Actual source code: baij.c
1: /*
2: Defines the basic matrix operations for the BAIJ (compressed row)
3: matrix storage format.
4: */
5: #include <../src/mat/impls/baij/seq/baij.h>
6: #include <petscblaslapack.h>
7: #include <petsc/private/kernels/blockinvert.h>
8: #include <petsc/private/kernels/blockmatmult.h>
10: /* defines MatSetValues_Seq_Hash(), MatAssemblyEnd_Seq_Hash(), MatSetUp_Seq_Hash() */
11: #define TYPE BAIJ
12: #define TYPE_BS
13: #include "../src/mat/impls/aij/seq/seqhashmatsetvalues.h"
14: #undef TYPE_BS
15: #define TYPE_BS _BS
16: #define TYPE_BS_ON
17: #include "../src/mat/impls/aij/seq/seqhashmatsetvalues.h"
18: #undef TYPE_BS
19: #include "../src/mat/impls/aij/seq/seqhashmat.h"
20: #undef TYPE
21: #undef TYPE_BS_ON
23: #if defined(PETSC_HAVE_HYPRE)
24: PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
25: #endif
27: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
28: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat, MatType, MatReuse, Mat *);
29: #endif
30: PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
32: MatGetDiagonalMarkers(SeqBAIJ, A->rmap->bs)
34: static PetscErrorCode MatGetColumnReductions_SeqBAIJ(Mat A, PetscInt type, PetscReal *reductions)
35: {
36: Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)A->data;
37: PetscInt m, n, ib, jb, bs = A->rmap->bs;
38: MatScalar *a_val = a_aij->a;
40: PetscFunctionBegin;
41: PetscCall(MatGetSize(A, &m, &n));
42: PetscCall(PetscArrayzero(reductions, n));
43: if (type == NORM_2) {
44: for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
45: for (jb = 0; jb < bs; jb++) {
46: for (ib = 0; ib < bs; ib++) {
47: reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
48: a_val++;
49: }
50: }
51: }
52: } else if (type == NORM_1) {
53: for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
54: for (jb = 0; jb < bs; jb++) {
55: for (ib = 0; ib < bs; ib++) {
56: reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
57: a_val++;
58: }
59: }
60: }
61: } else if (type == NORM_INFINITY) {
62: for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
63: for (jb = 0; jb < bs; jb++) {
64: for (ib = 0; ib < bs; ib++) {
65: PetscInt col = A->cmap->rstart + a_aij->j[i] * bs + jb;
66: reductions[col] = PetscMax(PetscAbsScalar(*a_val), reductions[col]);
67: a_val++;
68: }
69: }
70: }
71: } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
72: for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
73: for (jb = 0; jb < bs; jb++) {
74: for (ib = 0; ib < bs; ib++) {
75: reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val);
76: a_val++;
77: }
78: }
79: }
80: } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
81: for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
82: for (jb = 0; jb < bs; jb++) {
83: for (ib = 0; ib < bs; ib++) {
84: reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val);
85: a_val++;
86: }
87: }
88: }
89: } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
90: if (type == NORM_2) {
91: for (PetscInt i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
92: } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
93: for (PetscInt i = 0; i < n; i++) reductions[i] /= m;
94: }
95: PetscFunctionReturn(PETSC_SUCCESS);
96: }
98: static PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A, const PetscScalar **values)
99: {
100: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
101: PetscInt i, bs = A->rmap->bs, mbs = a->mbs, ipvt[5], bs2 = bs * bs, *v_pivots;
102: MatScalar *v = a->a, *odiag, *diag, work[25], *v_work;
103: PetscReal shift = 0.0;
104: PetscBool allowzeropivot, zeropivotdetected = PETSC_FALSE;
105: const PetscInt *adiag;
107: PetscFunctionBegin;
108: allowzeropivot = PetscNot(A->erroriffailure);
110: if (a->idiagvalid) {
111: if (values) *values = a->idiag;
112: PetscFunctionReturn(PETSC_SUCCESS);
113: }
114: PetscCall(MatGetDiagonalMarkers_SeqBAIJ(A, &adiag, NULL));
115: if (!a->idiag) PetscCall(PetscMalloc1(bs2 * mbs, &a->idiag));
116: diag = a->idiag;
117: if (values) *values = a->idiag;
118: /* factor and invert each block */
119: switch (bs) {
120: case 1:
121: for (i = 0; i < mbs; i++) {
122: odiag = v + 1 * adiag[i];
123: diag[0] = odiag[0];
125: if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
126: PetscCheck(allowzeropivot, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row %" PetscInt_FMT " pivot value %g tolerance %g", i, (double)PetscAbsScalar(diag[0]), (double)PETSC_MACHINE_EPSILON);
127: A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
128: A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]);
129: A->factorerror_zeropivot_row = i;
130: PetscCall(PetscInfo(A, "Zero pivot, row %" PetscInt_FMT "\n", i));
131: }
133: diag[0] = (PetscScalar)1.0 / (diag[0] + shift);
134: diag += 1;
135: }
136: break;
137: case 2:
138: for (i = 0; i < mbs; i++) {
139: odiag = v + 4 * adiag[i];
140: diag[0] = odiag[0];
141: diag[1] = odiag[1];
142: diag[2] = odiag[2];
143: diag[3] = odiag[3];
144: PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected));
145: if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
146: diag += 4;
147: }
148: break;
149: case 3:
150: for (i = 0; i < mbs; i++) {
151: odiag = v + 9 * adiag[i];
152: diag[0] = odiag[0];
153: diag[1] = odiag[1];
154: diag[2] = odiag[2];
155: diag[3] = odiag[3];
156: diag[4] = odiag[4];
157: diag[5] = odiag[5];
158: diag[6] = odiag[6];
159: diag[7] = odiag[7];
160: diag[8] = odiag[8];
161: PetscCall(PetscKernel_A_gets_inverse_A_3(diag, shift, allowzeropivot, &zeropivotdetected));
162: if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
163: diag += 9;
164: }
165: break;
166: case 4:
167: for (i = 0; i < mbs; i++) {
168: odiag = v + 16 * adiag[i];
169: PetscCall(PetscArraycpy(diag, odiag, 16));
170: PetscCall(PetscKernel_A_gets_inverse_A_4(diag, shift, allowzeropivot, &zeropivotdetected));
171: if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
172: diag += 16;
173: }
174: break;
175: case 5:
176: for (i = 0; i < mbs; i++) {
177: odiag = v + 25 * adiag[i];
178: PetscCall(PetscArraycpy(diag, odiag, 25));
179: PetscCall(PetscKernel_A_gets_inverse_A_5(diag, ipvt, work, shift, allowzeropivot, &zeropivotdetected));
180: if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
181: diag += 25;
182: }
183: break;
184: case 6:
185: for (i = 0; i < mbs; i++) {
186: odiag = v + 36 * adiag[i];
187: PetscCall(PetscArraycpy(diag, odiag, 36));
188: PetscCall(PetscKernel_A_gets_inverse_A_6(diag, shift, allowzeropivot, &zeropivotdetected));
189: if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
190: diag += 36;
191: }
192: break;
193: case 7:
194: for (i = 0; i < mbs; i++) {
195: odiag = v + 49 * adiag[i];
196: PetscCall(PetscArraycpy(diag, odiag, 49));
197: PetscCall(PetscKernel_A_gets_inverse_A_7(diag, shift, allowzeropivot, &zeropivotdetected));
198: if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
199: diag += 49;
200: }
201: break;
202: default:
203: PetscCall(PetscMalloc2(bs, &v_work, bs, &v_pivots));
204: for (i = 0; i < mbs; i++) {
205: odiag = v + bs2 * adiag[i];
206: PetscCall(PetscArraycpy(diag, odiag, bs2));
207: PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, v_pivots, v_work, allowzeropivot, &zeropivotdetected));
208: if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
209: diag += bs2;
210: }
211: PetscCall(PetscFree2(v_work, v_pivots));
212: }
213: a->idiagvalid = PETSC_TRUE;
214: PetscFunctionReturn(PETSC_SUCCESS);
215: }
217: static PetscErrorCode MatSOR_SeqBAIJ(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
218: {
219: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
220: PetscScalar *x, *work, *w, *workt, *t;
221: const MatScalar *v, *aa = a->a, *idiag;
222: const PetscScalar *b, *xb;
223: PetscScalar s[7], xw[7] = {0}; /* avoid some compilers thinking xw is uninitialized */
224: PetscInt m = a->mbs, i, i2, nz, bs = A->rmap->bs, bs2 = bs * bs, k, j, idx, it;
225: const PetscInt *diag, *ai = a->i, *aj = a->j, *vi;
227: PetscFunctionBegin;
228: its = its * lits;
229: PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat");
230: PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
231: PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for diagonal shift");
232: PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for non-trivial relaxation factor");
233: PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for applying upper or lower triangular parts");
235: if (!a->idiagvalid) PetscCall(MatInvertBlockDiagonal(A, NULL));
237: if (!m) PetscFunctionReturn(PETSC_SUCCESS);
238: diag = a->diag;
239: idiag = a->idiag;
240: k = PetscMax(A->rmap->n, A->cmap->n);
241: if (!a->mult_work) PetscCall(PetscMalloc1(k + 1, &a->mult_work));
242: if (!a->sor_workt) PetscCall(PetscMalloc1(k, &a->sor_workt));
243: if (!a->sor_work) PetscCall(PetscMalloc1(bs, &a->sor_work));
244: work = a->mult_work;
245: t = a->sor_workt;
246: w = a->sor_work;
248: PetscCall(VecGetArray(xx, &x));
249: PetscCall(VecGetArrayRead(bb, &b));
251: if (flag & SOR_ZERO_INITIAL_GUESS) {
252: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
253: switch (bs) {
254: case 1:
255: PetscKernel_v_gets_A_times_w_1(x, idiag, b);
256: t[0] = b[0];
257: i2 = 1;
258: idiag += 1;
259: for (i = 1; i < m; i++) {
260: v = aa + ai[i];
261: vi = aj + ai[i];
262: nz = diag[i] - ai[i];
263: s[0] = b[i2];
264: for (j = 0; j < nz; j++) {
265: xw[0] = x[vi[j]];
266: PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
267: }
268: t[i2] = s[0];
269: PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
270: x[i2] = xw[0];
271: idiag += 1;
272: i2 += 1;
273: }
274: break;
275: case 2:
276: PetscKernel_v_gets_A_times_w_2(x, idiag, b);
277: t[0] = b[0];
278: t[1] = b[1];
279: i2 = 2;
280: idiag += 4;
281: for (i = 1; i < m; i++) {
282: v = aa + 4 * ai[i];
283: vi = aj + ai[i];
284: nz = diag[i] - ai[i];
285: s[0] = b[i2];
286: s[1] = b[i2 + 1];
287: for (j = 0; j < nz; j++) {
288: idx = 2 * vi[j];
289: it = 4 * j;
290: xw[0] = x[idx];
291: xw[1] = x[1 + idx];
292: PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
293: }
294: t[i2] = s[0];
295: t[i2 + 1] = s[1];
296: PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
297: x[i2] = xw[0];
298: x[i2 + 1] = xw[1];
299: idiag += 4;
300: i2 += 2;
301: }
302: break;
303: case 3:
304: PetscKernel_v_gets_A_times_w_3(x, idiag, b);
305: t[0] = b[0];
306: t[1] = b[1];
307: t[2] = b[2];
308: i2 = 3;
309: idiag += 9;
310: for (i = 1; i < m; i++) {
311: v = aa + 9 * ai[i];
312: vi = aj + ai[i];
313: nz = diag[i] - ai[i];
314: s[0] = b[i2];
315: s[1] = b[i2 + 1];
316: s[2] = b[i2 + 2];
317: while (nz--) {
318: idx = 3 * (*vi++);
319: xw[0] = x[idx];
320: xw[1] = x[1 + idx];
321: xw[2] = x[2 + idx];
322: PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
323: v += 9;
324: }
325: t[i2] = s[0];
326: t[i2 + 1] = s[1];
327: t[i2 + 2] = s[2];
328: PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
329: x[i2] = xw[0];
330: x[i2 + 1] = xw[1];
331: x[i2 + 2] = xw[2];
332: idiag += 9;
333: i2 += 3;
334: }
335: break;
336: case 4:
337: PetscKernel_v_gets_A_times_w_4(x, idiag, b);
338: t[0] = b[0];
339: t[1] = b[1];
340: t[2] = b[2];
341: t[3] = b[3];
342: i2 = 4;
343: idiag += 16;
344: for (i = 1; i < m; i++) {
345: v = aa + 16 * ai[i];
346: vi = aj + ai[i];
347: nz = diag[i] - ai[i];
348: s[0] = b[i2];
349: s[1] = b[i2 + 1];
350: s[2] = b[i2 + 2];
351: s[3] = b[i2 + 3];
352: while (nz--) {
353: idx = 4 * (*vi++);
354: xw[0] = x[idx];
355: xw[1] = x[1 + idx];
356: xw[2] = x[2 + idx];
357: xw[3] = x[3 + idx];
358: PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
359: v += 16;
360: }
361: t[i2] = s[0];
362: t[i2 + 1] = s[1];
363: t[i2 + 2] = s[2];
364: t[i2 + 3] = s[3];
365: PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
366: x[i2] = xw[0];
367: x[i2 + 1] = xw[1];
368: x[i2 + 2] = xw[2];
369: x[i2 + 3] = xw[3];
370: idiag += 16;
371: i2 += 4;
372: }
373: break;
374: case 5:
375: PetscKernel_v_gets_A_times_w_5(x, idiag, b);
376: t[0] = b[0];
377: t[1] = b[1];
378: t[2] = b[2];
379: t[3] = b[3];
380: t[4] = b[4];
381: i2 = 5;
382: idiag += 25;
383: for (i = 1; i < m; i++) {
384: v = aa + 25 * ai[i];
385: vi = aj + ai[i];
386: nz = diag[i] - ai[i];
387: s[0] = b[i2];
388: s[1] = b[i2 + 1];
389: s[2] = b[i2 + 2];
390: s[3] = b[i2 + 3];
391: s[4] = b[i2 + 4];
392: while (nz--) {
393: idx = 5 * (*vi++);
394: xw[0] = x[idx];
395: xw[1] = x[1 + idx];
396: xw[2] = x[2 + idx];
397: xw[3] = x[3 + idx];
398: xw[4] = x[4 + idx];
399: PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
400: v += 25;
401: }
402: t[i2] = s[0];
403: t[i2 + 1] = s[1];
404: t[i2 + 2] = s[2];
405: t[i2 + 3] = s[3];
406: t[i2 + 4] = s[4];
407: PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
408: x[i2] = xw[0];
409: x[i2 + 1] = xw[1];
410: x[i2 + 2] = xw[2];
411: x[i2 + 3] = xw[3];
412: x[i2 + 4] = xw[4];
413: idiag += 25;
414: i2 += 5;
415: }
416: break;
417: case 6:
418: PetscKernel_v_gets_A_times_w_6(x, idiag, b);
419: t[0] = b[0];
420: t[1] = b[1];
421: t[2] = b[2];
422: t[3] = b[3];
423: t[4] = b[4];
424: t[5] = b[5];
425: i2 = 6;
426: idiag += 36;
427: for (i = 1; i < m; i++) {
428: v = aa + 36 * ai[i];
429: vi = aj + ai[i];
430: nz = diag[i] - ai[i];
431: s[0] = b[i2];
432: s[1] = b[i2 + 1];
433: s[2] = b[i2 + 2];
434: s[3] = b[i2 + 3];
435: s[4] = b[i2 + 4];
436: s[5] = b[i2 + 5];
437: while (nz--) {
438: idx = 6 * (*vi++);
439: xw[0] = x[idx];
440: xw[1] = x[1 + idx];
441: xw[2] = x[2 + idx];
442: xw[3] = x[3 + idx];
443: xw[4] = x[4 + idx];
444: xw[5] = x[5 + idx];
445: PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
446: v += 36;
447: }
448: t[i2] = s[0];
449: t[i2 + 1] = s[1];
450: t[i2 + 2] = s[2];
451: t[i2 + 3] = s[3];
452: t[i2 + 4] = s[4];
453: t[i2 + 5] = s[5];
454: PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
455: x[i2] = xw[0];
456: x[i2 + 1] = xw[1];
457: x[i2 + 2] = xw[2];
458: x[i2 + 3] = xw[3];
459: x[i2 + 4] = xw[4];
460: x[i2 + 5] = xw[5];
461: idiag += 36;
462: i2 += 6;
463: }
464: break;
465: case 7:
466: PetscKernel_v_gets_A_times_w_7(x, idiag, b);
467: t[0] = b[0];
468: t[1] = b[1];
469: t[2] = b[2];
470: t[3] = b[3];
471: t[4] = b[4];
472: t[5] = b[5];
473: t[6] = b[6];
474: i2 = 7;
475: idiag += 49;
476: for (i = 1; i < m; i++) {
477: v = aa + 49 * ai[i];
478: vi = aj + ai[i];
479: nz = diag[i] - ai[i];
480: s[0] = b[i2];
481: s[1] = b[i2 + 1];
482: s[2] = b[i2 + 2];
483: s[3] = b[i2 + 3];
484: s[4] = b[i2 + 4];
485: s[5] = b[i2 + 5];
486: s[6] = b[i2 + 6];
487: while (nz--) {
488: idx = 7 * (*vi++);
489: xw[0] = x[idx];
490: xw[1] = x[1 + idx];
491: xw[2] = x[2 + idx];
492: xw[3] = x[3 + idx];
493: xw[4] = x[4 + idx];
494: xw[5] = x[5 + idx];
495: xw[6] = x[6 + idx];
496: PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
497: v += 49;
498: }
499: t[i2] = s[0];
500: t[i2 + 1] = s[1];
501: t[i2 + 2] = s[2];
502: t[i2 + 3] = s[3];
503: t[i2 + 4] = s[4];
504: t[i2 + 5] = s[5];
505: t[i2 + 6] = s[6];
506: PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
507: x[i2] = xw[0];
508: x[i2 + 1] = xw[1];
509: x[i2 + 2] = xw[2];
510: x[i2 + 3] = xw[3];
511: x[i2 + 4] = xw[4];
512: x[i2 + 5] = xw[5];
513: x[i2 + 6] = xw[6];
514: idiag += 49;
515: i2 += 7;
516: }
517: break;
518: default:
519: PetscKernel_w_gets_Ar_times_v(bs, bs, b, idiag, x);
520: PetscCall(PetscArraycpy(t, b, bs));
521: i2 = bs;
522: idiag += bs2;
523: for (i = 1; i < m; i++) {
524: v = aa + bs2 * ai[i];
525: vi = aj + ai[i];
526: nz = diag[i] - ai[i];
528: PetscCall(PetscArraycpy(w, b + i2, bs));
529: /* copy all rows of x that are needed into contiguous space */
530: workt = work;
531: for (j = 0; j < nz; j++) {
532: PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
533: workt += bs;
534: }
535: PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
536: PetscCall(PetscArraycpy(t + i2, w, bs));
537: PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
539: idiag += bs2;
540: i2 += bs;
541: }
542: break;
543: }
544: /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
545: PetscCall(PetscLogFlops(1.0 * bs2 * a->nz));
546: xb = t;
547: } else xb = b;
548: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
549: idiag = a->idiag + bs2 * (a->mbs - 1);
550: i2 = bs * (m - 1);
551: switch (bs) {
552: case 1:
553: s[0] = xb[i2];
554: PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
555: x[i2] = xw[0];
556: i2 -= 1;
557: for (i = m - 2; i >= 0; i--) {
558: v = aa + (diag[i] + 1);
559: vi = aj + diag[i] + 1;
560: nz = ai[i + 1] - diag[i] - 1;
561: s[0] = xb[i2];
562: for (j = 0; j < nz; j++) {
563: xw[0] = x[vi[j]];
564: PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
565: }
566: PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
567: x[i2] = xw[0];
568: idiag -= 1;
569: i2 -= 1;
570: }
571: break;
572: case 2:
573: s[0] = xb[i2];
574: s[1] = xb[i2 + 1];
575: PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
576: x[i2] = xw[0];
577: x[i2 + 1] = xw[1];
578: i2 -= 2;
579: idiag -= 4;
580: for (i = m - 2; i >= 0; i--) {
581: v = aa + 4 * (diag[i] + 1);
582: vi = aj + diag[i] + 1;
583: nz = ai[i + 1] - diag[i] - 1;
584: s[0] = xb[i2];
585: s[1] = xb[i2 + 1];
586: for (j = 0; j < nz; j++) {
587: idx = 2 * vi[j];
588: it = 4 * j;
589: xw[0] = x[idx];
590: xw[1] = x[1 + idx];
591: PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
592: }
593: PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
594: x[i2] = xw[0];
595: x[i2 + 1] = xw[1];
596: idiag -= 4;
597: i2 -= 2;
598: }
599: break;
600: case 3:
601: s[0] = xb[i2];
602: s[1] = xb[i2 + 1];
603: s[2] = xb[i2 + 2];
604: PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
605: x[i2] = xw[0];
606: x[i2 + 1] = xw[1];
607: x[i2 + 2] = xw[2];
608: i2 -= 3;
609: idiag -= 9;
610: for (i = m - 2; i >= 0; i--) {
611: v = aa + 9 * (diag[i] + 1);
612: vi = aj + diag[i] + 1;
613: nz = ai[i + 1] - diag[i] - 1;
614: s[0] = xb[i2];
615: s[1] = xb[i2 + 1];
616: s[2] = xb[i2 + 2];
617: while (nz--) {
618: idx = 3 * (*vi++);
619: xw[0] = x[idx];
620: xw[1] = x[1 + idx];
621: xw[2] = x[2 + idx];
622: PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
623: v += 9;
624: }
625: PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
626: x[i2] = xw[0];
627: x[i2 + 1] = xw[1];
628: x[i2 + 2] = xw[2];
629: idiag -= 9;
630: i2 -= 3;
631: }
632: break;
633: case 4:
634: s[0] = xb[i2];
635: s[1] = xb[i2 + 1];
636: s[2] = xb[i2 + 2];
637: s[3] = xb[i2 + 3];
638: PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
639: x[i2] = xw[0];
640: x[i2 + 1] = xw[1];
641: x[i2 + 2] = xw[2];
642: x[i2 + 3] = xw[3];
643: i2 -= 4;
644: idiag -= 16;
645: for (i = m - 2; i >= 0; i--) {
646: v = aa + 16 * (diag[i] + 1);
647: vi = aj + diag[i] + 1;
648: nz = ai[i + 1] - diag[i] - 1;
649: s[0] = xb[i2];
650: s[1] = xb[i2 + 1];
651: s[2] = xb[i2 + 2];
652: s[3] = xb[i2 + 3];
653: while (nz--) {
654: idx = 4 * (*vi++);
655: xw[0] = x[idx];
656: xw[1] = x[1 + idx];
657: xw[2] = x[2 + idx];
658: xw[3] = x[3 + idx];
659: PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
660: v += 16;
661: }
662: PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
663: x[i2] = xw[0];
664: x[i2 + 1] = xw[1];
665: x[i2 + 2] = xw[2];
666: x[i2 + 3] = xw[3];
667: idiag -= 16;
668: i2 -= 4;
669: }
670: break;
671: case 5:
672: s[0] = xb[i2];
673: s[1] = xb[i2 + 1];
674: s[2] = xb[i2 + 2];
675: s[3] = xb[i2 + 3];
676: s[4] = xb[i2 + 4];
677: PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
678: x[i2] = xw[0];
679: x[i2 + 1] = xw[1];
680: x[i2 + 2] = xw[2];
681: x[i2 + 3] = xw[3];
682: x[i2 + 4] = xw[4];
683: i2 -= 5;
684: idiag -= 25;
685: for (i = m - 2; i >= 0; i--) {
686: v = aa + 25 * (diag[i] + 1);
687: vi = aj + diag[i] + 1;
688: nz = ai[i + 1] - diag[i] - 1;
689: s[0] = xb[i2];
690: s[1] = xb[i2 + 1];
691: s[2] = xb[i2 + 2];
692: s[3] = xb[i2 + 3];
693: s[4] = xb[i2 + 4];
694: while (nz--) {
695: idx = 5 * (*vi++);
696: xw[0] = x[idx];
697: xw[1] = x[1 + idx];
698: xw[2] = x[2 + idx];
699: xw[3] = x[3 + idx];
700: xw[4] = x[4 + idx];
701: PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
702: v += 25;
703: }
704: PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
705: x[i2] = xw[0];
706: x[i2 + 1] = xw[1];
707: x[i2 + 2] = xw[2];
708: x[i2 + 3] = xw[3];
709: x[i2 + 4] = xw[4];
710: idiag -= 25;
711: i2 -= 5;
712: }
713: break;
714: case 6:
715: s[0] = xb[i2];
716: s[1] = xb[i2 + 1];
717: s[2] = xb[i2 + 2];
718: s[3] = xb[i2 + 3];
719: s[4] = xb[i2 + 4];
720: s[5] = xb[i2 + 5];
721: PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
722: x[i2] = xw[0];
723: x[i2 + 1] = xw[1];
724: x[i2 + 2] = xw[2];
725: x[i2 + 3] = xw[3];
726: x[i2 + 4] = xw[4];
727: x[i2 + 5] = xw[5];
728: i2 -= 6;
729: idiag -= 36;
730: for (i = m - 2; i >= 0; i--) {
731: v = aa + 36 * (diag[i] + 1);
732: vi = aj + diag[i] + 1;
733: nz = ai[i + 1] - diag[i] - 1;
734: s[0] = xb[i2];
735: s[1] = xb[i2 + 1];
736: s[2] = xb[i2 + 2];
737: s[3] = xb[i2 + 3];
738: s[4] = xb[i2 + 4];
739: s[5] = xb[i2 + 5];
740: while (nz--) {
741: idx = 6 * (*vi++);
742: xw[0] = x[idx];
743: xw[1] = x[1 + idx];
744: xw[2] = x[2 + idx];
745: xw[3] = x[3 + idx];
746: xw[4] = x[4 + idx];
747: xw[5] = x[5 + idx];
748: PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
749: v += 36;
750: }
751: PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
752: x[i2] = xw[0];
753: x[i2 + 1] = xw[1];
754: x[i2 + 2] = xw[2];
755: x[i2 + 3] = xw[3];
756: x[i2 + 4] = xw[4];
757: x[i2 + 5] = xw[5];
758: idiag -= 36;
759: i2 -= 6;
760: }
761: break;
762: case 7:
763: s[0] = xb[i2];
764: s[1] = xb[i2 + 1];
765: s[2] = xb[i2 + 2];
766: s[3] = xb[i2 + 3];
767: s[4] = xb[i2 + 4];
768: s[5] = xb[i2 + 5];
769: s[6] = xb[i2 + 6];
770: PetscKernel_v_gets_A_times_w_7(x, idiag, b);
771: x[i2] = xw[0];
772: x[i2 + 1] = xw[1];
773: x[i2 + 2] = xw[2];
774: x[i2 + 3] = xw[3];
775: x[i2 + 4] = xw[4];
776: x[i2 + 5] = xw[5];
777: x[i2 + 6] = xw[6];
778: i2 -= 7;
779: idiag -= 49;
780: for (i = m - 2; i >= 0; i--) {
781: v = aa + 49 * (diag[i] + 1);
782: vi = aj + diag[i] + 1;
783: nz = ai[i + 1] - diag[i] - 1;
784: s[0] = xb[i2];
785: s[1] = xb[i2 + 1];
786: s[2] = xb[i2 + 2];
787: s[3] = xb[i2 + 3];
788: s[4] = xb[i2 + 4];
789: s[5] = xb[i2 + 5];
790: s[6] = xb[i2 + 6];
791: while (nz--) {
792: idx = 7 * (*vi++);
793: xw[0] = x[idx];
794: xw[1] = x[1 + idx];
795: xw[2] = x[2 + idx];
796: xw[3] = x[3 + idx];
797: xw[4] = x[4 + idx];
798: xw[5] = x[5 + idx];
799: xw[6] = x[6 + idx];
800: PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
801: v += 49;
802: }
803: PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
804: x[i2] = xw[0];
805: x[i2 + 1] = xw[1];
806: x[i2 + 2] = xw[2];
807: x[i2 + 3] = xw[3];
808: x[i2 + 4] = xw[4];
809: x[i2 + 5] = xw[5];
810: x[i2 + 6] = xw[6];
811: idiag -= 49;
812: i2 -= 7;
813: }
814: break;
815: default:
816: PetscCall(PetscArraycpy(w, xb + i2, bs));
817: PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
818: i2 -= bs;
819: idiag -= bs2;
820: for (i = m - 2; i >= 0; i--) {
821: v = aa + bs2 * (diag[i] + 1);
822: vi = aj + diag[i] + 1;
823: nz = ai[i + 1] - diag[i] - 1;
825: PetscCall(PetscArraycpy(w, xb + i2, bs));
826: /* copy all rows of x that are needed into contiguous space */
827: workt = work;
828: for (j = 0; j < nz; j++) {
829: PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
830: workt += bs;
831: }
832: PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
833: PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
835: idiag -= bs2;
836: i2 -= bs;
837: }
838: break;
839: }
840: PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz)));
841: }
842: its--;
843: }
844: while (its--) {
845: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
846: idiag = a->idiag;
847: i2 = 0;
848: switch (bs) {
849: case 1:
850: for (i = 0; i < m; i++) {
851: v = aa + ai[i];
852: vi = aj + ai[i];
853: nz = ai[i + 1] - ai[i];
854: s[0] = b[i2];
855: for (j = 0; j < nz; j++) {
856: xw[0] = x[vi[j]];
857: PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
858: }
859: PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
860: x[i2] += xw[0];
861: idiag += 1;
862: i2 += 1;
863: }
864: break;
865: case 2:
866: for (i = 0; i < m; i++) {
867: v = aa + 4 * ai[i];
868: vi = aj + ai[i];
869: nz = ai[i + 1] - ai[i];
870: s[0] = b[i2];
871: s[1] = b[i2 + 1];
872: for (j = 0; j < nz; j++) {
873: idx = 2 * vi[j];
874: it = 4 * j;
875: xw[0] = x[idx];
876: xw[1] = x[1 + idx];
877: PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
878: }
879: PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
880: x[i2] += xw[0];
881: x[i2 + 1] += xw[1];
882: idiag += 4;
883: i2 += 2;
884: }
885: break;
886: case 3:
887: for (i = 0; i < m; i++) {
888: v = aa + 9 * ai[i];
889: vi = aj + ai[i];
890: nz = ai[i + 1] - ai[i];
891: s[0] = b[i2];
892: s[1] = b[i2 + 1];
893: s[2] = b[i2 + 2];
894: while (nz--) {
895: idx = 3 * (*vi++);
896: xw[0] = x[idx];
897: xw[1] = x[1 + idx];
898: xw[2] = x[2 + idx];
899: PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
900: v += 9;
901: }
902: PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
903: x[i2] += xw[0];
904: x[i2 + 1] += xw[1];
905: x[i2 + 2] += xw[2];
906: idiag += 9;
907: i2 += 3;
908: }
909: break;
910: case 4:
911: for (i = 0; i < m; i++) {
912: v = aa + 16 * ai[i];
913: vi = aj + ai[i];
914: nz = ai[i + 1] - ai[i];
915: s[0] = b[i2];
916: s[1] = b[i2 + 1];
917: s[2] = b[i2 + 2];
918: s[3] = b[i2 + 3];
919: while (nz--) {
920: idx = 4 * (*vi++);
921: xw[0] = x[idx];
922: xw[1] = x[1 + idx];
923: xw[2] = x[2 + idx];
924: xw[3] = x[3 + idx];
925: PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
926: v += 16;
927: }
928: PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
929: x[i2] += xw[0];
930: x[i2 + 1] += xw[1];
931: x[i2 + 2] += xw[2];
932: x[i2 + 3] += xw[3];
933: idiag += 16;
934: i2 += 4;
935: }
936: break;
937: case 5:
938: for (i = 0; i < m; i++) {
939: v = aa + 25 * ai[i];
940: vi = aj + ai[i];
941: nz = ai[i + 1] - ai[i];
942: s[0] = b[i2];
943: s[1] = b[i2 + 1];
944: s[2] = b[i2 + 2];
945: s[3] = b[i2 + 3];
946: s[4] = b[i2 + 4];
947: while (nz--) {
948: idx = 5 * (*vi++);
949: xw[0] = x[idx];
950: xw[1] = x[1 + idx];
951: xw[2] = x[2 + idx];
952: xw[3] = x[3 + idx];
953: xw[4] = x[4 + idx];
954: PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
955: v += 25;
956: }
957: PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
958: x[i2] += xw[0];
959: x[i2 + 1] += xw[1];
960: x[i2 + 2] += xw[2];
961: x[i2 + 3] += xw[3];
962: x[i2 + 4] += xw[4];
963: idiag += 25;
964: i2 += 5;
965: }
966: break;
967: case 6:
968: for (i = 0; i < m; i++) {
969: v = aa + 36 * ai[i];
970: vi = aj + ai[i];
971: nz = ai[i + 1] - ai[i];
972: s[0] = b[i2];
973: s[1] = b[i2 + 1];
974: s[2] = b[i2 + 2];
975: s[3] = b[i2 + 3];
976: s[4] = b[i2 + 4];
977: s[5] = b[i2 + 5];
978: while (nz--) {
979: idx = 6 * (*vi++);
980: xw[0] = x[idx];
981: xw[1] = x[1 + idx];
982: xw[2] = x[2 + idx];
983: xw[3] = x[3 + idx];
984: xw[4] = x[4 + idx];
985: xw[5] = x[5 + idx];
986: PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
987: v += 36;
988: }
989: PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
990: x[i2] += xw[0];
991: x[i2 + 1] += xw[1];
992: x[i2 + 2] += xw[2];
993: x[i2 + 3] += xw[3];
994: x[i2 + 4] += xw[4];
995: x[i2 + 5] += xw[5];
996: idiag += 36;
997: i2 += 6;
998: }
999: break;
1000: case 7:
1001: for (i = 0; i < m; i++) {
1002: v = aa + 49 * ai[i];
1003: vi = aj + ai[i];
1004: nz = ai[i + 1] - ai[i];
1005: s[0] = b[i2];
1006: s[1] = b[i2 + 1];
1007: s[2] = b[i2 + 2];
1008: s[3] = b[i2 + 3];
1009: s[4] = b[i2 + 4];
1010: s[5] = b[i2 + 5];
1011: s[6] = b[i2 + 6];
1012: while (nz--) {
1013: idx = 7 * (*vi++);
1014: xw[0] = x[idx];
1015: xw[1] = x[1 + idx];
1016: xw[2] = x[2 + idx];
1017: xw[3] = x[3 + idx];
1018: xw[4] = x[4 + idx];
1019: xw[5] = x[5 + idx];
1020: xw[6] = x[6 + idx];
1021: PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
1022: v += 49;
1023: }
1024: PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
1025: x[i2] += xw[0];
1026: x[i2 + 1] += xw[1];
1027: x[i2 + 2] += xw[2];
1028: x[i2 + 3] += xw[3];
1029: x[i2 + 4] += xw[4];
1030: x[i2 + 5] += xw[5];
1031: x[i2 + 6] += xw[6];
1032: idiag += 49;
1033: i2 += 7;
1034: }
1035: break;
1036: default:
1037: for (i = 0; i < m; i++) {
1038: v = aa + bs2 * ai[i];
1039: vi = aj + ai[i];
1040: nz = ai[i + 1] - ai[i];
1042: PetscCall(PetscArraycpy(w, b + i2, bs));
1043: /* copy all rows of x that are needed into contiguous space */
1044: workt = work;
1045: for (j = 0; j < nz; j++) {
1046: PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
1047: workt += bs;
1048: }
1049: PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
1050: PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2);
1052: idiag += bs2;
1053: i2 += bs;
1054: }
1055: break;
1056: }
1057: PetscCall(PetscLogFlops(2.0 * bs2 * a->nz));
1058: }
1059: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1060: idiag = a->idiag + bs2 * (a->mbs - 1);
1061: i2 = bs * (m - 1);
1062: switch (bs) {
1063: case 1:
1064: for (i = m - 1; i >= 0; i--) {
1065: v = aa + ai[i];
1066: vi = aj + ai[i];
1067: nz = ai[i + 1] - ai[i];
1068: s[0] = b[i2];
1069: for (j = 0; j < nz; j++) {
1070: xw[0] = x[vi[j]];
1071: PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
1072: }
1073: PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
1074: x[i2] += xw[0];
1075: idiag -= 1;
1076: i2 -= 1;
1077: }
1078: break;
1079: case 2:
1080: for (i = m - 1; i >= 0; i--) {
1081: v = aa + 4 * ai[i];
1082: vi = aj + ai[i];
1083: nz = ai[i + 1] - ai[i];
1084: s[0] = b[i2];
1085: s[1] = b[i2 + 1];
1086: for (j = 0; j < nz; j++) {
1087: idx = 2 * vi[j];
1088: it = 4 * j;
1089: xw[0] = x[idx];
1090: xw[1] = x[1 + idx];
1091: PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
1092: }
1093: PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
1094: x[i2] += xw[0];
1095: x[i2 + 1] += xw[1];
1096: idiag -= 4;
1097: i2 -= 2;
1098: }
1099: break;
1100: case 3:
1101: for (i = m - 1; i >= 0; i--) {
1102: v = aa + 9 * ai[i];
1103: vi = aj + ai[i];
1104: nz = ai[i + 1] - ai[i];
1105: s[0] = b[i2];
1106: s[1] = b[i2 + 1];
1107: s[2] = b[i2 + 2];
1108: while (nz--) {
1109: idx = 3 * (*vi++);
1110: xw[0] = x[idx];
1111: xw[1] = x[1 + idx];
1112: xw[2] = x[2 + idx];
1113: PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
1114: v += 9;
1115: }
1116: PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
1117: x[i2] += xw[0];
1118: x[i2 + 1] += xw[1];
1119: x[i2 + 2] += xw[2];
1120: idiag -= 9;
1121: i2 -= 3;
1122: }
1123: break;
1124: case 4:
1125: for (i = m - 1; i >= 0; i--) {
1126: v = aa + 16 * ai[i];
1127: vi = aj + ai[i];
1128: nz = ai[i + 1] - ai[i];
1129: s[0] = b[i2];
1130: s[1] = b[i2 + 1];
1131: s[2] = b[i2 + 2];
1132: s[3] = b[i2 + 3];
1133: while (nz--) {
1134: idx = 4 * (*vi++);
1135: xw[0] = x[idx];
1136: xw[1] = x[1 + idx];
1137: xw[2] = x[2 + idx];
1138: xw[3] = x[3 + idx];
1139: PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
1140: v += 16;
1141: }
1142: PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
1143: x[i2] += xw[0];
1144: x[i2 + 1] += xw[1];
1145: x[i2 + 2] += xw[2];
1146: x[i2 + 3] += xw[3];
1147: idiag -= 16;
1148: i2 -= 4;
1149: }
1150: break;
1151: case 5:
1152: for (i = m - 1; i >= 0; i--) {
1153: v = aa + 25 * ai[i];
1154: vi = aj + ai[i];
1155: nz = ai[i + 1] - ai[i];
1156: s[0] = b[i2];
1157: s[1] = b[i2 + 1];
1158: s[2] = b[i2 + 2];
1159: s[3] = b[i2 + 3];
1160: s[4] = b[i2 + 4];
1161: while (nz--) {
1162: idx = 5 * (*vi++);
1163: xw[0] = x[idx];
1164: xw[1] = x[1 + idx];
1165: xw[2] = x[2 + idx];
1166: xw[3] = x[3 + idx];
1167: xw[4] = x[4 + idx];
1168: PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
1169: v += 25;
1170: }
1171: PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
1172: x[i2] += xw[0];
1173: x[i2 + 1] += xw[1];
1174: x[i2 + 2] += xw[2];
1175: x[i2 + 3] += xw[3];
1176: x[i2 + 4] += xw[4];
1177: idiag -= 25;
1178: i2 -= 5;
1179: }
1180: break;
1181: case 6:
1182: for (i = m - 1; i >= 0; i--) {
1183: v = aa + 36 * ai[i];
1184: vi = aj + ai[i];
1185: nz = ai[i + 1] - ai[i];
1186: s[0] = b[i2];
1187: s[1] = b[i2 + 1];
1188: s[2] = b[i2 + 2];
1189: s[3] = b[i2 + 3];
1190: s[4] = b[i2 + 4];
1191: s[5] = b[i2 + 5];
1192: while (nz--) {
1193: idx = 6 * (*vi++);
1194: xw[0] = x[idx];
1195: xw[1] = x[1 + idx];
1196: xw[2] = x[2 + idx];
1197: xw[3] = x[3 + idx];
1198: xw[4] = x[4 + idx];
1199: xw[5] = x[5 + idx];
1200: PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
1201: v += 36;
1202: }
1203: PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
1204: x[i2] += xw[0];
1205: x[i2 + 1] += xw[1];
1206: x[i2 + 2] += xw[2];
1207: x[i2 + 3] += xw[3];
1208: x[i2 + 4] += xw[4];
1209: x[i2 + 5] += xw[5];
1210: idiag -= 36;
1211: i2 -= 6;
1212: }
1213: break;
1214: case 7:
1215: for (i = m - 1; i >= 0; i--) {
1216: v = aa + 49 * ai[i];
1217: vi = aj + ai[i];
1218: nz = ai[i + 1] - ai[i];
1219: s[0] = b[i2];
1220: s[1] = b[i2 + 1];
1221: s[2] = b[i2 + 2];
1222: s[3] = b[i2 + 3];
1223: s[4] = b[i2 + 4];
1224: s[5] = b[i2 + 5];
1225: s[6] = b[i2 + 6];
1226: while (nz--) {
1227: idx = 7 * (*vi++);
1228: xw[0] = x[idx];
1229: xw[1] = x[1 + idx];
1230: xw[2] = x[2 + idx];
1231: xw[3] = x[3 + idx];
1232: xw[4] = x[4 + idx];
1233: xw[5] = x[5 + idx];
1234: xw[6] = x[6 + idx];
1235: PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
1236: v += 49;
1237: }
1238: PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
1239: x[i2] += xw[0];
1240: x[i2 + 1] += xw[1];
1241: x[i2 + 2] += xw[2];
1242: x[i2 + 3] += xw[3];
1243: x[i2 + 4] += xw[4];
1244: x[i2 + 5] += xw[5];
1245: x[i2 + 6] += xw[6];
1246: idiag -= 49;
1247: i2 -= 7;
1248: }
1249: break;
1250: default:
1251: for (i = m - 1; i >= 0; i--) {
1252: v = aa + bs2 * ai[i];
1253: vi = aj + ai[i];
1254: nz = ai[i + 1] - ai[i];
1256: PetscCall(PetscArraycpy(w, b + i2, bs));
1257: /* copy all rows of x that are needed into contiguous space */
1258: workt = work;
1259: for (j = 0; j < nz; j++) {
1260: PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
1261: workt += bs;
1262: }
1263: PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
1264: PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2);
1266: idiag -= bs2;
1267: i2 -= bs;
1268: }
1269: break;
1270: }
1271: PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz)));
1272: }
1273: }
1274: PetscCall(VecRestoreArray(xx, &x));
1275: PetscCall(VecRestoreArrayRead(bb, &b));
1276: PetscFunctionReturn(PETSC_SUCCESS);
1277: }
1279: /*
1280: Special version for direct calls from Fortran (Used in PETSc-fun3d)
1281: */
1282: #if defined(PETSC_HAVE_FORTRAN_CAPS)
1283: #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
1284: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1285: #define matsetvaluesblocked4_ matsetvaluesblocked4
1286: #endif
1288: PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA, PetscInt *mm, const PetscInt im[], PetscInt *nn, const PetscInt in[], const PetscScalar v[])
1289: {
1290: Mat A = *AA;
1291: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1292: PetscInt *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, N, m = *mm, n = *nn;
1293: PetscInt *ai = a->i, *ailen = a->ilen;
1294: PetscInt *aj = a->j, stepval, lastcol = -1;
1295: const PetscScalar *value = v;
1296: MatScalar *ap, *aa = a->a, *bap;
1298: PetscFunctionBegin;
1299: if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Can only be called with a block size of 4");
1300: stepval = (n - 1) * 4;
1301: for (k = 0; k < m; k++) { /* loop over added rows */
1302: row = im[k];
1303: rp = aj + ai[row];
1304: ap = aa + 16 * ai[row];
1305: nrow = ailen[row];
1306: low = 0;
1307: high = nrow;
1308: for (l = 0; l < n; l++) { /* loop over added columns */
1309: col = in[l];
1310: if (col <= lastcol) low = 0;
1311: else high = nrow;
1312: lastcol = col;
1313: value = v + k * (stepval + 4 + l) * 4;
1314: while (high - low > 7) {
1315: t = (low + high) / 2;
1316: if (rp[t] > col) high = t;
1317: else low = t;
1318: }
1319: for (i = low; i < high; i++) {
1320: if (rp[i] > col) break;
1321: if (rp[i] == col) {
1322: bap = ap + 16 * i;
1323: for (ii = 0; ii < 4; ii++, value += stepval) {
1324: for (jj = ii; jj < 16; jj += 4) bap[jj] += *value++;
1325: }
1326: goto noinsert2;
1327: }
1328: }
1329: N = nrow++ - 1;
1330: high++; /* added new column index thus must search to one higher than before */
1331: /* shift up all the later entries in this row */
1332: for (ii = N; ii >= i; ii--) {
1333: rp[ii + 1] = rp[ii];
1334: PetscCallVoid(PetscArraycpy(ap + 16 * (ii + 1), ap + 16 * (ii), 16));
1335: }
1336: if (N >= i) PetscCallVoid(PetscArrayzero(ap + 16 * i, 16));
1337: rp[i] = col;
1338: bap = ap + 16 * i;
1339: for (ii = 0; ii < 4; ii++, value += stepval) {
1340: for (jj = ii; jj < 16; jj += 4) bap[jj] = *value++;
1341: }
1342: noinsert2:;
1343: low = i;
1344: }
1345: ailen[row] = nrow;
1346: }
1347: PetscFunctionReturnVoid();
1348: }
1350: #if defined(PETSC_HAVE_FORTRAN_CAPS)
1351: #define matsetvalues4_ MATSETVALUES4
1352: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1353: #define matsetvalues4_ matsetvalues4
1354: #endif
1356: PETSC_EXTERN void matsetvalues4_(Mat *AA, PetscInt *mm, PetscInt *im, PetscInt *nn, PetscInt *in, PetscScalar *v)
1357: {
1358: Mat A = *AA;
1359: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1360: PetscInt *rp, k, low, high, t, row, nrow, i, col, l, N, n = *nn, m = *mm;
1361: PetscInt *ai = a->i, *ailen = a->ilen;
1362: PetscInt *aj = a->j, brow, bcol;
1363: PetscInt ridx, cidx, lastcol = -1;
1364: MatScalar *ap, value, *aa = a->a, *bap;
1366: PetscFunctionBegin;
1367: for (k = 0; k < m; k++) { /* loop over added rows */
1368: row = im[k];
1369: brow = row / 4;
1370: rp = aj + ai[brow];
1371: ap = aa + 16 * ai[brow];
1372: nrow = ailen[brow];
1373: low = 0;
1374: high = nrow;
1375: for (l = 0; l < n; l++) { /* loop over added columns */
1376: col = in[l];
1377: bcol = col / 4;
1378: ridx = row % 4;
1379: cidx = col % 4;
1380: value = v[l + k * n];
1381: if (col <= lastcol) low = 0;
1382: else high = nrow;
1383: lastcol = col;
1384: while (high - low > 7) {
1385: t = (low + high) / 2;
1386: if (rp[t] > bcol) high = t;
1387: else low = t;
1388: }
1389: for (i = low; i < high; i++) {
1390: if (rp[i] > bcol) break;
1391: if (rp[i] == bcol) {
1392: bap = ap + 16 * i + 4 * cidx + ridx;
1393: *bap += value;
1394: goto noinsert1;
1395: }
1396: }
1397: N = nrow++ - 1;
1398: high++; /* added new column thus must search to one higher than before */
1399: /* shift up all the later entries in this row */
1400: PetscCallVoid(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
1401: PetscCallVoid(PetscArraymove(ap + 16 * i + 16, ap + 16 * i, 16 * (N - i + 1)));
1402: PetscCallVoid(PetscArrayzero(ap + 16 * i, 16));
1403: rp[i] = bcol;
1404: ap[16 * i + 4 * cidx + ridx] = value;
1405: noinsert1:;
1406: low = i;
1407: }
1408: ailen[brow] = nrow;
1409: }
1410: PetscFunctionReturnVoid();
1411: }
1413: static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *inia[], const PetscInt *inja[], PetscBool *done)
1414: {
1415: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1416: PetscInt i, j, n = a->mbs, nz = a->i[n], *tia, *tja, bs = A->rmap->bs, k, l, cnt;
1417: PetscInt **ia = (PetscInt **)inia, **ja = (PetscInt **)inja;
1419: PetscFunctionBegin;
1420: *nn = n;
1421: if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
1422: if (symmetric) {
1423: PetscCall(MatToSymmetricIJ_SeqAIJ(n, a->i, a->j, PETSC_TRUE, 0, 0, &tia, &tja));
1424: nz = tia[n];
1425: } else {
1426: tia = a->i;
1427: tja = a->j;
1428: }
1430: if (!blockcompressed && bs > 1) {
1431: (*nn) *= bs;
1432: /* malloc & create the natural set of indices */
1433: PetscCall(PetscMalloc1((n + 1) * bs, ia));
1434: if (n) {
1435: (*ia)[0] = oshift;
1436: for (j = 1; j < bs; j++) (*ia)[j] = (tia[1] - tia[0]) * bs + (*ia)[j - 1];
1437: }
1439: for (i = 1; i < n; i++) {
1440: (*ia)[i * bs] = (tia[i] - tia[i - 1]) * bs + (*ia)[i * bs - 1];
1441: for (j = 1; j < bs; j++) (*ia)[i * bs + j] = (tia[i + 1] - tia[i]) * bs + (*ia)[i * bs + j - 1];
1442: }
1443: if (n) (*ia)[n * bs] = (tia[n] - tia[n - 1]) * bs + (*ia)[n * bs - 1];
1445: if (inja) {
1446: PetscCall(PetscMalloc1(nz * bs * bs, ja));
1447: cnt = 0;
1448: for (i = 0; i < n; i++) {
1449: for (j = 0; j < bs; j++) {
1450: for (k = tia[i]; k < tia[i + 1]; k++) {
1451: for (l = 0; l < bs; l++) (*ja)[cnt++] = bs * tja[k] + l;
1452: }
1453: }
1454: }
1455: }
1457: if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1458: PetscCall(PetscFree(tia));
1459: PetscCall(PetscFree(tja));
1460: }
1461: } else if (oshift == 1) {
1462: if (symmetric) {
1463: nz = tia[A->rmap->n / bs];
1464: /* add 1 to i and j indices */
1465: for (i = 0; i < A->rmap->n / bs + 1; i++) tia[i] = tia[i] + 1;
1466: *ia = tia;
1467: if (ja) {
1468: for (i = 0; i < nz; i++) tja[i] = tja[i] + 1;
1469: *ja = tja;
1470: }
1471: } else {
1472: nz = a->i[A->rmap->n / bs];
1473: /* malloc space and add 1 to i and j indices */
1474: PetscCall(PetscMalloc1(A->rmap->n / bs + 1, ia));
1475: for (i = 0; i < A->rmap->n / bs + 1; i++) (*ia)[i] = a->i[i] + 1;
1476: if (ja) {
1477: PetscCall(PetscMalloc1(nz, ja));
1478: for (i = 0; i < nz; i++) (*ja)[i] = a->j[i] + 1;
1479: }
1480: }
1481: } else {
1482: *ia = tia;
1483: if (ja) *ja = tja;
1484: }
1485: PetscFunctionReturn(PETSC_SUCCESS);
1486: }
1488: static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
1489: {
1490: PetscFunctionBegin;
1491: if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
1492: if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1493: PetscCall(PetscFree(*ia));
1494: if (ja) PetscCall(PetscFree(*ja));
1495: }
1496: PetscFunctionReturn(PETSC_SUCCESS);
1497: }
1499: PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1500: {
1501: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1503: PetscFunctionBegin;
1504: if (A->hash_active) {
1505: PetscInt bs;
1506: A->ops[0] = a->cops;
1507: PetscCall(PetscHMapIJVDestroy(&a->ht));
1508: PetscCall(MatGetBlockSize(A, &bs));
1509: if (bs > 1) PetscCall(PetscHSetIJDestroy(&a->bht));
1510: PetscCall(PetscFree(a->dnz));
1511: PetscCall(PetscFree(a->bdnz));
1512: A->hash_active = PETSC_FALSE;
1513: }
1514: PetscCall(PetscLogObjectState((PetscObject)A, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT ", NZ=%" PetscInt_FMT, A->rmap->N, A->cmap->n, a->nz));
1515: PetscCall(MatSeqXAIJFreeAIJ(A, &a->a, &a->j, &a->i));
1516: PetscCall(ISDestroy(&a->row));
1517: PetscCall(ISDestroy(&a->col));
1518: PetscCall(PetscFree(a->diag));
1519: PetscCall(PetscFree(a->idiag));
1520: if (a->free_imax_ilen) PetscCall(PetscFree2(a->imax, a->ilen));
1521: PetscCall(PetscFree(a->solve_work));
1522: PetscCall(PetscFree(a->mult_work));
1523: PetscCall(PetscFree(a->sor_workt));
1524: PetscCall(PetscFree(a->sor_work));
1525: PetscCall(ISDestroy(&a->icol));
1526: PetscCall(PetscFree(a->saved_values));
1527: PetscCall(PetscFree2(a->compressedrow.i, a->compressedrow.rindex));
1529: PetscCall(MatDestroy(&a->sbaijMat));
1530: PetscCall(MatDestroy(&a->parent));
1531: PetscCall(PetscFree(A->data));
1533: PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL));
1534: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJGetArray_C", NULL));
1535: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJRestoreArray_C", NULL));
1536: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatStoreValues_C", NULL));
1537: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatRetrieveValues_C", NULL));
1538: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetColumnIndices_C", NULL));
1539: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqaij_C", NULL));
1540: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqsbaij_C", NULL));
1541: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", NULL));
1542: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocationCSR_C", NULL));
1543: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqbstrm_C", NULL));
1544: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatIsTranspose_C", NULL));
1545: #if defined(PETSC_HAVE_HYPRE)
1546: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_hypre_C", NULL));
1547: #endif
1548: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_is_C", NULL));
1549: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
1550: PetscFunctionReturn(PETSC_SUCCESS);
1551: }
1553: static PetscErrorCode MatSetOption_SeqBAIJ(Mat A, MatOption op, PetscBool flg)
1554: {
1555: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1557: PetscFunctionBegin;
1558: switch (op) {
1559: case MAT_ROW_ORIENTED:
1560: a->roworiented = flg;
1561: break;
1562: case MAT_KEEP_NONZERO_PATTERN:
1563: a->keepnonzeropattern = flg;
1564: break;
1565: case MAT_NEW_NONZERO_LOCATIONS:
1566: a->nonew = (flg ? 0 : 1);
1567: break;
1568: case MAT_NEW_NONZERO_LOCATION_ERR:
1569: a->nonew = (flg ? -1 : 0);
1570: break;
1571: case MAT_NEW_NONZERO_ALLOCATION_ERR:
1572: a->nonew = (flg ? -2 : 0);
1573: break;
1574: case MAT_UNUSED_NONZERO_LOCATION_ERR:
1575: a->nounused = (flg ? -1 : 0);
1576: break;
1577: default:
1578: break;
1579: }
1580: PetscFunctionReturn(PETSC_SUCCESS);
1581: }
1583: /* used for both SeqBAIJ and SeqSBAIJ matrices */
1584: PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v, PetscInt *ai, PetscInt *aj, PetscScalar *aa)
1585: {
1586: PetscInt itmp, i, j, k, M, bn, bp, *idx_i, bs, bs2;
1587: MatScalar *aa_i;
1588: PetscScalar *v_i;
1590: PetscFunctionBegin;
1591: bs = A->rmap->bs;
1592: bs2 = bs * bs;
1593: PetscCheck(row >= 0 && row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range", row);
1595: bn = row / bs; /* Block number */
1596: bp = row % bs; /* Block Position */
1597: M = ai[bn + 1] - ai[bn];
1598: *nz = bs * M;
1600: if (v) {
1601: *v = NULL;
1602: if (*nz) {
1603: PetscCall(PetscMalloc1(*nz, v));
1604: for (i = 0; i < M; i++) { /* for each block in the block row */
1605: v_i = *v + i * bs;
1606: aa_i = aa + bs2 * (ai[bn] + i);
1607: for (j = bp, k = 0; j < bs2; j += bs, k++) v_i[k] = aa_i[j];
1608: }
1609: }
1610: }
1612: if (idx) {
1613: *idx = NULL;
1614: if (*nz) {
1615: PetscCall(PetscMalloc1(*nz, idx));
1616: for (i = 0; i < M; i++) { /* for each block in the block row */
1617: idx_i = *idx + i * bs;
1618: itmp = bs * aj[ai[bn] + i];
1619: for (j = 0; j < bs; j++) idx_i[j] = itmp++;
1620: }
1621: }
1622: }
1623: PetscFunctionReturn(PETSC_SUCCESS);
1624: }
1626: PetscErrorCode MatGetRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1627: {
1628: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1630: PetscFunctionBegin;
1631: PetscCall(MatGetRow_SeqBAIJ_private(A, row, nz, idx, v, a->i, a->j, a->a));
1632: PetscFunctionReturn(PETSC_SUCCESS);
1633: }
1635: PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1636: {
1637: PetscFunctionBegin;
1638: if (idx) PetscCall(PetscFree(*idx));
1639: if (v) PetscCall(PetscFree(*v));
1640: PetscFunctionReturn(PETSC_SUCCESS);
1641: }
1643: static PetscErrorCode MatTranspose_SeqBAIJ(Mat A, MatReuse reuse, Mat *B)
1644: {
1645: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *at;
1646: Mat C;
1647: PetscInt i, j, k, *aj = a->j, *ai = a->i, bs = A->rmap->bs, mbs = a->mbs, nbs = a->nbs, *atfill;
1648: PetscInt bs2 = a->bs2, *ati, *atj, anzj, kr;
1649: MatScalar *ata, *aa = a->a;
1651: PetscFunctionBegin;
1652: if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
1653: PetscCall(PetscCalloc1(1 + nbs, &atfill));
1654: if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1655: for (i = 0; i < ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */
1657: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C));
1658: PetscCall(MatSetSizes(C, A->cmap->n, A->rmap->N, A->cmap->n, A->rmap->N));
1659: PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
1660: PetscCall(MatSeqBAIJSetPreallocation(C, bs, 0, atfill));
1662: at = (Mat_SeqBAIJ *)C->data;
1663: ati = at->i;
1664: for (i = 0; i < nbs; i++) at->ilen[i] = at->imax[i] = ati[i + 1] - ati[i];
1665: } else {
1666: C = *B;
1667: at = (Mat_SeqBAIJ *)C->data;
1668: ati = at->i;
1669: }
1671: atj = at->j;
1672: ata = at->a;
1674: /* Copy ati into atfill so we have locations of the next free space in atj */
1675: PetscCall(PetscArraycpy(atfill, ati, nbs));
1677: /* Walk through A row-wise and mark nonzero entries of A^T. */
1678: for (i = 0; i < mbs; i++) {
1679: anzj = ai[i + 1] - ai[i];
1680: for (j = 0; j < anzj; j++) {
1681: atj[atfill[*aj]] = i;
1682: for (kr = 0; kr < bs; kr++) {
1683: for (k = 0; k < bs; k++) ata[bs2 * atfill[*aj] + k * bs + kr] = *aa++;
1684: }
1685: atfill[*aj++] += 1;
1686: }
1687: }
1688: PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
1689: PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
1691: /* Clean up temporary space and complete requests. */
1692: PetscCall(PetscFree(atfill));
1694: if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1695: PetscCall(MatSetBlockSizes(C, A->cmap->bs, A->rmap->bs));
1696: *B = C;
1697: } else {
1698: PetscCall(MatHeaderMerge(A, &C));
1699: }
1700: PetscFunctionReturn(PETSC_SUCCESS);
1701: }
1703: static PetscErrorCode MatCompare_SeqBAIJ_Private(Mat A, Mat B, PetscReal tol, PetscBool *flg)
1704: {
1705: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
1707: PetscFunctionBegin;
1708: /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */
1709: if (A->rmap->N != B->rmap->N || A->cmap->n != B->cmap->n || A->rmap->bs != B->rmap->bs || a->nz != b->nz) {
1710: *flg = PETSC_FALSE;
1711: PetscFunctionReturn(PETSC_SUCCESS);
1712: }
1714: /* if the a->i are the same */
1715: PetscCall(PetscArraycmp(a->i, b->i, a->mbs + 1, flg));
1716: if (!*flg) PetscFunctionReturn(PETSC_SUCCESS);
1718: /* if a->j are the same */
1719: PetscCall(PetscArraycmp(a->j, b->j, a->nz, flg));
1720: if (!*flg) PetscFunctionReturn(PETSC_SUCCESS);
1722: if (tol == 0.0) PetscCall(PetscArraycmp(a->a, b->a, a->nz * A->rmap->bs * A->rmap->bs, flg)); /* if a->a are the same */
1723: else {
1724: *flg = PETSC_TRUE;
1725: for (PetscInt i = 0; (i < a->nz * A->rmap->bs * A->rmap->bs) && *flg; ++i)
1726: if (PetscAbsScalar(a->a[i] - b->a[i]) > tol) *flg = PETSC_FALSE;
1727: }
1728: PetscFunctionReturn(PETSC_SUCCESS);
1729: }
1731: static PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A, Mat B, PetscReal tol, PetscBool *f)
1732: {
1733: Mat Btrans;
1735: PetscFunctionBegin;
1736: PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &Btrans));
1737: PetscCall(MatCompare_SeqBAIJ_Private(A, Btrans, tol, f));
1738: PetscCall(MatDestroy(&Btrans));
1739: PetscFunctionReturn(PETSC_SUCCESS);
1740: }
1742: static PetscErrorCode MatEqual_SeqBAIJ(Mat A, Mat B, PetscBool *flg)
1743: {
1744: PetscFunctionBegin;
1745: PetscCall(MatCompare_SeqBAIJ_Private(A, B, 0.0, flg));
1746: PetscFunctionReturn(PETSC_SUCCESS);
1747: }
1749: /* Used for both SeqBAIJ and SeqSBAIJ matrices */
1750: PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat, PetscViewer viewer)
1751: {
1752: Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)mat->data;
1753: PetscInt header[4], M, N, m, bs, nz, cnt, i, j, k, l;
1754: PetscInt *rowlens, *colidxs;
1755: PetscScalar *matvals;
1757: PetscFunctionBegin;
1758: PetscCall(PetscViewerSetUp(viewer));
1760: M = mat->rmap->N;
1761: N = mat->cmap->N;
1762: m = mat->rmap->n;
1763: bs = mat->rmap->bs;
1764: nz = bs * bs * A->nz;
1766: /* write matrix header */
1767: header[0] = MAT_FILE_CLASSID;
1768: header[1] = M;
1769: header[2] = N;
1770: header[3] = nz;
1771: PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1773: /* store row lengths */
1774: PetscCall(PetscMalloc1(m, &rowlens));
1775: for (cnt = 0, i = 0; i < A->mbs; i++)
1776: for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i]);
1777: PetscCall(PetscViewerBinaryWrite(viewer, rowlens, m, PETSC_INT));
1778: PetscCall(PetscFree(rowlens));
1780: /* store column indices */
1781: PetscCall(PetscMalloc1(nz, &colidxs));
1782: for (cnt = 0, i = 0; i < A->mbs; i++)
1783: for (k = 0; k < bs; k++)
1784: for (j = A->i[i]; j < A->i[i + 1]; j++)
1785: for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[j] + l;
1786: PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
1787: PetscCall(PetscViewerBinaryWrite(viewer, colidxs, nz, PETSC_INT));
1788: PetscCall(PetscFree(colidxs));
1790: /* store nonzero values */
1791: PetscCall(PetscMalloc1(nz, &matvals));
1792: for (cnt = 0, i = 0; i < A->mbs; i++)
1793: for (k = 0; k < bs; k++)
1794: for (j = A->i[i]; j < A->i[i + 1]; j++)
1795: for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * j + l) + k];
1796: PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
1797: PetscCall(PetscViewerBinaryWrite(viewer, matvals, nz, PETSC_SCALAR));
1798: PetscCall(PetscFree(matvals));
1800: /* write block size option to the viewer's .info file */
1801: PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1802: PetscFunctionReturn(PETSC_SUCCESS);
1803: }
1805: static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A, PetscViewer viewer)
1806: {
1807: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1808: PetscInt i, bs = A->rmap->bs, k;
1810: PetscFunctionBegin;
1811: PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
1812: for (i = 0; i < a->mbs; i++) {
1813: PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT "-%" PetscInt_FMT ":", i * bs, i * bs + bs - 1));
1814: for (k = a->i[i]; k < a->i[i + 1]; k++) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT "-%" PetscInt_FMT ") ", bs * a->j[k], bs * a->j[k] + bs - 1));
1815: PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
1816: }
1817: PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
1818: PetscFunctionReturn(PETSC_SUCCESS);
1819: }
1821: static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A, PetscViewer viewer)
1822: {
1823: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1824: PetscInt i, j, bs = A->rmap->bs, k, l, bs2 = a->bs2;
1825: PetscViewerFormat format;
1827: PetscFunctionBegin;
1828: if (A->structure_only) {
1829: PetscCall(MatView_SeqBAIJ_ASCII_structonly(A, viewer));
1830: PetscFunctionReturn(PETSC_SUCCESS);
1831: }
1833: PetscCall(PetscViewerGetFormat(viewer, &format));
1834: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1835: PetscCall(PetscViewerASCIIPrintf(viewer, " block size is %" PetscInt_FMT "\n", bs));
1836: } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1837: const char *matname;
1838: Mat aij;
1839: PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &aij));
1840: PetscCall(PetscObjectGetName((PetscObject)A, &matname));
1841: PetscCall(PetscObjectSetName((PetscObject)aij, matname));
1842: PetscCall(MatView(aij, viewer));
1843: PetscCall(MatDestroy(&aij));
1844: } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1845: PetscFunctionReturn(PETSC_SUCCESS);
1846: } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1847: PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
1848: for (i = 0; i < a->mbs; i++) {
1849: for (j = 0; j < bs; j++) {
1850: PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j));
1851: for (k = a->i[i]; k < a->i[i + 1]; k++) {
1852: for (l = 0; l < bs; l++) {
1853: #if defined(PETSC_USE_COMPLEX)
1854: if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
1855: PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
1856: } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
1857: PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
1858: } else if (PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
1859: PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j])));
1860: }
1861: #else
1862: if (a->a[bs2 * k + l * bs + j] != 0.0) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j]));
1863: #endif
1864: }
1865: }
1866: PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
1867: }
1868: }
1869: PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
1870: } else {
1871: PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
1872: for (i = 0; i < a->mbs; i++) {
1873: for (j = 0; j < bs; j++) {
1874: PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j));
1875: for (k = a->i[i]; k < a->i[i + 1]; k++) {
1876: for (l = 0; l < bs; l++) {
1877: #if defined(PETSC_USE_COMPLEX)
1878: if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0) {
1879: PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
1880: } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0) {
1881: PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
1882: } else {
1883: PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j])));
1884: }
1885: #else
1886: PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j]));
1887: #endif
1888: }
1889: }
1890: PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
1891: }
1892: }
1893: PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
1894: }
1895: PetscCall(PetscViewerFlush(viewer));
1896: PetscFunctionReturn(PETSC_SUCCESS);
1897: }
1899: #include <petscdraw.h>
1900: static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw, void *Aa)
1901: {
1902: Mat A = (Mat)Aa;
1903: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1904: PetscInt row, i, j, k, l, mbs = a->mbs, bs = A->rmap->bs, bs2 = a->bs2;
1905: PetscReal xl, yl, xr, yr, x_l, x_r, y_l, y_r;
1906: MatScalar *aa;
1907: PetscViewer viewer;
1908: PetscViewerFormat format;
1909: int color;
1911: PetscFunctionBegin;
1912: PetscCall(PetscObjectQuery((PetscObject)A, "Zoomviewer", (PetscObject *)&viewer));
1913: PetscCall(PetscViewerGetFormat(viewer, &format));
1914: PetscCall(PetscDrawGetCoordinates(draw, &xl, &yl, &xr, &yr));
1916: /* loop over matrix elements drawing boxes */
1918: if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1919: PetscDrawCollectiveBegin(draw);
1920: /* Blue for negative, Cyan for zero and Red for positive */
1921: color = PETSC_DRAW_BLUE;
1922: for (i = 0, row = 0; i < mbs; i++, row += bs) {
1923: for (j = a->i[i]; j < a->i[i + 1]; j++) {
1924: y_l = A->rmap->N - row - 1.0;
1925: y_r = y_l + 1.0;
1926: x_l = a->j[j] * bs;
1927: x_r = x_l + 1.0;
1928: aa = a->a + j * bs2;
1929: for (k = 0; k < bs; k++) {
1930: for (l = 0; l < bs; l++) {
1931: if (PetscRealPart(*aa++) >= 0.) continue;
1932: PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
1933: }
1934: }
1935: }
1936: }
1937: color = PETSC_DRAW_CYAN;
1938: for (i = 0, row = 0; i < mbs; i++, row += bs) {
1939: for (j = a->i[i]; j < a->i[i + 1]; j++) {
1940: y_l = A->rmap->N - row - 1.0;
1941: y_r = y_l + 1.0;
1942: x_l = a->j[j] * bs;
1943: x_r = x_l + 1.0;
1944: aa = a->a + j * bs2;
1945: for (k = 0; k < bs; k++) {
1946: for (l = 0; l < bs; l++) {
1947: if (PetscRealPart(*aa++) != 0.) continue;
1948: PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
1949: }
1950: }
1951: }
1952: }
1953: color = PETSC_DRAW_RED;
1954: for (i = 0, row = 0; i < mbs; i++, row += bs) {
1955: for (j = a->i[i]; j < a->i[i + 1]; j++) {
1956: y_l = A->rmap->N - row - 1.0;
1957: y_r = y_l + 1.0;
1958: x_l = a->j[j] * bs;
1959: x_r = x_l + 1.0;
1960: aa = a->a + j * bs2;
1961: for (k = 0; k < bs; k++) {
1962: for (l = 0; l < bs; l++) {
1963: if (PetscRealPart(*aa++) <= 0.) continue;
1964: PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
1965: }
1966: }
1967: }
1968: }
1969: PetscDrawCollectiveEnd(draw);
1970: } else {
1971: /* use contour shading to indicate magnitude of values */
1972: /* first determine max of all nonzero values */
1973: PetscReal minv = 0.0, maxv = 0.0;
1974: PetscDraw popup;
1976: for (i = 0; i < a->nz * a->bs2; i++) {
1977: if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1978: }
1979: if (minv >= maxv) maxv = minv + PETSC_SMALL;
1980: PetscCall(PetscDrawGetPopup(draw, &popup));
1981: PetscCall(PetscDrawScalePopup(popup, 0.0, maxv));
1983: PetscDrawCollectiveBegin(draw);
1984: for (i = 0, row = 0; i < mbs; i++, row += bs) {
1985: for (j = a->i[i]; j < a->i[i + 1]; j++) {
1986: y_l = A->rmap->N - row - 1.0;
1987: y_r = y_l + 1.0;
1988: x_l = a->j[j] * bs;
1989: x_r = x_l + 1.0;
1990: aa = a->a + j * bs2;
1991: for (k = 0; k < bs; k++) {
1992: for (l = 0; l < bs; l++) {
1993: MatScalar v = *aa++;
1994: color = PetscDrawRealToColor(PetscAbsScalar(v), minv, maxv);
1995: PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
1996: }
1997: }
1998: }
1999: }
2000: PetscDrawCollectiveEnd(draw);
2001: }
2002: PetscFunctionReturn(PETSC_SUCCESS);
2003: }
2005: static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A, PetscViewer viewer)
2006: {
2007: PetscReal xl, yl, xr, yr, w, h;
2008: PetscDraw draw;
2009: PetscBool isnull;
2011: PetscFunctionBegin;
2012: PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
2013: PetscCall(PetscDrawIsNull(draw, &isnull));
2014: if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
2016: xr = A->cmap->n;
2017: yr = A->rmap->N;
2018: h = yr / 10.0;
2019: w = xr / 10.0;
2020: xr += w;
2021: yr += h;
2022: xl = -w;
2023: yl = -h;
2024: PetscCall(PetscDrawSetCoordinates(draw, xl, yl, xr, yr));
2025: PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", (PetscObject)viewer));
2026: PetscCall(PetscDrawZoom(draw, MatView_SeqBAIJ_Draw_Zoom, A));
2027: PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", NULL));
2028: PetscCall(PetscDrawSave(draw));
2029: PetscFunctionReturn(PETSC_SUCCESS);
2030: }
2032: PetscErrorCode MatView_SeqBAIJ(Mat A, PetscViewer viewer)
2033: {
2034: PetscBool isascii, isbinary, isdraw;
2036: PetscFunctionBegin;
2037: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
2038: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
2039: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
2040: if (isascii) {
2041: PetscCall(MatView_SeqBAIJ_ASCII(A, viewer));
2042: } else if (isbinary) {
2043: PetscCall(MatView_SeqBAIJ_Binary(A, viewer));
2044: } else if (isdraw) {
2045: PetscCall(MatView_SeqBAIJ_Draw(A, viewer));
2046: } else {
2047: Mat B;
2048: PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
2049: PetscCall(MatView(B, viewer));
2050: PetscCall(MatDestroy(&B));
2051: }
2052: PetscFunctionReturn(PETSC_SUCCESS);
2053: }
2055: PetscErrorCode MatGetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], PetscScalar v[])
2056: {
2057: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2058: PetscInt *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
2059: PetscInt *ai = a->i, *ailen = a->ilen;
2060: PetscInt brow, bcol, ridx, cidx, bs = A->rmap->bs, bs2 = a->bs2;
2061: MatScalar *ap, *aa = a->a;
2063: PetscFunctionBegin;
2064: for (k = 0; k < m; k++) { /* loop over rows */
2065: row = im[k];
2066: brow = row / bs;
2067: if (row < 0) {
2068: v += n;
2069: continue;
2070: } /* negative row */
2071: PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " too large", row);
2072: rp = PetscSafePointerPlusOffset(aj, ai[brow]);
2073: ap = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]);
2074: nrow = ailen[brow];
2075: for (l = 0; l < n; l++) { /* loop over columns */
2076: if (in[l] < 0) {
2077: v++;
2078: continue;
2079: } /* negative column */
2080: PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column %" PetscInt_FMT " too large", in[l]);
2081: col = in[l];
2082: bcol = col / bs;
2083: cidx = col % bs;
2084: ridx = row % bs;
2085: high = nrow;
2086: low = 0; /* assume unsorted */
2087: while (high - low > 5) {
2088: t = (low + high) / 2;
2089: if (rp[t] > bcol) high = t;
2090: else low = t;
2091: }
2092: for (i = low; i < high; i++) {
2093: if (rp[i] > bcol) break;
2094: if (rp[i] == bcol) {
2095: *v++ = ap[bs2 * i + bs * cidx + ridx];
2096: goto finished;
2097: }
2098: }
2099: *v++ = 0.0;
2100: finished:;
2101: }
2102: }
2103: PetscFunctionReturn(PETSC_SUCCESS);
2104: }
2106: PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is)
2107: {
2108: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2109: PetscInt *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, rmax, N, lastcol = -1;
2110: PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen;
2111: PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs, stepval;
2112: PetscBool roworiented = a->roworiented;
2113: const PetscScalar *value = v;
2114: MatScalar *ap = NULL, *aa = a->a, *bap;
2116: PetscFunctionBegin;
2117: if (roworiented) {
2118: stepval = (n - 1) * bs;
2119: } else {
2120: stepval = (m - 1) * bs;
2121: }
2122: for (k = 0; k < m; k++) { /* loop over added rows */
2123: row = im[k];
2124: if (row < 0) continue;
2125: PetscCheck(row < a->mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block row index too large %" PetscInt_FMT " max %" PetscInt_FMT, row, a->mbs - 1);
2126: rp = aj + ai[row];
2127: if (!A->structure_only) ap = aa + bs2 * ai[row];
2128: rmax = imax[row];
2129: nrow = ailen[row];
2130: low = 0;
2131: high = nrow;
2132: for (l = 0; l < n; l++) { /* loop over added columns */
2133: if (in[l] < 0) continue;
2134: PetscCheck(in[l] < a->nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block column index too large %" PetscInt_FMT " max %" PetscInt_FMT, in[l], a->nbs - 1);
2135: col = in[l];
2136: if (!A->structure_only) {
2137: if (roworiented) {
2138: value = v + (k * (stepval + bs) + l) * bs;
2139: } else {
2140: value = v + (l * (stepval + bs) + k) * bs;
2141: }
2142: }
2143: if (col <= lastcol) low = 0;
2144: else high = nrow;
2145: lastcol = col;
2146: while (high - low > 7) {
2147: t = (low + high) / 2;
2148: if (rp[t] > col) high = t;
2149: else low = t;
2150: }
2151: for (i = low; i < high; i++) {
2152: if (rp[i] > col) break;
2153: if (rp[i] == col) {
2154: if (A->structure_only) goto noinsert2;
2155: bap = ap + bs2 * i;
2156: if (roworiented) {
2157: if (is == ADD_VALUES) {
2158: for (ii = 0; ii < bs; ii++, value += stepval) {
2159: for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
2160: }
2161: } else {
2162: for (ii = 0; ii < bs; ii++, value += stepval) {
2163: for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
2164: }
2165: }
2166: } else {
2167: if (is == ADD_VALUES) {
2168: for (ii = 0; ii < bs; ii++, value += bs + stepval) {
2169: for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
2170: bap += bs;
2171: }
2172: } else {
2173: for (ii = 0; ii < bs; ii++, value += bs + stepval) {
2174: for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
2175: bap += bs;
2176: }
2177: }
2178: }
2179: goto noinsert2;
2180: }
2181: }
2182: if (nonew == 1) goto noinsert2;
2183: PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked index new nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
2184: if (A->structure_only) {
2185: MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, row, col, rmax, ai, aj, rp, imax, nonew, MatScalar);
2186: } else {
2187: MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
2188: }
2189: N = nrow++ - 1;
2190: high++;
2191: /* shift up all the later entries in this row */
2192: PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
2193: rp[i] = col;
2194: if (!A->structure_only) {
2195: PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
2196: bap = ap + bs2 * i;
2197: if (roworiented) {
2198: for (ii = 0; ii < bs; ii++, value += stepval) {
2199: for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
2200: }
2201: } else {
2202: for (ii = 0; ii < bs; ii++, value += stepval) {
2203: for (jj = 0; jj < bs; jj++) *bap++ = *value++;
2204: }
2205: }
2206: }
2207: noinsert2:;
2208: low = i;
2209: }
2210: ailen[row] = nrow;
2211: }
2212: PetscFunctionReturn(PETSC_SUCCESS);
2213: }
2215: PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A, MatAssemblyType mode)
2216: {
2217: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2218: PetscInt fshift = 0, i, *ai = a->i, *aj = a->j, *imax = a->imax;
2219: PetscInt m = A->rmap->N, *ip, N, *ailen = a->ilen;
2220: PetscInt mbs = a->mbs, bs2 = a->bs2, rmax = 0;
2221: MatScalar *aa = a->a, *ap;
2222: PetscReal ratio = 0.6;
2224: PetscFunctionBegin;
2225: if (mode == MAT_FLUSH_ASSEMBLY || (A->was_assembled && A->ass_nonzerostate == A->nonzerostate)) PetscFunctionReturn(PETSC_SUCCESS);
2227: if (m) rmax = ailen[0];
2228: for (i = 1; i < mbs; i++) {
2229: /* move each row back by the amount of empty slots (fshift) before it*/
2230: fshift += imax[i - 1] - ailen[i - 1];
2231: rmax = PetscMax(rmax, ailen[i]);
2232: if (fshift) {
2233: ip = aj + ai[i];
2234: ap = aa + bs2 * ai[i];
2235: N = ailen[i];
2236: PetscCall(PetscArraymove(ip - fshift, ip, N));
2237: if (!A->structure_only) PetscCall(PetscArraymove(ap - bs2 * fshift, ap, bs2 * N));
2238: }
2239: ai[i] = ai[i - 1] + ailen[i - 1];
2240: }
2241: if (mbs) {
2242: fshift += imax[mbs - 1] - ailen[mbs - 1];
2243: ai[mbs] = ai[mbs - 1] + ailen[mbs - 1];
2244: }
2246: /* reset ilen and imax for each row */
2247: a->nonzerorowcnt = 0;
2248: if (A->structure_only) {
2249: PetscCall(PetscFree2(a->imax, a->ilen));
2250: } else { /* !A->structure_only */
2251: for (i = 0; i < mbs; i++) {
2252: ailen[i] = imax[i] = ai[i + 1] - ai[i];
2253: a->nonzerorowcnt += ((ai[i + 1] - ai[i]) > 0);
2254: }
2255: }
2256: a->nz = ai[mbs];
2258: /* diagonals may have moved, so kill the diagonal pointers */
2259: a->idiagvalid = PETSC_FALSE;
2260: if (fshift) PetscCheck(a->nounused != -1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unused space detected in matrix: %" PetscInt_FMT " X %" PetscInt_FMT " block size %" PetscInt_FMT ", %" PetscInt_FMT " unneeded", m, A->cmap->n, A->rmap->bs, fshift * bs2);
2261: PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT ", block size %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded, %" PetscInt_FMT " used\n", m, A->cmap->n, A->rmap->bs, fshift * bs2, a->nz * bs2));
2262: PetscCall(PetscInfo(A, "Number of mallocs during MatSetValues is %" PetscInt_FMT "\n", a->reallocs));
2263: PetscCall(PetscInfo(A, "Most nonzeros blocks in any row is %" PetscInt_FMT "\n", rmax));
2265: A->info.mallocs += a->reallocs;
2266: a->reallocs = 0;
2267: A->info.nz_unneeded = (PetscReal)fshift * bs2;
2268: a->rmax = rmax;
2270: if (!A->structure_only) PetscCall(MatCheckCompressedRow(A, a->nonzerorowcnt, &a->compressedrow, a->i, mbs, ratio));
2271: PetscFunctionReturn(PETSC_SUCCESS);
2272: }
2274: /*
2275: This function returns an array of flags which indicate the locations of contiguous
2276: blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9]
2277: then the resulting sizes = [3,1,1,3,1] corresponding to sets [(0,1,2),(3),(5),(6,7,8),(9)]
2278: Assume: sizes should be long enough to hold all the values.
2279: */
2280: static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[], PetscInt n, PetscInt bs, PetscInt sizes[], PetscInt *bs_max)
2281: {
2282: PetscInt j = 0;
2284: PetscFunctionBegin;
2285: for (PetscInt i = 0; i < n; j++) {
2286: PetscInt row = idx[i];
2287: if (row % bs != 0) { /* Not the beginning of a block */
2288: sizes[j] = 1;
2289: i++;
2290: } else if (i + bs > n) { /* complete block doesn't exist (at idx end) */
2291: sizes[j] = 1; /* Also makes sure at least 'bs' values exist for next else */
2292: i++;
2293: } else { /* Beginning of the block, so check if the complete block exists */
2294: PetscBool flg = PETSC_TRUE;
2295: for (PetscInt k = 1; k < bs; k++) {
2296: if (row + k != idx[i + k]) { /* break in the block */
2297: flg = PETSC_FALSE;
2298: break;
2299: }
2300: }
2301: if (flg) { /* No break in the bs */
2302: sizes[j] = bs;
2303: i += bs;
2304: } else {
2305: sizes[j] = 1;
2306: i++;
2307: }
2308: }
2309: }
2310: *bs_max = j;
2311: PetscFunctionReturn(PETSC_SUCCESS);
2312: }
2314: PetscErrorCode MatZeroRows_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b)
2315: {
2316: Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)A->data;
2317: PetscInt i, j, k, count, *rows;
2318: PetscInt bs = A->rmap->bs, bs2 = baij->bs2, *sizes, row, bs_max;
2319: PetscScalar zero = 0.0;
2320: MatScalar *aa;
2321: const PetscScalar *xx;
2322: PetscScalar *bb;
2324: PetscFunctionBegin;
2325: /* fix right-hand side if needed */
2326: if (x && b) {
2327: PetscCall(VecGetArrayRead(x, &xx));
2328: PetscCall(VecGetArray(b, &bb));
2329: for (i = 0; i < is_n; i++) bb[is_idx[i]] = diag * xx[is_idx[i]];
2330: PetscCall(VecRestoreArrayRead(x, &xx));
2331: PetscCall(VecRestoreArray(b, &bb));
2332: }
2334: /* Make a copy of the IS and sort it */
2335: /* allocate memory for rows,sizes */
2336: PetscCall(PetscMalloc2(is_n, &rows, 2 * is_n, &sizes));
2338: /* copy IS values to rows, and sort them */
2339: for (i = 0; i < is_n; i++) rows[i] = is_idx[i];
2340: PetscCall(PetscSortInt(is_n, rows));
2342: if (baij->keepnonzeropattern) {
2343: for (i = 0; i < is_n; i++) sizes[i] = 1;
2344: bs_max = is_n;
2345: } else {
2346: PetscCall(MatZeroRows_SeqBAIJ_Check_Blocks(rows, is_n, bs, sizes, &bs_max));
2347: A->nonzerostate++;
2348: }
2350: for (i = 0, j = 0; i < bs_max; j += sizes[i], i++) {
2351: row = rows[j];
2352: PetscCheck(row >= 0 && row <= A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", row);
2353: count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
2354: aa = baij->a + baij->i[row / bs] * bs2 + (row % bs);
2355: if (sizes[i] == bs && !baij->keepnonzeropattern) {
2356: if (diag != (PetscScalar)0.0) {
2357: if (baij->ilen[row / bs] > 0) {
2358: baij->ilen[row / bs] = 1;
2359: baij->j[baij->i[row / bs]] = row / bs;
2361: PetscCall(PetscArrayzero(aa, count * bs));
2362: }
2363: /* Now insert all the diagonal values for this bs */
2364: for (k = 0; k < bs; k++) PetscUseTypeMethod(A, setvalues, 1, rows + j + k, 1, rows + j + k, &diag, INSERT_VALUES);
2365: } else { /* (diag == 0.0) */
2366: baij->ilen[row / bs] = 0;
2367: } /* end (diag == 0.0) */
2368: } else { /* (sizes[i] != bs) */
2369: PetscAssert(sizes[i] == 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal Error. Value should be 1");
2370: for (k = 0; k < count; k++) {
2371: aa[0] = zero;
2372: aa += bs;
2373: }
2374: if (diag != (PetscScalar)0.0) PetscUseTypeMethod(A, setvalues, 1, rows + j, 1, rows + j, &diag, INSERT_VALUES);
2375: }
2376: }
2378: PetscCall(PetscFree2(rows, sizes));
2379: PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY));
2380: PetscFunctionReturn(PETSC_SUCCESS);
2381: }
2383: static PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b)
2384: {
2385: Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)A->data;
2386: PetscInt i, j, k, count;
2387: PetscInt bs = A->rmap->bs, bs2 = baij->bs2, row, col;
2388: PetscScalar zero = 0.0;
2389: MatScalar *aa;
2390: const PetscScalar *xx;
2391: PetscScalar *bb;
2392: PetscBool *zeroed, vecs = PETSC_FALSE;
2394: PetscFunctionBegin;
2395: /* fix right-hand side if needed */
2396: if (x && b) {
2397: PetscCall(VecGetArrayRead(x, &xx));
2398: PetscCall(VecGetArray(b, &bb));
2399: vecs = PETSC_TRUE;
2400: }
2402: /* zero the columns */
2403: PetscCall(PetscCalloc1(A->rmap->n, &zeroed));
2404: for (i = 0; i < is_n; i++) {
2405: PetscCheck(is_idx[i] >= 0 && is_idx[i] < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", is_idx[i]);
2406: zeroed[is_idx[i]] = PETSC_TRUE;
2407: }
2408: for (i = 0; i < A->rmap->N; i++) {
2409: if (!zeroed[i]) {
2410: row = i / bs;
2411: for (j = baij->i[row]; j < baij->i[row + 1]; j++) {
2412: for (k = 0; k < bs; k++) {
2413: col = bs * baij->j[j] + k;
2414: if (zeroed[col]) {
2415: aa = baij->a + j * bs2 + (i % bs) + bs * k;
2416: if (vecs) bb[i] -= aa[0] * xx[col];
2417: aa[0] = 0.0;
2418: }
2419: }
2420: }
2421: } else if (vecs) bb[i] = diag * xx[i];
2422: }
2423: PetscCall(PetscFree(zeroed));
2424: if (vecs) {
2425: PetscCall(VecRestoreArrayRead(x, &xx));
2426: PetscCall(VecRestoreArray(b, &bb));
2427: }
2429: /* zero the rows */
2430: for (i = 0; i < is_n; i++) {
2431: row = is_idx[i];
2432: count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
2433: aa = baij->a + baij->i[row / bs] * bs2 + (row % bs);
2434: for (k = 0; k < count; k++) {
2435: aa[0] = zero;
2436: aa += bs;
2437: }
2438: if (diag != (PetscScalar)0.0) PetscUseTypeMethod(A, setvalues, 1, &row, 1, &row, &diag, INSERT_VALUES);
2439: }
2440: PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY));
2441: PetscFunctionReturn(PETSC_SUCCESS);
2442: }
2444: PetscErrorCode MatSetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is)
2445: {
2446: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2447: PetscInt *rp, k, low, high, t, ii, row, nrow, i, col, l, rmax, N, lastcol = -1;
2448: PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen;
2449: PetscInt *aj = a->j, nonew = a->nonew, bs = A->rmap->bs, brow, bcol;
2450: PetscInt ridx, cidx, bs2 = a->bs2;
2451: PetscBool roworiented = a->roworiented;
2452: MatScalar *ap = NULL, value = 0.0, *aa = a->a, *bap;
2454: PetscFunctionBegin;
2455: for (k = 0; k < m; k++) { /* loop over added rows */
2456: row = im[k];
2457: brow = row / bs;
2458: if (row < 0) continue;
2459: PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, row, A->rmap->N - 1);
2460: rp = PetscSafePointerPlusOffset(aj, ai[brow]);
2461: if (!A->structure_only) ap = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]);
2462: rmax = imax[brow];
2463: nrow = ailen[brow];
2464: low = 0;
2465: high = nrow;
2466: for (l = 0; l < n; l++) { /* loop over added columns */
2467: if (in[l] < 0) continue;
2468: PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[l], A->cmap->n - 1);
2469: col = in[l];
2470: bcol = col / bs;
2471: ridx = row % bs;
2472: cidx = col % bs;
2473: if (!A->structure_only) {
2474: if (roworiented) {
2475: value = v[l + k * n];
2476: } else {
2477: value = v[k + l * m];
2478: }
2479: }
2480: if (col <= lastcol) low = 0;
2481: else high = nrow;
2482: lastcol = col;
2483: while (high - low > 7) {
2484: t = (low + high) / 2;
2485: if (rp[t] > bcol) high = t;
2486: else low = t;
2487: }
2488: for (i = low; i < high; i++) {
2489: if (rp[i] > bcol) break;
2490: if (rp[i] == bcol) {
2491: bap = PetscSafePointerPlusOffset(ap, bs2 * i + bs * cidx + ridx);
2492: if (!A->structure_only) {
2493: if (is == ADD_VALUES) *bap += value;
2494: else *bap = value;
2495: }
2496: goto noinsert1;
2497: }
2498: }
2499: if (nonew == 1) goto noinsert1;
2500: PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
2501: if (A->structure_only) {
2502: MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, brow, bcol, rmax, ai, aj, rp, imax, nonew, MatScalar);
2503: } else {
2504: MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
2505: }
2506: N = nrow++ - 1;
2507: high++;
2508: /* shift up all the later entries in this row */
2509: PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
2510: rp[i] = bcol;
2511: if (!A->structure_only) {
2512: PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
2513: PetscCall(PetscArrayzero(ap + bs2 * i, bs2));
2514: ap[bs2 * i + bs * cidx + ridx] = value;
2515: }
2516: a->nz++;
2517: noinsert1:;
2518: low = i;
2519: }
2520: ailen[brow] = nrow;
2521: }
2522: PetscFunctionReturn(PETSC_SUCCESS);
2523: }
2525: static PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA, IS row, IS col, const MatFactorInfo *info)
2526: {
2527: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inA->data;
2528: Mat outA;
2529: PetscBool row_identity, col_identity;
2531: PetscFunctionBegin;
2532: PetscCheck(info->levels == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only levels = 0 supported for in-place ILU");
2533: PetscCall(ISIdentity(row, &row_identity));
2534: PetscCall(ISIdentity(col, &col_identity));
2535: PetscCheck(row_identity && col_identity, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Row and column permutations must be identity for in-place ILU");
2537: outA = inA;
2538: inA->factortype = MAT_FACTOR_LU;
2539: PetscCall(PetscFree(inA->solvertype));
2540: PetscCall(PetscStrallocpy(MATSOLVERPETSC, &inA->solvertype));
2542: PetscCall(PetscObjectReference((PetscObject)row));
2543: PetscCall(ISDestroy(&a->row));
2544: a->row = row;
2545: PetscCall(PetscObjectReference((PetscObject)col));
2546: PetscCall(ISDestroy(&a->col));
2547: a->col = col;
2549: /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2550: PetscCall(ISDestroy(&a->icol));
2551: PetscCall(ISInvertPermutation(col, PETSC_DECIDE, &a->icol));
2553: PetscCall(MatSeqBAIJSetNumericFactorization_inplace(inA, (PetscBool)(row_identity && col_identity)));
2554: if (!a->solve_work) PetscCall(PetscMalloc1(inA->rmap->N + inA->rmap->bs, &a->solve_work));
2555: PetscCall(MatLUFactorNumeric(outA, inA, info));
2556: PetscFunctionReturn(PETSC_SUCCESS);
2557: }
2559: static PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat, const PetscInt *indices)
2560: {
2561: Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2563: PetscFunctionBegin;
2564: baij->nz = baij->maxnz;
2565: PetscCall(PetscArraycpy(baij->j, indices, baij->nz));
2566: PetscCall(PetscArraycpy(baij->ilen, baij->imax, baij->mbs));
2567: PetscFunctionReturn(PETSC_SUCCESS);
2568: }
2570: /*@
2571: MatSeqBAIJSetColumnIndices - Set the column indices for all the block rows in the matrix.
2573: Input Parameters:
2574: + mat - the `MATSEQBAIJ` matrix
2575: - indices - the block column indices
2577: Level: advanced
2579: Notes:
2580: This can be called if you have precomputed the nonzero structure of the
2581: matrix and want to provide it to the matrix object to improve the performance
2582: of the `MatSetValues()` operation.
2584: You MUST have set the correct numbers of nonzeros per row in the call to
2585: `MatCreateSeqBAIJ()`, and the columns indices MUST be sorted.
2587: MUST be called before any calls to `MatSetValues()`
2589: .seealso: [](ch_matrices), `Mat`, `MATSEQBAIJ`, `MatSetValues()`
2590: @*/
2591: PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat, PetscInt *indices)
2592: {
2593: PetscFunctionBegin;
2595: PetscAssertPointer(indices, 2);
2596: PetscUseMethod(mat, "MatSeqBAIJSetColumnIndices_C", (Mat, const PetscInt *), (mat, (const PetscInt *)indices));
2597: PetscFunctionReturn(PETSC_SUCCESS);
2598: }
2600: static PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A, Vec v, PetscInt idx[])
2601: {
2602: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2603: PetscInt i, j, n, row, bs, *ai, *aj, mbs;
2604: PetscReal atmp;
2605: PetscScalar *x, zero = 0.0;
2606: MatScalar *aa;
2607: PetscInt ncols, brow, krow, kcol;
2609: PetscFunctionBegin;
2610: PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
2611: bs = A->rmap->bs;
2612: aa = a->a;
2613: ai = a->i;
2614: aj = a->j;
2615: mbs = a->mbs;
2617: PetscCall(VecSet(v, zero));
2618: PetscCall(VecGetArray(v, &x));
2619: PetscCall(VecGetLocalSize(v, &n));
2620: PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector");
2621: for (i = 0; i < mbs; i++) {
2622: ncols = ai[1] - ai[0];
2623: ai++;
2624: brow = bs * i;
2625: for (j = 0; j < ncols; j++) {
2626: for (kcol = 0; kcol < bs; kcol++) {
2627: for (krow = 0; krow < bs; krow++) {
2628: atmp = PetscAbsScalar(*aa);
2629: aa++;
2630: row = brow + krow; /* row index */
2631: if (PetscAbsScalar(x[row]) < atmp) {
2632: x[row] = atmp;
2633: if (idx) idx[row] = bs * (*aj) + kcol;
2634: }
2635: }
2636: }
2637: aj++;
2638: }
2639: }
2640: PetscCall(VecRestoreArray(v, &x));
2641: PetscFunctionReturn(PETSC_SUCCESS);
2642: }
2644: static PetscErrorCode MatGetRowSumAbs_SeqBAIJ(Mat A, Vec v)
2645: {
2646: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2647: PetscInt i, j, n, row, bs, *ai, mbs;
2648: PetscReal atmp;
2649: PetscScalar *x, zero = 0.0;
2650: MatScalar *aa;
2651: PetscInt ncols, brow, krow, kcol;
2653: PetscFunctionBegin;
2654: PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
2655: bs = A->rmap->bs;
2656: aa = a->a;
2657: ai = a->i;
2658: mbs = a->mbs;
2660: PetscCall(VecSet(v, zero));
2661: PetscCall(VecGetArrayWrite(v, &x));
2662: PetscCall(VecGetLocalSize(v, &n));
2663: PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector");
2664: for (i = 0; i < mbs; i++) {
2665: ncols = ai[1] - ai[0];
2666: ai++;
2667: brow = bs * i;
2668: for (j = 0; j < ncols; j++) {
2669: for (kcol = 0; kcol < bs; kcol++) {
2670: for (krow = 0; krow < bs; krow++) {
2671: atmp = PetscAbsScalar(*aa);
2672: aa++;
2673: row = brow + krow; /* row index */
2674: x[row] += atmp;
2675: }
2676: }
2677: }
2678: }
2679: PetscCall(VecRestoreArrayWrite(v, &x));
2680: PetscFunctionReturn(PETSC_SUCCESS);
2681: }
2683: static PetscErrorCode MatCopy_SeqBAIJ(Mat A, Mat B, MatStructure str)
2684: {
2685: PetscFunctionBegin;
2686: /* If the two matrices have the same copy implementation, use fast copy. */
2687: if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2688: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2689: Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data;
2690: PetscInt ambs = a->mbs, bmbs = b->mbs, abs = A->rmap->bs, bbs = B->rmap->bs, bs2 = abs * abs;
2692: PetscCheck(a->i[ambs] == b->i[bmbs], PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Number of nonzero blocks in matrices A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", a->i[ambs], b->i[bmbs]);
2693: PetscCheck(abs == bbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Block size A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", abs, bbs);
2694: PetscCall(PetscArraycpy(b->a, a->a, bs2 * a->i[ambs]));
2695: PetscCall(PetscObjectStateIncrease((PetscObject)B));
2696: } else {
2697: PetscCall(MatCopy_Basic(A, B, str));
2698: }
2699: PetscFunctionReturn(PETSC_SUCCESS);
2700: }
2702: static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A, PetscScalar *array[])
2703: {
2704: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2706: PetscFunctionBegin;
2707: *array = a->a;
2708: PetscFunctionReturn(PETSC_SUCCESS);
2709: }
2711: static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A, PetscScalar *array[])
2712: {
2713: PetscFunctionBegin;
2714: *array = NULL;
2715: PetscFunctionReturn(PETSC_SUCCESS);
2716: }
2718: PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y, Mat X, PetscInt *nnz)
2719: {
2720: PetscInt bs = Y->rmap->bs, mbs = Y->rmap->N / bs;
2721: Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data;
2722: Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data;
2724: PetscFunctionBegin;
2725: /* Set the number of nonzeros in the new matrix */
2726: PetscCall(MatAXPYGetPreallocation_SeqX_private(mbs, x->i, x->j, y->i, y->j, nnz));
2727: PetscFunctionReturn(PETSC_SUCCESS);
2728: }
2730: PetscErrorCode MatAXPY_SeqBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
2731: {
2732: Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data, *y = (Mat_SeqBAIJ *)Y->data;
2733: PetscInt bs = Y->rmap->bs, bs2 = bs * bs;
2734: PetscBLASInt one = 1;
2736: PetscFunctionBegin;
2737: if (str == UNKNOWN_NONZERO_PATTERN || (PetscDefined(USE_DEBUG) && str == SAME_NONZERO_PATTERN)) {
2738: PetscBool e = x->nz == y->nz && x->mbs == y->mbs && bs == X->rmap->bs ? PETSC_TRUE : PETSC_FALSE;
2739: if (e) {
2740: PetscCall(PetscArraycmp(x->i, y->i, x->mbs + 1, &e));
2741: if (e) {
2742: PetscCall(PetscArraycmp(x->j, y->j, x->i[x->mbs], &e));
2743: if (e) str = SAME_NONZERO_PATTERN;
2744: }
2745: }
2746: if (!e) PetscCheck(str != SAME_NONZERO_PATTERN, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "MatStructure is not SAME_NONZERO_PATTERN");
2747: }
2748: if (str == SAME_NONZERO_PATTERN) {
2749: PetscScalar alpha = a;
2750: PetscBLASInt bnz;
2751: PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
2752: PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
2753: PetscCall(PetscObjectStateIncrease((PetscObject)Y));
2754: } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2755: PetscCall(MatAXPY_Basic(Y, a, X, str));
2756: } else {
2757: Mat B;
2758: PetscInt *nnz;
2759: PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size");
2760: PetscCall(PetscMalloc1(Y->rmap->N, &nnz));
2761: PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
2762: PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
2763: PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
2764: PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
2765: PetscCall(MatSetType(B, (MatType)((PetscObject)Y)->type_name));
2766: PetscCall(MatAXPYGetPreallocation_SeqBAIJ(Y, X, nnz));
2767: PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz));
2768: PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
2769: PetscCall(MatHeaderMerge(Y, &B));
2770: PetscCall(PetscFree(nnz));
2771: }
2772: PetscFunctionReturn(PETSC_SUCCESS);
2773: }
2775: PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A)
2776: {
2777: #if PetscDefined(USE_COMPLEX)
2778: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2779: PetscInt i, nz = a->bs2 * a->i[a->mbs];
2780: MatScalar *aa = a->a;
2782: PetscFunctionBegin;
2783: for (i = 0; i < nz; i++) aa[i] = PetscConj(aa[i]);
2784: PetscFunctionReturn(PETSC_SUCCESS);
2785: #else
2786: (void)A;
2787: return PETSC_SUCCESS;
2788: #endif
2789: }
2791: static PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2792: {
2793: #if PetscDefined(USE_COMPLEX)
2794: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2795: PetscInt i, nz = a->bs2 * a->i[a->mbs];
2796: MatScalar *aa = a->a;
2798: PetscFunctionBegin;
2799: for (i = 0; i < nz; i++) aa[i] = PetscRealPart(aa[i]);
2800: PetscFunctionReturn(PETSC_SUCCESS);
2801: #else
2802: (void)A;
2803: return PETSC_SUCCESS;
2804: #endif
2805: }
2807: static PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2808: {
2809: #if PetscDefined(USE_COMPLEX)
2810: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2811: PetscInt i, nz = a->bs2 * a->i[a->mbs];
2812: MatScalar *aa = a->a;
2814: PetscFunctionBegin;
2815: for (i = 0; i < nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2816: PetscFunctionReturn(PETSC_SUCCESS);
2817: #else
2818: (void)A;
2819: return PETSC_SUCCESS;
2820: #endif
2821: }
2823: /*
2824: Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code
2825: */
2826: static PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
2827: {
2828: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2829: PetscInt bs = A->rmap->bs, i, *collengths, *cia, *cja, n = A->cmap->n / bs, m = A->rmap->n / bs;
2830: PetscInt nz = a->i[m], row, *jj, mr, col;
2832: PetscFunctionBegin;
2833: *nn = n;
2834: if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
2835: PetscCheck(!symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not for BAIJ matrices");
2836: PetscCall(PetscCalloc1(n, &collengths));
2837: PetscCall(PetscMalloc1(n + 1, &cia));
2838: PetscCall(PetscMalloc1(nz, &cja));
2839: jj = a->j;
2840: for (i = 0; i < nz; i++) collengths[jj[i]]++;
2841: cia[0] = oshift;
2842: for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i];
2843: PetscCall(PetscArrayzero(collengths, n));
2844: jj = a->j;
2845: for (row = 0; row < m; row++) {
2846: mr = a->i[row + 1] - a->i[row];
2847: for (i = 0; i < mr; i++) {
2848: col = *jj++;
2850: cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2851: }
2852: }
2853: PetscCall(PetscFree(collengths));
2854: *ia = cia;
2855: *ja = cja;
2856: PetscFunctionReturn(PETSC_SUCCESS);
2857: }
2859: static PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
2860: {
2861: PetscFunctionBegin;
2862: if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
2863: PetscCall(PetscFree(*ia));
2864: PetscCall(PetscFree(*ja));
2865: PetscFunctionReturn(PETSC_SUCCESS);
2866: }
2868: /*
2869: MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2870: MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2871: spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2872: */
2873: PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done)
2874: {
2875: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2876: PetscInt i, *collengths, *cia, *cja, n = a->nbs, m = a->mbs;
2877: PetscInt nz = a->i[m], row, *jj, mr, col;
2878: PetscInt *cspidx;
2880: PetscFunctionBegin;
2881: *nn = n;
2882: if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
2884: PetscCall(PetscCalloc1(n, &collengths));
2885: PetscCall(PetscMalloc1(n + 1, &cia));
2886: PetscCall(PetscMalloc1(nz, &cja));
2887: PetscCall(PetscMalloc1(nz, &cspidx));
2888: jj = a->j;
2889: for (i = 0; i < nz; i++) collengths[jj[i]]++;
2890: cia[0] = oshift;
2891: for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i];
2892: PetscCall(PetscArrayzero(collengths, n));
2893: jj = a->j;
2894: for (row = 0; row < m; row++) {
2895: mr = a->i[row + 1] - a->i[row];
2896: for (i = 0; i < mr; i++) {
2897: col = *jj++;
2898: cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2899: cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2900: }
2901: }
2902: PetscCall(PetscFree(collengths));
2903: *ia = cia;
2904: *ja = cja;
2905: *spidx = cspidx;
2906: PetscFunctionReturn(PETSC_SUCCESS);
2907: }
2909: PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done)
2910: {
2911: PetscFunctionBegin;
2912: PetscCall(MatRestoreColumnIJ_SeqBAIJ(A, oshift, symmetric, inodecompressed, n, ia, ja, done));
2913: PetscCall(PetscFree(*spidx));
2914: PetscFunctionReturn(PETSC_SUCCESS);
2915: }
2917: static PetscErrorCode MatShift_SeqBAIJ(Mat Y, PetscScalar a)
2918: {
2919: Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)Y->data;
2921: PetscFunctionBegin;
2922: if (!Y->preallocated || !aij->nz) PetscCall(MatSeqBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL));
2923: PetscCall(MatShift_Basic(Y, a));
2924: PetscFunctionReturn(PETSC_SUCCESS);
2925: }
2927: PetscErrorCode MatEliminateZeros_SeqBAIJ(Mat A, PetscBool keep)
2928: {
2929: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2930: PetscInt fshift = 0, fshift_prev = 0, i, *ai = a->i, *aj = a->j, *imax = a->imax, j, k;
2931: PetscInt m = A->rmap->N, *ailen = a->ilen;
2932: PetscInt mbs = a->mbs, bs2 = a->bs2, rmax = 0;
2933: MatScalar *aa = a->a, *ap;
2934: PetscBool zero;
2936: PetscFunctionBegin;
2937: PetscCheck(A->assembled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot eliminate zeros for unassembled matrix");
2938: if (m) rmax = ailen[0];
2939: for (i = 1; i <= mbs; i++) {
2940: for (k = ai[i - 1]; k < ai[i]; k++) {
2941: zero = PETSC_TRUE;
2942: ap = aa + bs2 * k;
2943: for (j = 0; j < bs2 && zero; j++) {
2944: if (ap[j] != 0.0) zero = PETSC_FALSE;
2945: }
2946: if (zero && (aj[k] != i - 1 || !keep)) fshift++;
2947: else {
2948: if (zero && aj[k] == i - 1) PetscCall(PetscInfo(A, "Keep the diagonal block at row %" PetscInt_FMT "\n", i - 1));
2949: aj[k - fshift] = aj[k];
2950: PetscCall(PetscArraymove(ap - bs2 * fshift, ap, bs2));
2951: }
2952: }
2953: ai[i - 1] -= fshift_prev;
2954: fshift_prev = fshift;
2955: ailen[i - 1] = imax[i - 1] = ai[i] - fshift - ai[i - 1];
2956: a->nonzerorowcnt += ((ai[i] - fshift - ai[i - 1]) > 0);
2957: rmax = PetscMax(rmax, ailen[i - 1]);
2958: }
2959: if (fshift) {
2960: if (mbs) {
2961: ai[mbs] -= fshift;
2962: a->nz = ai[mbs];
2963: }
2964: PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT "; zeros eliminated: %" PetscInt_FMT "; nonzeros left: %" PetscInt_FMT "\n", m, A->cmap->n, fshift, a->nz));
2965: A->nonzerostate++;
2966: A->info.nz_unneeded += (PetscReal)fshift;
2967: a->rmax = rmax;
2968: PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
2969: PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
2970: }
2971: PetscFunctionReturn(PETSC_SUCCESS);
2972: }
2974: static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2975: MatGetRow_SeqBAIJ,
2976: MatRestoreRow_SeqBAIJ,
2977: MatMult_SeqBAIJ_N,
2978: /* 4*/ MatMultAdd_SeqBAIJ_N,
2979: MatMultTranspose_SeqBAIJ,
2980: MatMultTransposeAdd_SeqBAIJ,
2981: NULL,
2982: NULL,
2983: NULL,
2984: /* 10*/ NULL,
2985: MatLUFactor_SeqBAIJ,
2986: NULL,
2987: NULL,
2988: MatTranspose_SeqBAIJ,
2989: /* 15*/ MatGetInfo_SeqBAIJ,
2990: MatEqual_SeqBAIJ,
2991: MatGetDiagonal_SeqBAIJ,
2992: MatDiagonalScale_SeqBAIJ,
2993: MatNorm_SeqBAIJ,
2994: /* 20*/ NULL,
2995: MatAssemblyEnd_SeqBAIJ,
2996: MatSetOption_SeqBAIJ,
2997: MatZeroEntries_SeqBAIJ,
2998: /* 24*/ MatZeroRows_SeqBAIJ,
2999: NULL,
3000: NULL,
3001: NULL,
3002: NULL,
3003: /* 29*/ MatSetUp_Seq_Hash,
3004: NULL,
3005: NULL,
3006: NULL,
3007: NULL,
3008: /* 34*/ MatDuplicate_SeqBAIJ,
3009: NULL,
3010: NULL,
3011: MatILUFactor_SeqBAIJ,
3012: NULL,
3013: /* 39*/ MatAXPY_SeqBAIJ,
3014: MatCreateSubMatrices_SeqBAIJ,
3015: MatIncreaseOverlap_SeqBAIJ,
3016: MatGetValues_SeqBAIJ,
3017: MatCopy_SeqBAIJ,
3018: /* 44*/ NULL,
3019: MatScale_SeqBAIJ,
3020: MatShift_SeqBAIJ,
3021: NULL,
3022: MatZeroRowsColumns_SeqBAIJ,
3023: /* 49*/ NULL,
3024: MatGetRowIJ_SeqBAIJ,
3025: MatRestoreRowIJ_SeqBAIJ,
3026: MatGetColumnIJ_SeqBAIJ,
3027: MatRestoreColumnIJ_SeqBAIJ,
3028: /* 54*/ MatFDColoringCreate_SeqXAIJ,
3029: NULL,
3030: NULL,
3031: NULL,
3032: MatSetValuesBlocked_SeqBAIJ,
3033: /* 59*/ MatCreateSubMatrix_SeqBAIJ,
3034: MatDestroy_SeqBAIJ,
3035: MatView_SeqBAIJ,
3036: NULL,
3037: NULL,
3038: /* 64*/ NULL,
3039: NULL,
3040: NULL,
3041: NULL,
3042: MatGetRowMaxAbs_SeqBAIJ,
3043: /* 69*/ NULL,
3044: MatConvert_Basic,
3045: NULL,
3046: MatFDColoringApply_BAIJ,
3047: NULL,
3048: /* 74*/ NULL,
3049: NULL,
3050: NULL,
3051: NULL,
3052: MatLoad_SeqBAIJ,
3053: /* 79*/ NULL,
3054: NULL,
3055: NULL,
3056: NULL,
3057: NULL,
3058: /* 84*/ NULL,
3059: NULL,
3060: NULL,
3061: NULL,
3062: NULL,
3063: /* 89*/ NULL,
3064: NULL,
3065: NULL,
3066: NULL,
3067: MatConjugate_SeqBAIJ,
3068: /* 94*/ NULL,
3069: NULL,
3070: MatRealPart_SeqBAIJ,
3071: MatImaginaryPart_SeqBAIJ,
3072: NULL,
3073: /* 99*/ NULL,
3074: NULL,
3075: NULL,
3076: NULL,
3077: NULL,
3078: /*104*/ NULL,
3079: NULL,
3080: NULL,
3081: NULL,
3082: NULL,
3083: /*109*/ NULL,
3084: NULL,
3085: MatMultHermitianTranspose_SeqBAIJ,
3086: MatMultHermitianTransposeAdd_SeqBAIJ,
3087: NULL,
3088: /*114*/ NULL,
3089: MatGetColumnReductions_SeqBAIJ,
3090: MatInvertBlockDiagonal_SeqBAIJ,
3091: NULL,
3092: NULL,
3093: /*119*/ NULL,
3094: NULL,
3095: NULL,
3096: NULL,
3097: NULL,
3098: /*124*/ NULL,
3099: NULL,
3100: MatSetBlockSizes_Default,
3101: NULL,
3102: MatFDColoringSetUp_SeqXAIJ,
3103: /*129*/ NULL,
3104: MatCreateMPIMatConcatenateSeqMat_SeqBAIJ,
3105: MatDestroySubMatrices_SeqBAIJ,
3106: NULL,
3107: NULL,
3108: /*134*/ NULL,
3109: NULL,
3110: MatEliminateZeros_SeqBAIJ,
3111: MatGetRowSumAbs_SeqBAIJ,
3112: NULL,
3113: /*139*/ NULL,
3114: NULL,
3115: MatCopyHashToXAIJ_Seq_Hash,
3116: NULL,
3117: NULL};
3119: static PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat)
3120: {
3121: Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
3122: PetscInt nz = aij->i[aij->mbs] * aij->bs2;
3124: PetscFunctionBegin;
3125: PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3127: /* allocate space for values if not already there */
3128: if (!aij->saved_values) PetscCall(PetscMalloc1(nz + 1, &aij->saved_values));
3130: /* copy values over */
3131: PetscCall(PetscArraycpy(aij->saved_values, aij->a, nz));
3132: PetscFunctionReturn(PETSC_SUCCESS);
3133: }
3135: static PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat)
3136: {
3137: Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
3138: PetscInt nz = aij->i[aij->mbs] * aij->bs2;
3140: PetscFunctionBegin;
3141: PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3142: PetscCheck(aij->saved_values, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatStoreValues(A);first");
3144: /* copy values over */
3145: PetscCall(PetscArraycpy(aij->a, aij->saved_values, nz));
3146: PetscFunctionReturn(PETSC_SUCCESS);
3147: }
3149: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *);
3150: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType, MatReuse, Mat *);
3152: PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[])
3153: {
3154: Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data;
3155: PetscInt i, mbs, nbs, bs2;
3156: PetscBool flg = PETSC_FALSE, skipallocation = PETSC_FALSE, realalloc = PETSC_FALSE;
3158: PetscFunctionBegin;
3159: if (B->hash_active) {
3160: PetscInt bs;
3161: B->ops[0] = b->cops;
3162: PetscCall(PetscHMapIJVDestroy(&b->ht));
3163: PetscCall(MatGetBlockSize(B, &bs));
3164: if (bs > 1) PetscCall(PetscHSetIJDestroy(&b->bht));
3165: PetscCall(PetscFree(b->dnz));
3166: PetscCall(PetscFree(b->bdnz));
3167: B->hash_active = PETSC_FALSE;
3168: }
3169: if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
3170: if (nz == MAT_SKIP_ALLOCATION) {
3171: skipallocation = PETSC_TRUE;
3172: nz = 0;
3173: }
3175: PetscCall(MatSetBlockSize(B, bs));
3176: PetscCall(PetscLayoutSetUp(B->rmap));
3177: PetscCall(PetscLayoutSetUp(B->cmap));
3178: PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
3180: B->preallocated = PETSC_TRUE;
3182: mbs = B->rmap->n / bs;
3183: nbs = B->cmap->n / bs;
3184: bs2 = bs * bs;
3186: PetscCheck(mbs * bs == B->rmap->n && nbs * bs == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number rows %" PetscInt_FMT ", cols %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, B->cmap->n, bs);
3188: if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3189: PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nz cannot be less than 0: value %" PetscInt_FMT, nz);
3190: if (nnz) {
3191: for (i = 0; i < mbs; i++) {
3192: PetscCheck(nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, nnz[i]);
3193: PetscCheck(nnz[i] <= nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be greater than block row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT, i, nnz[i], nbs);
3194: }
3195: }
3197: PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Optimize options for SEQBAIJ matrix 2 ", "Mat");
3198: PetscCall(PetscOptionsBool("-mat_no_unroll", "Do not optimize for block size (slow)", NULL, flg, &flg, NULL));
3199: PetscOptionsEnd();
3201: if (!flg) {
3202: switch (bs) {
3203: case 1:
3204: B->ops->mult = MatMult_SeqBAIJ_1;
3205: B->ops->multadd = MatMultAdd_SeqBAIJ_1;
3206: break;
3207: case 2:
3208: B->ops->mult = MatMult_SeqBAIJ_2;
3209: B->ops->multadd = MatMultAdd_SeqBAIJ_2;
3210: break;
3211: case 3:
3212: B->ops->mult = MatMult_SeqBAIJ_3;
3213: B->ops->multadd = MatMultAdd_SeqBAIJ_3;
3214: break;
3215: case 4:
3216: B->ops->mult = MatMult_SeqBAIJ_4;
3217: B->ops->multadd = MatMultAdd_SeqBAIJ_4;
3218: break;
3219: case 5:
3220: B->ops->mult = MatMult_SeqBAIJ_5;
3221: B->ops->multadd = MatMultAdd_SeqBAIJ_5;
3222: break;
3223: case 6:
3224: B->ops->mult = MatMult_SeqBAIJ_6;
3225: B->ops->multadd = MatMultAdd_SeqBAIJ_6;
3226: break;
3227: case 7:
3228: B->ops->mult = MatMult_SeqBAIJ_7;
3229: B->ops->multadd = MatMultAdd_SeqBAIJ_7;
3230: break;
3231: case 9: {
3232: PetscInt version = 1;
3233: PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
3234: switch (version) {
3235: #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
3236: case 1:
3237: B->ops->mult = MatMult_SeqBAIJ_9_AVX2;
3238: B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2;
3239: PetscCall(PetscInfo(B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3240: break;
3241: #endif
3242: default:
3243: B->ops->mult = MatMult_SeqBAIJ_N;
3244: B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3245: PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3246: break;
3247: }
3248: break;
3249: }
3250: case 11:
3251: B->ops->mult = MatMult_SeqBAIJ_11;
3252: B->ops->multadd = MatMultAdd_SeqBAIJ_11;
3253: break;
3254: case 12: {
3255: PetscInt version = 1;
3256: PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
3257: switch (version) {
3258: case 1:
3259: B->ops->mult = MatMult_SeqBAIJ_12_ver1;
3260: B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
3261: PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3262: break;
3263: case 2:
3264: B->ops->mult = MatMult_SeqBAIJ_12_ver2;
3265: B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver2;
3266: PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3267: break;
3268: #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
3269: case 3:
3270: B->ops->mult = MatMult_SeqBAIJ_12_AVX2;
3271: B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
3272: PetscCall(PetscInfo(B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3273: break;
3274: #endif
3275: default:
3276: B->ops->mult = MatMult_SeqBAIJ_N;
3277: B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3278: PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3279: break;
3280: }
3281: break;
3282: }
3283: case 15: {
3284: PetscInt version = 1;
3285: PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
3286: switch (version) {
3287: case 1:
3288: B->ops->mult = MatMult_SeqBAIJ_15_ver1;
3289: PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3290: break;
3291: case 2:
3292: B->ops->mult = MatMult_SeqBAIJ_15_ver2;
3293: PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3294: break;
3295: case 3:
3296: B->ops->mult = MatMult_SeqBAIJ_15_ver3;
3297: PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3298: break;
3299: case 4:
3300: B->ops->mult = MatMult_SeqBAIJ_15_ver4;
3301: PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3302: break;
3303: default:
3304: B->ops->mult = MatMult_SeqBAIJ_N;
3305: PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3306: break;
3307: }
3308: B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3309: break;
3310: }
3311: default:
3312: B->ops->mult = MatMult_SeqBAIJ_N;
3313: B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3314: PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3315: break;
3316: }
3317: }
3318: B->ops->sor = MatSOR_SeqBAIJ;
3319: b->mbs = mbs;
3320: b->nbs = nbs;
3321: if (!skipallocation) {
3322: if (!b->imax) {
3323: PetscCall(PetscMalloc2(mbs, &b->imax, mbs, &b->ilen));
3325: b->free_imax_ilen = PETSC_TRUE;
3326: }
3327: /* b->ilen will count nonzeros in each block row so far. */
3328: for (i = 0; i < mbs; i++) b->ilen[i] = 0;
3329: if (!nnz) {
3330: if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3331: else if (nz < 0) nz = 1;
3332: nz = PetscMin(nz, nbs);
3333: for (i = 0; i < mbs; i++) b->imax[i] = nz;
3334: PetscCall(PetscIntMultError(nz, mbs, &nz));
3335: } else {
3336: PetscInt64 nz64 = 0;
3337: for (i = 0; i < mbs; i++) {
3338: b->imax[i] = nnz[i];
3339: nz64 += nnz[i];
3340: }
3341: PetscCall(PetscIntCast(nz64, &nz));
3342: }
3344: /* allocate the matrix space */
3345: PetscCall(MatSeqXAIJFreeAIJ(B, &b->a, &b->j, &b->i));
3346: PetscCall(PetscShmgetAllocateArray(nz, sizeof(PetscInt), (void **)&b->j));
3347: PetscCall(PetscShmgetAllocateArray(B->rmap->N + 1, sizeof(PetscInt), (void **)&b->i));
3348: if (B->structure_only) {
3349: b->free_a = PETSC_FALSE;
3350: } else {
3351: PetscInt nzbs2 = 0;
3352: PetscCall(PetscIntMultError(nz, bs2, &nzbs2));
3353: PetscCall(PetscShmgetAllocateArray(nzbs2, sizeof(PetscScalar), (void **)&b->a));
3354: b->free_a = PETSC_TRUE;
3355: PetscCall(PetscArrayzero(b->a, nzbs2));
3356: }
3357: b->free_ij = PETSC_TRUE;
3358: PetscCall(PetscArrayzero(b->j, nz));
3360: b->i[0] = 0;
3361: for (i = 1; i < mbs + 1; i++) b->i[i] = b->i[i - 1] + b->imax[i - 1];
3362: } else {
3363: b->free_a = PETSC_FALSE;
3364: b->free_ij = PETSC_FALSE;
3365: }
3367: b->bs2 = bs2;
3368: b->mbs = mbs;
3369: b->nz = 0;
3370: b->maxnz = nz;
3371: B->info.nz_unneeded = (PetscReal)b->maxnz * bs2;
3372: B->was_assembled = PETSC_FALSE;
3373: B->assembled = PETSC_FALSE;
3374: if (realalloc) PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
3375: PetscFunctionReturn(PETSC_SUCCESS);
3376: }
3378: static PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
3379: {
3380: PetscInt i, m, nz, nz_max = 0, *nnz;
3381: PetscScalar *values = NULL;
3382: PetscBool roworiented = ((Mat_SeqBAIJ *)B->data)->roworiented;
3384: PetscFunctionBegin;
3385: PetscCheck(bs >= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs);
3386: PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
3387: PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
3388: PetscCall(PetscLayoutSetUp(B->rmap));
3389: PetscCall(PetscLayoutSetUp(B->cmap));
3390: PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
3391: m = B->rmap->n / bs;
3393: PetscCheck(ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
3394: PetscCall(PetscMalloc1(m + 1, &nnz));
3395: for (i = 0; i < m; i++) {
3396: nz = ii[i + 1] - ii[i];
3397: PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
3398: nz_max = PetscMax(nz_max, nz);
3399: nnz[i] = nz;
3400: }
3401: PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz));
3402: PetscCall(PetscFree(nnz));
3404: values = (PetscScalar *)V;
3405: if (!values) PetscCall(PetscCalloc1(bs * bs * (nz_max + 1), &values));
3406: for (i = 0; i < m; i++) {
3407: PetscInt ncols = ii[i + 1] - ii[i];
3408: const PetscInt *icols = jj + ii[i];
3409: if (bs == 1 || !roworiented) {
3410: const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
3411: PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, ncols, icols, svals, INSERT_VALUES));
3412: } else {
3413: PetscInt j;
3414: for (j = 0; j < ncols; j++) {
3415: const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
3416: PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, 1, &icols[j], svals, INSERT_VALUES));
3417: }
3418: }
3419: }
3420: if (!V) PetscCall(PetscFree(values));
3421: PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
3422: PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3423: PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3424: PetscFunctionReturn(PETSC_SUCCESS);
3425: }
3427: /*@C
3428: MatSeqBAIJGetArray - gives read/write access to the array where the data for a `MATSEQBAIJ` matrix is stored
3430: Not Collective
3432: Input Parameter:
3433: . A - a `MATSEQBAIJ` matrix
3435: Output Parameter:
3436: . array - pointer to the data
3438: Level: intermediate
3440: .seealso: [](ch_matrices), `Mat`, `MATSEQBAIJ`, `MatSeqBAIJRestoreArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()`
3441: @*/
3442: PetscErrorCode MatSeqBAIJGetArray(Mat A, PetscScalar *array[])
3443: {
3444: PetscFunctionBegin;
3445: PetscUseMethod(A, "MatSeqBAIJGetArray_C", (Mat, PetscScalar **), (A, array));
3446: PetscFunctionReturn(PETSC_SUCCESS);
3447: }
3449: /*@C
3450: MatSeqBAIJRestoreArray - returns access to the array where the data for a `MATSEQBAIJ` matrix is stored obtained by `MatSeqBAIJGetArray()`
3452: Not Collective
3454: Input Parameters:
3455: + A - a `MATSEQBAIJ` matrix
3456: - array - pointer to the data
3458: Level: intermediate
3460: .seealso: [](ch_matrices), `Mat`, `MatSeqBAIJGetArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()`
3461: @*/
3462: PetscErrorCode MatSeqBAIJRestoreArray(Mat A, PetscScalar *array[])
3463: {
3464: PetscFunctionBegin;
3465: PetscUseMethod(A, "MatSeqBAIJRestoreArray_C", (Mat, PetscScalar **), (A, array));
3466: PetscFunctionReturn(PETSC_SUCCESS);
3467: }
3469: /*MC
3470: MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3471: block sparse compressed row format.
3473: Options Database Keys:
3474: + -mat_type seqbaij - sets the matrix type to `MATSEQBAIJ` during a call to `MatSetFromOptions()`
3475: - -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS)
3477: Level: beginner
3479: Notes:
3480: `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
3481: space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
3483: Run with `-info` to see what version of the matrix-vector product is being used
3485: .seealso: [](ch_matrices), `Mat`, `MatCreateSeqBAIJ()`
3486: M*/
3488: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType, MatReuse, Mat *);
3490: PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3491: {
3492: PetscMPIInt size;
3493: Mat_SeqBAIJ *b;
3495: PetscFunctionBegin;
3496: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
3497: PetscCheck(size == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Comm must be of size 1");
3499: PetscCall(PetscNew(&b));
3500: B->data = (void *)b;
3501: B->ops[0] = MatOps_Values;
3503: b->row = NULL;
3504: b->col = NULL;
3505: b->icol = NULL;
3506: b->reallocs = 0;
3507: b->saved_values = NULL;
3509: b->roworiented = PETSC_TRUE;
3510: b->nonew = 0;
3511: b->diag = NULL;
3512: B->spptr = NULL;
3513: B->info.nz_unneeded = (PetscReal)b->maxnz * b->bs2;
3514: b->keepnonzeropattern = PETSC_FALSE;
3516: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJGetArray_C", MatSeqBAIJGetArray_SeqBAIJ));
3517: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJRestoreArray_C", MatSeqBAIJRestoreArray_SeqBAIJ));
3518: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_SeqBAIJ));
3519: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_SeqBAIJ));
3520: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetColumnIndices_C", MatSeqBAIJSetColumnIndices_SeqBAIJ));
3521: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqaij_C", MatConvert_SeqBAIJ_SeqAIJ));
3522: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqsbaij_C", MatConvert_SeqBAIJ_SeqSBAIJ));
3523: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocation_C", MatSeqBAIJSetPreallocation_SeqBAIJ));
3524: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocationCSR_C", MatSeqBAIJSetPreallocationCSR_SeqBAIJ));
3525: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_SeqBAIJ));
3526: #if defined(PETSC_HAVE_HYPRE)
3527: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_hypre_C", MatConvert_AIJ_HYPRE));
3528: #endif
3529: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_is_C", MatConvert_XAIJ_IS));
3530: PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ));
3531: PetscFunctionReturn(PETSC_SUCCESS);
3532: }
3534: PETSC_INTERN PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C, Mat A, MatDuplicateOption cpvalues, PetscBool mallocmatspace)
3535: {
3536: Mat_SeqBAIJ *c = (Mat_SeqBAIJ *)C->data, *a = (Mat_SeqBAIJ *)A->data;
3537: PetscInt i, mbs = a->mbs, nz = a->nz, bs2 = a->bs2;
3539: PetscFunctionBegin;
3540: PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONGSTATE, "Cannot duplicate unassembled matrix");
3541: PetscCheck(a->i[mbs] == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupt matrix");
3543: if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3544: c->imax = a->imax;
3545: c->ilen = a->ilen;
3546: c->free_imax_ilen = PETSC_FALSE;
3547: } else {
3548: PetscCall(PetscMalloc2(mbs, &c->imax, mbs, &c->ilen));
3549: for (i = 0; i < mbs; i++) {
3550: c->imax[i] = a->imax[i];
3551: c->ilen[i] = a->ilen[i];
3552: }
3553: c->free_imax_ilen = PETSC_TRUE;
3554: }
3556: /* allocate the matrix space */
3557: if (mallocmatspace) {
3558: if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3559: PetscCall(PetscShmgetAllocateArray(bs2 * nz, sizeof(PetscScalar), (void **)&c->a));
3560: PetscCall(PetscArrayzero(c->a, bs2 * nz));
3561: c->free_a = PETSC_TRUE;
3562: c->i = a->i;
3563: c->j = a->j;
3564: c->free_ij = PETSC_FALSE;
3565: c->parent = A;
3566: C->preallocated = PETSC_TRUE;
3567: C->assembled = PETSC_TRUE;
3569: PetscCall(PetscObjectReference((PetscObject)A));
3570: PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3571: PetscCall(MatSetOption(C, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3572: } else {
3573: PetscCall(PetscShmgetAllocateArray(bs2 * nz, sizeof(PetscScalar), (void **)&c->a));
3574: PetscCall(PetscShmgetAllocateArray(nz, sizeof(PetscInt), (void **)&c->j));
3575: PetscCall(PetscShmgetAllocateArray(mbs + 1, sizeof(PetscInt), (void **)&c->i));
3576: c->free_a = PETSC_TRUE;
3577: c->free_ij = PETSC_TRUE;
3579: PetscCall(PetscArraycpy(c->i, a->i, mbs + 1));
3580: if (mbs > 0) {
3581: PetscCall(PetscArraycpy(c->j, a->j, nz));
3582: if (cpvalues == MAT_COPY_VALUES) {
3583: PetscCall(PetscArraycpy(c->a, a->a, bs2 * nz));
3584: } else {
3585: PetscCall(PetscArrayzero(c->a, bs2 * nz));
3586: }
3587: }
3588: C->preallocated = PETSC_TRUE;
3589: C->assembled = PETSC_TRUE;
3590: }
3591: }
3593: c->roworiented = a->roworiented;
3594: c->nonew = a->nonew;
3596: PetscCall(PetscLayoutReference(A->rmap, &C->rmap));
3597: PetscCall(PetscLayoutReference(A->cmap, &C->cmap));
3599: c->bs2 = a->bs2;
3600: c->mbs = a->mbs;
3601: c->nbs = a->nbs;
3602: c->nz = a->nz;
3603: c->maxnz = a->nz; /* Since we allocate exactly the right amount */
3604: c->solve_work = NULL;
3605: c->mult_work = NULL;
3606: c->sor_workt = NULL;
3607: c->sor_work = NULL;
3609: c->compressedrow.use = a->compressedrow.use;
3610: c->compressedrow.nrows = a->compressedrow.nrows;
3611: if (a->compressedrow.use) {
3612: i = a->compressedrow.nrows;
3613: PetscCall(PetscMalloc2(i + 1, &c->compressedrow.i, i + 1, &c->compressedrow.rindex));
3614: PetscCall(PetscArraycpy(c->compressedrow.i, a->compressedrow.i, i + 1));
3615: PetscCall(PetscArraycpy(c->compressedrow.rindex, a->compressedrow.rindex, i));
3616: } else {
3617: c->compressedrow.use = PETSC_FALSE;
3618: c->compressedrow.i = NULL;
3619: c->compressedrow.rindex = NULL;
3620: }
3621: c->nonzerorowcnt = a->nonzerorowcnt;
3622: C->nonzerostate = A->nonzerostate;
3624: PetscCall(PetscFunctionListDuplicate(((PetscObject)A)->qlist, &((PetscObject)C)->qlist));
3625: PetscFunctionReturn(PETSC_SUCCESS);
3626: }
3628: PetscErrorCode MatDuplicate_SeqBAIJ(Mat A, MatDuplicateOption cpvalues, Mat *B)
3629: {
3630: PetscFunctionBegin;
3631: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B));
3632: PetscCall(MatSetSizes(*B, A->rmap->N, A->cmap->n, A->rmap->N, A->cmap->n));
3633: PetscCall(MatSetType(*B, MATSEQBAIJ));
3634: PetscCall(MatDuplicateNoCreate_SeqBAIJ(*B, A, cpvalues, PETSC_TRUE));
3635: PetscFunctionReturn(PETSC_SUCCESS);
3636: }
3638: /* Used for both SeqBAIJ and SeqSBAIJ matrices */
3639: PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat, PetscViewer viewer)
3640: {
3641: PetscInt header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k;
3642: PetscInt *rowidxs, *colidxs;
3643: PetscScalar *matvals;
3645: PetscFunctionBegin;
3646: PetscCall(PetscViewerSetUp(viewer));
3648: /* read matrix header */
3649: PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3650: PetscCheck(header[0] == MAT_FILE_CLASSID, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3651: M = header[1];
3652: N = header[2];
3653: nz = header[3];
3654: PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3655: PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3656: PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as SeqBAIJ");
3658: /* set block sizes from the viewer's .info file */
3659: PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3660: /* set local and global sizes if not set already */
3661: if (mat->rmap->n < 0) mat->rmap->n = M;
3662: if (mat->cmap->n < 0) mat->cmap->n = N;
3663: if (mat->rmap->N < 0) mat->rmap->N = M;
3664: if (mat->cmap->N < 0) mat->cmap->N = N;
3665: PetscCall(PetscLayoutSetUp(mat->rmap));
3666: PetscCall(PetscLayoutSetUp(mat->cmap));
3668: /* check if the matrix sizes are correct */
3669: PetscCall(MatGetSize(mat, &rows, &cols));
3670: PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3671: PetscCall(MatGetBlockSize(mat, &bs));
3672: PetscCall(MatGetLocalSize(mat, &m, &n));
3673: mbs = m / bs;
3674: nbs = n / bs;
3676: /* read in row lengths, column indices and nonzero values */
3677: PetscCall(PetscMalloc1(m + 1, &rowidxs));
3678: PetscCall(PetscViewerBinaryRead(viewer, rowidxs + 1, m, NULL, PETSC_INT));
3679: rowidxs[0] = 0;
3680: for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3681: sum = rowidxs[m];
3682: PetscCheck(sum == nz, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3684: /* read in column indices and nonzero values */
3685: PetscCall(PetscMalloc2(rowidxs[m], &colidxs, nz, &matvals));
3686: PetscCall(PetscViewerBinaryRead(viewer, colidxs, rowidxs[m], NULL, PETSC_INT));
3687: PetscCall(PetscViewerBinaryRead(viewer, matvals, rowidxs[m], NULL, PETSC_SCALAR));
3689: { /* preallocate matrix storage */
3690: PetscBT bt; /* helper bit set to count nonzeros */
3691: PetscInt *nnz;
3692: PetscBool sbaij;
3694: PetscCall(PetscBTCreate(nbs, &bt));
3695: PetscCall(PetscCalloc1(mbs, &nnz));
3696: PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATSEQSBAIJ, &sbaij));
3697: for (i = 0; i < mbs; i++) {
3698: PetscCall(PetscBTMemzero(nbs, bt));
3699: for (k = 0; k < bs; k++) {
3700: PetscInt row = bs * i + k;
3701: for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) {
3702: PetscInt col = colidxs[j];
3703: if (!sbaij || col >= row)
3704: if (!PetscBTLookupSet(bt, col / bs)) nnz[i]++;
3705: }
3706: }
3707: }
3708: PetscCall(PetscBTDestroy(&bt));
3709: PetscCall(MatSeqBAIJSetPreallocation(mat, bs, 0, nnz));
3710: PetscCall(MatSeqSBAIJSetPreallocation(mat, bs, 0, nnz));
3711: PetscCall(PetscFree(nnz));
3712: }
3714: /* store matrix values */
3715: for (i = 0; i < m; i++) {
3716: PetscInt row = i, s = rowidxs[i], e = rowidxs[i + 1];
3717: PetscUseTypeMethod(mat, setvalues, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES);
3718: }
3720: PetscCall(PetscFree(rowidxs));
3721: PetscCall(PetscFree2(colidxs, matvals));
3722: PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
3723: PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
3724: PetscFunctionReturn(PETSC_SUCCESS);
3725: }
3727: PetscErrorCode MatLoad_SeqBAIJ(Mat mat, PetscViewer viewer)
3728: {
3729: PetscBool isbinary;
3731: PetscFunctionBegin;
3732: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3733: PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
3734: PetscCall(MatLoad_SeqBAIJ_Binary(mat, viewer));
3735: PetscFunctionReturn(PETSC_SUCCESS);
3736: }
3738: /*@
3739: MatCreateSeqBAIJ - Creates a sparse matrix in `MATSEQAIJ` (block
3740: compressed row) format. For good matrix assembly performance the
3741: user should preallocate the matrix storage by setting the parameter `nz`
3742: (or the array `nnz`).
3744: Collective
3746: Input Parameters:
3747: + comm - MPI communicator, set to `PETSC_COMM_SELF`
3748: . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
3749: blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3750: . m - number of rows
3751: . n - number of columns
3752: . nz - number of nonzero blocks per block row (same for all rows)
3753: - nnz - array containing the number of nonzero blocks in the various block rows
3754: (possibly different for each block row) or `NULL`
3756: Output Parameter:
3757: . A - the matrix
3759: Options Database Keys:
3760: + -mat_no_unroll - uses code that does not unroll the loops in the block calculations (much slower)
3761: - -mat_block_size - size of the blocks to use
3763: Level: intermediate
3765: Notes:
3766: It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
3767: MatXXXXSetPreallocation() paradigm instead of this routine directly.
3768: [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
3770: The number of rows and columns must be divisible by blocksize.
3772: If the `nnz` parameter is given then the `nz` parameter is ignored
3774: A nonzero block is any block that as 1 or more nonzeros in it
3776: The `MATSEQBAIJ` format is fully compatible with standard Fortran
3777: storage. That is, the stored row and column indices can begin at
3778: either one (as in Fortran) or zero.
3780: Specify the preallocated storage with either `nz` or `nnz` (not both).
3781: Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory
3782: allocation. See [Sparse Matrices](sec_matsparse) for details.
3783: matrices.
3785: .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`
3786: @*/
3787: PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A)
3788: {
3789: PetscFunctionBegin;
3790: PetscCall(MatCreate(comm, A));
3791: PetscCall(MatSetSizes(*A, m, n, m, n));
3792: PetscCall(MatSetType(*A, MATSEQBAIJ));
3793: PetscCall(MatSeqBAIJSetPreallocation(*A, bs, nz, (PetscInt *)nnz));
3794: PetscFunctionReturn(PETSC_SUCCESS);
3795: }
3797: /*@
3798: MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3799: per row in the matrix. For good matrix assembly performance the
3800: user should preallocate the matrix storage by setting the parameter `nz`
3801: (or the array `nnz`).
3803: Collective
3805: Input Parameters:
3806: + B - the matrix
3807: . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
3808: blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3809: . nz - number of block nonzeros per block row (same for all rows)
3810: - nnz - array containing the number of block nonzeros in the various block rows
3811: (possibly different for each block row) or `NULL`
3813: Options Database Keys:
3814: + -mat_no_unroll - uses code that does not unroll the loops in the block calculations (much slower)
3815: - -mat_block_size - size of the blocks to use
3817: Level: intermediate
3819: Notes:
3820: If the `nnz` parameter is given then the `nz` parameter is ignored
3822: You can call `MatGetInfo()` to get information on how effective the preallocation was;
3823: for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3824: You can also run with the option `-info` and look for messages with the string
3825: malloc in them to see if additional memory allocation was needed.
3827: The `MATSEQBAIJ` format is fully compatible with standard Fortran
3828: storage. That is, the stored row and column indices can begin at
3829: either one (as in Fortran) or zero.
3831: Specify the preallocated storage with either `nz` or `nnz` (not both).
3832: Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory
3833: allocation. See [Sparse Matrices](sec_matsparse) for details.
3835: .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatGetInfo()`
3836: @*/
3837: PetscErrorCode MatSeqBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[])
3838: {
3839: PetscFunctionBegin;
3843: PetscTryMethod(B, "MatSeqBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[]), (B, bs, nz, nnz));
3844: PetscFunctionReturn(PETSC_SUCCESS);
3845: }
3847: /*@C
3848: MatSeqBAIJSetPreallocationCSR - Creates a sparse sequential matrix in `MATSEQBAIJ` format using the given nonzero structure and (optional) numerical values
3850: Collective
3852: Input Parameters:
3853: + B - the matrix
3854: . bs - the blocksize
3855: . i - the indices into `j` for the start of each local row (indices start with zero)
3856: . j - the column indices for each local row (indices start with zero) these must be sorted for each row
3857: - v - optional values in the matrix, use `NULL` if not provided
3859: Level: advanced
3861: Notes:
3862: The `i`,`j`,`v` values are COPIED with this routine; to avoid the copy use `MatCreateSeqBAIJWithArrays()`
3864: The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`. For example, C programs
3865: may want to use the default `MAT_ROW_ORIENTED` of `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is
3866: over rows within a block and the last index is over columns within a block row. Fortran programs will likely set
3867: `MAT_ROW_ORIENTED` of `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3868: block column and the second index is over columns within a block.
3870: Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
3872: .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatSeqBAIJSetPreallocation()`, `MATSEQBAIJ`
3873: @*/
3874: PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
3875: {
3876: PetscFunctionBegin;
3880: PetscTryMethod(B, "MatSeqBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
3881: PetscFunctionReturn(PETSC_SUCCESS);
3882: }
3884: /*@
3885: MatCreateSeqBAIJWithArrays - Creates a `MATSEQBAIJ` matrix using matrix elements provided by the user.
3887: Collective
3889: Input Parameters:
3890: + comm - must be an MPI communicator of size 1
3891: . bs - size of block
3892: . m - number of rows
3893: . n - number of columns
3894: . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix
3895: . j - column indices
3896: - a - matrix values
3898: Output Parameter:
3899: . mat - the matrix
3901: Level: advanced
3903: Notes:
3904: The `i`, `j`, and `a` arrays are not copied by this routine, the user must free these arrays
3905: once the matrix is destroyed
3907: You cannot set new nonzero locations into this matrix, that will generate an error.
3909: The `i` and `j` indices are 0 based
3911: When block size is greater than 1 the matrix values must be stored using the `MATSEQBAIJ` storage format
3913: The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3914: the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3915: block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory
3916: with column-major ordering within blocks.
3918: .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateBAIJ()`, `MatCreateSeqBAIJ()`
3919: @*/
3920: PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt i[], PetscInt j[], PetscScalar a[], Mat *mat)
3921: {
3922: Mat_SeqBAIJ *baij;
3924: PetscFunctionBegin;
3925: PetscCheck(bs == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "block size %" PetscInt_FMT " > 1 is not supported yet", bs);
3926: if (m > 0) PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
3928: PetscCall(MatCreate(comm, mat));
3929: PetscCall(MatSetSizes(*mat, m, n, m, n));
3930: PetscCall(MatSetType(*mat, MATSEQBAIJ));
3931: PetscCall(MatSeqBAIJSetPreallocation(*mat, bs, MAT_SKIP_ALLOCATION, NULL));
3932: baij = (Mat_SeqBAIJ *)(*mat)->data;
3933: PetscCall(PetscMalloc2(m, &baij->imax, m, &baij->ilen));
3935: baij->i = i;
3936: baij->j = j;
3937: baij->a = a;
3939: baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3940: baij->free_a = PETSC_FALSE;
3941: baij->free_ij = PETSC_FALSE;
3942: baij->free_imax_ilen = PETSC_TRUE;
3944: for (PetscInt ii = 0; ii < m; ii++) {
3945: const PetscInt row_len = i[ii + 1] - i[ii];
3947: baij->ilen[ii] = baij->imax[ii] = row_len;
3948: PetscCheck(row_len >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row length in i (row indices) row = %" PetscInt_FMT " length = %" PetscInt_FMT, ii, row_len);
3949: }
3950: if (PetscDefined(USE_DEBUG)) {
3951: for (PetscInt ii = 0; ii < baij->i[m]; ii++) {
3952: PetscCheck(j[ii] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative column index at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]);
3953: PetscCheck(j[ii] <= n - 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index to large at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]);
3954: }
3955: }
3957: PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
3958: PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
3959: PetscFunctionReturn(PETSC_SUCCESS);
3960: }
3962: PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
3963: {
3964: PetscFunctionBegin;
3965: PetscCall(MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm, inmat, n, scall, outmat));
3966: PetscFunctionReturn(PETSC_SUCCESS);
3967: }