Actual source code: baij.c
1: /*
2: Defines the basic matrix operations for the BAIJ (compressed row)
3: matrix storage format.
4: */
5: #include <../src/mat/impls/baij/seq/baij.h>
6: #include <petscblaslapack.h>
7: #include <petsc/private/kernels/blockinvert.h>
8: #include <petsc/private/kernels/blockmatmult.h>
10: /* defines MatSetValues_Seq_Hash(), MatAssemblyEnd_Seq_Hash(), MatSetUp_Seq_Hash() */
11: #define TYPE BAIJ
12: #define TYPE_BS
13: #include "../src/mat/impls/aij/seq/seqhashmatsetvalues.h"
14: #undef TYPE_BS
15: #define TYPE_BS _BS
16: #define TYPE_BS_ON
17: #include "../src/mat/impls/aij/seq/seqhashmatsetvalues.h"
18: #undef TYPE_BS
19: #include "../src/mat/impls/aij/seq/seqhashmat.h"
20: #undef TYPE
21: #undef TYPE_BS_ON
23: #if defined(PETSC_HAVE_HYPRE)
24: PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
25: #endif
27: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
28: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat, MatType, MatReuse, Mat *);
29: #endif
30: PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
32: static PetscErrorCode MatGetColumnReductions_SeqBAIJ(Mat A, PetscInt type, PetscReal *reductions)
33: {
34: Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)A->data;
35: PetscInt m, n, ib, jb, bs = A->rmap->bs;
36: MatScalar *a_val = a_aij->a;
38: PetscFunctionBegin;
39: PetscCall(MatGetSize(A, &m, &n));
40: PetscCall(PetscArrayzero(reductions, n));
41: if (type == NORM_2) {
42: for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
43: for (jb = 0; jb < bs; jb++) {
44: for (ib = 0; ib < bs; ib++) {
45: reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
46: a_val++;
47: }
48: }
49: }
50: } else if (type == NORM_1) {
51: for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
52: for (jb = 0; jb < bs; jb++) {
53: for (ib = 0; ib < bs; ib++) {
54: reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
55: a_val++;
56: }
57: }
58: }
59: } else if (type == NORM_INFINITY) {
60: for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
61: for (jb = 0; jb < bs; jb++) {
62: for (ib = 0; ib < bs; ib++) {
63: PetscInt col = A->cmap->rstart + a_aij->j[i] * bs + jb;
64: reductions[col] = PetscMax(PetscAbsScalar(*a_val), reductions[col]);
65: a_val++;
66: }
67: }
68: }
69: } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
70: for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
71: for (jb = 0; jb < bs; jb++) {
72: for (ib = 0; ib < bs; ib++) {
73: reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val);
74: a_val++;
75: }
76: }
77: }
78: } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
79: for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
80: for (jb = 0; jb < bs; jb++) {
81: for (ib = 0; ib < bs; ib++) {
82: reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val);
83: a_val++;
84: }
85: }
86: }
87: } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
88: if (type == NORM_2) {
89: for (PetscInt i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
90: } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
91: for (PetscInt i = 0; i < n; i++) reductions[i] /= m;
92: }
93: PetscFunctionReturn(PETSC_SUCCESS);
94: }
96: static PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A, const PetscScalar **values)
97: {
98: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
99: PetscInt *diag_offset, i, bs = A->rmap->bs, mbs = a->mbs, ipvt[5], bs2 = bs * bs, *v_pivots;
100: MatScalar *v = a->a, *odiag, *diag, work[25], *v_work;
101: PetscReal shift = 0.0;
102: PetscBool allowzeropivot, zeropivotdetected = PETSC_FALSE;
104: PetscFunctionBegin;
105: allowzeropivot = PetscNot(A->erroriffailure);
107: if (a->idiagvalid) {
108: if (values) *values = a->idiag;
109: PetscFunctionReturn(PETSC_SUCCESS);
110: }
111: PetscCall(MatMarkDiagonal_SeqBAIJ(A));
112: diag_offset = a->diag;
113: if (!a->idiag) { PetscCall(PetscMalloc1(bs2 * mbs, &a->idiag)); }
114: diag = a->idiag;
115: if (values) *values = a->idiag;
116: /* factor and invert each block */
117: switch (bs) {
118: case 1:
119: for (i = 0; i < mbs; i++) {
120: odiag = v + 1 * diag_offset[i];
121: diag[0] = odiag[0];
123: if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
124: if (allowzeropivot) {
125: A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
126: A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]);
127: A->factorerror_zeropivot_row = i;
128: PetscCall(PetscInfo(A, "Zero pivot, row %" PetscInt_FMT "\n", i));
129: } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row %" PetscInt_FMT " pivot value %g tolerance %g", i, (double)PetscAbsScalar(diag[0]), (double)PETSC_MACHINE_EPSILON);
130: }
132: diag[0] = (PetscScalar)1.0 / (diag[0] + shift);
133: diag += 1;
134: }
135: break;
136: case 2:
137: for (i = 0; i < mbs; i++) {
138: odiag = v + 4 * diag_offset[i];
139: diag[0] = odiag[0];
140: diag[1] = odiag[1];
141: diag[2] = odiag[2];
142: diag[3] = odiag[3];
143: PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected));
144: if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
145: diag += 4;
146: }
147: break;
148: case 3:
149: for (i = 0; i < mbs; i++) {
150: odiag = v + 9 * diag_offset[i];
151: diag[0] = odiag[0];
152: diag[1] = odiag[1];
153: diag[2] = odiag[2];
154: diag[3] = odiag[3];
155: diag[4] = odiag[4];
156: diag[5] = odiag[5];
157: diag[6] = odiag[6];
158: diag[7] = odiag[7];
159: diag[8] = odiag[8];
160: PetscCall(PetscKernel_A_gets_inverse_A_3(diag, shift, allowzeropivot, &zeropivotdetected));
161: if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
162: diag += 9;
163: }
164: break;
165: case 4:
166: for (i = 0; i < mbs; i++) {
167: odiag = v + 16 * diag_offset[i];
168: PetscCall(PetscArraycpy(diag, odiag, 16));
169: PetscCall(PetscKernel_A_gets_inverse_A_4(diag, shift, allowzeropivot, &zeropivotdetected));
170: if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
171: diag += 16;
172: }
173: break;
174: case 5:
175: for (i = 0; i < mbs; i++) {
176: odiag = v + 25 * diag_offset[i];
177: PetscCall(PetscArraycpy(diag, odiag, 25));
178: PetscCall(PetscKernel_A_gets_inverse_A_5(diag, ipvt, work, shift, allowzeropivot, &zeropivotdetected));
179: if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
180: diag += 25;
181: }
182: break;
183: case 6:
184: for (i = 0; i < mbs; i++) {
185: odiag = v + 36 * diag_offset[i];
186: PetscCall(PetscArraycpy(diag, odiag, 36));
187: PetscCall(PetscKernel_A_gets_inverse_A_6(diag, shift, allowzeropivot, &zeropivotdetected));
188: if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
189: diag += 36;
190: }
191: break;
192: case 7:
193: for (i = 0; i < mbs; i++) {
194: odiag = v + 49 * diag_offset[i];
195: PetscCall(PetscArraycpy(diag, odiag, 49));
196: PetscCall(PetscKernel_A_gets_inverse_A_7(diag, shift, allowzeropivot, &zeropivotdetected));
197: if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
198: diag += 49;
199: }
200: break;
201: default:
202: PetscCall(PetscMalloc2(bs, &v_work, bs, &v_pivots));
203: for (i = 0; i < mbs; i++) {
204: odiag = v + bs2 * diag_offset[i];
205: PetscCall(PetscArraycpy(diag, odiag, bs2));
206: PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, v_pivots, v_work, allowzeropivot, &zeropivotdetected));
207: if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
208: diag += bs2;
209: }
210: PetscCall(PetscFree2(v_work, v_pivots));
211: }
212: a->idiagvalid = PETSC_TRUE;
213: PetscFunctionReturn(PETSC_SUCCESS);
214: }
216: static PetscErrorCode MatSOR_SeqBAIJ(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
217: {
218: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
219: PetscScalar *x, *work, *w, *workt, *t;
220: const MatScalar *v, *aa = a->a, *idiag;
221: const PetscScalar *b, *xb;
222: PetscScalar s[7], xw[7] = {0}; /* avoid some compilers thinking xw is uninitialized */
223: PetscInt m = a->mbs, i, i2, nz, bs = A->rmap->bs, bs2 = bs * bs, k, j, idx, it;
224: const PetscInt *diag, *ai = a->i, *aj = a->j, *vi;
226: PetscFunctionBegin;
227: its = its * lits;
228: PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat");
229: PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
230: PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for diagonal shift");
231: PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for non-trivial relaxation factor");
232: PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for applying upper or lower triangular parts");
234: if (!a->idiagvalid) PetscCall(MatInvertBlockDiagonal(A, NULL));
236: if (!m) PetscFunctionReturn(PETSC_SUCCESS);
237: diag = a->diag;
238: idiag = a->idiag;
239: k = PetscMax(A->rmap->n, A->cmap->n);
240: if (!a->mult_work) PetscCall(PetscMalloc1(k + 1, &a->mult_work));
241: if (!a->sor_workt) PetscCall(PetscMalloc1(k, &a->sor_workt));
242: if (!a->sor_work) PetscCall(PetscMalloc1(bs, &a->sor_work));
243: work = a->mult_work;
244: t = a->sor_workt;
245: w = a->sor_work;
247: PetscCall(VecGetArray(xx, &x));
248: PetscCall(VecGetArrayRead(bb, &b));
250: if (flag & SOR_ZERO_INITIAL_GUESS) {
251: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
252: switch (bs) {
253: case 1:
254: PetscKernel_v_gets_A_times_w_1(x, idiag, b);
255: t[0] = b[0];
256: i2 = 1;
257: idiag += 1;
258: for (i = 1; i < m; i++) {
259: v = aa + ai[i];
260: vi = aj + ai[i];
261: nz = diag[i] - ai[i];
262: s[0] = b[i2];
263: for (j = 0; j < nz; j++) {
264: xw[0] = x[vi[j]];
265: PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
266: }
267: t[i2] = s[0];
268: PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
269: x[i2] = xw[0];
270: idiag += 1;
271: i2 += 1;
272: }
273: break;
274: case 2:
275: PetscKernel_v_gets_A_times_w_2(x, idiag, b);
276: t[0] = b[0];
277: t[1] = b[1];
278: i2 = 2;
279: idiag += 4;
280: for (i = 1; i < m; i++) {
281: v = aa + 4 * ai[i];
282: vi = aj + ai[i];
283: nz = diag[i] - ai[i];
284: s[0] = b[i2];
285: s[1] = b[i2 + 1];
286: for (j = 0; j < nz; j++) {
287: idx = 2 * vi[j];
288: it = 4 * j;
289: xw[0] = x[idx];
290: xw[1] = x[1 + idx];
291: PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
292: }
293: t[i2] = s[0];
294: t[i2 + 1] = s[1];
295: PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
296: x[i2] = xw[0];
297: x[i2 + 1] = xw[1];
298: idiag += 4;
299: i2 += 2;
300: }
301: break;
302: case 3:
303: PetscKernel_v_gets_A_times_w_3(x, idiag, b);
304: t[0] = b[0];
305: t[1] = b[1];
306: t[2] = b[2];
307: i2 = 3;
308: idiag += 9;
309: for (i = 1; i < m; i++) {
310: v = aa + 9 * ai[i];
311: vi = aj + ai[i];
312: nz = diag[i] - ai[i];
313: s[0] = b[i2];
314: s[1] = b[i2 + 1];
315: s[2] = b[i2 + 2];
316: while (nz--) {
317: idx = 3 * (*vi++);
318: xw[0] = x[idx];
319: xw[1] = x[1 + idx];
320: xw[2] = x[2 + idx];
321: PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
322: v += 9;
323: }
324: t[i2] = s[0];
325: t[i2 + 1] = s[1];
326: t[i2 + 2] = s[2];
327: PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
328: x[i2] = xw[0];
329: x[i2 + 1] = xw[1];
330: x[i2 + 2] = xw[2];
331: idiag += 9;
332: i2 += 3;
333: }
334: break;
335: case 4:
336: PetscKernel_v_gets_A_times_w_4(x, idiag, b);
337: t[0] = b[0];
338: t[1] = b[1];
339: t[2] = b[2];
340: t[3] = b[3];
341: i2 = 4;
342: idiag += 16;
343: for (i = 1; i < m; i++) {
344: v = aa + 16 * ai[i];
345: vi = aj + ai[i];
346: nz = diag[i] - ai[i];
347: s[0] = b[i2];
348: s[1] = b[i2 + 1];
349: s[2] = b[i2 + 2];
350: s[3] = b[i2 + 3];
351: while (nz--) {
352: idx = 4 * (*vi++);
353: xw[0] = x[idx];
354: xw[1] = x[1 + idx];
355: xw[2] = x[2 + idx];
356: xw[3] = x[3 + idx];
357: PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
358: v += 16;
359: }
360: t[i2] = s[0];
361: t[i2 + 1] = s[1];
362: t[i2 + 2] = s[2];
363: t[i2 + 3] = s[3];
364: PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
365: x[i2] = xw[0];
366: x[i2 + 1] = xw[1];
367: x[i2 + 2] = xw[2];
368: x[i2 + 3] = xw[3];
369: idiag += 16;
370: i2 += 4;
371: }
372: break;
373: case 5:
374: PetscKernel_v_gets_A_times_w_5(x, idiag, b);
375: t[0] = b[0];
376: t[1] = b[1];
377: t[2] = b[2];
378: t[3] = b[3];
379: t[4] = b[4];
380: i2 = 5;
381: idiag += 25;
382: for (i = 1; i < m; i++) {
383: v = aa + 25 * ai[i];
384: vi = aj + ai[i];
385: nz = diag[i] - ai[i];
386: s[0] = b[i2];
387: s[1] = b[i2 + 1];
388: s[2] = b[i2 + 2];
389: s[3] = b[i2 + 3];
390: s[4] = b[i2 + 4];
391: while (nz--) {
392: idx = 5 * (*vi++);
393: xw[0] = x[idx];
394: xw[1] = x[1 + idx];
395: xw[2] = x[2 + idx];
396: xw[3] = x[3 + idx];
397: xw[4] = x[4 + idx];
398: PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
399: v += 25;
400: }
401: t[i2] = s[0];
402: t[i2 + 1] = s[1];
403: t[i2 + 2] = s[2];
404: t[i2 + 3] = s[3];
405: t[i2 + 4] = s[4];
406: PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
407: x[i2] = xw[0];
408: x[i2 + 1] = xw[1];
409: x[i2 + 2] = xw[2];
410: x[i2 + 3] = xw[3];
411: x[i2 + 4] = xw[4];
412: idiag += 25;
413: i2 += 5;
414: }
415: break;
416: case 6:
417: PetscKernel_v_gets_A_times_w_6(x, idiag, b);
418: t[0] = b[0];
419: t[1] = b[1];
420: t[2] = b[2];
421: t[3] = b[3];
422: t[4] = b[4];
423: t[5] = b[5];
424: i2 = 6;
425: idiag += 36;
426: for (i = 1; i < m; i++) {
427: v = aa + 36 * ai[i];
428: vi = aj + ai[i];
429: nz = diag[i] - ai[i];
430: s[0] = b[i2];
431: s[1] = b[i2 + 1];
432: s[2] = b[i2 + 2];
433: s[3] = b[i2 + 3];
434: s[4] = b[i2 + 4];
435: s[5] = b[i2 + 5];
436: while (nz--) {
437: idx = 6 * (*vi++);
438: xw[0] = x[idx];
439: xw[1] = x[1 + idx];
440: xw[2] = x[2 + idx];
441: xw[3] = x[3 + idx];
442: xw[4] = x[4 + idx];
443: xw[5] = x[5 + idx];
444: PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
445: v += 36;
446: }
447: t[i2] = s[0];
448: t[i2 + 1] = s[1];
449: t[i2 + 2] = s[2];
450: t[i2 + 3] = s[3];
451: t[i2 + 4] = s[4];
452: t[i2 + 5] = s[5];
453: PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
454: x[i2] = xw[0];
455: x[i2 + 1] = xw[1];
456: x[i2 + 2] = xw[2];
457: x[i2 + 3] = xw[3];
458: x[i2 + 4] = xw[4];
459: x[i2 + 5] = xw[5];
460: idiag += 36;
461: i2 += 6;
462: }
463: break;
464: case 7:
465: PetscKernel_v_gets_A_times_w_7(x, idiag, b);
466: t[0] = b[0];
467: t[1] = b[1];
468: t[2] = b[2];
469: t[3] = b[3];
470: t[4] = b[4];
471: t[5] = b[5];
472: t[6] = b[6];
473: i2 = 7;
474: idiag += 49;
475: for (i = 1; i < m; i++) {
476: v = aa + 49 * ai[i];
477: vi = aj + ai[i];
478: nz = diag[i] - ai[i];
479: s[0] = b[i2];
480: s[1] = b[i2 + 1];
481: s[2] = b[i2 + 2];
482: s[3] = b[i2 + 3];
483: s[4] = b[i2 + 4];
484: s[5] = b[i2 + 5];
485: s[6] = b[i2 + 6];
486: while (nz--) {
487: idx = 7 * (*vi++);
488: xw[0] = x[idx];
489: xw[1] = x[1 + idx];
490: xw[2] = x[2 + idx];
491: xw[3] = x[3 + idx];
492: xw[4] = x[4 + idx];
493: xw[5] = x[5 + idx];
494: xw[6] = x[6 + idx];
495: PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
496: v += 49;
497: }
498: t[i2] = s[0];
499: t[i2 + 1] = s[1];
500: t[i2 + 2] = s[2];
501: t[i2 + 3] = s[3];
502: t[i2 + 4] = s[4];
503: t[i2 + 5] = s[5];
504: t[i2 + 6] = s[6];
505: PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
506: x[i2] = xw[0];
507: x[i2 + 1] = xw[1];
508: x[i2 + 2] = xw[2];
509: x[i2 + 3] = xw[3];
510: x[i2 + 4] = xw[4];
511: x[i2 + 5] = xw[5];
512: x[i2 + 6] = xw[6];
513: idiag += 49;
514: i2 += 7;
515: }
516: break;
517: default:
518: PetscKernel_w_gets_Ar_times_v(bs, bs, b, idiag, x);
519: PetscCall(PetscArraycpy(t, b, bs));
520: i2 = bs;
521: idiag += bs2;
522: for (i = 1; i < m; i++) {
523: v = aa + bs2 * ai[i];
524: vi = aj + ai[i];
525: nz = diag[i] - ai[i];
527: PetscCall(PetscArraycpy(w, b + i2, bs));
528: /* copy all rows of x that are needed into contiguous space */
529: workt = work;
530: for (j = 0; j < nz; j++) {
531: PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
532: workt += bs;
533: }
534: PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
535: PetscCall(PetscArraycpy(t + i2, w, bs));
536: PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
538: idiag += bs2;
539: i2 += bs;
540: }
541: break;
542: }
543: /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
544: PetscCall(PetscLogFlops(1.0 * bs2 * a->nz));
545: xb = t;
546: } else xb = b;
547: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
548: idiag = a->idiag + bs2 * (a->mbs - 1);
549: i2 = bs * (m - 1);
550: switch (bs) {
551: case 1:
552: s[0] = xb[i2];
553: PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
554: x[i2] = xw[0];
555: i2 -= 1;
556: for (i = m - 2; i >= 0; i--) {
557: v = aa + (diag[i] + 1);
558: vi = aj + diag[i] + 1;
559: nz = ai[i + 1] - diag[i] - 1;
560: s[0] = xb[i2];
561: for (j = 0; j < nz; j++) {
562: xw[0] = x[vi[j]];
563: PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
564: }
565: PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
566: x[i2] = xw[0];
567: idiag -= 1;
568: i2 -= 1;
569: }
570: break;
571: case 2:
572: s[0] = xb[i2];
573: s[1] = xb[i2 + 1];
574: PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
575: x[i2] = xw[0];
576: x[i2 + 1] = xw[1];
577: i2 -= 2;
578: idiag -= 4;
579: for (i = m - 2; i >= 0; i--) {
580: v = aa + 4 * (diag[i] + 1);
581: vi = aj + diag[i] + 1;
582: nz = ai[i + 1] - diag[i] - 1;
583: s[0] = xb[i2];
584: s[1] = xb[i2 + 1];
585: for (j = 0; j < nz; j++) {
586: idx = 2 * vi[j];
587: it = 4 * j;
588: xw[0] = x[idx];
589: xw[1] = x[1 + idx];
590: PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
591: }
592: PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
593: x[i2] = xw[0];
594: x[i2 + 1] = xw[1];
595: idiag -= 4;
596: i2 -= 2;
597: }
598: break;
599: case 3:
600: s[0] = xb[i2];
601: s[1] = xb[i2 + 1];
602: s[2] = xb[i2 + 2];
603: PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
604: x[i2] = xw[0];
605: x[i2 + 1] = xw[1];
606: x[i2 + 2] = xw[2];
607: i2 -= 3;
608: idiag -= 9;
609: for (i = m - 2; i >= 0; i--) {
610: v = aa + 9 * (diag[i] + 1);
611: vi = aj + diag[i] + 1;
612: nz = ai[i + 1] - diag[i] - 1;
613: s[0] = xb[i2];
614: s[1] = xb[i2 + 1];
615: s[2] = xb[i2 + 2];
616: while (nz--) {
617: idx = 3 * (*vi++);
618: xw[0] = x[idx];
619: xw[1] = x[1 + idx];
620: xw[2] = x[2 + idx];
621: PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
622: v += 9;
623: }
624: PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
625: x[i2] = xw[0];
626: x[i2 + 1] = xw[1];
627: x[i2 + 2] = xw[2];
628: idiag -= 9;
629: i2 -= 3;
630: }
631: break;
632: case 4:
633: s[0] = xb[i2];
634: s[1] = xb[i2 + 1];
635: s[2] = xb[i2 + 2];
636: s[3] = xb[i2 + 3];
637: PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
638: x[i2] = xw[0];
639: x[i2 + 1] = xw[1];
640: x[i2 + 2] = xw[2];
641: x[i2 + 3] = xw[3];
642: i2 -= 4;
643: idiag -= 16;
644: for (i = m - 2; i >= 0; i--) {
645: v = aa + 16 * (diag[i] + 1);
646: vi = aj + diag[i] + 1;
647: nz = ai[i + 1] - diag[i] - 1;
648: s[0] = xb[i2];
649: s[1] = xb[i2 + 1];
650: s[2] = xb[i2 + 2];
651: s[3] = xb[i2 + 3];
652: while (nz--) {
653: idx = 4 * (*vi++);
654: xw[0] = x[idx];
655: xw[1] = x[1 + idx];
656: xw[2] = x[2 + idx];
657: xw[3] = x[3 + idx];
658: PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
659: v += 16;
660: }
661: PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
662: x[i2] = xw[0];
663: x[i2 + 1] = xw[1];
664: x[i2 + 2] = xw[2];
665: x[i2 + 3] = xw[3];
666: idiag -= 16;
667: i2 -= 4;
668: }
669: break;
670: case 5:
671: s[0] = xb[i2];
672: s[1] = xb[i2 + 1];
673: s[2] = xb[i2 + 2];
674: s[3] = xb[i2 + 3];
675: s[4] = xb[i2 + 4];
676: PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
677: x[i2] = xw[0];
678: x[i2 + 1] = xw[1];
679: x[i2 + 2] = xw[2];
680: x[i2 + 3] = xw[3];
681: x[i2 + 4] = xw[4];
682: i2 -= 5;
683: idiag -= 25;
684: for (i = m - 2; i >= 0; i--) {
685: v = aa + 25 * (diag[i] + 1);
686: vi = aj + diag[i] + 1;
687: nz = ai[i + 1] - diag[i] - 1;
688: s[0] = xb[i2];
689: s[1] = xb[i2 + 1];
690: s[2] = xb[i2 + 2];
691: s[3] = xb[i2 + 3];
692: s[4] = xb[i2 + 4];
693: while (nz--) {
694: idx = 5 * (*vi++);
695: xw[0] = x[idx];
696: xw[1] = x[1 + idx];
697: xw[2] = x[2 + idx];
698: xw[3] = x[3 + idx];
699: xw[4] = x[4 + idx];
700: PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
701: v += 25;
702: }
703: PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
704: x[i2] = xw[0];
705: x[i2 + 1] = xw[1];
706: x[i2 + 2] = xw[2];
707: x[i2 + 3] = xw[3];
708: x[i2 + 4] = xw[4];
709: idiag -= 25;
710: i2 -= 5;
711: }
712: break;
713: case 6:
714: s[0] = xb[i2];
715: s[1] = xb[i2 + 1];
716: s[2] = xb[i2 + 2];
717: s[3] = xb[i2 + 3];
718: s[4] = xb[i2 + 4];
719: s[5] = xb[i2 + 5];
720: PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
721: x[i2] = xw[0];
722: x[i2 + 1] = xw[1];
723: x[i2 + 2] = xw[2];
724: x[i2 + 3] = xw[3];
725: x[i2 + 4] = xw[4];
726: x[i2 + 5] = xw[5];
727: i2 -= 6;
728: idiag -= 36;
729: for (i = m - 2; i >= 0; i--) {
730: v = aa + 36 * (diag[i] + 1);
731: vi = aj + diag[i] + 1;
732: nz = ai[i + 1] - diag[i] - 1;
733: s[0] = xb[i2];
734: s[1] = xb[i2 + 1];
735: s[2] = xb[i2 + 2];
736: s[3] = xb[i2 + 3];
737: s[4] = xb[i2 + 4];
738: s[5] = xb[i2 + 5];
739: while (nz--) {
740: idx = 6 * (*vi++);
741: xw[0] = x[idx];
742: xw[1] = x[1 + idx];
743: xw[2] = x[2 + idx];
744: xw[3] = x[3 + idx];
745: xw[4] = x[4 + idx];
746: xw[5] = x[5 + idx];
747: PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
748: v += 36;
749: }
750: PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
751: x[i2] = xw[0];
752: x[i2 + 1] = xw[1];
753: x[i2 + 2] = xw[2];
754: x[i2 + 3] = xw[3];
755: x[i2 + 4] = xw[4];
756: x[i2 + 5] = xw[5];
757: idiag -= 36;
758: i2 -= 6;
759: }
760: break;
761: case 7:
762: s[0] = xb[i2];
763: s[1] = xb[i2 + 1];
764: s[2] = xb[i2 + 2];
765: s[3] = xb[i2 + 3];
766: s[4] = xb[i2 + 4];
767: s[5] = xb[i2 + 5];
768: s[6] = xb[i2 + 6];
769: PetscKernel_v_gets_A_times_w_7(x, idiag, b);
770: x[i2] = xw[0];
771: x[i2 + 1] = xw[1];
772: x[i2 + 2] = xw[2];
773: x[i2 + 3] = xw[3];
774: x[i2 + 4] = xw[4];
775: x[i2 + 5] = xw[5];
776: x[i2 + 6] = xw[6];
777: i2 -= 7;
778: idiag -= 49;
779: for (i = m - 2; i >= 0; i--) {
780: v = aa + 49 * (diag[i] + 1);
781: vi = aj + diag[i] + 1;
782: nz = ai[i + 1] - diag[i] - 1;
783: s[0] = xb[i2];
784: s[1] = xb[i2 + 1];
785: s[2] = xb[i2 + 2];
786: s[3] = xb[i2 + 3];
787: s[4] = xb[i2 + 4];
788: s[5] = xb[i2 + 5];
789: s[6] = xb[i2 + 6];
790: while (nz--) {
791: idx = 7 * (*vi++);
792: xw[0] = x[idx];
793: xw[1] = x[1 + idx];
794: xw[2] = x[2 + idx];
795: xw[3] = x[3 + idx];
796: xw[4] = x[4 + idx];
797: xw[5] = x[5 + idx];
798: xw[6] = x[6 + idx];
799: PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
800: v += 49;
801: }
802: PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
803: x[i2] = xw[0];
804: x[i2 + 1] = xw[1];
805: x[i2 + 2] = xw[2];
806: x[i2 + 3] = xw[3];
807: x[i2 + 4] = xw[4];
808: x[i2 + 5] = xw[5];
809: x[i2 + 6] = xw[6];
810: idiag -= 49;
811: i2 -= 7;
812: }
813: break;
814: default:
815: PetscCall(PetscArraycpy(w, xb + i2, bs));
816: PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
817: i2 -= bs;
818: idiag -= bs2;
819: for (i = m - 2; i >= 0; i--) {
820: v = aa + bs2 * (diag[i] + 1);
821: vi = aj + diag[i] + 1;
822: nz = ai[i + 1] - diag[i] - 1;
824: PetscCall(PetscArraycpy(w, xb + i2, bs));
825: /* copy all rows of x that are needed into contiguous space */
826: workt = work;
827: for (j = 0; j < nz; j++) {
828: PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
829: workt += bs;
830: }
831: PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
832: PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
834: idiag -= bs2;
835: i2 -= bs;
836: }
837: break;
838: }
839: PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz)));
840: }
841: its--;
842: }
843: while (its--) {
844: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
845: idiag = a->idiag;
846: i2 = 0;
847: switch (bs) {
848: case 1:
849: for (i = 0; i < m; i++) {
850: v = aa + ai[i];
851: vi = aj + ai[i];
852: nz = ai[i + 1] - ai[i];
853: s[0] = b[i2];
854: for (j = 0; j < nz; j++) {
855: xw[0] = x[vi[j]];
856: PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
857: }
858: PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
859: x[i2] += xw[0];
860: idiag += 1;
861: i2 += 1;
862: }
863: break;
864: case 2:
865: for (i = 0; i < m; i++) {
866: v = aa + 4 * ai[i];
867: vi = aj + ai[i];
868: nz = ai[i + 1] - ai[i];
869: s[0] = b[i2];
870: s[1] = b[i2 + 1];
871: for (j = 0; j < nz; j++) {
872: idx = 2 * vi[j];
873: it = 4 * j;
874: xw[0] = x[idx];
875: xw[1] = x[1 + idx];
876: PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
877: }
878: PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
879: x[i2] += xw[0];
880: x[i2 + 1] += xw[1];
881: idiag += 4;
882: i2 += 2;
883: }
884: break;
885: case 3:
886: for (i = 0; i < m; i++) {
887: v = aa + 9 * ai[i];
888: vi = aj + ai[i];
889: nz = ai[i + 1] - ai[i];
890: s[0] = b[i2];
891: s[1] = b[i2 + 1];
892: s[2] = b[i2 + 2];
893: while (nz--) {
894: idx = 3 * (*vi++);
895: xw[0] = x[idx];
896: xw[1] = x[1 + idx];
897: xw[2] = x[2 + idx];
898: PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
899: v += 9;
900: }
901: PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
902: x[i2] += xw[0];
903: x[i2 + 1] += xw[1];
904: x[i2 + 2] += xw[2];
905: idiag += 9;
906: i2 += 3;
907: }
908: break;
909: case 4:
910: for (i = 0; i < m; i++) {
911: v = aa + 16 * ai[i];
912: vi = aj + ai[i];
913: nz = ai[i + 1] - ai[i];
914: s[0] = b[i2];
915: s[1] = b[i2 + 1];
916: s[2] = b[i2 + 2];
917: s[3] = b[i2 + 3];
918: while (nz--) {
919: idx = 4 * (*vi++);
920: xw[0] = x[idx];
921: xw[1] = x[1 + idx];
922: xw[2] = x[2 + idx];
923: xw[3] = x[3 + idx];
924: PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
925: v += 16;
926: }
927: PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
928: x[i2] += xw[0];
929: x[i2 + 1] += xw[1];
930: x[i2 + 2] += xw[2];
931: x[i2 + 3] += xw[3];
932: idiag += 16;
933: i2 += 4;
934: }
935: break;
936: case 5:
937: for (i = 0; i < m; i++) {
938: v = aa + 25 * ai[i];
939: vi = aj + ai[i];
940: nz = ai[i + 1] - ai[i];
941: s[0] = b[i2];
942: s[1] = b[i2 + 1];
943: s[2] = b[i2 + 2];
944: s[3] = b[i2 + 3];
945: s[4] = b[i2 + 4];
946: while (nz--) {
947: idx = 5 * (*vi++);
948: xw[0] = x[idx];
949: xw[1] = x[1 + idx];
950: xw[2] = x[2 + idx];
951: xw[3] = x[3 + idx];
952: xw[4] = x[4 + idx];
953: PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
954: v += 25;
955: }
956: PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
957: x[i2] += xw[0];
958: x[i2 + 1] += xw[1];
959: x[i2 + 2] += xw[2];
960: x[i2 + 3] += xw[3];
961: x[i2 + 4] += xw[4];
962: idiag += 25;
963: i2 += 5;
964: }
965: break;
966: case 6:
967: for (i = 0; i < m; i++) {
968: v = aa + 36 * ai[i];
969: vi = aj + ai[i];
970: nz = ai[i + 1] - ai[i];
971: s[0] = b[i2];
972: s[1] = b[i2 + 1];
973: s[2] = b[i2 + 2];
974: s[3] = b[i2 + 3];
975: s[4] = b[i2 + 4];
976: s[5] = b[i2 + 5];
977: while (nz--) {
978: idx = 6 * (*vi++);
979: xw[0] = x[idx];
980: xw[1] = x[1 + idx];
981: xw[2] = x[2 + idx];
982: xw[3] = x[3 + idx];
983: xw[4] = x[4 + idx];
984: xw[5] = x[5 + idx];
985: PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
986: v += 36;
987: }
988: PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
989: x[i2] += xw[0];
990: x[i2 + 1] += xw[1];
991: x[i2 + 2] += xw[2];
992: x[i2 + 3] += xw[3];
993: x[i2 + 4] += xw[4];
994: x[i2 + 5] += xw[5];
995: idiag += 36;
996: i2 += 6;
997: }
998: break;
999: case 7:
1000: for (i = 0; i < m; i++) {
1001: v = aa + 49 * ai[i];
1002: vi = aj + ai[i];
1003: nz = ai[i + 1] - ai[i];
1004: s[0] = b[i2];
1005: s[1] = b[i2 + 1];
1006: s[2] = b[i2 + 2];
1007: s[3] = b[i2 + 3];
1008: s[4] = b[i2 + 4];
1009: s[5] = b[i2 + 5];
1010: s[6] = b[i2 + 6];
1011: while (nz--) {
1012: idx = 7 * (*vi++);
1013: xw[0] = x[idx];
1014: xw[1] = x[1 + idx];
1015: xw[2] = x[2 + idx];
1016: xw[3] = x[3 + idx];
1017: xw[4] = x[4 + idx];
1018: xw[5] = x[5 + idx];
1019: xw[6] = x[6 + idx];
1020: PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
1021: v += 49;
1022: }
1023: PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
1024: x[i2] += xw[0];
1025: x[i2 + 1] += xw[1];
1026: x[i2 + 2] += xw[2];
1027: x[i2 + 3] += xw[3];
1028: x[i2 + 4] += xw[4];
1029: x[i2 + 5] += xw[5];
1030: x[i2 + 6] += xw[6];
1031: idiag += 49;
1032: i2 += 7;
1033: }
1034: break;
1035: default:
1036: for (i = 0; i < m; i++) {
1037: v = aa + bs2 * ai[i];
1038: vi = aj + ai[i];
1039: nz = ai[i + 1] - ai[i];
1041: PetscCall(PetscArraycpy(w, b + i2, bs));
1042: /* copy all rows of x that are needed into contiguous space */
1043: workt = work;
1044: for (j = 0; j < nz; j++) {
1045: PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
1046: workt += bs;
1047: }
1048: PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
1049: PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2);
1051: idiag += bs2;
1052: i2 += bs;
1053: }
1054: break;
1055: }
1056: PetscCall(PetscLogFlops(2.0 * bs2 * a->nz));
1057: }
1058: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1059: idiag = a->idiag + bs2 * (a->mbs - 1);
1060: i2 = bs * (m - 1);
1061: switch (bs) {
1062: case 1:
1063: for (i = m - 1; i >= 0; i--) {
1064: v = aa + ai[i];
1065: vi = aj + ai[i];
1066: nz = ai[i + 1] - ai[i];
1067: s[0] = b[i2];
1068: for (j = 0; j < nz; j++) {
1069: xw[0] = x[vi[j]];
1070: PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
1071: }
1072: PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
1073: x[i2] += xw[0];
1074: idiag -= 1;
1075: i2 -= 1;
1076: }
1077: break;
1078: case 2:
1079: for (i = m - 1; i >= 0; i--) {
1080: v = aa + 4 * ai[i];
1081: vi = aj + ai[i];
1082: nz = ai[i + 1] - ai[i];
1083: s[0] = b[i2];
1084: s[1] = b[i2 + 1];
1085: for (j = 0; j < nz; j++) {
1086: idx = 2 * vi[j];
1087: it = 4 * j;
1088: xw[0] = x[idx];
1089: xw[1] = x[1 + idx];
1090: PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
1091: }
1092: PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
1093: x[i2] += xw[0];
1094: x[i2 + 1] += xw[1];
1095: idiag -= 4;
1096: i2 -= 2;
1097: }
1098: break;
1099: case 3:
1100: for (i = m - 1; i >= 0; i--) {
1101: v = aa + 9 * ai[i];
1102: vi = aj + ai[i];
1103: nz = ai[i + 1] - ai[i];
1104: s[0] = b[i2];
1105: s[1] = b[i2 + 1];
1106: s[2] = b[i2 + 2];
1107: while (nz--) {
1108: idx = 3 * (*vi++);
1109: xw[0] = x[idx];
1110: xw[1] = x[1 + idx];
1111: xw[2] = x[2 + idx];
1112: PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
1113: v += 9;
1114: }
1115: PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
1116: x[i2] += xw[0];
1117: x[i2 + 1] += xw[1];
1118: x[i2 + 2] += xw[2];
1119: idiag -= 9;
1120: i2 -= 3;
1121: }
1122: break;
1123: case 4:
1124: for (i = m - 1; i >= 0; i--) {
1125: v = aa + 16 * ai[i];
1126: vi = aj + ai[i];
1127: nz = ai[i + 1] - ai[i];
1128: s[0] = b[i2];
1129: s[1] = b[i2 + 1];
1130: s[2] = b[i2 + 2];
1131: s[3] = b[i2 + 3];
1132: while (nz--) {
1133: idx = 4 * (*vi++);
1134: xw[0] = x[idx];
1135: xw[1] = x[1 + idx];
1136: xw[2] = x[2 + idx];
1137: xw[3] = x[3 + idx];
1138: PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
1139: v += 16;
1140: }
1141: PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
1142: x[i2] += xw[0];
1143: x[i2 + 1] += xw[1];
1144: x[i2 + 2] += xw[2];
1145: x[i2 + 3] += xw[3];
1146: idiag -= 16;
1147: i2 -= 4;
1148: }
1149: break;
1150: case 5:
1151: for (i = m - 1; i >= 0; i--) {
1152: v = aa + 25 * ai[i];
1153: vi = aj + ai[i];
1154: nz = ai[i + 1] - ai[i];
1155: s[0] = b[i2];
1156: s[1] = b[i2 + 1];
1157: s[2] = b[i2 + 2];
1158: s[3] = b[i2 + 3];
1159: s[4] = b[i2 + 4];
1160: while (nz--) {
1161: idx = 5 * (*vi++);
1162: xw[0] = x[idx];
1163: xw[1] = x[1 + idx];
1164: xw[2] = x[2 + idx];
1165: xw[3] = x[3 + idx];
1166: xw[4] = x[4 + idx];
1167: PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
1168: v += 25;
1169: }
1170: PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
1171: x[i2] += xw[0];
1172: x[i2 + 1] += xw[1];
1173: x[i2 + 2] += xw[2];
1174: x[i2 + 3] += xw[3];
1175: x[i2 + 4] += xw[4];
1176: idiag -= 25;
1177: i2 -= 5;
1178: }
1179: break;
1180: case 6:
1181: for (i = m - 1; i >= 0; i--) {
1182: v = aa + 36 * ai[i];
1183: vi = aj + ai[i];
1184: nz = ai[i + 1] - ai[i];
1185: s[0] = b[i2];
1186: s[1] = b[i2 + 1];
1187: s[2] = b[i2 + 2];
1188: s[3] = b[i2 + 3];
1189: s[4] = b[i2 + 4];
1190: s[5] = b[i2 + 5];
1191: while (nz--) {
1192: idx = 6 * (*vi++);
1193: xw[0] = x[idx];
1194: xw[1] = x[1 + idx];
1195: xw[2] = x[2 + idx];
1196: xw[3] = x[3 + idx];
1197: xw[4] = x[4 + idx];
1198: xw[5] = x[5 + idx];
1199: PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
1200: v += 36;
1201: }
1202: PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
1203: x[i2] += xw[0];
1204: x[i2 + 1] += xw[1];
1205: x[i2 + 2] += xw[2];
1206: x[i2 + 3] += xw[3];
1207: x[i2 + 4] += xw[4];
1208: x[i2 + 5] += xw[5];
1209: idiag -= 36;
1210: i2 -= 6;
1211: }
1212: break;
1213: case 7:
1214: for (i = m - 1; i >= 0; i--) {
1215: v = aa + 49 * ai[i];
1216: vi = aj + ai[i];
1217: nz = ai[i + 1] - ai[i];
1218: s[0] = b[i2];
1219: s[1] = b[i2 + 1];
1220: s[2] = b[i2 + 2];
1221: s[3] = b[i2 + 3];
1222: s[4] = b[i2 + 4];
1223: s[5] = b[i2 + 5];
1224: s[6] = b[i2 + 6];
1225: while (nz--) {
1226: idx = 7 * (*vi++);
1227: xw[0] = x[idx];
1228: xw[1] = x[1 + idx];
1229: xw[2] = x[2 + idx];
1230: xw[3] = x[3 + idx];
1231: xw[4] = x[4 + idx];
1232: xw[5] = x[5 + idx];
1233: xw[6] = x[6 + idx];
1234: PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
1235: v += 49;
1236: }
1237: PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
1238: x[i2] += xw[0];
1239: x[i2 + 1] += xw[1];
1240: x[i2 + 2] += xw[2];
1241: x[i2 + 3] += xw[3];
1242: x[i2 + 4] += xw[4];
1243: x[i2 + 5] += xw[5];
1244: x[i2 + 6] += xw[6];
1245: idiag -= 49;
1246: i2 -= 7;
1247: }
1248: break;
1249: default:
1250: for (i = m - 1; i >= 0; i--) {
1251: v = aa + bs2 * ai[i];
1252: vi = aj + ai[i];
1253: nz = ai[i + 1] - ai[i];
1255: PetscCall(PetscArraycpy(w, b + i2, bs));
1256: /* copy all rows of x that are needed into contiguous space */
1257: workt = work;
1258: for (j = 0; j < nz; j++) {
1259: PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
1260: workt += bs;
1261: }
1262: PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
1263: PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2);
1265: idiag -= bs2;
1266: i2 -= bs;
1267: }
1268: break;
1269: }
1270: PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz)));
1271: }
1272: }
1273: PetscCall(VecRestoreArray(xx, &x));
1274: PetscCall(VecRestoreArrayRead(bb, &b));
1275: PetscFunctionReturn(PETSC_SUCCESS);
1276: }
1278: /*
1279: Special version for direct calls from Fortran (Used in PETSc-fun3d)
1280: */
1281: #if defined(PETSC_HAVE_FORTRAN_CAPS)
1282: #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
1283: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1284: #define matsetvaluesblocked4_ matsetvaluesblocked4
1285: #endif
1287: PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA, PetscInt *mm, const PetscInt im[], PetscInt *nn, const PetscInt in[], const PetscScalar v[])
1288: {
1289: Mat A = *AA;
1290: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1291: PetscInt *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, N, m = *mm, n = *nn;
1292: PetscInt *ai = a->i, *ailen = a->ilen;
1293: PetscInt *aj = a->j, stepval, lastcol = -1;
1294: const PetscScalar *value = v;
1295: MatScalar *ap, *aa = a->a, *bap;
1297: PetscFunctionBegin;
1298: if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Can only be called with a block size of 4");
1299: stepval = (n - 1) * 4;
1300: for (k = 0; k < m; k++) { /* loop over added rows */
1301: row = im[k];
1302: rp = aj + ai[row];
1303: ap = aa + 16 * ai[row];
1304: nrow = ailen[row];
1305: low = 0;
1306: high = nrow;
1307: for (l = 0; l < n; l++) { /* loop over added columns */
1308: col = in[l];
1309: if (col <= lastcol) low = 0;
1310: else high = nrow;
1311: lastcol = col;
1312: value = v + k * (stepval + 4 + l) * 4;
1313: while (high - low > 7) {
1314: t = (low + high) / 2;
1315: if (rp[t] > col) high = t;
1316: else low = t;
1317: }
1318: for (i = low; i < high; i++) {
1319: if (rp[i] > col) break;
1320: if (rp[i] == col) {
1321: bap = ap + 16 * i;
1322: for (ii = 0; ii < 4; ii++, value += stepval) {
1323: for (jj = ii; jj < 16; jj += 4) bap[jj] += *value++;
1324: }
1325: goto noinsert2;
1326: }
1327: }
1328: N = nrow++ - 1;
1329: high++; /* added new column index thus must search to one higher than before */
1330: /* shift up all the later entries in this row */
1331: for (ii = N; ii >= i; ii--) {
1332: rp[ii + 1] = rp[ii];
1333: PetscCallVoid(PetscArraycpy(ap + 16 * (ii + 1), ap + 16 * (ii), 16));
1334: }
1335: if (N >= i) PetscCallVoid(PetscArrayzero(ap + 16 * i, 16));
1336: rp[i] = col;
1337: bap = ap + 16 * i;
1338: for (ii = 0; ii < 4; ii++, value += stepval) {
1339: for (jj = ii; jj < 16; jj += 4) bap[jj] = *value++;
1340: }
1341: noinsert2:;
1342: low = i;
1343: }
1344: ailen[row] = nrow;
1345: }
1346: PetscFunctionReturnVoid();
1347: }
1349: #if defined(PETSC_HAVE_FORTRAN_CAPS)
1350: #define matsetvalues4_ MATSETVALUES4
1351: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1352: #define matsetvalues4_ matsetvalues4
1353: #endif
1355: PETSC_EXTERN void matsetvalues4_(Mat *AA, PetscInt *mm, PetscInt *im, PetscInt *nn, PetscInt *in, PetscScalar *v)
1356: {
1357: Mat A = *AA;
1358: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1359: PetscInt *rp, k, low, high, t, row, nrow, i, col, l, N, n = *nn, m = *mm;
1360: PetscInt *ai = a->i, *ailen = a->ilen;
1361: PetscInt *aj = a->j, brow, bcol;
1362: PetscInt ridx, cidx, lastcol = -1;
1363: MatScalar *ap, value, *aa = a->a, *bap;
1365: PetscFunctionBegin;
1366: for (k = 0; k < m; k++) { /* loop over added rows */
1367: row = im[k];
1368: brow = row / 4;
1369: rp = aj + ai[brow];
1370: ap = aa + 16 * ai[brow];
1371: nrow = ailen[brow];
1372: low = 0;
1373: high = nrow;
1374: for (l = 0; l < n; l++) { /* loop over added columns */
1375: col = in[l];
1376: bcol = col / 4;
1377: ridx = row % 4;
1378: cidx = col % 4;
1379: value = v[l + k * n];
1380: if (col <= lastcol) low = 0;
1381: else high = nrow;
1382: lastcol = col;
1383: while (high - low > 7) {
1384: t = (low + high) / 2;
1385: if (rp[t] > bcol) high = t;
1386: else low = t;
1387: }
1388: for (i = low; i < high; i++) {
1389: if (rp[i] > bcol) break;
1390: if (rp[i] == bcol) {
1391: bap = ap + 16 * i + 4 * cidx + ridx;
1392: *bap += value;
1393: goto noinsert1;
1394: }
1395: }
1396: N = nrow++ - 1;
1397: high++; /* added new column thus must search to one higher than before */
1398: /* shift up all the later entries in this row */
1399: PetscCallVoid(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
1400: PetscCallVoid(PetscArraymove(ap + 16 * i + 16, ap + 16 * i, 16 * (N - i + 1)));
1401: PetscCallVoid(PetscArrayzero(ap + 16 * i, 16));
1402: rp[i] = bcol;
1403: ap[16 * i + 4 * cidx + ridx] = value;
1404: noinsert1:;
1405: low = i;
1406: }
1407: ailen[brow] = nrow;
1408: }
1409: PetscFunctionReturnVoid();
1410: }
1412: /*
1413: Checks for missing diagonals
1414: */
1415: PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A, PetscBool *missing, PetscInt *d)
1416: {
1417: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1418: PetscInt *diag, *ii = a->i, i;
1420: PetscFunctionBegin;
1421: PetscCall(MatMarkDiagonal_SeqBAIJ(A));
1422: *missing = PETSC_FALSE;
1423: if (A->rmap->n > 0 && !ii) {
1424: *missing = PETSC_TRUE;
1425: if (d) *d = 0;
1426: PetscCall(PetscInfo(A, "Matrix has no entries therefore is missing diagonal\n"));
1427: } else {
1428: PetscInt n;
1429: n = PetscMin(a->mbs, a->nbs);
1430: diag = a->diag;
1431: for (i = 0; i < n; i++) {
1432: if (diag[i] >= ii[i + 1]) {
1433: *missing = PETSC_TRUE;
1434: if (d) *d = i;
1435: PetscCall(PetscInfo(A, "Matrix is missing block diagonal number %" PetscInt_FMT "\n", i));
1436: break;
1437: }
1438: }
1439: }
1440: PetscFunctionReturn(PETSC_SUCCESS);
1441: }
1443: PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1444: {
1445: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1446: PetscInt i, j, m = a->mbs;
1448: PetscFunctionBegin;
1449: if (!a->diag) {
1450: PetscCall(PetscMalloc1(m, &a->diag));
1451: a->free_diag = PETSC_TRUE;
1452: }
1453: for (i = 0; i < m; i++) {
1454: a->diag[i] = a->i[i + 1];
1455: for (j = a->i[i]; j < a->i[i + 1]; j++) {
1456: if (a->j[j] == i) {
1457: a->diag[i] = j;
1458: break;
1459: }
1460: }
1461: }
1462: PetscFunctionReturn(PETSC_SUCCESS);
1463: }
1465: static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *inia[], const PetscInt *inja[], PetscBool *done)
1466: {
1467: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1468: PetscInt i, j, n = a->mbs, nz = a->i[n], *tia, *tja, bs = A->rmap->bs, k, l, cnt;
1469: PetscInt **ia = (PetscInt **)inia, **ja = (PetscInt **)inja;
1471: PetscFunctionBegin;
1472: *nn = n;
1473: if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
1474: if (symmetric) {
1475: PetscCall(MatToSymmetricIJ_SeqAIJ(n, a->i, a->j, PETSC_TRUE, 0, 0, &tia, &tja));
1476: nz = tia[n];
1477: } else {
1478: tia = a->i;
1479: tja = a->j;
1480: }
1482: if (!blockcompressed && bs > 1) {
1483: (*nn) *= bs;
1484: /* malloc & create the natural set of indices */
1485: PetscCall(PetscMalloc1((n + 1) * bs, ia));
1486: if (n) {
1487: (*ia)[0] = oshift;
1488: for (j = 1; j < bs; j++) (*ia)[j] = (tia[1] - tia[0]) * bs + (*ia)[j - 1];
1489: }
1491: for (i = 1; i < n; i++) {
1492: (*ia)[i * bs] = (tia[i] - tia[i - 1]) * bs + (*ia)[i * bs - 1];
1493: for (j = 1; j < bs; j++) (*ia)[i * bs + j] = (tia[i + 1] - tia[i]) * bs + (*ia)[i * bs + j - 1];
1494: }
1495: if (n) (*ia)[n * bs] = (tia[n] - tia[n - 1]) * bs + (*ia)[n * bs - 1];
1497: if (inja) {
1498: PetscCall(PetscMalloc1(nz * bs * bs, ja));
1499: cnt = 0;
1500: for (i = 0; i < n; i++) {
1501: for (j = 0; j < bs; j++) {
1502: for (k = tia[i]; k < tia[i + 1]; k++) {
1503: for (l = 0; l < bs; l++) (*ja)[cnt++] = bs * tja[k] + l;
1504: }
1505: }
1506: }
1507: }
1509: if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1510: PetscCall(PetscFree(tia));
1511: PetscCall(PetscFree(tja));
1512: }
1513: } else if (oshift == 1) {
1514: if (symmetric) {
1515: nz = tia[A->rmap->n / bs];
1516: /* add 1 to i and j indices */
1517: for (i = 0; i < A->rmap->n / bs + 1; i++) tia[i] = tia[i] + 1;
1518: *ia = tia;
1519: if (ja) {
1520: for (i = 0; i < nz; i++) tja[i] = tja[i] + 1;
1521: *ja = tja;
1522: }
1523: } else {
1524: nz = a->i[A->rmap->n / bs];
1525: /* malloc space and add 1 to i and j indices */
1526: PetscCall(PetscMalloc1(A->rmap->n / bs + 1, ia));
1527: for (i = 0; i < A->rmap->n / bs + 1; i++) (*ia)[i] = a->i[i] + 1;
1528: if (ja) {
1529: PetscCall(PetscMalloc1(nz, ja));
1530: for (i = 0; i < nz; i++) (*ja)[i] = a->j[i] + 1;
1531: }
1532: }
1533: } else {
1534: *ia = tia;
1535: if (ja) *ja = tja;
1536: }
1537: PetscFunctionReturn(PETSC_SUCCESS);
1538: }
1540: static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
1541: {
1542: PetscFunctionBegin;
1543: if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
1544: if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1545: PetscCall(PetscFree(*ia));
1546: if (ja) PetscCall(PetscFree(*ja));
1547: }
1548: PetscFunctionReturn(PETSC_SUCCESS);
1549: }
1551: PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1552: {
1553: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1555: PetscFunctionBegin;
1556: if (A->hash_active) {
1557: PetscInt bs;
1558: A->ops[0] = a->cops;
1559: PetscCall(PetscHMapIJVDestroy(&a->ht));
1560: PetscCall(MatGetBlockSize(A, &bs));
1561: if (bs > 1) PetscCall(PetscHSetIJDestroy(&a->bht));
1562: PetscCall(PetscFree(a->dnz));
1563: PetscCall(PetscFree(a->bdnz));
1564: A->hash_active = PETSC_FALSE;
1565: }
1566: PetscCall(PetscLogObjectState((PetscObject)A, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT ", NZ=%" PetscInt_FMT, A->rmap->N, A->cmap->n, a->nz));
1567: PetscCall(MatSeqXAIJFreeAIJ(A, &a->a, &a->j, &a->i));
1568: PetscCall(ISDestroy(&a->row));
1569: PetscCall(ISDestroy(&a->col));
1570: if (a->free_diag) PetscCall(PetscFree(a->diag));
1571: PetscCall(PetscFree(a->idiag));
1572: if (a->free_imax_ilen) PetscCall(PetscFree2(a->imax, a->ilen));
1573: PetscCall(PetscFree(a->solve_work));
1574: PetscCall(PetscFree(a->mult_work));
1575: PetscCall(PetscFree(a->sor_workt));
1576: PetscCall(PetscFree(a->sor_work));
1577: PetscCall(ISDestroy(&a->icol));
1578: PetscCall(PetscFree(a->saved_values));
1579: PetscCall(PetscFree2(a->compressedrow.i, a->compressedrow.rindex));
1581: PetscCall(MatDestroy(&a->sbaijMat));
1582: PetscCall(MatDestroy(&a->parent));
1583: PetscCall(PetscFree(A->data));
1585: PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL));
1586: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJGetArray_C", NULL));
1587: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJRestoreArray_C", NULL));
1588: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatStoreValues_C", NULL));
1589: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatRetrieveValues_C", NULL));
1590: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetColumnIndices_C", NULL));
1591: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqaij_C", NULL));
1592: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqsbaij_C", NULL));
1593: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", NULL));
1594: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocationCSR_C", NULL));
1595: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqbstrm_C", NULL));
1596: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatIsTranspose_C", NULL));
1597: #if defined(PETSC_HAVE_HYPRE)
1598: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_hypre_C", NULL));
1599: #endif
1600: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_is_C", NULL));
1601: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
1602: PetscFunctionReturn(PETSC_SUCCESS);
1603: }
1605: static PetscErrorCode MatSetOption_SeqBAIJ(Mat A, MatOption op, PetscBool flg)
1606: {
1607: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1609: PetscFunctionBegin;
1610: switch (op) {
1611: case MAT_ROW_ORIENTED:
1612: a->roworiented = flg;
1613: break;
1614: case MAT_KEEP_NONZERO_PATTERN:
1615: a->keepnonzeropattern = flg;
1616: break;
1617: case MAT_NEW_NONZERO_LOCATIONS:
1618: a->nonew = (flg ? 0 : 1);
1619: break;
1620: case MAT_NEW_NONZERO_LOCATION_ERR:
1621: a->nonew = (flg ? -1 : 0);
1622: break;
1623: case MAT_NEW_NONZERO_ALLOCATION_ERR:
1624: a->nonew = (flg ? -2 : 0);
1625: break;
1626: case MAT_UNUSED_NONZERO_LOCATION_ERR:
1627: a->nounused = (flg ? -1 : 0);
1628: break;
1629: case MAT_FORCE_DIAGONAL_ENTRIES:
1630: case MAT_IGNORE_OFF_PROC_ENTRIES:
1631: case MAT_USE_HASH_TABLE:
1632: case MAT_SORTED_FULL:
1633: PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1634: break;
1635: case MAT_SPD:
1636: case MAT_SYMMETRIC:
1637: case MAT_STRUCTURALLY_SYMMETRIC:
1638: case MAT_HERMITIAN:
1639: case MAT_SYMMETRY_ETERNAL:
1640: case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1641: case MAT_SUBMAT_SINGLEIS:
1642: case MAT_STRUCTURE_ONLY:
1643: case MAT_SPD_ETERNAL:
1644: /* if the diagonal matrix is square it inherits some of the properties above */
1645: break;
1646: default:
1647: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
1648: }
1649: PetscFunctionReturn(PETSC_SUCCESS);
1650: }
1652: /* used for both SeqBAIJ and SeqSBAIJ matrices */
1653: PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v, PetscInt *ai, PetscInt *aj, PetscScalar *aa)
1654: {
1655: PetscInt itmp, i, j, k, M, bn, bp, *idx_i, bs, bs2;
1656: MatScalar *aa_i;
1657: PetscScalar *v_i;
1659: PetscFunctionBegin;
1660: bs = A->rmap->bs;
1661: bs2 = bs * bs;
1662: PetscCheck(row >= 0 && row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range", row);
1664: bn = row / bs; /* Block number */
1665: bp = row % bs; /* Block Position */
1666: M = ai[bn + 1] - ai[bn];
1667: *nz = bs * M;
1669: if (v) {
1670: *v = NULL;
1671: if (*nz) {
1672: PetscCall(PetscMalloc1(*nz, v));
1673: for (i = 0; i < M; i++) { /* for each block in the block row */
1674: v_i = *v + i * bs;
1675: aa_i = aa + bs2 * (ai[bn] + i);
1676: for (j = bp, k = 0; j < bs2; j += bs, k++) v_i[k] = aa_i[j];
1677: }
1678: }
1679: }
1681: if (idx) {
1682: *idx = NULL;
1683: if (*nz) {
1684: PetscCall(PetscMalloc1(*nz, idx));
1685: for (i = 0; i < M; i++) { /* for each block in the block row */
1686: idx_i = *idx + i * bs;
1687: itmp = bs * aj[ai[bn] + i];
1688: for (j = 0; j < bs; j++) idx_i[j] = itmp++;
1689: }
1690: }
1691: }
1692: PetscFunctionReturn(PETSC_SUCCESS);
1693: }
1695: PetscErrorCode MatGetRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1696: {
1697: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1699: PetscFunctionBegin;
1700: PetscCall(MatGetRow_SeqBAIJ_private(A, row, nz, idx, v, a->i, a->j, a->a));
1701: PetscFunctionReturn(PETSC_SUCCESS);
1702: }
1704: PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1705: {
1706: PetscFunctionBegin;
1707: if (idx) PetscCall(PetscFree(*idx));
1708: if (v) PetscCall(PetscFree(*v));
1709: PetscFunctionReturn(PETSC_SUCCESS);
1710: }
1712: static PetscErrorCode MatTranspose_SeqBAIJ(Mat A, MatReuse reuse, Mat *B)
1713: {
1714: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *at;
1715: Mat C;
1716: PetscInt i, j, k, *aj = a->j, *ai = a->i, bs = A->rmap->bs, mbs = a->mbs, nbs = a->nbs, *atfill;
1717: PetscInt bs2 = a->bs2, *ati, *atj, anzj, kr;
1718: MatScalar *ata, *aa = a->a;
1720: PetscFunctionBegin;
1721: if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
1722: PetscCall(PetscCalloc1(1 + nbs, &atfill));
1723: if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1724: for (i = 0; i < ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */
1726: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C));
1727: PetscCall(MatSetSizes(C, A->cmap->n, A->rmap->N, A->cmap->n, A->rmap->N));
1728: PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
1729: PetscCall(MatSeqBAIJSetPreallocation(C, bs, 0, atfill));
1731: at = (Mat_SeqBAIJ *)C->data;
1732: ati = at->i;
1733: for (i = 0; i < nbs; i++) at->ilen[i] = at->imax[i] = ati[i + 1] - ati[i];
1734: } else {
1735: C = *B;
1736: at = (Mat_SeqBAIJ *)C->data;
1737: ati = at->i;
1738: }
1740: atj = at->j;
1741: ata = at->a;
1743: /* Copy ati into atfill so we have locations of the next free space in atj */
1744: PetscCall(PetscArraycpy(atfill, ati, nbs));
1746: /* Walk through A row-wise and mark nonzero entries of A^T. */
1747: for (i = 0; i < mbs; i++) {
1748: anzj = ai[i + 1] - ai[i];
1749: for (j = 0; j < anzj; j++) {
1750: atj[atfill[*aj]] = i;
1751: for (kr = 0; kr < bs; kr++) {
1752: for (k = 0; k < bs; k++) ata[bs2 * atfill[*aj] + k * bs + kr] = *aa++;
1753: }
1754: atfill[*aj++] += 1;
1755: }
1756: }
1757: PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
1758: PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
1760: /* Clean up temporary space and complete requests. */
1761: PetscCall(PetscFree(atfill));
1763: if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1764: PetscCall(MatSetBlockSizes(C, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs)));
1765: *B = C;
1766: } else {
1767: PetscCall(MatHeaderMerge(A, &C));
1768: }
1769: PetscFunctionReturn(PETSC_SUCCESS);
1770: }
1772: static PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A, Mat B, PetscReal tol, PetscBool *f)
1773: {
1774: Mat Btrans;
1776: PetscFunctionBegin;
1777: *f = PETSC_FALSE;
1778: PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &Btrans));
1779: PetscCall(MatEqual_SeqBAIJ(B, Btrans, f));
1780: PetscCall(MatDestroy(&Btrans));
1781: PetscFunctionReturn(PETSC_SUCCESS);
1782: }
1784: /* Used for both SeqBAIJ and SeqSBAIJ matrices */
1785: PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat, PetscViewer viewer)
1786: {
1787: Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)mat->data;
1788: PetscInt header[4], M, N, m, bs, nz, cnt, i, j, k, l;
1789: PetscInt *rowlens, *colidxs;
1790: PetscScalar *matvals;
1792: PetscFunctionBegin;
1793: PetscCall(PetscViewerSetUp(viewer));
1795: M = mat->rmap->N;
1796: N = mat->cmap->N;
1797: m = mat->rmap->n;
1798: bs = mat->rmap->bs;
1799: nz = bs * bs * A->nz;
1801: /* write matrix header */
1802: header[0] = MAT_FILE_CLASSID;
1803: header[1] = M;
1804: header[2] = N;
1805: header[3] = nz;
1806: PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1808: /* store row lengths */
1809: PetscCall(PetscMalloc1(m, &rowlens));
1810: for (cnt = 0, i = 0; i < A->mbs; i++)
1811: for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i]);
1812: PetscCall(PetscViewerBinaryWrite(viewer, rowlens, m, PETSC_INT));
1813: PetscCall(PetscFree(rowlens));
1815: /* store column indices */
1816: PetscCall(PetscMalloc1(nz, &colidxs));
1817: for (cnt = 0, i = 0; i < A->mbs; i++)
1818: for (k = 0; k < bs; k++)
1819: for (j = A->i[i]; j < A->i[i + 1]; j++)
1820: for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[j] + l;
1821: PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
1822: PetscCall(PetscViewerBinaryWrite(viewer, colidxs, nz, PETSC_INT));
1823: PetscCall(PetscFree(colidxs));
1825: /* store nonzero values */
1826: PetscCall(PetscMalloc1(nz, &matvals));
1827: for (cnt = 0, i = 0; i < A->mbs; i++)
1828: for (k = 0; k < bs; k++)
1829: for (j = A->i[i]; j < A->i[i + 1]; j++)
1830: for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * j + l) + k];
1831: PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
1832: PetscCall(PetscViewerBinaryWrite(viewer, matvals, nz, PETSC_SCALAR));
1833: PetscCall(PetscFree(matvals));
1835: /* write block size option to the viewer's .info file */
1836: PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1837: PetscFunctionReturn(PETSC_SUCCESS);
1838: }
1840: static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A, PetscViewer viewer)
1841: {
1842: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1843: PetscInt i, bs = A->rmap->bs, k;
1845: PetscFunctionBegin;
1846: PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
1847: for (i = 0; i < a->mbs; i++) {
1848: PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT "-%" PetscInt_FMT ":", i * bs, i * bs + bs - 1));
1849: for (k = a->i[i]; k < a->i[i + 1]; k++) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT "-%" PetscInt_FMT ") ", bs * a->j[k], bs * a->j[k] + bs - 1));
1850: PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
1851: }
1852: PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
1853: PetscFunctionReturn(PETSC_SUCCESS);
1854: }
1856: static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A, PetscViewer viewer)
1857: {
1858: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1859: PetscInt i, j, bs = A->rmap->bs, k, l, bs2 = a->bs2;
1860: PetscViewerFormat format;
1862: PetscFunctionBegin;
1863: if (A->structure_only) {
1864: PetscCall(MatView_SeqBAIJ_ASCII_structonly(A, viewer));
1865: PetscFunctionReturn(PETSC_SUCCESS);
1866: }
1868: PetscCall(PetscViewerGetFormat(viewer, &format));
1869: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1870: PetscCall(PetscViewerASCIIPrintf(viewer, " block size is %" PetscInt_FMT "\n", bs));
1871: } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1872: const char *matname;
1873: Mat aij;
1874: PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &aij));
1875: PetscCall(PetscObjectGetName((PetscObject)A, &matname));
1876: PetscCall(PetscObjectSetName((PetscObject)aij, matname));
1877: PetscCall(MatView(aij, viewer));
1878: PetscCall(MatDestroy(&aij));
1879: } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1880: PetscFunctionReturn(PETSC_SUCCESS);
1881: } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1882: PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
1883: for (i = 0; i < a->mbs; i++) {
1884: for (j = 0; j < bs; j++) {
1885: PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j));
1886: for (k = a->i[i]; k < a->i[i + 1]; k++) {
1887: for (l = 0; l < bs; l++) {
1888: #if defined(PETSC_USE_COMPLEX)
1889: if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
1890: PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
1891: } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
1892: PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
1893: } else if (PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
1894: PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j])));
1895: }
1896: #else
1897: if (a->a[bs2 * k + l * bs + j] != 0.0) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j]));
1898: #endif
1899: }
1900: }
1901: PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
1902: }
1903: }
1904: PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
1905: } else {
1906: PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
1907: for (i = 0; i < a->mbs; i++) {
1908: for (j = 0; j < bs; j++) {
1909: PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j));
1910: for (k = a->i[i]; k < a->i[i + 1]; k++) {
1911: for (l = 0; l < bs; l++) {
1912: #if defined(PETSC_USE_COMPLEX)
1913: if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0) {
1914: PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
1915: } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0) {
1916: PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
1917: } else {
1918: PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j])));
1919: }
1920: #else
1921: PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j]));
1922: #endif
1923: }
1924: }
1925: PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
1926: }
1927: }
1928: PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
1929: }
1930: PetscCall(PetscViewerFlush(viewer));
1931: PetscFunctionReturn(PETSC_SUCCESS);
1932: }
1934: #include <petscdraw.h>
1935: static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw, void *Aa)
1936: {
1937: Mat A = (Mat)Aa;
1938: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1939: PetscInt row, i, j, k, l, mbs = a->mbs, bs = A->rmap->bs, bs2 = a->bs2;
1940: PetscReal xl, yl, xr, yr, x_l, x_r, y_l, y_r;
1941: MatScalar *aa;
1942: PetscViewer viewer;
1943: PetscViewerFormat format;
1944: int color;
1946: PetscFunctionBegin;
1947: PetscCall(PetscObjectQuery((PetscObject)A, "Zoomviewer", (PetscObject *)&viewer));
1948: PetscCall(PetscViewerGetFormat(viewer, &format));
1949: PetscCall(PetscDrawGetCoordinates(draw, &xl, &yl, &xr, &yr));
1951: /* loop over matrix elements drawing boxes */
1953: if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1954: PetscDrawCollectiveBegin(draw);
1955: /* Blue for negative, Cyan for zero and Red for positive */
1956: color = PETSC_DRAW_BLUE;
1957: for (i = 0, row = 0; i < mbs; i++, row += bs) {
1958: for (j = a->i[i]; j < a->i[i + 1]; j++) {
1959: y_l = A->rmap->N - row - 1.0;
1960: y_r = y_l + 1.0;
1961: x_l = a->j[j] * bs;
1962: x_r = x_l + 1.0;
1963: aa = a->a + j * bs2;
1964: for (k = 0; k < bs; k++) {
1965: for (l = 0; l < bs; l++) {
1966: if (PetscRealPart(*aa++) >= 0.) continue;
1967: PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
1968: }
1969: }
1970: }
1971: }
1972: color = PETSC_DRAW_CYAN;
1973: for (i = 0, row = 0; i < mbs; i++, row += bs) {
1974: for (j = a->i[i]; j < a->i[i + 1]; j++) {
1975: y_l = A->rmap->N - row - 1.0;
1976: y_r = y_l + 1.0;
1977: x_l = a->j[j] * bs;
1978: x_r = x_l + 1.0;
1979: aa = a->a + j * bs2;
1980: for (k = 0; k < bs; k++) {
1981: for (l = 0; l < bs; l++) {
1982: if (PetscRealPart(*aa++) != 0.) continue;
1983: PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
1984: }
1985: }
1986: }
1987: }
1988: color = PETSC_DRAW_RED;
1989: for (i = 0, row = 0; i < mbs; i++, row += bs) {
1990: for (j = a->i[i]; j < a->i[i + 1]; j++) {
1991: y_l = A->rmap->N - row - 1.0;
1992: y_r = y_l + 1.0;
1993: x_l = a->j[j] * bs;
1994: x_r = x_l + 1.0;
1995: aa = a->a + j * bs2;
1996: for (k = 0; k < bs; k++) {
1997: for (l = 0; l < bs; l++) {
1998: if (PetscRealPart(*aa++) <= 0.) continue;
1999: PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
2000: }
2001: }
2002: }
2003: }
2004: PetscDrawCollectiveEnd(draw);
2005: } else {
2006: /* use contour shading to indicate magnitude of values */
2007: /* first determine max of all nonzero values */
2008: PetscReal minv = 0.0, maxv = 0.0;
2009: PetscDraw popup;
2011: for (i = 0; i < a->nz * a->bs2; i++) {
2012: if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
2013: }
2014: if (minv >= maxv) maxv = minv + PETSC_SMALL;
2015: PetscCall(PetscDrawGetPopup(draw, &popup));
2016: PetscCall(PetscDrawScalePopup(popup, 0.0, maxv));
2018: PetscDrawCollectiveBegin(draw);
2019: for (i = 0, row = 0; i < mbs; i++, row += bs) {
2020: for (j = a->i[i]; j < a->i[i + 1]; j++) {
2021: y_l = A->rmap->N - row - 1.0;
2022: y_r = y_l + 1.0;
2023: x_l = a->j[j] * bs;
2024: x_r = x_l + 1.0;
2025: aa = a->a + j * bs2;
2026: for (k = 0; k < bs; k++) {
2027: for (l = 0; l < bs; l++) {
2028: MatScalar v = *aa++;
2029: color = PetscDrawRealToColor(PetscAbsScalar(v), minv, maxv);
2030: PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
2031: }
2032: }
2033: }
2034: }
2035: PetscDrawCollectiveEnd(draw);
2036: }
2037: PetscFunctionReturn(PETSC_SUCCESS);
2038: }
2040: static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A, PetscViewer viewer)
2041: {
2042: PetscReal xl, yl, xr, yr, w, h;
2043: PetscDraw draw;
2044: PetscBool isnull;
2046: PetscFunctionBegin;
2047: PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
2048: PetscCall(PetscDrawIsNull(draw, &isnull));
2049: if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
2051: xr = A->cmap->n;
2052: yr = A->rmap->N;
2053: h = yr / 10.0;
2054: w = xr / 10.0;
2055: xr += w;
2056: yr += h;
2057: xl = -w;
2058: yl = -h;
2059: PetscCall(PetscDrawSetCoordinates(draw, xl, yl, xr, yr));
2060: PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", (PetscObject)viewer));
2061: PetscCall(PetscDrawZoom(draw, MatView_SeqBAIJ_Draw_Zoom, A));
2062: PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", NULL));
2063: PetscCall(PetscDrawSave(draw));
2064: PetscFunctionReturn(PETSC_SUCCESS);
2065: }
2067: PetscErrorCode MatView_SeqBAIJ(Mat A, PetscViewer viewer)
2068: {
2069: PetscBool iascii, isbinary, isdraw;
2071: PetscFunctionBegin;
2072: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
2073: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
2074: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
2075: if (iascii) {
2076: PetscCall(MatView_SeqBAIJ_ASCII(A, viewer));
2077: } else if (isbinary) {
2078: PetscCall(MatView_SeqBAIJ_Binary(A, viewer));
2079: } else if (isdraw) {
2080: PetscCall(MatView_SeqBAIJ_Draw(A, viewer));
2081: } else {
2082: Mat B;
2083: PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
2084: PetscCall(MatView(B, viewer));
2085: PetscCall(MatDestroy(&B));
2086: }
2087: PetscFunctionReturn(PETSC_SUCCESS);
2088: }
2090: PetscErrorCode MatGetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], PetscScalar v[])
2091: {
2092: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2093: PetscInt *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
2094: PetscInt *ai = a->i, *ailen = a->ilen;
2095: PetscInt brow, bcol, ridx, cidx, bs = A->rmap->bs, bs2 = a->bs2;
2096: MatScalar *ap, *aa = a->a;
2098: PetscFunctionBegin;
2099: for (k = 0; k < m; k++) { /* loop over rows */
2100: row = im[k];
2101: brow = row / bs;
2102: if (row < 0) {
2103: v += n;
2104: continue;
2105: } /* negative row */
2106: PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " too large", row);
2107: rp = PetscSafePointerPlusOffset(aj, ai[brow]);
2108: ap = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]);
2109: nrow = ailen[brow];
2110: for (l = 0; l < n; l++) { /* loop over columns */
2111: if (in[l] < 0) {
2112: v++;
2113: continue;
2114: } /* negative column */
2115: PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column %" PetscInt_FMT " too large", in[l]);
2116: col = in[l];
2117: bcol = col / bs;
2118: cidx = col % bs;
2119: ridx = row % bs;
2120: high = nrow;
2121: low = 0; /* assume unsorted */
2122: while (high - low > 5) {
2123: t = (low + high) / 2;
2124: if (rp[t] > bcol) high = t;
2125: else low = t;
2126: }
2127: for (i = low; i < high; i++) {
2128: if (rp[i] > bcol) break;
2129: if (rp[i] == bcol) {
2130: *v++ = ap[bs2 * i + bs * cidx + ridx];
2131: goto finished;
2132: }
2133: }
2134: *v++ = 0.0;
2135: finished:;
2136: }
2137: }
2138: PetscFunctionReturn(PETSC_SUCCESS);
2139: }
2141: PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is)
2142: {
2143: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2144: PetscInt *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, rmax, N, lastcol = -1;
2145: PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen;
2146: PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs, stepval;
2147: PetscBool roworiented = a->roworiented;
2148: const PetscScalar *value = v;
2149: MatScalar *ap = NULL, *aa = a->a, *bap;
2151: PetscFunctionBegin;
2152: if (roworiented) {
2153: stepval = (n - 1) * bs;
2154: } else {
2155: stepval = (m - 1) * bs;
2156: }
2157: for (k = 0; k < m; k++) { /* loop over added rows */
2158: row = im[k];
2159: if (row < 0) continue;
2160: PetscCheck(row < a->mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block row index too large %" PetscInt_FMT " max %" PetscInt_FMT, row, a->mbs - 1);
2161: rp = aj + ai[row];
2162: if (!A->structure_only) ap = aa + bs2 * ai[row];
2163: rmax = imax[row];
2164: nrow = ailen[row];
2165: low = 0;
2166: high = nrow;
2167: for (l = 0; l < n; l++) { /* loop over added columns */
2168: if (in[l] < 0) continue;
2169: PetscCheck(in[l] < a->nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block column index too large %" PetscInt_FMT " max %" PetscInt_FMT, in[l], a->nbs - 1);
2170: col = in[l];
2171: if (!A->structure_only) {
2172: if (roworiented) {
2173: value = v + (k * (stepval + bs) + l) * bs;
2174: } else {
2175: value = v + (l * (stepval + bs) + k) * bs;
2176: }
2177: }
2178: if (col <= lastcol) low = 0;
2179: else high = nrow;
2180: lastcol = col;
2181: while (high - low > 7) {
2182: t = (low + high) / 2;
2183: if (rp[t] > col) high = t;
2184: else low = t;
2185: }
2186: for (i = low; i < high; i++) {
2187: if (rp[i] > col) break;
2188: if (rp[i] == col) {
2189: if (A->structure_only) goto noinsert2;
2190: bap = ap + bs2 * i;
2191: if (roworiented) {
2192: if (is == ADD_VALUES) {
2193: for (ii = 0; ii < bs; ii++, value += stepval) {
2194: for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
2195: }
2196: } else {
2197: for (ii = 0; ii < bs; ii++, value += stepval) {
2198: for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
2199: }
2200: }
2201: } else {
2202: if (is == ADD_VALUES) {
2203: for (ii = 0; ii < bs; ii++, value += bs + stepval) {
2204: for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
2205: bap += bs;
2206: }
2207: } else {
2208: for (ii = 0; ii < bs; ii++, value += bs + stepval) {
2209: for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
2210: bap += bs;
2211: }
2212: }
2213: }
2214: goto noinsert2;
2215: }
2216: }
2217: if (nonew == 1) goto noinsert2;
2218: PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked index new nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
2219: if (A->structure_only) {
2220: MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, row, col, rmax, ai, aj, rp, imax, nonew, MatScalar);
2221: } else {
2222: MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
2223: }
2224: N = nrow++ - 1;
2225: high++;
2226: /* shift up all the later entries in this row */
2227: PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
2228: rp[i] = col;
2229: if (!A->structure_only) {
2230: PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
2231: bap = ap + bs2 * i;
2232: if (roworiented) {
2233: for (ii = 0; ii < bs; ii++, value += stepval) {
2234: for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
2235: }
2236: } else {
2237: for (ii = 0; ii < bs; ii++, value += stepval) {
2238: for (jj = 0; jj < bs; jj++) *bap++ = *value++;
2239: }
2240: }
2241: }
2242: noinsert2:;
2243: low = i;
2244: }
2245: ailen[row] = nrow;
2246: }
2247: PetscFunctionReturn(PETSC_SUCCESS);
2248: }
2250: PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A, MatAssemblyType mode)
2251: {
2252: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2253: PetscInt fshift = 0, i, *ai = a->i, *aj = a->j, *imax = a->imax;
2254: PetscInt m = A->rmap->N, *ip, N, *ailen = a->ilen;
2255: PetscInt mbs = a->mbs, bs2 = a->bs2, rmax = 0;
2256: MatScalar *aa = a->a, *ap;
2257: PetscReal ratio = 0.6;
2259: PetscFunctionBegin;
2260: if (mode == MAT_FLUSH_ASSEMBLY || (A->was_assembled && A->ass_nonzerostate == A->nonzerostate)) PetscFunctionReturn(PETSC_SUCCESS);
2262: if (m) rmax = ailen[0];
2263: for (i = 1; i < mbs; i++) {
2264: /* move each row back by the amount of empty slots (fshift) before it*/
2265: fshift += imax[i - 1] - ailen[i - 1];
2266: rmax = PetscMax(rmax, ailen[i]);
2267: if (fshift) {
2268: ip = aj + ai[i];
2269: ap = aa + bs2 * ai[i];
2270: N = ailen[i];
2271: PetscCall(PetscArraymove(ip - fshift, ip, N));
2272: if (!A->structure_only) PetscCall(PetscArraymove(ap - bs2 * fshift, ap, bs2 * N));
2273: }
2274: ai[i] = ai[i - 1] + ailen[i - 1];
2275: }
2276: if (mbs) {
2277: fshift += imax[mbs - 1] - ailen[mbs - 1];
2278: ai[mbs] = ai[mbs - 1] + ailen[mbs - 1];
2279: }
2281: /* reset ilen and imax for each row */
2282: a->nonzerorowcnt = 0;
2283: if (A->structure_only) {
2284: PetscCall(PetscFree2(a->imax, a->ilen));
2285: } else { /* !A->structure_only */
2286: for (i = 0; i < mbs; i++) {
2287: ailen[i] = imax[i] = ai[i + 1] - ai[i];
2288: a->nonzerorowcnt += ((ai[i + 1] - ai[i]) > 0);
2289: }
2290: }
2291: a->nz = ai[mbs];
2293: /* diagonals may have moved, so kill the diagonal pointers */
2294: a->idiagvalid = PETSC_FALSE;
2295: if (fshift && a->diag) PetscCall(PetscFree(a->diag));
2296: if (fshift) PetscCheck(a->nounused != -1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unused space detected in matrix: %" PetscInt_FMT " X %" PetscInt_FMT " block size %" PetscInt_FMT ", %" PetscInt_FMT " unneeded", m, A->cmap->n, A->rmap->bs, fshift * bs2);
2297: PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT ", block size %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded, %" PetscInt_FMT " used\n", m, A->cmap->n, A->rmap->bs, fshift * bs2, a->nz * bs2));
2298: PetscCall(PetscInfo(A, "Number of mallocs during MatSetValues is %" PetscInt_FMT "\n", a->reallocs));
2299: PetscCall(PetscInfo(A, "Most nonzeros blocks in any row is %" PetscInt_FMT "\n", rmax));
2301: A->info.mallocs += a->reallocs;
2302: a->reallocs = 0;
2303: A->info.nz_unneeded = (PetscReal)fshift * bs2;
2304: a->rmax = rmax;
2306: if (!A->structure_only) PetscCall(MatCheckCompressedRow(A, a->nonzerorowcnt, &a->compressedrow, a->i, mbs, ratio));
2307: PetscFunctionReturn(PETSC_SUCCESS);
2308: }
2310: /*
2311: This function returns an array of flags which indicate the locations of contiguous
2312: blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9]
2313: then the resulting sizes = [3,1,1,3,1] corresponding to sets [(0,1,2),(3),(5),(6,7,8),(9)]
2314: Assume: sizes should be long enough to hold all the values.
2315: */
2316: static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[], PetscInt n, PetscInt bs, PetscInt sizes[], PetscInt *bs_max)
2317: {
2318: PetscInt j = 0;
2320: PetscFunctionBegin;
2321: for (PetscInt i = 0; i < n; j++) {
2322: PetscInt row = idx[i];
2323: if (row % bs != 0) { /* Not the beginning of a block */
2324: sizes[j] = 1;
2325: i++;
2326: } else if (i + bs > n) { /* complete block doesn't exist (at idx end) */
2327: sizes[j] = 1; /* Also makes sure at least 'bs' values exist for next else */
2328: i++;
2329: } else { /* Beginning of the block, so check if the complete block exists */
2330: PetscBool flg = PETSC_TRUE;
2331: for (PetscInt k = 1; k < bs; k++) {
2332: if (row + k != idx[i + k]) { /* break in the block */
2333: flg = PETSC_FALSE;
2334: break;
2335: }
2336: }
2337: if (flg) { /* No break in the bs */
2338: sizes[j] = bs;
2339: i += bs;
2340: } else {
2341: sizes[j] = 1;
2342: i++;
2343: }
2344: }
2345: }
2346: *bs_max = j;
2347: PetscFunctionReturn(PETSC_SUCCESS);
2348: }
2350: PetscErrorCode MatZeroRows_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b)
2351: {
2352: Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)A->data;
2353: PetscInt i, j, k, count, *rows;
2354: PetscInt bs = A->rmap->bs, bs2 = baij->bs2, *sizes, row, bs_max;
2355: PetscScalar zero = 0.0;
2356: MatScalar *aa;
2357: const PetscScalar *xx;
2358: PetscScalar *bb;
2360: PetscFunctionBegin;
2361: /* fix right-hand side if needed */
2362: if (x && b) {
2363: PetscCall(VecGetArrayRead(x, &xx));
2364: PetscCall(VecGetArray(b, &bb));
2365: for (i = 0; i < is_n; i++) bb[is_idx[i]] = diag * xx[is_idx[i]];
2366: PetscCall(VecRestoreArrayRead(x, &xx));
2367: PetscCall(VecRestoreArray(b, &bb));
2368: }
2370: /* Make a copy of the IS and sort it */
2371: /* allocate memory for rows,sizes */
2372: PetscCall(PetscMalloc2(is_n, &rows, 2 * is_n, &sizes));
2374: /* copy IS values to rows, and sort them */
2375: for (i = 0; i < is_n; i++) rows[i] = is_idx[i];
2376: PetscCall(PetscSortInt(is_n, rows));
2378: if (baij->keepnonzeropattern) {
2379: for (i = 0; i < is_n; i++) sizes[i] = 1;
2380: bs_max = is_n;
2381: } else {
2382: PetscCall(MatZeroRows_SeqBAIJ_Check_Blocks(rows, is_n, bs, sizes, &bs_max));
2383: A->nonzerostate++;
2384: }
2386: for (i = 0, j = 0; i < bs_max; j += sizes[i], i++) {
2387: row = rows[j];
2388: PetscCheck(row >= 0 && row <= A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", row);
2389: count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
2390: aa = baij->a + baij->i[row / bs] * bs2 + (row % bs);
2391: if (sizes[i] == bs && !baij->keepnonzeropattern) {
2392: if (diag != (PetscScalar)0.0) {
2393: if (baij->ilen[row / bs] > 0) {
2394: baij->ilen[row / bs] = 1;
2395: baij->j[baij->i[row / bs]] = row / bs;
2397: PetscCall(PetscArrayzero(aa, count * bs));
2398: }
2399: /* Now insert all the diagonal values for this bs */
2400: for (k = 0; k < bs; k++) PetscUseTypeMethod(A, setvalues, 1, rows + j + k, 1, rows + j + k, &diag, INSERT_VALUES);
2401: } else { /* (diag == 0.0) */
2402: baij->ilen[row / bs] = 0;
2403: } /* end (diag == 0.0) */
2404: } else { /* (sizes[i] != bs) */
2405: PetscAssert(sizes[i] == 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal Error. Value should be 1");
2406: for (k = 0; k < count; k++) {
2407: aa[0] = zero;
2408: aa += bs;
2409: }
2410: if (diag != (PetscScalar)0.0) PetscUseTypeMethod(A, setvalues, 1, rows + j, 1, rows + j, &diag, INSERT_VALUES);
2411: }
2412: }
2414: PetscCall(PetscFree2(rows, sizes));
2415: PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY));
2416: PetscFunctionReturn(PETSC_SUCCESS);
2417: }
2419: static PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b)
2420: {
2421: Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)A->data;
2422: PetscInt i, j, k, count;
2423: PetscInt bs = A->rmap->bs, bs2 = baij->bs2, row, col;
2424: PetscScalar zero = 0.0;
2425: MatScalar *aa;
2426: const PetscScalar *xx;
2427: PetscScalar *bb;
2428: PetscBool *zeroed, vecs = PETSC_FALSE;
2430: PetscFunctionBegin;
2431: /* fix right-hand side if needed */
2432: if (x && b) {
2433: PetscCall(VecGetArrayRead(x, &xx));
2434: PetscCall(VecGetArray(b, &bb));
2435: vecs = PETSC_TRUE;
2436: }
2438: /* zero the columns */
2439: PetscCall(PetscCalloc1(A->rmap->n, &zeroed));
2440: for (i = 0; i < is_n; i++) {
2441: PetscCheck(is_idx[i] >= 0 && is_idx[i] < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", is_idx[i]);
2442: zeroed[is_idx[i]] = PETSC_TRUE;
2443: }
2444: for (i = 0; i < A->rmap->N; i++) {
2445: if (!zeroed[i]) {
2446: row = i / bs;
2447: for (j = baij->i[row]; j < baij->i[row + 1]; j++) {
2448: for (k = 0; k < bs; k++) {
2449: col = bs * baij->j[j] + k;
2450: if (zeroed[col]) {
2451: aa = baij->a + j * bs2 + (i % bs) + bs * k;
2452: if (vecs) bb[i] -= aa[0] * xx[col];
2453: aa[0] = 0.0;
2454: }
2455: }
2456: }
2457: } else if (vecs) bb[i] = diag * xx[i];
2458: }
2459: PetscCall(PetscFree(zeroed));
2460: if (vecs) {
2461: PetscCall(VecRestoreArrayRead(x, &xx));
2462: PetscCall(VecRestoreArray(b, &bb));
2463: }
2465: /* zero the rows */
2466: for (i = 0; i < is_n; i++) {
2467: row = is_idx[i];
2468: count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
2469: aa = baij->a + baij->i[row / bs] * bs2 + (row % bs);
2470: for (k = 0; k < count; k++) {
2471: aa[0] = zero;
2472: aa += bs;
2473: }
2474: if (diag != (PetscScalar)0.0) PetscUseTypeMethod(A, setvalues, 1, &row, 1, &row, &diag, INSERT_VALUES);
2475: }
2476: PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY));
2477: PetscFunctionReturn(PETSC_SUCCESS);
2478: }
2480: PetscErrorCode MatSetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is)
2481: {
2482: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2483: PetscInt *rp, k, low, high, t, ii, row, nrow, i, col, l, rmax, N, lastcol = -1;
2484: PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen;
2485: PetscInt *aj = a->j, nonew = a->nonew, bs = A->rmap->bs, brow, bcol;
2486: PetscInt ridx, cidx, bs2 = a->bs2;
2487: PetscBool roworiented = a->roworiented;
2488: MatScalar *ap = NULL, value = 0.0, *aa = a->a, *bap;
2490: PetscFunctionBegin;
2491: for (k = 0; k < m; k++) { /* loop over added rows */
2492: row = im[k];
2493: brow = row / bs;
2494: if (row < 0) continue;
2495: PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, row, A->rmap->N - 1);
2496: rp = PetscSafePointerPlusOffset(aj, ai[brow]);
2497: if (!A->structure_only) ap = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]);
2498: rmax = imax[brow];
2499: nrow = ailen[brow];
2500: low = 0;
2501: high = nrow;
2502: for (l = 0; l < n; l++) { /* loop over added columns */
2503: if (in[l] < 0) continue;
2504: PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[l], A->cmap->n - 1);
2505: col = in[l];
2506: bcol = col / bs;
2507: ridx = row % bs;
2508: cidx = col % bs;
2509: if (!A->structure_only) {
2510: if (roworiented) {
2511: value = v[l + k * n];
2512: } else {
2513: value = v[k + l * m];
2514: }
2515: }
2516: if (col <= lastcol) low = 0;
2517: else high = nrow;
2518: lastcol = col;
2519: while (high - low > 7) {
2520: t = (low + high) / 2;
2521: if (rp[t] > bcol) high = t;
2522: else low = t;
2523: }
2524: for (i = low; i < high; i++) {
2525: if (rp[i] > bcol) break;
2526: if (rp[i] == bcol) {
2527: bap = PetscSafePointerPlusOffset(ap, bs2 * i + bs * cidx + ridx);
2528: if (!A->structure_only) {
2529: if (is == ADD_VALUES) *bap += value;
2530: else *bap = value;
2531: }
2532: goto noinsert1;
2533: }
2534: }
2535: if (nonew == 1) goto noinsert1;
2536: PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
2537: if (A->structure_only) {
2538: MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, brow, bcol, rmax, ai, aj, rp, imax, nonew, MatScalar);
2539: } else {
2540: MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
2541: }
2542: N = nrow++ - 1;
2543: high++;
2544: /* shift up all the later entries in this row */
2545: PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
2546: rp[i] = bcol;
2547: if (!A->structure_only) {
2548: PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
2549: PetscCall(PetscArrayzero(ap + bs2 * i, bs2));
2550: ap[bs2 * i + bs * cidx + ridx] = value;
2551: }
2552: a->nz++;
2553: noinsert1:;
2554: low = i;
2555: }
2556: ailen[brow] = nrow;
2557: }
2558: PetscFunctionReturn(PETSC_SUCCESS);
2559: }
2561: static PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA, IS row, IS col, const MatFactorInfo *info)
2562: {
2563: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inA->data;
2564: Mat outA;
2565: PetscBool row_identity, col_identity;
2567: PetscFunctionBegin;
2568: PetscCheck(info->levels == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only levels = 0 supported for in-place ILU");
2569: PetscCall(ISIdentity(row, &row_identity));
2570: PetscCall(ISIdentity(col, &col_identity));
2571: PetscCheck(row_identity && col_identity, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Row and column permutations must be identity for in-place ILU");
2573: outA = inA;
2574: inA->factortype = MAT_FACTOR_LU;
2575: PetscCall(PetscFree(inA->solvertype));
2576: PetscCall(PetscStrallocpy(MATSOLVERPETSC, &inA->solvertype));
2578: PetscCall(MatMarkDiagonal_SeqBAIJ(inA));
2580: PetscCall(PetscObjectReference((PetscObject)row));
2581: PetscCall(ISDestroy(&a->row));
2582: a->row = row;
2583: PetscCall(PetscObjectReference((PetscObject)col));
2584: PetscCall(ISDestroy(&a->col));
2585: a->col = col;
2587: /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2588: PetscCall(ISDestroy(&a->icol));
2589: PetscCall(ISInvertPermutation(col, PETSC_DECIDE, &a->icol));
2591: PetscCall(MatSeqBAIJSetNumericFactorization_inplace(inA, (PetscBool)(row_identity && col_identity)));
2592: if (!a->solve_work) PetscCall(PetscMalloc1(inA->rmap->N + inA->rmap->bs, &a->solve_work));
2593: PetscCall(MatLUFactorNumeric(outA, inA, info));
2594: PetscFunctionReturn(PETSC_SUCCESS);
2595: }
2597: static PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat, const PetscInt *indices)
2598: {
2599: Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2601: PetscFunctionBegin;
2602: baij->nz = baij->maxnz;
2603: PetscCall(PetscArraycpy(baij->j, indices, baij->nz));
2604: PetscCall(PetscArraycpy(baij->ilen, baij->imax, baij->mbs));
2605: PetscFunctionReturn(PETSC_SUCCESS);
2606: }
2608: /*@
2609: MatSeqBAIJSetColumnIndices - Set the column indices for all the block rows in the matrix.
2611: Input Parameters:
2612: + mat - the `MATSEQBAIJ` matrix
2613: - indices - the block column indices
2615: Level: advanced
2617: Notes:
2618: This can be called if you have precomputed the nonzero structure of the
2619: matrix and want to provide it to the matrix object to improve the performance
2620: of the `MatSetValues()` operation.
2622: You MUST have set the correct numbers of nonzeros per row in the call to
2623: `MatCreateSeqBAIJ()`, and the columns indices MUST be sorted.
2625: MUST be called before any calls to `MatSetValues()`
2627: .seealso: [](ch_matrices), `Mat`, `MATSEQBAIJ`, `MatSetValues()`
2628: @*/
2629: PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat, PetscInt *indices)
2630: {
2631: PetscFunctionBegin;
2633: PetscAssertPointer(indices, 2);
2634: PetscUseMethod(mat, "MatSeqBAIJSetColumnIndices_C", (Mat, const PetscInt *), (mat, (const PetscInt *)indices));
2635: PetscFunctionReturn(PETSC_SUCCESS);
2636: }
2638: static PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A, Vec v, PetscInt idx[])
2639: {
2640: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2641: PetscInt i, j, n, row, bs, *ai, *aj, mbs;
2642: PetscReal atmp;
2643: PetscScalar *x, zero = 0.0;
2644: MatScalar *aa;
2645: PetscInt ncols, brow, krow, kcol;
2647: PetscFunctionBegin;
2648: PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
2649: bs = A->rmap->bs;
2650: aa = a->a;
2651: ai = a->i;
2652: aj = a->j;
2653: mbs = a->mbs;
2655: PetscCall(VecSet(v, zero));
2656: PetscCall(VecGetArray(v, &x));
2657: PetscCall(VecGetLocalSize(v, &n));
2658: PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector");
2659: for (i = 0; i < mbs; i++) {
2660: ncols = ai[1] - ai[0];
2661: ai++;
2662: brow = bs * i;
2663: for (j = 0; j < ncols; j++) {
2664: for (kcol = 0; kcol < bs; kcol++) {
2665: for (krow = 0; krow < bs; krow++) {
2666: atmp = PetscAbsScalar(*aa);
2667: aa++;
2668: row = brow + krow; /* row index */
2669: if (PetscAbsScalar(x[row]) < atmp) {
2670: x[row] = atmp;
2671: if (idx) idx[row] = bs * (*aj) + kcol;
2672: }
2673: }
2674: }
2675: aj++;
2676: }
2677: }
2678: PetscCall(VecRestoreArray(v, &x));
2679: PetscFunctionReturn(PETSC_SUCCESS);
2680: }
2682: static PetscErrorCode MatGetRowSumAbs_SeqBAIJ(Mat A, Vec v)
2683: {
2684: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2685: PetscInt i, j, n, row, bs, *ai, mbs;
2686: PetscReal atmp;
2687: PetscScalar *x, zero = 0.0;
2688: MatScalar *aa;
2689: PetscInt ncols, brow, krow, kcol;
2691: PetscFunctionBegin;
2692: PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
2693: bs = A->rmap->bs;
2694: aa = a->a;
2695: ai = a->i;
2696: mbs = a->mbs;
2698: PetscCall(VecSet(v, zero));
2699: PetscCall(VecGetArrayWrite(v, &x));
2700: PetscCall(VecGetLocalSize(v, &n));
2701: PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector");
2702: for (i = 0; i < mbs; i++) {
2703: ncols = ai[1] - ai[0];
2704: ai++;
2705: brow = bs * i;
2706: for (j = 0; j < ncols; j++) {
2707: for (kcol = 0; kcol < bs; kcol++) {
2708: for (krow = 0; krow < bs; krow++) {
2709: atmp = PetscAbsScalar(*aa);
2710: aa++;
2711: row = brow + krow; /* row index */
2712: x[row] += atmp;
2713: }
2714: }
2715: }
2716: }
2717: PetscCall(VecRestoreArrayWrite(v, &x));
2718: PetscFunctionReturn(PETSC_SUCCESS);
2719: }
2721: static PetscErrorCode MatCopy_SeqBAIJ(Mat A, Mat B, MatStructure str)
2722: {
2723: PetscFunctionBegin;
2724: /* If the two matrices have the same copy implementation, use fast copy. */
2725: if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2726: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2727: Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data;
2728: PetscInt ambs = a->mbs, bmbs = b->mbs, abs = A->rmap->bs, bbs = B->rmap->bs, bs2 = abs * abs;
2730: PetscCheck(a->i[ambs] == b->i[bmbs], PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Number of nonzero blocks in matrices A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", a->i[ambs], b->i[bmbs]);
2731: PetscCheck(abs == bbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Block size A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", abs, bbs);
2732: PetscCall(PetscArraycpy(b->a, a->a, bs2 * a->i[ambs]));
2733: PetscCall(PetscObjectStateIncrease((PetscObject)B));
2734: } else {
2735: PetscCall(MatCopy_Basic(A, B, str));
2736: }
2737: PetscFunctionReturn(PETSC_SUCCESS);
2738: }
2740: static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A, PetscScalar *array[])
2741: {
2742: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2744: PetscFunctionBegin;
2745: *array = a->a;
2746: PetscFunctionReturn(PETSC_SUCCESS);
2747: }
2749: static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A, PetscScalar *array[])
2750: {
2751: PetscFunctionBegin;
2752: *array = NULL;
2753: PetscFunctionReturn(PETSC_SUCCESS);
2754: }
2756: PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y, Mat X, PetscInt *nnz)
2757: {
2758: PetscInt bs = Y->rmap->bs, mbs = Y->rmap->N / bs;
2759: Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data;
2760: Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data;
2762: PetscFunctionBegin;
2763: /* Set the number of nonzeros in the new matrix */
2764: PetscCall(MatAXPYGetPreallocation_SeqX_private(mbs, x->i, x->j, y->i, y->j, nnz));
2765: PetscFunctionReturn(PETSC_SUCCESS);
2766: }
2768: PetscErrorCode MatAXPY_SeqBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
2769: {
2770: Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data, *y = (Mat_SeqBAIJ *)Y->data;
2771: PetscInt bs = Y->rmap->bs, bs2 = bs * bs;
2772: PetscBLASInt one = 1;
2774: PetscFunctionBegin;
2775: if (str == UNKNOWN_NONZERO_PATTERN || (PetscDefined(USE_DEBUG) && str == SAME_NONZERO_PATTERN)) {
2776: PetscBool e = x->nz == y->nz && x->mbs == y->mbs && bs == X->rmap->bs ? PETSC_TRUE : PETSC_FALSE;
2777: if (e) {
2778: PetscCall(PetscArraycmp(x->i, y->i, x->mbs + 1, &e));
2779: if (e) {
2780: PetscCall(PetscArraycmp(x->j, y->j, x->i[x->mbs], &e));
2781: if (e) str = SAME_NONZERO_PATTERN;
2782: }
2783: }
2784: if (!e) PetscCheck(str != SAME_NONZERO_PATTERN, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "MatStructure is not SAME_NONZERO_PATTERN");
2785: }
2786: if (str == SAME_NONZERO_PATTERN) {
2787: PetscScalar alpha = a;
2788: PetscBLASInt bnz;
2789: PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
2790: PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
2791: PetscCall(PetscObjectStateIncrease((PetscObject)Y));
2792: } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2793: PetscCall(MatAXPY_Basic(Y, a, X, str));
2794: } else {
2795: Mat B;
2796: PetscInt *nnz;
2797: PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size");
2798: PetscCall(PetscMalloc1(Y->rmap->N, &nnz));
2799: PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
2800: PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
2801: PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
2802: PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
2803: PetscCall(MatSetType(B, (MatType)((PetscObject)Y)->type_name));
2804: PetscCall(MatAXPYGetPreallocation_SeqBAIJ(Y, X, nnz));
2805: PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz));
2806: PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
2807: PetscCall(MatHeaderMerge(Y, &B));
2808: PetscCall(PetscFree(nnz));
2809: }
2810: PetscFunctionReturn(PETSC_SUCCESS);
2811: }
2813: PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A)
2814: {
2815: #if PetscDefined(USE_COMPLEX)
2816: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2817: PetscInt i, nz = a->bs2 * a->i[a->mbs];
2818: MatScalar *aa = a->a;
2820: PetscFunctionBegin;
2821: for (i = 0; i < nz; i++) aa[i] = PetscConj(aa[i]);
2822: PetscFunctionReturn(PETSC_SUCCESS);
2823: #else
2824: (void)A;
2825: return PETSC_SUCCESS;
2826: #endif
2827: }
2829: static PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2830: {
2831: #if PetscDefined(USE_COMPLEX)
2832: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2833: PetscInt i, nz = a->bs2 * a->i[a->mbs];
2834: MatScalar *aa = a->a;
2836: PetscFunctionBegin;
2837: for (i = 0; i < nz; i++) aa[i] = PetscRealPart(aa[i]);
2838: PetscFunctionReturn(PETSC_SUCCESS);
2839: #else
2840: (void)A;
2841: return PETSC_SUCCESS;
2842: #endif
2843: }
2845: static PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2846: {
2847: #if PetscDefined(USE_COMPLEX)
2848: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2849: PetscInt i, nz = a->bs2 * a->i[a->mbs];
2850: MatScalar *aa = a->a;
2852: PetscFunctionBegin;
2853: for (i = 0; i < nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2854: PetscFunctionReturn(PETSC_SUCCESS);
2855: #else
2856: (void)A;
2857: return PETSC_SUCCESS;
2858: #endif
2859: }
2861: /*
2862: Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code
2863: */
2864: static PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
2865: {
2866: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2867: PetscInt bs = A->rmap->bs, i, *collengths, *cia, *cja, n = A->cmap->n / bs, m = A->rmap->n / bs;
2868: PetscInt nz = a->i[m], row, *jj, mr, col;
2870: PetscFunctionBegin;
2871: *nn = n;
2872: if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
2873: PetscCheck(!symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not for BAIJ matrices");
2874: PetscCall(PetscCalloc1(n, &collengths));
2875: PetscCall(PetscMalloc1(n + 1, &cia));
2876: PetscCall(PetscMalloc1(nz, &cja));
2877: jj = a->j;
2878: for (i = 0; i < nz; i++) collengths[jj[i]]++;
2879: cia[0] = oshift;
2880: for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i];
2881: PetscCall(PetscArrayzero(collengths, n));
2882: jj = a->j;
2883: for (row = 0; row < m; row++) {
2884: mr = a->i[row + 1] - a->i[row];
2885: for (i = 0; i < mr; i++) {
2886: col = *jj++;
2888: cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2889: }
2890: }
2891: PetscCall(PetscFree(collengths));
2892: *ia = cia;
2893: *ja = cja;
2894: PetscFunctionReturn(PETSC_SUCCESS);
2895: }
2897: static PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
2898: {
2899: PetscFunctionBegin;
2900: if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
2901: PetscCall(PetscFree(*ia));
2902: PetscCall(PetscFree(*ja));
2903: PetscFunctionReturn(PETSC_SUCCESS);
2904: }
2906: /*
2907: MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2908: MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2909: spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2910: */
2911: PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done)
2912: {
2913: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2914: PetscInt i, *collengths, *cia, *cja, n = a->nbs, m = a->mbs;
2915: PetscInt nz = a->i[m], row, *jj, mr, col;
2916: PetscInt *cspidx;
2918: PetscFunctionBegin;
2919: *nn = n;
2920: if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
2922: PetscCall(PetscCalloc1(n, &collengths));
2923: PetscCall(PetscMalloc1(n + 1, &cia));
2924: PetscCall(PetscMalloc1(nz, &cja));
2925: PetscCall(PetscMalloc1(nz, &cspidx));
2926: jj = a->j;
2927: for (i = 0; i < nz; i++) collengths[jj[i]]++;
2928: cia[0] = oshift;
2929: for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i];
2930: PetscCall(PetscArrayzero(collengths, n));
2931: jj = a->j;
2932: for (row = 0; row < m; row++) {
2933: mr = a->i[row + 1] - a->i[row];
2934: for (i = 0; i < mr; i++) {
2935: col = *jj++;
2936: cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2937: cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2938: }
2939: }
2940: PetscCall(PetscFree(collengths));
2941: *ia = cia;
2942: *ja = cja;
2943: *spidx = cspidx;
2944: PetscFunctionReturn(PETSC_SUCCESS);
2945: }
2947: PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done)
2948: {
2949: PetscFunctionBegin;
2950: PetscCall(MatRestoreColumnIJ_SeqBAIJ(A, oshift, symmetric, inodecompressed, n, ia, ja, done));
2951: PetscCall(PetscFree(*spidx));
2952: PetscFunctionReturn(PETSC_SUCCESS);
2953: }
2955: static PetscErrorCode MatShift_SeqBAIJ(Mat Y, PetscScalar a)
2956: {
2957: Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)Y->data;
2959: PetscFunctionBegin;
2960: if (!Y->preallocated || !aij->nz) PetscCall(MatSeqBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL));
2961: PetscCall(MatShift_Basic(Y, a));
2962: PetscFunctionReturn(PETSC_SUCCESS);
2963: }
2965: PetscErrorCode MatEliminateZeros_SeqBAIJ(Mat A, PetscBool keep)
2966: {
2967: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2968: PetscInt fshift = 0, fshift_prev = 0, i, *ai = a->i, *aj = a->j, *imax = a->imax, j, k;
2969: PetscInt m = A->rmap->N, *ailen = a->ilen;
2970: PetscInt mbs = a->mbs, bs2 = a->bs2, rmax = 0;
2971: MatScalar *aa = a->a, *ap;
2972: PetscBool zero;
2974: PetscFunctionBegin;
2975: PetscCheck(A->assembled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot eliminate zeros for unassembled matrix");
2976: if (m) rmax = ailen[0];
2977: for (i = 1; i <= mbs; i++) {
2978: for (k = ai[i - 1]; k < ai[i]; k++) {
2979: zero = PETSC_TRUE;
2980: ap = aa + bs2 * k;
2981: for (j = 0; j < bs2 && zero; j++) {
2982: if (ap[j] != 0.0) zero = PETSC_FALSE;
2983: }
2984: if (zero && (aj[k] != i - 1 || !keep)) fshift++;
2985: else {
2986: if (zero && aj[k] == i - 1) PetscCall(PetscInfo(A, "Keep the diagonal block at row %" PetscInt_FMT "\n", i - 1));
2987: aj[k - fshift] = aj[k];
2988: PetscCall(PetscArraymove(ap - bs2 * fshift, ap, bs2));
2989: }
2990: }
2991: ai[i - 1] -= fshift_prev;
2992: fshift_prev = fshift;
2993: ailen[i - 1] = imax[i - 1] = ai[i] - fshift - ai[i - 1];
2994: a->nonzerorowcnt += ((ai[i] - fshift - ai[i - 1]) > 0);
2995: rmax = PetscMax(rmax, ailen[i - 1]);
2996: }
2997: if (fshift) {
2998: if (mbs) {
2999: ai[mbs] -= fshift;
3000: a->nz = ai[mbs];
3001: }
3002: PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT "; zeros eliminated: %" PetscInt_FMT "; nonzeros left: %" PetscInt_FMT "\n", m, A->cmap->n, fshift, a->nz));
3003: A->nonzerostate++;
3004: A->info.nz_unneeded += (PetscReal)fshift;
3005: a->rmax = rmax;
3006: PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
3007: PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
3008: }
3009: PetscFunctionReturn(PETSC_SUCCESS);
3010: }
3012: static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
3013: MatGetRow_SeqBAIJ,
3014: MatRestoreRow_SeqBAIJ,
3015: MatMult_SeqBAIJ_N,
3016: /* 4*/ MatMultAdd_SeqBAIJ_N,
3017: MatMultTranspose_SeqBAIJ,
3018: MatMultTransposeAdd_SeqBAIJ,
3019: NULL,
3020: NULL,
3021: NULL,
3022: /* 10*/ NULL,
3023: MatLUFactor_SeqBAIJ,
3024: NULL,
3025: NULL,
3026: MatTranspose_SeqBAIJ,
3027: /* 15*/ MatGetInfo_SeqBAIJ,
3028: MatEqual_SeqBAIJ,
3029: MatGetDiagonal_SeqBAIJ,
3030: MatDiagonalScale_SeqBAIJ,
3031: MatNorm_SeqBAIJ,
3032: /* 20*/ NULL,
3033: MatAssemblyEnd_SeqBAIJ,
3034: MatSetOption_SeqBAIJ,
3035: MatZeroEntries_SeqBAIJ,
3036: /* 24*/ MatZeroRows_SeqBAIJ,
3037: NULL,
3038: NULL,
3039: NULL,
3040: NULL,
3041: /* 29*/ MatSetUp_Seq_Hash,
3042: NULL,
3043: NULL,
3044: NULL,
3045: NULL,
3046: /* 34*/ MatDuplicate_SeqBAIJ,
3047: NULL,
3048: NULL,
3049: MatILUFactor_SeqBAIJ,
3050: NULL,
3051: /* 39*/ MatAXPY_SeqBAIJ,
3052: MatCreateSubMatrices_SeqBAIJ,
3053: MatIncreaseOverlap_SeqBAIJ,
3054: MatGetValues_SeqBAIJ,
3055: MatCopy_SeqBAIJ,
3056: /* 44*/ NULL,
3057: MatScale_SeqBAIJ,
3058: MatShift_SeqBAIJ,
3059: NULL,
3060: MatZeroRowsColumns_SeqBAIJ,
3061: /* 49*/ NULL,
3062: MatGetRowIJ_SeqBAIJ,
3063: MatRestoreRowIJ_SeqBAIJ,
3064: MatGetColumnIJ_SeqBAIJ,
3065: MatRestoreColumnIJ_SeqBAIJ,
3066: /* 54*/ MatFDColoringCreate_SeqXAIJ,
3067: NULL,
3068: NULL,
3069: NULL,
3070: MatSetValuesBlocked_SeqBAIJ,
3071: /* 59*/ MatCreateSubMatrix_SeqBAIJ,
3072: MatDestroy_SeqBAIJ,
3073: MatView_SeqBAIJ,
3074: NULL,
3075: NULL,
3076: /* 64*/ NULL,
3077: NULL,
3078: NULL,
3079: NULL,
3080: NULL,
3081: /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
3082: NULL,
3083: MatConvert_Basic,
3084: NULL,
3085: NULL,
3086: /* 74*/ NULL,
3087: MatFDColoringApply_BAIJ,
3088: NULL,
3089: NULL,
3090: NULL,
3091: /* 79*/ NULL,
3092: NULL,
3093: NULL,
3094: NULL,
3095: MatLoad_SeqBAIJ,
3096: /* 84*/ NULL,
3097: NULL,
3098: NULL,
3099: NULL,
3100: NULL,
3101: /* 89*/ NULL,
3102: NULL,
3103: NULL,
3104: NULL,
3105: NULL,
3106: /* 94*/ NULL,
3107: NULL,
3108: NULL,
3109: NULL,
3110: NULL,
3111: /* 99*/ NULL,
3112: NULL,
3113: NULL,
3114: MatConjugate_SeqBAIJ,
3115: NULL,
3116: /*104*/ NULL,
3117: MatRealPart_SeqBAIJ,
3118: MatImaginaryPart_SeqBAIJ,
3119: NULL,
3120: NULL,
3121: /*109*/ NULL,
3122: NULL,
3123: NULL,
3124: NULL,
3125: MatMissingDiagonal_SeqBAIJ,
3126: /*114*/ NULL,
3127: NULL,
3128: NULL,
3129: NULL,
3130: NULL,
3131: /*119*/ NULL,
3132: NULL,
3133: MatMultHermitianTranspose_SeqBAIJ,
3134: MatMultHermitianTransposeAdd_SeqBAIJ,
3135: NULL,
3136: /*124*/ NULL,
3137: MatGetColumnReductions_SeqBAIJ,
3138: MatInvertBlockDiagonal_SeqBAIJ,
3139: NULL,
3140: NULL,
3141: /*129*/ NULL,
3142: NULL,
3143: NULL,
3144: NULL,
3145: NULL,
3146: /*134*/ NULL,
3147: NULL,
3148: NULL,
3149: NULL,
3150: NULL,
3151: /*139*/ MatSetBlockSizes_Default,
3152: NULL,
3153: NULL,
3154: MatFDColoringSetUp_SeqXAIJ,
3155: NULL,
3156: /*144*/ MatCreateMPIMatConcatenateSeqMat_SeqBAIJ,
3157: MatDestroySubMatrices_SeqBAIJ,
3158: NULL,
3159: NULL,
3160: NULL,
3161: NULL,
3162: /*150*/ NULL,
3163: MatEliminateZeros_SeqBAIJ,
3164: MatGetRowSumAbs_SeqBAIJ,
3165: NULL,
3166: NULL,
3167: /*155*/ NULL,
3168: MatCopyHashToXAIJ_Seq_Hash};
3170: static PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat)
3171: {
3172: Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
3173: PetscInt nz = aij->i[aij->mbs] * aij->bs2;
3175: PetscFunctionBegin;
3176: PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3178: /* allocate space for values if not already there */
3179: if (!aij->saved_values) PetscCall(PetscMalloc1(nz + 1, &aij->saved_values));
3181: /* copy values over */
3182: PetscCall(PetscArraycpy(aij->saved_values, aij->a, nz));
3183: PetscFunctionReturn(PETSC_SUCCESS);
3184: }
3186: static PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat)
3187: {
3188: Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
3189: PetscInt nz = aij->i[aij->mbs] * aij->bs2;
3191: PetscFunctionBegin;
3192: PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3193: PetscCheck(aij->saved_values, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatStoreValues(A);first");
3195: /* copy values over */
3196: PetscCall(PetscArraycpy(aij->a, aij->saved_values, nz));
3197: PetscFunctionReturn(PETSC_SUCCESS);
3198: }
3200: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *);
3201: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType, MatReuse, Mat *);
3203: PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[])
3204: {
3205: Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data;
3206: PetscInt i, mbs, nbs, bs2;
3207: PetscBool flg = PETSC_FALSE, skipallocation = PETSC_FALSE, realalloc = PETSC_FALSE;
3209: PetscFunctionBegin;
3210: if (B->hash_active) {
3211: PetscInt bs;
3212: B->ops[0] = b->cops;
3213: PetscCall(PetscHMapIJVDestroy(&b->ht));
3214: PetscCall(MatGetBlockSize(B, &bs));
3215: if (bs > 1) PetscCall(PetscHSetIJDestroy(&b->bht));
3216: PetscCall(PetscFree(b->dnz));
3217: PetscCall(PetscFree(b->bdnz));
3218: B->hash_active = PETSC_FALSE;
3219: }
3220: if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
3221: if (nz == MAT_SKIP_ALLOCATION) {
3222: skipallocation = PETSC_TRUE;
3223: nz = 0;
3224: }
3226: PetscCall(MatSetBlockSize(B, PetscAbs(bs)));
3227: PetscCall(PetscLayoutSetUp(B->rmap));
3228: PetscCall(PetscLayoutSetUp(B->cmap));
3229: PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
3231: B->preallocated = PETSC_TRUE;
3233: mbs = B->rmap->n / bs;
3234: nbs = B->cmap->n / bs;
3235: bs2 = bs * bs;
3237: PetscCheck(mbs * bs == B->rmap->n && nbs * bs == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number rows %" PetscInt_FMT ", cols %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, B->cmap->n, bs);
3239: if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3240: PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nz cannot be less than 0: value %" PetscInt_FMT, nz);
3241: if (nnz) {
3242: for (i = 0; i < mbs; i++) {
3243: PetscCheck(nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, nnz[i]);
3244: PetscCheck(nnz[i] <= nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be greater than block row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT, i, nnz[i], nbs);
3245: }
3246: }
3248: PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Optimize options for SEQBAIJ matrix 2 ", "Mat");
3249: PetscCall(PetscOptionsBool("-mat_no_unroll", "Do not optimize for block size (slow)", NULL, flg, &flg, NULL));
3250: PetscOptionsEnd();
3252: if (!flg) {
3253: switch (bs) {
3254: case 1:
3255: B->ops->mult = MatMult_SeqBAIJ_1;
3256: B->ops->multadd = MatMultAdd_SeqBAIJ_1;
3257: break;
3258: case 2:
3259: B->ops->mult = MatMult_SeqBAIJ_2;
3260: B->ops->multadd = MatMultAdd_SeqBAIJ_2;
3261: break;
3262: case 3:
3263: B->ops->mult = MatMult_SeqBAIJ_3;
3264: B->ops->multadd = MatMultAdd_SeqBAIJ_3;
3265: break;
3266: case 4:
3267: B->ops->mult = MatMult_SeqBAIJ_4;
3268: B->ops->multadd = MatMultAdd_SeqBAIJ_4;
3269: break;
3270: case 5:
3271: B->ops->mult = MatMult_SeqBAIJ_5;
3272: B->ops->multadd = MatMultAdd_SeqBAIJ_5;
3273: break;
3274: case 6:
3275: B->ops->mult = MatMult_SeqBAIJ_6;
3276: B->ops->multadd = MatMultAdd_SeqBAIJ_6;
3277: break;
3278: case 7:
3279: B->ops->mult = MatMult_SeqBAIJ_7;
3280: B->ops->multadd = MatMultAdd_SeqBAIJ_7;
3281: break;
3282: case 9: {
3283: PetscInt version = 1;
3284: PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
3285: switch (version) {
3286: #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
3287: case 1:
3288: B->ops->mult = MatMult_SeqBAIJ_9_AVX2;
3289: B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2;
3290: PetscCall(PetscInfo(B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3291: break;
3292: #endif
3293: default:
3294: B->ops->mult = MatMult_SeqBAIJ_N;
3295: B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3296: PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3297: break;
3298: }
3299: break;
3300: }
3301: case 11:
3302: B->ops->mult = MatMult_SeqBAIJ_11;
3303: B->ops->multadd = MatMultAdd_SeqBAIJ_11;
3304: break;
3305: case 12: {
3306: PetscInt version = 1;
3307: PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
3308: switch (version) {
3309: case 1:
3310: B->ops->mult = MatMult_SeqBAIJ_12_ver1;
3311: B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
3312: PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3313: break;
3314: case 2:
3315: B->ops->mult = MatMult_SeqBAIJ_12_ver2;
3316: B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver2;
3317: PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3318: break;
3319: #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
3320: case 3:
3321: B->ops->mult = MatMult_SeqBAIJ_12_AVX2;
3322: B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
3323: PetscCall(PetscInfo(B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3324: break;
3325: #endif
3326: default:
3327: B->ops->mult = MatMult_SeqBAIJ_N;
3328: B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3329: PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3330: break;
3331: }
3332: break;
3333: }
3334: case 15: {
3335: PetscInt version = 1;
3336: PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
3337: switch (version) {
3338: case 1:
3339: B->ops->mult = MatMult_SeqBAIJ_15_ver1;
3340: PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3341: break;
3342: case 2:
3343: B->ops->mult = MatMult_SeqBAIJ_15_ver2;
3344: PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3345: break;
3346: case 3:
3347: B->ops->mult = MatMult_SeqBAIJ_15_ver3;
3348: PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3349: break;
3350: case 4:
3351: B->ops->mult = MatMult_SeqBAIJ_15_ver4;
3352: PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3353: break;
3354: default:
3355: B->ops->mult = MatMult_SeqBAIJ_N;
3356: PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3357: break;
3358: }
3359: B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3360: break;
3361: }
3362: default:
3363: B->ops->mult = MatMult_SeqBAIJ_N;
3364: B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3365: PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3366: break;
3367: }
3368: }
3369: B->ops->sor = MatSOR_SeqBAIJ;
3370: b->mbs = mbs;
3371: b->nbs = nbs;
3372: if (!skipallocation) {
3373: if (!b->imax) {
3374: PetscCall(PetscMalloc2(mbs, &b->imax, mbs, &b->ilen));
3376: b->free_imax_ilen = PETSC_TRUE;
3377: }
3378: /* b->ilen will count nonzeros in each block row so far. */
3379: for (i = 0; i < mbs; i++) b->ilen[i] = 0;
3380: if (!nnz) {
3381: if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3382: else if (nz < 0) nz = 1;
3383: nz = PetscMin(nz, nbs);
3384: for (i = 0; i < mbs; i++) b->imax[i] = nz;
3385: PetscCall(PetscIntMultError(nz, mbs, &nz));
3386: } else {
3387: PetscInt64 nz64 = 0;
3388: for (i = 0; i < mbs; i++) {
3389: b->imax[i] = nnz[i];
3390: nz64 += nnz[i];
3391: }
3392: PetscCall(PetscIntCast(nz64, &nz));
3393: }
3395: /* allocate the matrix space */
3396: PetscCall(MatSeqXAIJFreeAIJ(B, &b->a, &b->j, &b->i));
3397: PetscCall(PetscShmgetAllocateArray(nz, sizeof(PetscInt), (void **)&b->j));
3398: PetscCall(PetscShmgetAllocateArray(B->rmap->N + 1, sizeof(PetscInt), (void **)&b->i));
3399: if (B->structure_only) {
3400: b->free_a = PETSC_FALSE;
3401: } else {
3402: PetscInt nzbs2 = 0;
3403: PetscCall(PetscIntMultError(nz, bs2, &nzbs2));
3404: PetscCall(PetscShmgetAllocateArray(nzbs2, sizeof(PetscScalar), (void **)&b->a));
3405: b->free_a = PETSC_TRUE;
3406: PetscCall(PetscArrayzero(b->a, nz * bs2));
3407: }
3408: b->free_ij = PETSC_TRUE;
3409: PetscCall(PetscArrayzero(b->j, nz));
3411: b->i[0] = 0;
3412: for (i = 1; i < mbs + 1; i++) b->i[i] = b->i[i - 1] + b->imax[i - 1];
3413: } else {
3414: b->free_a = PETSC_FALSE;
3415: b->free_ij = PETSC_FALSE;
3416: }
3418: b->bs2 = bs2;
3419: b->mbs = mbs;
3420: b->nz = 0;
3421: b->maxnz = nz;
3422: B->info.nz_unneeded = (PetscReal)b->maxnz * bs2;
3423: B->was_assembled = PETSC_FALSE;
3424: B->assembled = PETSC_FALSE;
3425: if (realalloc) PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
3426: PetscFunctionReturn(PETSC_SUCCESS);
3427: }
3429: static PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
3430: {
3431: PetscInt i, m, nz, nz_max = 0, *nnz;
3432: PetscScalar *values = NULL;
3433: PetscBool roworiented = ((Mat_SeqBAIJ *)B->data)->roworiented;
3435: PetscFunctionBegin;
3436: PetscCheck(bs >= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs);
3437: PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
3438: PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
3439: PetscCall(PetscLayoutSetUp(B->rmap));
3440: PetscCall(PetscLayoutSetUp(B->cmap));
3441: PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
3442: m = B->rmap->n / bs;
3444: PetscCheck(ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
3445: PetscCall(PetscMalloc1(m + 1, &nnz));
3446: for (i = 0; i < m; i++) {
3447: nz = ii[i + 1] - ii[i];
3448: PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
3449: nz_max = PetscMax(nz_max, nz);
3450: nnz[i] = nz;
3451: }
3452: PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz));
3453: PetscCall(PetscFree(nnz));
3455: values = (PetscScalar *)V;
3456: if (!values) PetscCall(PetscCalloc1(bs * bs * (nz_max + 1), &values));
3457: for (i = 0; i < m; i++) {
3458: PetscInt ncols = ii[i + 1] - ii[i];
3459: const PetscInt *icols = jj + ii[i];
3460: if (bs == 1 || !roworiented) {
3461: const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
3462: PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, ncols, icols, svals, INSERT_VALUES));
3463: } else {
3464: PetscInt j;
3465: for (j = 0; j < ncols; j++) {
3466: const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
3467: PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, 1, &icols[j], svals, INSERT_VALUES));
3468: }
3469: }
3470: }
3471: if (!V) PetscCall(PetscFree(values));
3472: PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
3473: PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3474: PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3475: PetscFunctionReturn(PETSC_SUCCESS);
3476: }
3478: /*@C
3479: MatSeqBAIJGetArray - gives read/write access to the array where the data for a `MATSEQBAIJ` matrix is stored
3481: Not Collective
3483: Input Parameter:
3484: . A - a `MATSEQBAIJ` matrix
3486: Output Parameter:
3487: . array - pointer to the data
3489: Level: intermediate
3491: .seealso: [](ch_matrices), `Mat`, `MATSEQBAIJ`, `MatSeqBAIJRestoreArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()`
3492: @*/
3493: PetscErrorCode MatSeqBAIJGetArray(Mat A, PetscScalar *array[])
3494: {
3495: PetscFunctionBegin;
3496: PetscUseMethod(A, "MatSeqBAIJGetArray_C", (Mat, PetscScalar **), (A, array));
3497: PetscFunctionReturn(PETSC_SUCCESS);
3498: }
3500: /*@C
3501: MatSeqBAIJRestoreArray - returns access to the array where the data for a `MATSEQBAIJ` matrix is stored obtained by `MatSeqBAIJGetArray()`
3503: Not Collective
3505: Input Parameters:
3506: + A - a `MATSEQBAIJ` matrix
3507: - array - pointer to the data
3509: Level: intermediate
3511: .seealso: [](ch_matrices), `Mat`, `MatSeqBAIJGetArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()`
3512: @*/
3513: PetscErrorCode MatSeqBAIJRestoreArray(Mat A, PetscScalar *array[])
3514: {
3515: PetscFunctionBegin;
3516: PetscUseMethod(A, "MatSeqBAIJRestoreArray_C", (Mat, PetscScalar **), (A, array));
3517: PetscFunctionReturn(PETSC_SUCCESS);
3518: }
3520: /*MC
3521: MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3522: block sparse compressed row format.
3524: Options Database Keys:
3525: + -mat_type seqbaij - sets the matrix type to `MATSEQBAIJ` during a call to `MatSetFromOptions()`
3526: - -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS)
3528: Level: beginner
3530: Notes:
3531: `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
3532: space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
3534: Run with `-info` to see what version of the matrix-vector product is being used
3536: .seealso: [](ch_matrices), `Mat`, `MatCreateSeqBAIJ()`
3537: M*/
3539: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType, MatReuse, Mat *);
3541: PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3542: {
3543: PetscMPIInt size;
3544: Mat_SeqBAIJ *b;
3546: PetscFunctionBegin;
3547: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
3548: PetscCheck(size == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Comm must be of size 1");
3550: PetscCall(PetscNew(&b));
3551: B->data = (void *)b;
3552: B->ops[0] = MatOps_Values;
3554: b->row = NULL;
3555: b->col = NULL;
3556: b->icol = NULL;
3557: b->reallocs = 0;
3558: b->saved_values = NULL;
3560: b->roworiented = PETSC_TRUE;
3561: b->nonew = 0;
3562: b->diag = NULL;
3563: B->spptr = NULL;
3564: B->info.nz_unneeded = (PetscReal)b->maxnz * b->bs2;
3565: b->keepnonzeropattern = PETSC_FALSE;
3567: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJGetArray_C", MatSeqBAIJGetArray_SeqBAIJ));
3568: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJRestoreArray_C", MatSeqBAIJRestoreArray_SeqBAIJ));
3569: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_SeqBAIJ));
3570: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_SeqBAIJ));
3571: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetColumnIndices_C", MatSeqBAIJSetColumnIndices_SeqBAIJ));
3572: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqaij_C", MatConvert_SeqBAIJ_SeqAIJ));
3573: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqsbaij_C", MatConvert_SeqBAIJ_SeqSBAIJ));
3574: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocation_C", MatSeqBAIJSetPreallocation_SeqBAIJ));
3575: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocationCSR_C", MatSeqBAIJSetPreallocationCSR_SeqBAIJ));
3576: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_SeqBAIJ));
3577: #if defined(PETSC_HAVE_HYPRE)
3578: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_hypre_C", MatConvert_AIJ_HYPRE));
3579: #endif
3580: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_is_C", MatConvert_XAIJ_IS));
3581: PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ));
3582: PetscFunctionReturn(PETSC_SUCCESS);
3583: }
3585: PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C, Mat A, MatDuplicateOption cpvalues, PetscBool mallocmatspace)
3586: {
3587: Mat_SeqBAIJ *c = (Mat_SeqBAIJ *)C->data, *a = (Mat_SeqBAIJ *)A->data;
3588: PetscInt i, mbs = a->mbs, nz = a->nz, bs2 = a->bs2;
3590: PetscFunctionBegin;
3591: PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONGSTATE, "Cannot duplicate unassembled matrix");
3592: PetscCheck(a->i[mbs] == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupt matrix");
3594: if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3595: c->imax = a->imax;
3596: c->ilen = a->ilen;
3597: c->free_imax_ilen = PETSC_FALSE;
3598: } else {
3599: PetscCall(PetscMalloc2(mbs, &c->imax, mbs, &c->ilen));
3600: for (i = 0; i < mbs; i++) {
3601: c->imax[i] = a->imax[i];
3602: c->ilen[i] = a->ilen[i];
3603: }
3604: c->free_imax_ilen = PETSC_TRUE;
3605: }
3607: /* allocate the matrix space */
3608: if (mallocmatspace) {
3609: if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3610: PetscCall(PetscShmgetAllocateArray(bs2 * nz, sizeof(PetscScalar), (void **)&c->a));
3611: PetscCall(PetscArrayzero(c->a, bs2 * nz));
3612: c->free_a = PETSC_TRUE;
3613: c->i = a->i;
3614: c->j = a->j;
3615: c->free_ij = PETSC_FALSE;
3616: c->parent = A;
3617: C->preallocated = PETSC_TRUE;
3618: C->assembled = PETSC_TRUE;
3620: PetscCall(PetscObjectReference((PetscObject)A));
3621: PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3622: PetscCall(MatSetOption(C, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3623: } else {
3624: PetscCall(PetscShmgetAllocateArray(bs2 * nz, sizeof(PetscScalar), (void **)&c->a));
3625: PetscCall(PetscShmgetAllocateArray(nz, sizeof(PetscInt), (void **)&c->j));
3626: PetscCall(PetscShmgetAllocateArray(mbs + 1, sizeof(PetscInt), (void **)&c->i));
3627: c->free_a = PETSC_TRUE;
3628: c->free_ij = PETSC_TRUE;
3630: PetscCall(PetscArraycpy(c->i, a->i, mbs + 1));
3631: if (mbs > 0) {
3632: PetscCall(PetscArraycpy(c->j, a->j, nz));
3633: if (cpvalues == MAT_COPY_VALUES) {
3634: PetscCall(PetscArraycpy(c->a, a->a, bs2 * nz));
3635: } else {
3636: PetscCall(PetscArrayzero(c->a, bs2 * nz));
3637: }
3638: }
3639: C->preallocated = PETSC_TRUE;
3640: C->assembled = PETSC_TRUE;
3641: }
3642: }
3644: c->roworiented = a->roworiented;
3645: c->nonew = a->nonew;
3647: PetscCall(PetscLayoutReference(A->rmap, &C->rmap));
3648: PetscCall(PetscLayoutReference(A->cmap, &C->cmap));
3650: c->bs2 = a->bs2;
3651: c->mbs = a->mbs;
3652: c->nbs = a->nbs;
3654: if (a->diag) {
3655: if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3656: c->diag = a->diag;
3657: c->free_diag = PETSC_FALSE;
3658: } else {
3659: PetscCall(PetscMalloc1(mbs + 1, &c->diag));
3660: for (i = 0; i < mbs; i++) c->diag[i] = a->diag[i];
3661: c->free_diag = PETSC_TRUE;
3662: }
3663: } else c->diag = NULL;
3665: c->nz = a->nz;
3666: c->maxnz = a->nz; /* Since we allocate exactly the right amount */
3667: c->solve_work = NULL;
3668: c->mult_work = NULL;
3669: c->sor_workt = NULL;
3670: c->sor_work = NULL;
3672: c->compressedrow.use = a->compressedrow.use;
3673: c->compressedrow.nrows = a->compressedrow.nrows;
3674: if (a->compressedrow.use) {
3675: i = a->compressedrow.nrows;
3676: PetscCall(PetscMalloc2(i + 1, &c->compressedrow.i, i + 1, &c->compressedrow.rindex));
3677: PetscCall(PetscArraycpy(c->compressedrow.i, a->compressedrow.i, i + 1));
3678: PetscCall(PetscArraycpy(c->compressedrow.rindex, a->compressedrow.rindex, i));
3679: } else {
3680: c->compressedrow.use = PETSC_FALSE;
3681: c->compressedrow.i = NULL;
3682: c->compressedrow.rindex = NULL;
3683: }
3684: c->nonzerorowcnt = a->nonzerorowcnt;
3685: C->nonzerostate = A->nonzerostate;
3687: PetscCall(PetscFunctionListDuplicate(((PetscObject)A)->qlist, &((PetscObject)C)->qlist));
3688: PetscFunctionReturn(PETSC_SUCCESS);
3689: }
3691: PetscErrorCode MatDuplicate_SeqBAIJ(Mat A, MatDuplicateOption cpvalues, Mat *B)
3692: {
3693: PetscFunctionBegin;
3694: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B));
3695: PetscCall(MatSetSizes(*B, A->rmap->N, A->cmap->n, A->rmap->N, A->cmap->n));
3696: PetscCall(MatSetType(*B, MATSEQBAIJ));
3697: PetscCall(MatDuplicateNoCreate_SeqBAIJ(*B, A, cpvalues, PETSC_TRUE));
3698: PetscFunctionReturn(PETSC_SUCCESS);
3699: }
3701: /* Used for both SeqBAIJ and SeqSBAIJ matrices */
3702: PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat, PetscViewer viewer)
3703: {
3704: PetscInt header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k;
3705: PetscInt *rowidxs, *colidxs;
3706: PetscScalar *matvals;
3708: PetscFunctionBegin;
3709: PetscCall(PetscViewerSetUp(viewer));
3711: /* read matrix header */
3712: PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3713: PetscCheck(header[0] == MAT_FILE_CLASSID, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3714: M = header[1];
3715: N = header[2];
3716: nz = header[3];
3717: PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3718: PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3719: PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as SeqBAIJ");
3721: /* set block sizes from the viewer's .info file */
3722: PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3723: /* set local and global sizes if not set already */
3724: if (mat->rmap->n < 0) mat->rmap->n = M;
3725: if (mat->cmap->n < 0) mat->cmap->n = N;
3726: if (mat->rmap->N < 0) mat->rmap->N = M;
3727: if (mat->cmap->N < 0) mat->cmap->N = N;
3728: PetscCall(PetscLayoutSetUp(mat->rmap));
3729: PetscCall(PetscLayoutSetUp(mat->cmap));
3731: /* check if the matrix sizes are correct */
3732: PetscCall(MatGetSize(mat, &rows, &cols));
3733: PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3734: PetscCall(MatGetBlockSize(mat, &bs));
3735: PetscCall(MatGetLocalSize(mat, &m, &n));
3736: mbs = m / bs;
3737: nbs = n / bs;
3739: /* read in row lengths, column indices and nonzero values */
3740: PetscCall(PetscMalloc1(m + 1, &rowidxs));
3741: PetscCall(PetscViewerBinaryRead(viewer, rowidxs + 1, m, NULL, PETSC_INT));
3742: rowidxs[0] = 0;
3743: for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3744: sum = rowidxs[m];
3745: PetscCheck(sum == nz, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3747: /* read in column indices and nonzero values */
3748: PetscCall(PetscMalloc2(rowidxs[m], &colidxs, nz, &matvals));
3749: PetscCall(PetscViewerBinaryRead(viewer, colidxs, rowidxs[m], NULL, PETSC_INT));
3750: PetscCall(PetscViewerBinaryRead(viewer, matvals, rowidxs[m], NULL, PETSC_SCALAR));
3752: { /* preallocate matrix storage */
3753: PetscBT bt; /* helper bit set to count nonzeros */
3754: PetscInt *nnz;
3755: PetscBool sbaij;
3757: PetscCall(PetscBTCreate(nbs, &bt));
3758: PetscCall(PetscCalloc1(mbs, &nnz));
3759: PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATSEQSBAIJ, &sbaij));
3760: for (i = 0; i < mbs; i++) {
3761: PetscCall(PetscBTMemzero(nbs, bt));
3762: for (k = 0; k < bs; k++) {
3763: PetscInt row = bs * i + k;
3764: for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) {
3765: PetscInt col = colidxs[j];
3766: if (!sbaij || col >= row)
3767: if (!PetscBTLookupSet(bt, col / bs)) nnz[i]++;
3768: }
3769: }
3770: }
3771: PetscCall(PetscBTDestroy(&bt));
3772: PetscCall(MatSeqBAIJSetPreallocation(mat, bs, 0, nnz));
3773: PetscCall(MatSeqSBAIJSetPreallocation(mat, bs, 0, nnz));
3774: PetscCall(PetscFree(nnz));
3775: }
3777: /* store matrix values */
3778: for (i = 0; i < m; i++) {
3779: PetscInt row = i, s = rowidxs[i], e = rowidxs[i + 1];
3780: PetscUseTypeMethod(mat, setvalues, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES);
3781: }
3783: PetscCall(PetscFree(rowidxs));
3784: PetscCall(PetscFree2(colidxs, matvals));
3785: PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
3786: PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
3787: PetscFunctionReturn(PETSC_SUCCESS);
3788: }
3790: PetscErrorCode MatLoad_SeqBAIJ(Mat mat, PetscViewer viewer)
3791: {
3792: PetscBool isbinary;
3794: PetscFunctionBegin;
3795: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3796: PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
3797: PetscCall(MatLoad_SeqBAIJ_Binary(mat, viewer));
3798: PetscFunctionReturn(PETSC_SUCCESS);
3799: }
3801: /*@
3802: MatCreateSeqBAIJ - Creates a sparse matrix in `MATSEQAIJ` (block
3803: compressed row) format. For good matrix assembly performance the
3804: user should preallocate the matrix storage by setting the parameter `nz`
3805: (or the array `nnz`).
3807: Collective
3809: Input Parameters:
3810: + comm - MPI communicator, set to `PETSC_COMM_SELF`
3811: . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
3812: blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3813: . m - number of rows
3814: . n - number of columns
3815: . nz - number of nonzero blocks per block row (same for all rows)
3816: - nnz - array containing the number of nonzero blocks in the various block rows
3817: (possibly different for each block row) or `NULL`
3819: Output Parameter:
3820: . A - the matrix
3822: Options Database Keys:
3823: + -mat_no_unroll - uses code that does not unroll the loops in the block calculations (much slower)
3824: - -mat_block_size - size of the blocks to use
3826: Level: intermediate
3828: Notes:
3829: It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
3830: MatXXXXSetPreallocation() paradigm instead of this routine directly.
3831: [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
3833: The number of rows and columns must be divisible by blocksize.
3835: If the `nnz` parameter is given then the `nz` parameter is ignored
3837: A nonzero block is any block that as 1 or more nonzeros in it
3839: The `MATSEQBAIJ` format is fully compatible with standard Fortran
3840: storage. That is, the stored row and column indices can begin at
3841: either one (as in Fortran) or zero.
3843: Specify the preallocated storage with either `nz` or `nnz` (not both).
3844: Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory
3845: allocation. See [Sparse Matrices](sec_matsparse) for details.
3846: matrices.
3848: .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`
3849: @*/
3850: PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A)
3851: {
3852: PetscFunctionBegin;
3853: PetscCall(MatCreate(comm, A));
3854: PetscCall(MatSetSizes(*A, m, n, m, n));
3855: PetscCall(MatSetType(*A, MATSEQBAIJ));
3856: PetscCall(MatSeqBAIJSetPreallocation(*A, bs, nz, (PetscInt *)nnz));
3857: PetscFunctionReturn(PETSC_SUCCESS);
3858: }
3860: /*@
3861: MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3862: per row in the matrix. For good matrix assembly performance the
3863: user should preallocate the matrix storage by setting the parameter `nz`
3864: (or the array `nnz`).
3866: Collective
3868: Input Parameters:
3869: + B - the matrix
3870: . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
3871: blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3872: . nz - number of block nonzeros per block row (same for all rows)
3873: - nnz - array containing the number of block nonzeros in the various block rows
3874: (possibly different for each block row) or `NULL`
3876: Options Database Keys:
3877: + -mat_no_unroll - uses code that does not unroll the loops in the block calculations (much slower)
3878: - -mat_block_size - size of the blocks to use
3880: Level: intermediate
3882: Notes:
3883: If the `nnz` parameter is given then the `nz` parameter is ignored
3885: You can call `MatGetInfo()` to get information on how effective the preallocation was;
3886: for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3887: You can also run with the option `-info` and look for messages with the string
3888: malloc in them to see if additional memory allocation was needed.
3890: The `MATSEQBAIJ` format is fully compatible with standard Fortran
3891: storage. That is, the stored row and column indices can begin at
3892: either one (as in Fortran) or zero.
3894: Specify the preallocated storage with either `nz` or `nnz` (not both).
3895: Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory
3896: allocation. See [Sparse Matrices](sec_matsparse) for details.
3898: .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatGetInfo()`
3899: @*/
3900: PetscErrorCode MatSeqBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[])
3901: {
3902: PetscFunctionBegin;
3906: PetscTryMethod(B, "MatSeqBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[]), (B, bs, nz, nnz));
3907: PetscFunctionReturn(PETSC_SUCCESS);
3908: }
3910: /*@C
3911: MatSeqBAIJSetPreallocationCSR - Creates a sparse sequential matrix in `MATSEQBAIJ` format using the given nonzero structure and (optional) numerical values
3913: Collective
3915: Input Parameters:
3916: + B - the matrix
3917: . bs - the blocksize
3918: . i - the indices into `j` for the start of each local row (indices start with zero)
3919: . j - the column indices for each local row (indices start with zero) these must be sorted for each row
3920: - v - optional values in the matrix, use `NULL` if not provided
3922: Level: advanced
3924: Notes:
3925: The `i`,`j`,`v` values are COPIED with this routine; to avoid the copy use `MatCreateSeqBAIJWithArrays()`
3927: The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`. For example, C programs
3928: may want to use the default `MAT_ROW_ORIENTED` of `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is
3929: over rows within a block and the last index is over columns within a block row. Fortran programs will likely set
3930: `MAT_ROW_ORIENTED` of `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3931: block column and the second index is over columns within a block.
3933: Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
3935: .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatSeqBAIJSetPreallocation()`, `MATSEQBAIJ`
3936: @*/
3937: PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
3938: {
3939: PetscFunctionBegin;
3943: PetscTryMethod(B, "MatSeqBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
3944: PetscFunctionReturn(PETSC_SUCCESS);
3945: }
3947: /*@
3948: MatCreateSeqBAIJWithArrays - Creates a `MATSEQBAIJ` matrix using matrix elements provided by the user.
3950: Collective
3952: Input Parameters:
3953: + comm - must be an MPI communicator of size 1
3954: . bs - size of block
3955: . m - number of rows
3956: . n - number of columns
3957: . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix
3958: . j - column indices
3959: - a - matrix values
3961: Output Parameter:
3962: . mat - the matrix
3964: Level: advanced
3966: Notes:
3967: The `i`, `j`, and `a` arrays are not copied by this routine, the user must free these arrays
3968: once the matrix is destroyed
3970: You cannot set new nonzero locations into this matrix, that will generate an error.
3972: The `i` and `j` indices are 0 based
3974: When block size is greater than 1 the matrix values must be stored using the `MATSEQBAIJ` storage format
3976: The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3977: the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3978: block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory
3979: with column-major ordering within blocks.
3981: .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateBAIJ()`, `MatCreateSeqBAIJ()`
3982: @*/
3983: PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt i[], PetscInt j[], PetscScalar a[], Mat *mat)
3984: {
3985: Mat_SeqBAIJ *baij;
3987: PetscFunctionBegin;
3988: PetscCheck(bs == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "block size %" PetscInt_FMT " > 1 is not supported yet", bs);
3989: if (m > 0) PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
3991: PetscCall(MatCreate(comm, mat));
3992: PetscCall(MatSetSizes(*mat, m, n, m, n));
3993: PetscCall(MatSetType(*mat, MATSEQBAIJ));
3994: PetscCall(MatSeqBAIJSetPreallocation(*mat, bs, MAT_SKIP_ALLOCATION, NULL));
3995: baij = (Mat_SeqBAIJ *)(*mat)->data;
3996: PetscCall(PetscMalloc2(m, &baij->imax, m, &baij->ilen));
3998: baij->i = i;
3999: baij->j = j;
4000: baij->a = a;
4002: baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
4003: baij->free_a = PETSC_FALSE;
4004: baij->free_ij = PETSC_FALSE;
4005: baij->free_imax_ilen = PETSC_TRUE;
4007: for (PetscInt ii = 0; ii < m; ii++) {
4008: const PetscInt row_len = i[ii + 1] - i[ii];
4010: baij->ilen[ii] = baij->imax[ii] = row_len;
4011: PetscCheck(row_len >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row length in i (row indices) row = %" PetscInt_FMT " length = %" PetscInt_FMT, ii, row_len);
4012: }
4013: if (PetscDefined(USE_DEBUG)) {
4014: for (PetscInt ii = 0; ii < baij->i[m]; ii++) {
4015: PetscCheck(j[ii] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative column index at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]);
4016: PetscCheck(j[ii] <= n - 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index to large at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]);
4017: }
4018: }
4020: PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
4021: PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
4022: PetscFunctionReturn(PETSC_SUCCESS);
4023: }
4025: PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
4026: {
4027: PetscFunctionBegin;
4028: PetscCall(MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm, inmat, n, scall, outmat));
4029: PetscFunctionReturn(PETSC_SUCCESS);
4030: }