Actual source code: blockmat.c
1: /*
2: This provides a matrix that consists of Mats
3: */
5: #include <petsc/private/matimpl.h>
6: #include <../src/mat/impls/baij/seq/baij.h>
8: typedef struct {
9: SEQAIJHEADER(Mat);
10: SEQBAIJHEADER;
11: Mat *diags;
13: Vec left, right, middle, workb; /* dummy vectors to perform local parts of product */
14: } Mat_BlockMat;
16: static PetscErrorCode MatSOR_BlockMat_Symmetric(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
17: {
18: Mat_BlockMat *a = (Mat_BlockMat *)A->data;
19: PetscScalar *x;
20: const Mat *v;
21: const PetscScalar *b;
22: PetscInt n = A->cmap->n, i, mbs = n / A->rmap->bs, j, bs = A->rmap->bs;
23: const PetscInt *idx;
24: IS row, col;
25: MatFactorInfo info;
26: Vec left = a->left, right = a->right, middle = a->middle;
27: Mat *diag;
29: PetscFunctionBegin;
30: its = its * lits;
31: PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
32: PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat");
33: PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for omega not equal to 1.0");
34: PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for fshift");
35: PetscCheck(!((flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) && !(flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP)), PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot do backward sweep without forward sweep");
37: if (!a->diags) {
38: PetscCall(PetscMalloc1(mbs, &a->diags));
39: PetscCall(MatFactorInfoInitialize(&info));
40: for (i = 0; i < mbs; i++) {
41: PetscCall(MatGetOrdering(a->a[a->diag[i]], MATORDERINGND, &row, &col));
42: PetscCall(MatCholeskyFactorSymbolic(a->diags[i], a->a[a->diag[i]], row, &info));
43: PetscCall(MatCholeskyFactorNumeric(a->diags[i], a->a[a->diag[i]], &info));
44: PetscCall(ISDestroy(&row));
45: PetscCall(ISDestroy(&col));
46: }
47: PetscCall(VecDuplicate(bb, &a->workb));
48: }
49: diag = a->diags;
51: PetscCall(VecSet(xx, 0.0));
52: PetscCall(VecGetArray(xx, &x));
53: /* copy right-hand side because it must be modified during iteration */
54: PetscCall(VecCopy(bb, a->workb));
55: PetscCall(VecGetArrayRead(a->workb, &b));
57: /* need to add code for when initial guess is zero, see MatSOR_SeqAIJ */
58: while (its--) {
59: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
60: for (i = 0; i < mbs; i++) {
61: n = a->i[i + 1] - a->i[i] - 1;
62: idx = a->j + a->i[i] + 1;
63: v = a->a + a->i[i] + 1;
65: PetscCall(VecSet(left, 0.0));
66: for (j = 0; j < n; j++) {
67: PetscCall(VecPlaceArray(right, x + idx[j] * bs));
68: PetscCall(MatMultAdd(v[j], right, left, left));
69: PetscCall(VecResetArray(right));
70: }
71: PetscCall(VecPlaceArray(right, b + i * bs));
72: PetscCall(VecAYPX(left, -1.0, right));
73: PetscCall(VecResetArray(right));
75: PetscCall(VecPlaceArray(right, x + i * bs));
76: PetscCall(MatSolve(diag[i], left, right));
78: /* now adjust right-hand side, see MatSOR_SeqSBAIJ */
79: for (j = 0; j < n; j++) {
80: PetscCall(MatMultTranspose(v[j], right, left));
81: PetscCall(VecPlaceArray(middle, b + idx[j] * bs));
82: PetscCall(VecAXPY(middle, -1.0, left));
83: PetscCall(VecResetArray(middle));
84: }
85: PetscCall(VecResetArray(right));
86: }
87: }
88: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
89: for (i = mbs - 1; i >= 0; i--) {
90: n = a->i[i + 1] - a->i[i] - 1;
91: idx = a->j + a->i[i] + 1;
92: v = a->a + a->i[i] + 1;
94: PetscCall(VecSet(left, 0.0));
95: for (j = 0; j < n; j++) {
96: PetscCall(VecPlaceArray(right, x + idx[j] * bs));
97: PetscCall(MatMultAdd(v[j], right, left, left));
98: PetscCall(VecResetArray(right));
99: }
100: PetscCall(VecPlaceArray(right, b + i * bs));
101: PetscCall(VecAYPX(left, -1.0, right));
102: PetscCall(VecResetArray(right));
104: PetscCall(VecPlaceArray(right, x + i * bs));
105: PetscCall(MatSolve(diag[i], left, right));
106: PetscCall(VecResetArray(right));
107: }
108: }
109: }
110: PetscCall(VecRestoreArray(xx, &x));
111: PetscCall(VecRestoreArrayRead(a->workb, &b));
112: PetscFunctionReturn(PETSC_SUCCESS);
113: }
115: static PetscErrorCode MatSOR_BlockMat(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
116: {
117: Mat_BlockMat *a = (Mat_BlockMat *)A->data;
118: PetscScalar *x;
119: const Mat *v;
120: const PetscScalar *b;
121: PetscInt n = A->cmap->n, i, mbs = n / A->rmap->bs, j, bs = A->rmap->bs;
122: const PetscInt *idx;
123: IS row, col;
124: MatFactorInfo info;
125: Vec left = a->left, right = a->right;
126: Mat *diag;
128: PetscFunctionBegin;
129: its = its * lits;
130: PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
131: PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat");
132: PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for omega not equal to 1.0");
133: PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for fshift");
135: if (!a->diags) {
136: PetscCall(PetscMalloc1(mbs, &a->diags));
137: PetscCall(MatFactorInfoInitialize(&info));
138: for (i = 0; i < mbs; i++) {
139: PetscCall(MatGetOrdering(a->a[a->diag[i]], MATORDERINGND, &row, &col));
140: PetscCall(MatLUFactorSymbolic(a->diags[i], a->a[a->diag[i]], row, col, &info));
141: PetscCall(MatLUFactorNumeric(a->diags[i], a->a[a->diag[i]], &info));
142: PetscCall(ISDestroy(&row));
143: PetscCall(ISDestroy(&col));
144: }
145: }
146: diag = a->diags;
148: PetscCall(VecSet(xx, 0.0));
149: PetscCall(VecGetArray(xx, &x));
150: PetscCall(VecGetArrayRead(bb, &b));
152: /* need to add code for when initial guess is zero, see MatSOR_SeqAIJ */
153: while (its--) {
154: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
155: for (i = 0; i < mbs; i++) {
156: n = a->i[i + 1] - a->i[i];
157: idx = a->j + a->i[i];
158: v = a->a + a->i[i];
160: PetscCall(VecSet(left, 0.0));
161: for (j = 0; j < n; j++) {
162: if (idx[j] != i) {
163: PetscCall(VecPlaceArray(right, x + idx[j] * bs));
164: PetscCall(MatMultAdd(v[j], right, left, left));
165: PetscCall(VecResetArray(right));
166: }
167: }
168: PetscCall(VecPlaceArray(right, b + i * bs));
169: PetscCall(VecAYPX(left, -1.0, right));
170: PetscCall(VecResetArray(right));
172: PetscCall(VecPlaceArray(right, x + i * bs));
173: PetscCall(MatSolve(diag[i], left, right));
174: PetscCall(VecResetArray(right));
175: }
176: }
177: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
178: for (i = mbs - 1; i >= 0; i--) {
179: n = a->i[i + 1] - a->i[i];
180: idx = a->j + a->i[i];
181: v = a->a + a->i[i];
183: PetscCall(VecSet(left, 0.0));
184: for (j = 0; j < n; j++) {
185: if (idx[j] != i) {
186: PetscCall(VecPlaceArray(right, x + idx[j] * bs));
187: PetscCall(MatMultAdd(v[j], right, left, left));
188: PetscCall(VecResetArray(right));
189: }
190: }
191: PetscCall(VecPlaceArray(right, b + i * bs));
192: PetscCall(VecAYPX(left, -1.0, right));
193: PetscCall(VecResetArray(right));
195: PetscCall(VecPlaceArray(right, x + i * bs));
196: PetscCall(MatSolve(diag[i], left, right));
197: PetscCall(VecResetArray(right));
198: }
199: }
200: }
201: PetscCall(VecRestoreArray(xx, &x));
202: PetscCall(VecRestoreArrayRead(bb, &b));
203: PetscFunctionReturn(PETSC_SUCCESS);
204: }
206: static PetscErrorCode MatSetValues_BlockMat(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is)
207: {
208: Mat_BlockMat *a = (Mat_BlockMat *)A->data;
209: PetscInt *rp, k, low, high, t, row, nrow, i, col, l, lastcol = -1;
210: PetscInt *ai = a->i, *ailen = a->ilen;
211: PetscInt *aj = a->j, nonew = a->nonew, bs = A->rmap->bs, brow, bcol;
212: PetscInt ridx, cidx;
213: PetscBool roworiented = a->roworiented;
214: MatScalar value;
215: Mat *ap, *aa = a->a;
217: PetscFunctionBegin;
218: for (k = 0; k < m; k++) { /* loop over added rows */
219: row = im[k];
220: brow = row / bs;
221: if (row < 0) continue;
222: PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, row, A->rmap->N - 1);
223: rp = aj + ai[brow];
224: ap = aa + ai[brow];
225: nrow = ailen[brow];
226: low = 0;
227: high = nrow;
228: for (l = 0; l < n; l++) { /* loop over added columns */
229: if (in[l] < 0) continue;
230: PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[l], A->cmap->n - 1);
231: col = in[l];
232: bcol = col / bs;
233: if (A->symmetric == PETSC_BOOL3_TRUE && brow > bcol) continue;
234: ridx = row % bs;
235: cidx = col % bs;
236: if (roworiented) value = v[l + k * n];
237: else value = v[k + l * m];
239: if (col <= lastcol) low = 0;
240: else high = nrow;
241: lastcol = col;
242: while (high - low > 7) {
243: t = (low + high) / 2;
244: if (rp[t] > bcol) high = t;
245: else low = t;
246: }
247: for (i = low; i < high; i++) {
248: if (rp[i] > bcol) break;
249: if (rp[i] == bcol) goto noinsert1;
250: }
251: if (nonew == 1) goto noinsert1;
252: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the block matrix", row, col);
253: #if 0
254: MatSeqXAIJReallocateAIJ(A, a->mbs, 1, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, imax, nonew, Mat);
255: N = nrow++ - 1;
256: high++;
257: /* shift up all the later entries in this row */
258: for (ii = N; ii >= i; ii--) {
259: rp[ii + 1] = rp[ii];
260: ap[ii + 1] = ap[ii];
261: }
262: if (N >= i) ap[i] = NULL;
263: rp[i] = bcol;
264: a->nz++;
265: #endif
266: noinsert1:;
267: if (!*(ap + i)) PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, bs, bs, 0, NULL, ap + i));
268: PetscCall(MatSetValues(ap[i], 1, &ridx, 1, &cidx, &value, is));
269: low = i;
270: }
271: ailen[brow] = nrow;
272: }
273: PetscFunctionReturn(PETSC_SUCCESS);
274: }
276: static PetscErrorCode MatLoad_BlockMat(Mat newmat, PetscViewer viewer)
277: {
278: Mat tmpA;
279: PetscInt i, j, m, n, bs = 1, ncols, *lens, currentcol, mbs, **ii, *ilens, nextcol, *llens, cnt = 0;
280: const PetscInt *cols;
281: const PetscScalar *values;
282: PetscBool flg = PETSC_FALSE, notdone;
283: Mat_SeqAIJ *a;
284: Mat_BlockMat *amat;
286: PetscFunctionBegin;
287: /* force binary viewer to load .info file if it has not yet done so */
288: PetscCall(PetscViewerSetUp(viewer));
289: PetscCall(MatCreate(PETSC_COMM_SELF, &tmpA));
290: PetscCall(MatSetType(tmpA, MATSEQAIJ));
291: PetscCall(MatLoad_SeqAIJ(tmpA, viewer));
293: PetscCall(MatGetLocalSize(tmpA, &m, &n));
294: PetscOptionsBegin(PETSC_COMM_SELF, NULL, "Options for loading BlockMat matrix 1", "Mat");
295: PetscCall(PetscOptionsInt("-matload_block_size", "Set the blocksize used to store the matrix", "MatLoad", bs, &bs, NULL));
296: PetscCall(PetscOptionsBool("-matload_symmetric", "Store the matrix as symmetric", "MatLoad", flg, &flg, NULL));
297: PetscOptionsEnd();
299: /* Determine number of nonzero blocks for each block row */
300: a = (Mat_SeqAIJ *)tmpA->data;
301: mbs = m / bs;
302: PetscCall(PetscMalloc3(mbs, &lens, bs, &ii, bs, &ilens));
303: PetscCall(PetscArrayzero(lens, mbs));
305: for (i = 0; i < mbs; i++) {
306: for (j = 0; j < bs; j++) {
307: ii[j] = a->j + a->i[i * bs + j];
308: ilens[j] = a->i[i * bs + j + 1] - a->i[i * bs + j];
309: }
311: currentcol = -1;
312: while (PETSC_TRUE) {
313: notdone = PETSC_FALSE;
314: nextcol = 1000000000;
315: for (j = 0; j < bs; j++) {
316: while (ilens[j] > 0 && ii[j][0] / bs <= currentcol) {
317: ii[j]++;
318: ilens[j]--;
319: }
320: if (ilens[j] > 0) {
321: notdone = PETSC_TRUE;
322: nextcol = PetscMin(nextcol, ii[j][0] / bs);
323: }
324: }
325: if (!notdone) break;
326: if (!flg || (nextcol >= i)) lens[i]++;
327: currentcol = nextcol;
328: }
329: }
331: if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) PetscCall(MatSetSizes(newmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
332: PetscCall(MatBlockMatSetPreallocation(newmat, bs, 0, lens));
333: if (flg) PetscCall(MatSetOption(newmat, MAT_SYMMETRIC, PETSC_TRUE));
334: amat = (Mat_BlockMat *)newmat->data;
336: /* preallocate the submatrices */
337: PetscCall(PetscMalloc1(bs, &llens));
338: for (i = 0; i < mbs; i++) { /* loops for block rows */
339: for (j = 0; j < bs; j++) {
340: ii[j] = a->j + a->i[i * bs + j];
341: ilens[j] = a->i[i * bs + j + 1] - a->i[i * bs + j];
342: }
344: currentcol = 1000000000;
345: for (j = 0; j < bs; j++) { /* loop over rows in block finding first nonzero block */
346: if (ilens[j] > 0) currentcol = PetscMin(currentcol, ii[j][0] / bs);
347: }
349: while (PETSC_TRUE) { /* loops over blocks in block row */
350: notdone = PETSC_FALSE;
351: nextcol = 1000000000;
352: PetscCall(PetscArrayzero(llens, bs));
353: for (j = 0; j < bs; j++) { /* loop over rows in block */
354: while (ilens[j] > 0 && ii[j][0] / bs <= currentcol) { /* loop over columns in row */
355: ii[j]++;
356: ilens[j]--;
357: llens[j]++;
358: }
359: if (ilens[j] > 0) {
360: notdone = PETSC_TRUE;
361: nextcol = PetscMin(nextcol, ii[j][0] / bs);
362: }
363: }
364: PetscCheck(cnt < amat->maxnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Number of blocks found greater than expected %" PetscInt_FMT, cnt);
365: if (!flg || currentcol >= i) {
366: amat->j[cnt] = currentcol;
367: PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, bs, bs, 0, llens, amat->a + cnt++));
368: }
370: if (!notdone) break;
371: currentcol = nextcol;
372: }
373: amat->ilen[i] = lens[i];
374: }
376: PetscCall(PetscFree3(lens, ii, ilens));
377: PetscCall(PetscFree(llens));
379: /* copy over the matrix, one row at a time */
380: for (i = 0; i < m; i++) {
381: PetscCall(MatGetRow(tmpA, i, &ncols, &cols, &values));
382: PetscCall(MatSetValues(newmat, 1, &i, ncols, cols, values, INSERT_VALUES));
383: PetscCall(MatRestoreRow(tmpA, i, &ncols, &cols, &values));
384: }
385: PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY));
386: PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY));
387: PetscFunctionReturn(PETSC_SUCCESS);
388: }
390: static PetscErrorCode MatView_BlockMat(Mat A, PetscViewer viewer)
391: {
392: Mat_BlockMat *a = (Mat_BlockMat *)A->data;
393: const char *name;
394: PetscViewerFormat format;
396: PetscFunctionBegin;
397: PetscCall(PetscObjectGetName((PetscObject)A, &name));
398: PetscCall(PetscViewerGetFormat(viewer, &format));
399: if (format == PETSC_VIEWER_ASCII_FACTOR_INFO || format == PETSC_VIEWER_ASCII_INFO) {
400: PetscCall(PetscViewerASCIIPrintf(viewer, "Nonzero block matrices = %" PetscInt_FMT " \n", a->nz));
401: if (A->symmetric == PETSC_BOOL3_TRUE) PetscCall(PetscViewerASCIIPrintf(viewer, "Only upper triangular part of symmetric matrix is stored\n"));
402: }
403: PetscFunctionReturn(PETSC_SUCCESS);
404: }
406: static PetscErrorCode MatDestroy_BlockMat(Mat mat)
407: {
408: Mat_BlockMat *bmat = (Mat_BlockMat *)mat->data;
409: PetscInt i;
411: PetscFunctionBegin;
412: PetscCall(VecDestroy(&bmat->right));
413: PetscCall(VecDestroy(&bmat->left));
414: PetscCall(VecDestroy(&bmat->middle));
415: PetscCall(VecDestroy(&bmat->workb));
416: if (bmat->diags) {
417: for (i = 0; i < mat->rmap->n / mat->rmap->bs; i++) PetscCall(MatDestroy(&bmat->diags[i]));
418: }
419: if (bmat->a) {
420: for (i = 0; i < bmat->nz; i++) PetscCall(MatDestroy(&bmat->a[i]));
421: }
422: PetscCall(MatSeqXAIJFreeAIJ(mat, (PetscScalar **)&bmat->a, &bmat->j, &bmat->i));
423: PetscCall(PetscFree(mat->data));
424: PetscFunctionReturn(PETSC_SUCCESS);
425: }
427: static PetscErrorCode MatMult_BlockMat(Mat A, Vec x, Vec y)
428: {
429: Mat_BlockMat *bmat = (Mat_BlockMat *)A->data;
430: PetscScalar *xx, *yy;
431: PetscInt *aj, i, *ii, jrow, m = A->rmap->n / A->rmap->bs, bs = A->rmap->bs, n, j;
432: Mat *aa;
434: PetscFunctionBegin;
435: /*
436: Standard CSR multiply except each entry is a Mat
437: */
438: PetscCall(VecGetArray(x, &xx));
440: PetscCall(VecSet(y, 0.0));
441: PetscCall(VecGetArray(y, &yy));
442: aj = bmat->j;
443: aa = bmat->a;
444: ii = bmat->i;
445: for (i = 0; i < m; i++) {
446: jrow = ii[i];
447: PetscCall(VecPlaceArray(bmat->left, yy + bs * i));
448: n = ii[i + 1] - jrow;
449: for (j = 0; j < n; j++) {
450: PetscCall(VecPlaceArray(bmat->right, xx + bs * aj[jrow]));
451: PetscCall(MatMultAdd(aa[jrow], bmat->right, bmat->left, bmat->left));
452: PetscCall(VecResetArray(bmat->right));
453: jrow++;
454: }
455: PetscCall(VecResetArray(bmat->left));
456: }
457: PetscCall(VecRestoreArray(x, &xx));
458: PetscCall(VecRestoreArray(y, &yy));
459: PetscFunctionReturn(PETSC_SUCCESS);
460: }
462: static PetscErrorCode MatMult_BlockMat_Symmetric(Mat A, Vec x, Vec y)
463: {
464: Mat_BlockMat *bmat = (Mat_BlockMat *)A->data;
465: PetscScalar *xx, *yy;
466: PetscInt *aj, i, *ii, jrow, m = A->rmap->n / A->rmap->bs, bs = A->rmap->bs, n, j;
467: Mat *aa;
469: PetscFunctionBegin;
470: /*
471: Standard CSR multiply except each entry is a Mat
472: */
473: PetscCall(VecGetArray(x, &xx));
475: PetscCall(VecSet(y, 0.0));
476: PetscCall(VecGetArray(y, &yy));
477: aj = bmat->j;
478: aa = bmat->a;
479: ii = bmat->i;
480: for (i = 0; i < m; i++) {
481: jrow = ii[i];
482: n = ii[i + 1] - jrow;
483: PetscCall(VecPlaceArray(bmat->left, yy + bs * i));
484: PetscCall(VecPlaceArray(bmat->middle, xx + bs * i));
485: /* if we ALWAYS required a diagonal entry then could remove this if test */
486: if (aj[jrow] == i) {
487: PetscCall(VecPlaceArray(bmat->right, xx + bs * aj[jrow]));
488: PetscCall(MatMultAdd(aa[jrow], bmat->right, bmat->left, bmat->left));
489: PetscCall(VecResetArray(bmat->right));
490: jrow++;
491: n--;
492: }
493: for (j = 0; j < n; j++) {
494: PetscCall(VecPlaceArray(bmat->right, xx + bs * aj[jrow])); /* upper triangular part */
495: PetscCall(MatMultAdd(aa[jrow], bmat->right, bmat->left, bmat->left));
496: PetscCall(VecResetArray(bmat->right));
498: PetscCall(VecPlaceArray(bmat->right, yy + bs * aj[jrow])); /* lower triangular part */
499: PetscCall(MatMultTransposeAdd(aa[jrow], bmat->middle, bmat->right, bmat->right));
500: PetscCall(VecResetArray(bmat->right));
501: jrow++;
502: }
503: PetscCall(VecResetArray(bmat->left));
504: PetscCall(VecResetArray(bmat->middle));
505: }
506: PetscCall(VecRestoreArray(x, &xx));
507: PetscCall(VecRestoreArray(y, &yy));
508: PetscFunctionReturn(PETSC_SUCCESS);
509: }
511: static PetscErrorCode MatMultAdd_BlockMat(Mat A, Vec x, Vec y, Vec z)
512: {
513: PetscFunctionBegin;
514: PetscFunctionReturn(PETSC_SUCCESS);
515: }
517: static PetscErrorCode MatMultTranspose_BlockMat(Mat A, Vec x, Vec y)
518: {
519: PetscFunctionBegin;
520: PetscFunctionReturn(PETSC_SUCCESS);
521: }
523: static PetscErrorCode MatMultTransposeAdd_BlockMat(Mat A, Vec x, Vec y, Vec z)
524: {
525: PetscFunctionBegin;
526: PetscFunctionReturn(PETSC_SUCCESS);
527: }
529: /*
530: Adds diagonal pointers to sparse matrix structure.
531: */
532: static PetscErrorCode MatMarkDiagonal_BlockMat(Mat A)
533: {
534: Mat_BlockMat *a = (Mat_BlockMat *)A->data;
535: PetscInt i, j, mbs = A->rmap->n / A->rmap->bs;
537: PetscFunctionBegin;
538: if (!a->diag) PetscCall(PetscMalloc1(mbs, &a->diag));
539: for (i = 0; i < mbs; i++) {
540: a->diag[i] = a->i[i + 1];
541: for (j = a->i[i]; j < a->i[i + 1]; j++) {
542: if (a->j[j] == i) {
543: a->diag[i] = j;
544: break;
545: }
546: }
547: }
548: PetscFunctionReturn(PETSC_SUCCESS);
549: }
551: static PetscErrorCode MatCreateSubMatrix_BlockMat(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B)
552: {
553: Mat_BlockMat *a = (Mat_BlockMat *)A->data;
554: Mat_SeqAIJ *c;
555: PetscInt i, k, first, step, lensi, nrows, ncols;
556: PetscInt *j_new, *i_new, *aj = a->j, *ailen = a->ilen;
557: PetscScalar *a_new;
558: Mat C, *aa = a->a;
559: PetscBool stride, equal;
561: PetscFunctionBegin;
562: PetscCall(ISEqual(isrow, iscol, &equal));
563: PetscCheck(equal, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only for identical column and row indices");
564: PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &stride));
565: PetscCheck(stride, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only for stride indices");
566: PetscCall(ISStrideGetInfo(iscol, &first, &step));
567: PetscCheck(step == A->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_SUP, "Can only select one entry from each block");
569: PetscCall(ISGetLocalSize(isrow, &nrows));
570: ncols = nrows;
572: /* create submatrix */
573: if (scall == MAT_REUSE_MATRIX) {
574: PetscInt n_cols, n_rows;
575: C = *B;
576: PetscCall(MatGetSize(C, &n_rows, &n_cols));
577: PetscCheck(n_rows == nrows && n_cols == ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Reused submatrix wrong size");
578: PetscCall(MatZeroEntries(C));
579: } else {
580: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C));
581: PetscCall(MatSetSizes(C, nrows, ncols, PETSC_DETERMINE, PETSC_DETERMINE));
582: if (A->symmetric == PETSC_BOOL3_TRUE) PetscCall(MatSetType(C, MATSEQSBAIJ));
583: else PetscCall(MatSetType(C, MATSEQAIJ));
584: PetscCall(MatSeqAIJSetPreallocation(C, 0, ailen));
585: PetscCall(MatSeqSBAIJSetPreallocation(C, 1, 0, ailen));
586: }
587: c = (Mat_SeqAIJ *)C->data;
589: /* loop over rows inserting into submatrix */
590: a_new = c->a;
591: j_new = c->j;
592: i_new = c->i;
594: for (i = 0; i < nrows; i++) {
595: lensi = ailen[i];
596: for (k = 0; k < lensi; k++) {
597: *j_new++ = *aj++;
598: PetscCall(MatGetValue(*aa++, first, first, a_new++));
599: }
600: i_new[i + 1] = i_new[i] + lensi;
601: c->ilen[i] = lensi;
602: }
604: PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
605: PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
606: *B = C;
607: PetscFunctionReturn(PETSC_SUCCESS);
608: }
610: static PetscErrorCode MatAssemblyEnd_BlockMat(Mat A, MatAssemblyType mode)
611: {
612: Mat_BlockMat *a = (Mat_BlockMat *)A->data;
613: PetscInt fshift = 0, i, j, *ai = a->i, *aj = a->j, *imax = a->imax;
614: PetscInt m = a->mbs, *ip, N, *ailen = a->ilen, rmax = 0;
615: Mat *aa = a->a, *ap;
617: PetscFunctionBegin;
618: if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(PETSC_SUCCESS);
620: if (m) rmax = ailen[0]; /* determine row with most nonzeros */
621: for (i = 1; i < m; i++) {
622: /* move each row back by the amount of empty slots (fshift) before it*/
623: fshift += imax[i - 1] - ailen[i - 1];
624: rmax = PetscMax(rmax, ailen[i]);
625: if (fshift) {
626: ip = aj + ai[i];
627: ap = aa + ai[i];
628: N = ailen[i];
629: for (j = 0; j < N; j++) {
630: ip[j - fshift] = ip[j];
631: ap[j - fshift] = ap[j];
632: }
633: }
634: ai[i] = ai[i - 1] + ailen[i - 1];
635: }
636: if (m) {
637: fshift += imax[m - 1] - ailen[m - 1];
638: ai[m] = ai[m - 1] + ailen[m - 1];
639: }
640: /* reset ilen and imax for each row */
641: for (i = 0; i < m; i++) ailen[i] = imax[i] = ai[i + 1] - ai[i];
642: a->nz = ai[m];
643: for (i = 0; i < a->nz; i++) {
644: PetscAssert(aa[i], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Null matrix at location %" PetscInt_FMT " column %" PetscInt_FMT " nz %" PetscInt_FMT, i, aj[i], a->nz);
645: PetscCall(MatAssemblyBegin(aa[i], MAT_FINAL_ASSEMBLY));
646: PetscCall(MatAssemblyEnd(aa[i], MAT_FINAL_ASSEMBLY));
647: }
648: PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded,%" PetscInt_FMT " used\n", m, A->cmap->n / A->cmap->bs, fshift, a->nz));
649: PetscCall(PetscInfo(A, "Number of mallocs during MatSetValues() is %" PetscInt_FMT "\n", a->reallocs));
650: PetscCall(PetscInfo(A, "Maximum nonzeros in any row is %" PetscInt_FMT "\n", rmax));
652: A->info.mallocs += a->reallocs;
653: a->reallocs = 0;
654: A->info.nz_unneeded = (double)fshift;
655: a->rmax = rmax;
656: PetscCall(MatMarkDiagonal_BlockMat(A));
657: PetscFunctionReturn(PETSC_SUCCESS);
658: }
660: static PetscErrorCode MatSetOption_BlockMat(Mat A, MatOption opt, PetscBool flg)
661: {
662: PetscFunctionBegin;
663: if (opt == MAT_SYMMETRIC && flg) {
664: A->ops->sor = MatSOR_BlockMat_Symmetric;
665: A->ops->mult = MatMult_BlockMat_Symmetric;
666: } else {
667: PetscCall(PetscInfo(A, "Unused matrix option %s\n", MatOptions[opt]));
668: }
669: PetscFunctionReturn(PETSC_SUCCESS);
670: }
672: static struct _MatOps MatOps_Values = {MatSetValues_BlockMat,
673: NULL,
674: NULL,
675: MatMult_BlockMat,
676: /* 4*/ MatMultAdd_BlockMat,
677: MatMultTranspose_BlockMat,
678: MatMultTransposeAdd_BlockMat,
679: NULL,
680: NULL,
681: NULL,
682: /* 10*/ NULL,
683: NULL,
684: NULL,
685: MatSOR_BlockMat,
686: NULL,
687: /* 15*/ NULL,
688: NULL,
689: NULL,
690: NULL,
691: NULL,
692: /* 20*/ NULL,
693: MatAssemblyEnd_BlockMat,
694: MatSetOption_BlockMat,
695: NULL,
696: /* 24*/ NULL,
697: NULL,
698: NULL,
699: NULL,
700: NULL,
701: /* 29*/ NULL,
702: NULL,
703: NULL,
704: NULL,
705: NULL,
706: /* 34*/ NULL,
707: NULL,
708: NULL,
709: NULL,
710: NULL,
711: /* 39*/ NULL,
712: NULL,
713: NULL,
714: NULL,
715: NULL,
716: /* 44*/ NULL,
717: NULL,
718: MatShift_Basic,
719: NULL,
720: NULL,
721: /* 49*/ NULL,
722: NULL,
723: NULL,
724: NULL,
725: NULL,
726: /* 54*/ NULL,
727: NULL,
728: NULL,
729: NULL,
730: NULL,
731: /* 59*/ MatCreateSubMatrix_BlockMat,
732: MatDestroy_BlockMat,
733: MatView_BlockMat,
734: NULL,
735: NULL,
736: /* 64*/ NULL,
737: NULL,
738: NULL,
739: NULL,
740: NULL,
741: /* 69*/ NULL,
742: NULL,
743: NULL,
744: NULL,
745: NULL,
746: /* 74*/ NULL,
747: NULL,
748: NULL,
749: NULL,
750: NULL,
751: /* 79*/ NULL,
752: NULL,
753: NULL,
754: NULL,
755: MatLoad_BlockMat,
756: /* 84*/ NULL,
757: NULL,
758: NULL,
759: NULL,
760: NULL,
761: /* 89*/ NULL,
762: NULL,
763: NULL,
764: NULL,
765: NULL,
766: /* 94*/ NULL,
767: NULL,
768: NULL,
769: NULL,
770: NULL,
771: /* 99*/ NULL,
772: NULL,
773: NULL,
774: NULL,
775: NULL,
776: /*104*/ NULL,
777: NULL,
778: NULL,
779: NULL,
780: NULL,
781: /*109*/ NULL,
782: NULL,
783: NULL,
784: NULL,
785: NULL,
786: /*114*/ NULL,
787: NULL,
788: NULL,
789: NULL,
790: NULL,
791: /*119*/ NULL,
792: NULL,
793: NULL,
794: NULL,
795: NULL,
796: /*124*/ NULL,
797: NULL,
798: NULL,
799: NULL,
800: NULL,
801: /*129*/ NULL,
802: NULL,
803: NULL,
804: NULL,
805: NULL,
806: /*134*/ NULL,
807: NULL,
808: NULL,
809: NULL,
810: NULL,
811: /*139*/ NULL,
812: NULL,
813: NULL,
814: NULL,
815: NULL,
816: /*144*/ NULL,
817: NULL,
818: NULL,
819: NULL,
820: NULL,
821: NULL,
822: /*150*/ NULL,
823: NULL,
824: NULL,
825: NULL,
826: NULL,
827: /*155*/ NULL,
828: NULL};
830: /*@C
831: MatBlockMatSetPreallocation - For good matrix assembly performance
832: the user should preallocate the matrix storage by setting the parameter nz
833: (or the array nnz). By setting these parameters accurately, performance
834: during matrix assembly can be increased by more than a factor of 50.
836: Collective
838: Input Parameters:
839: + B - The matrix
840: . bs - size of each block in matrix
841: . nz - number of nonzeros per block row (same for all rows)
842: - nnz - array containing the number of nonzeros in the various block rows
843: (possibly different for each row) or `NULL`
845: Level: intermediate
847: Notes:
848: If `nnz` is given then `nz` is ignored
850: Specify the preallocated storage with either `nz` or `nnz` (not both).
851: Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory
852: allocation.
854: .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateBlockMat()`, `MatSetValues()`
855: @*/
856: PetscErrorCode MatBlockMatSetPreallocation(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[])
857: {
858: PetscFunctionBegin;
859: PetscTryMethod(B, "MatBlockMatSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[]), (B, bs, nz, nnz));
860: PetscFunctionReturn(PETSC_SUCCESS);
861: }
863: static PetscErrorCode MatBlockMatSetPreallocation_BlockMat(Mat A, PetscInt bs, PetscInt nz, PetscInt *nnz)
864: {
865: Mat_BlockMat *bmat = (Mat_BlockMat *)A->data;
866: PetscInt i;
868: PetscFunctionBegin;
869: PetscCall(PetscLayoutSetBlockSize(A->rmap, bs));
870: PetscCall(PetscLayoutSetBlockSize(A->cmap, bs));
871: PetscCall(PetscLayoutSetUp(A->rmap));
872: PetscCall(PetscLayoutSetUp(A->cmap));
873: PetscCall(PetscLayoutGetBlockSize(A->rmap, &bs));
875: if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
876: PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nz cannot be less than 0: value %" PetscInt_FMT, nz);
877: if (nnz) {
878: for (i = 0; i < A->rmap->n / bs; i++) {
879: PetscCheck(nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, nnz[i]);
880: PetscCheck(nnz[i] <= A->cmap->n / bs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be greater than row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT, i, nnz[i], A->cmap->n / bs);
881: }
882: }
883: bmat->mbs = A->rmap->n / bs;
885: PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs, NULL, &bmat->right));
886: PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs, NULL, &bmat->middle));
887: PetscCall(VecCreateSeq(PETSC_COMM_SELF, bs, &bmat->left));
889: if (!bmat->imax) PetscCall(PetscMalloc2(A->rmap->n, &bmat->imax, A->rmap->n, &bmat->ilen));
890: if (PetscLikely(nnz)) {
891: nz = 0;
892: for (i = 0; i < A->rmap->n / A->rmap->bs; i++) {
893: bmat->imax[i] = nnz[i];
894: nz += nnz[i];
895: }
896: } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Currently requires block row by row preallocation");
898: /* bmat->ilen will count nonzeros in each row so far. */
899: PetscCall(PetscArrayzero(bmat->ilen, bmat->mbs));
901: /* allocate the matrix space */
902: PetscCall(MatSeqXAIJFreeAIJ(A, (PetscScalar **)&bmat->a, &bmat->j, &bmat->i));
903: PetscCall(PetscShmgetAllocateArray(nz, sizeof(PetscScalar), (void **)&bmat->a));
904: PetscCall(PetscShmgetAllocateArray(nz, sizeof(PetscInt), (void **)&bmat->j));
905: PetscCall(PetscShmgetAllocateArray(A->rmap->n + 1, sizeof(PetscInt), (void **)&bmat->i));
906: bmat->free_a = PETSC_TRUE;
907: bmat->free_ij = PETSC_TRUE;
909: bmat->i[0] = 0;
910: for (i = 1; i < bmat->mbs + 1; i++) bmat->i[i] = bmat->i[i - 1] + bmat->imax[i - 1];
912: bmat->nz = 0;
913: bmat->maxnz = nz;
914: A->info.nz_unneeded = (double)bmat->maxnz;
915: PetscCall(MatSetOption(A, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
916: PetscFunctionReturn(PETSC_SUCCESS);
917: }
919: /*MC
920: MATBLOCKMAT - A matrix that is defined by a set of `Mat`'s that represents a sparse block matrix
921: consisting of (usually) sparse blocks.
923: Level: advanced
925: .seealso: [](ch_matrices), `Mat`, `MatCreateBlockMat()`
926: M*/
928: PETSC_EXTERN PetscErrorCode MatCreate_BlockMat(Mat A)
929: {
930: Mat_BlockMat *b;
932: PetscFunctionBegin;
933: PetscCall(PetscNew(&b));
934: A->data = (void *)b;
935: A->ops[0] = MatOps_Values;
936: A->assembled = PETSC_TRUE;
937: A->preallocated = PETSC_FALSE;
938: PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATBLOCKMAT));
940: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatBlockMatSetPreallocation_C", MatBlockMatSetPreallocation_BlockMat));
941: PetscFunctionReturn(PETSC_SUCCESS);
942: }
944: /*@C
945: MatCreateBlockMat - Creates a new matrix in which each block contains a uniform-size sequential `Mat` object
947: Collective
949: Input Parameters:
950: + comm - MPI communicator
951: . m - number of rows
952: . n - number of columns
953: . bs - size of each submatrix
954: . nz - expected maximum number of nonzero blocks in row (use `PETSC_DEFAULT` if not known)
955: - nnz - expected number of nonzers per block row if known (use `NULL` otherwise)
957: Output Parameter:
958: . A - the matrix
960: Level: intermediate
962: Notes:
963: Matrices of this type are nominally-sparse matrices in which each "entry" is a `Mat` object. Each `Mat` must
964: have the same size and be sequential. The local and global sizes must be compatible with this decomposition.
966: For matrices containing parallel submatrices and variable block sizes, see `MATNEST`.
968: Developer Notes:
969: I don't like the name, it is not `MATNESTMAT`
971: .seealso: [](ch_matrices), `Mat`, `MATBLOCKMAT`, `MatCreateNest()`
972: @*/
973: PetscErrorCode MatCreateBlockMat(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt bs, PetscInt nz, PetscInt *nnz, Mat *A)
974: {
975: PetscFunctionBegin;
976: PetscCall(MatCreate(comm, A));
977: PetscCall(MatSetSizes(*A, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
978: PetscCall(MatSetType(*A, MATBLOCKMAT));
979: PetscCall(MatBlockMatSetPreallocation(*A, bs, nz, nnz));
980: PetscFunctionReturn(PETSC_SUCCESS);
981: }