Actual source code: blockmat.c
1: /*
2: This provides a matrix that consists of Mats
3: */
5: #include <petsc/private/matimpl.h>
6: #include <../src/mat/impls/baij/seq/baij.h>
8: typedef struct {
9: SEQAIJHEADER(Mat);
10: SEQBAIJHEADER;
11: Mat *diags;
13: Vec left, right, middle, workb; /* dummy vectors to perform local parts of product */
14: } Mat_BlockMat;
16: MatGetDiagonalMarkers(BlockMat, A->rmap->bs)
18: static PetscErrorCode MatSOR_BlockMat_Symmetric(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
19: {
20: Mat_BlockMat *a = (Mat_BlockMat *)A->data;
21: PetscScalar *x;
22: const Mat *v;
23: const PetscScalar *b;
24: PetscInt n = A->cmap->n, i, mbs = n / A->rmap->bs, j, bs = A->rmap->bs;
25: const PetscInt *idx;
26: IS row, col;
27: MatFactorInfo info;
28: Vec left = a->left, right = a->right, middle = a->middle;
29: Mat *diag;
31: PetscFunctionBegin;
32: its = its * lits;
33: PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
34: PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat");
35: PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for omega not equal to 1.0");
36: PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for fshift");
37: PetscCheck(!((flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) && !(flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP)), PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot do backward sweep without forward sweep");
39: if (!a->diags) {
40: const PetscInt *adiag;
42: PetscCall(MatGetDiagonalMarkers_BlockMat(A, &adiag, NULL));
43: PetscCall(PetscMalloc1(mbs, &a->diags));
44: PetscCall(MatFactorInfoInitialize(&info));
45: for (i = 0; i < mbs; i++) {
46: PetscCall(MatGetOrdering(a->a[a->diag[i]], MATORDERINGND, &row, &col));
47: PetscCall(MatCholeskyFactorSymbolic(a->diags[i], a->a[adiag[i]], row, &info));
48: PetscCall(MatCholeskyFactorNumeric(a->diags[i], a->a[adiag[i]], &info));
49: PetscCall(ISDestroy(&row));
50: PetscCall(ISDestroy(&col));
51: }
52: PetscCall(VecDuplicate(bb, &a->workb));
53: }
54: diag = a->diags;
56: PetscCall(VecSet(xx, 0.0));
57: PetscCall(VecGetArray(xx, &x));
58: /* copy right-hand side because it must be modified during iteration */
59: PetscCall(VecCopy(bb, a->workb));
60: PetscCall(VecGetArrayRead(a->workb, &b));
62: /* need to add code for when initial guess is zero, see MatSOR_SeqAIJ */
63: while (its--) {
64: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
65: for (i = 0; i < mbs; i++) {
66: n = a->i[i + 1] - a->i[i] - 1;
67: idx = a->j + a->i[i] + 1;
68: v = a->a + a->i[i] + 1;
70: PetscCall(VecSet(left, 0.0));
71: for (j = 0; j < n; j++) {
72: PetscCall(VecPlaceArray(right, x + idx[j] * bs));
73: PetscCall(MatMultAdd(v[j], right, left, left));
74: PetscCall(VecResetArray(right));
75: }
76: PetscCall(VecPlaceArray(right, b + i * bs));
77: PetscCall(VecAYPX(left, -1.0, right));
78: PetscCall(VecResetArray(right));
80: PetscCall(VecPlaceArray(right, x + i * bs));
81: PetscCall(MatSolve(diag[i], left, right));
83: /* now adjust right-hand side, see MatSOR_SeqSBAIJ */
84: for (j = 0; j < n; j++) {
85: PetscCall(MatMultTranspose(v[j], right, left));
86: PetscCall(VecPlaceArray(middle, b + idx[j] * bs));
87: PetscCall(VecAXPY(middle, -1.0, left));
88: PetscCall(VecResetArray(middle));
89: }
90: PetscCall(VecResetArray(right));
91: }
92: }
93: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
94: for (i = mbs - 1; i >= 0; i--) {
95: n = a->i[i + 1] - a->i[i] - 1;
96: idx = a->j + a->i[i] + 1;
97: v = a->a + a->i[i] + 1;
99: PetscCall(VecSet(left, 0.0));
100: for (j = 0; j < n; j++) {
101: PetscCall(VecPlaceArray(right, x + idx[j] * bs));
102: PetscCall(MatMultAdd(v[j], right, left, left));
103: PetscCall(VecResetArray(right));
104: }
105: PetscCall(VecPlaceArray(right, b + i * bs));
106: PetscCall(VecAYPX(left, -1.0, right));
107: PetscCall(VecResetArray(right));
109: PetscCall(VecPlaceArray(right, x + i * bs));
110: PetscCall(MatSolve(diag[i], left, right));
111: PetscCall(VecResetArray(right));
112: }
113: }
114: }
115: PetscCall(VecRestoreArray(xx, &x));
116: PetscCall(VecRestoreArrayRead(a->workb, &b));
117: PetscFunctionReturn(PETSC_SUCCESS);
118: }
120: static PetscErrorCode MatSOR_BlockMat(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
121: {
122: Mat_BlockMat *a = (Mat_BlockMat *)A->data;
123: PetscScalar *x;
124: const Mat *v;
125: const PetscScalar *b;
126: PetscInt n = A->cmap->n, i, mbs = n / A->rmap->bs, j, bs = A->rmap->bs;
127: const PetscInt *idx;
128: IS row, col;
129: MatFactorInfo info;
130: Vec left = a->left, right = a->right;
131: Mat *diag;
133: PetscFunctionBegin;
134: its = its * lits;
135: PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
136: PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat");
137: PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for omega not equal to 1.0");
138: PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for fshift");
140: if (!a->diags) {
141: const PetscInt *adiag;
143: PetscCall(MatGetDiagonalMarkers_BlockMat(A, &adiag, NULL));
144: PetscCall(PetscMalloc1(mbs, &a->diags));
145: PetscCall(MatFactorInfoInitialize(&info));
146: for (i = 0; i < mbs; i++) {
147: PetscCall(MatGetOrdering(a->a[adiag[i]], MATORDERINGND, &row, &col));
148: PetscCall(MatLUFactorSymbolic(a->diags[i], a->a[adiag[i]], row, col, &info));
149: PetscCall(MatLUFactorNumeric(a->diags[i], a->a[adiag[i]], &info));
150: PetscCall(ISDestroy(&row));
151: PetscCall(ISDestroy(&col));
152: }
153: }
154: diag = a->diags;
156: PetscCall(VecSet(xx, 0.0));
157: PetscCall(VecGetArray(xx, &x));
158: PetscCall(VecGetArrayRead(bb, &b));
160: /* need to add code for when initial guess is zero, see MatSOR_SeqAIJ */
161: while (its--) {
162: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
163: for (i = 0; i < mbs; i++) {
164: n = a->i[i + 1] - a->i[i];
165: idx = a->j + a->i[i];
166: v = a->a + a->i[i];
168: PetscCall(VecSet(left, 0.0));
169: for (j = 0; j < n; j++) {
170: if (idx[j] != i) {
171: PetscCall(VecPlaceArray(right, x + idx[j] * bs));
172: PetscCall(MatMultAdd(v[j], right, left, left));
173: PetscCall(VecResetArray(right));
174: }
175: }
176: PetscCall(VecPlaceArray(right, b + i * bs));
177: PetscCall(VecAYPX(left, -1.0, right));
178: PetscCall(VecResetArray(right));
180: PetscCall(VecPlaceArray(right, x + i * bs));
181: PetscCall(MatSolve(diag[i], left, right));
182: PetscCall(VecResetArray(right));
183: }
184: }
185: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
186: for (i = mbs - 1; i >= 0; i--) {
187: n = a->i[i + 1] - a->i[i];
188: idx = a->j + a->i[i];
189: v = a->a + a->i[i];
191: PetscCall(VecSet(left, 0.0));
192: for (j = 0; j < n; j++) {
193: if (idx[j] != i) {
194: PetscCall(VecPlaceArray(right, x + idx[j] * bs));
195: PetscCall(MatMultAdd(v[j], right, left, left));
196: PetscCall(VecResetArray(right));
197: }
198: }
199: PetscCall(VecPlaceArray(right, b + i * bs));
200: PetscCall(VecAYPX(left, -1.0, right));
201: PetscCall(VecResetArray(right));
203: PetscCall(VecPlaceArray(right, x + i * bs));
204: PetscCall(MatSolve(diag[i], left, right));
205: PetscCall(VecResetArray(right));
206: }
207: }
208: }
209: PetscCall(VecRestoreArray(xx, &x));
210: PetscCall(VecRestoreArrayRead(bb, &b));
211: PetscFunctionReturn(PETSC_SUCCESS);
212: }
214: static PetscErrorCode MatSetValues_BlockMat(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is)
215: {
216: Mat_BlockMat *a = (Mat_BlockMat *)A->data;
217: PetscInt *rp, k, low, high, t, row, nrow, i, col, l, lastcol = -1;
218: PetscInt *ai = a->i, *ailen = a->ilen;
219: PetscInt *aj = a->j, nonew = a->nonew, bs = A->rmap->bs, brow, bcol;
220: PetscInt ridx, cidx;
221: PetscBool roworiented = a->roworiented;
222: MatScalar value;
223: Mat *ap, *aa = a->a;
225: PetscFunctionBegin;
226: for (k = 0; k < m; k++) { /* loop over added rows */
227: row = im[k];
228: brow = row / bs;
229: if (row < 0) continue;
230: PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, row, A->rmap->N - 1);
231: rp = aj + ai[brow];
232: ap = aa + ai[brow];
233: nrow = ailen[brow];
234: low = 0;
235: high = nrow;
236: for (l = 0; l < n; l++) { /* loop over added columns */
237: if (in[l] < 0) continue;
238: PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[l], A->cmap->n - 1);
239: col = in[l];
240: bcol = col / bs;
241: if (A->symmetric == PETSC_BOOL3_TRUE && brow > bcol) continue;
242: ridx = row % bs;
243: cidx = col % bs;
244: if (roworiented) value = v[l + k * n];
245: else value = v[k + l * m];
247: if (col <= lastcol) low = 0;
248: else high = nrow;
249: lastcol = col;
250: while (high - low > 7) {
251: t = (low + high) / 2;
252: if (rp[t] > bcol) high = t;
253: else low = t;
254: }
255: for (i = low; i < high; i++) {
256: if (rp[i] > bcol) break;
257: if (rp[i] == bcol) goto noinsert1;
258: }
259: if (nonew == 1) goto noinsert1;
260: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the block matrix", row, col);
261: #if 0
262: MatSeqXAIJReallocateAIJ(A, a->mbs, 1, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, imax, nonew, Mat);
263: N = nrow++ - 1;
264: high++;
265: /* shift up all the later entries in this row */
266: for (ii = N; ii >= i; ii--) {
267: rp[ii + 1] = rp[ii];
268: ap[ii + 1] = ap[ii];
269: }
270: if (N >= i) ap[i] = NULL;
271: rp[i] = bcol;
272: a->nz++;
273: #endif
274: noinsert1:;
275: if (!*(ap + i)) PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, bs, bs, 0, NULL, ap + i));
276: PetscCall(MatSetValues(ap[i], 1, &ridx, 1, &cidx, &value, is));
277: low = i;
278: }
279: ailen[brow] = nrow;
280: }
281: PetscFunctionReturn(PETSC_SUCCESS);
282: }
284: static PetscErrorCode MatLoad_BlockMat(Mat newmat, PetscViewer viewer)
285: {
286: Mat tmpA;
287: PetscInt i, j, m, n, bs = 1, ncols, *lens, currentcol, mbs, **ii, *ilens, nextcol, *llens, cnt = 0;
288: const PetscInt *cols;
289: const PetscScalar *values;
290: PetscBool flg = PETSC_FALSE, notdone;
291: Mat_SeqAIJ *a;
292: Mat_BlockMat *amat;
294: PetscFunctionBegin;
295: /* force binary viewer to load .info file if it has not yet done so */
296: PetscCall(PetscViewerSetUp(viewer));
297: PetscCall(MatCreate(PETSC_COMM_SELF, &tmpA));
298: PetscCall(MatSetType(tmpA, MATSEQAIJ));
299: PetscCall(MatLoad_SeqAIJ(tmpA, viewer));
301: PetscCall(MatGetLocalSize(tmpA, &m, &n));
302: PetscOptionsBegin(PETSC_COMM_SELF, NULL, "Options for loading BlockMat matrix 1", "Mat");
303: PetscCall(PetscOptionsInt("-matload_block_size", "Set the blocksize used to store the matrix", "MatLoad", bs, &bs, NULL));
304: PetscCall(PetscOptionsBool("-matload_symmetric", "Store the matrix as symmetric", "MatLoad", flg, &flg, NULL));
305: PetscOptionsEnd();
307: /* Determine number of nonzero blocks for each block row */
308: a = (Mat_SeqAIJ *)tmpA->data;
309: mbs = m / bs;
310: PetscCall(PetscMalloc3(mbs, &lens, bs, &ii, bs, &ilens));
311: PetscCall(PetscArrayzero(lens, mbs));
313: for (i = 0; i < mbs; i++) {
314: for (j = 0; j < bs; j++) {
315: ii[j] = a->j + a->i[i * bs + j];
316: ilens[j] = a->i[i * bs + j + 1] - a->i[i * bs + j];
317: }
319: currentcol = -1;
320: while (PETSC_TRUE) {
321: notdone = PETSC_FALSE;
322: nextcol = 1000000000;
323: for (j = 0; j < bs; j++) {
324: while (ilens[j] > 0 && ii[j][0] / bs <= currentcol) {
325: ii[j]++;
326: ilens[j]--;
327: }
328: if (ilens[j] > 0) {
329: notdone = PETSC_TRUE;
330: nextcol = PetscMin(nextcol, ii[j][0] / bs);
331: }
332: }
333: if (!notdone) break;
334: if (!flg || (nextcol >= i)) lens[i]++;
335: currentcol = nextcol;
336: }
337: }
339: if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) PetscCall(MatSetSizes(newmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
340: PetscCall(MatBlockMatSetPreallocation(newmat, bs, 0, lens));
341: if (flg) PetscCall(MatSetOption(newmat, MAT_SYMMETRIC, PETSC_TRUE));
342: amat = (Mat_BlockMat *)newmat->data;
344: /* preallocate the submatrices */
345: PetscCall(PetscMalloc1(bs, &llens));
346: for (i = 0; i < mbs; i++) { /* loops for block rows */
347: for (j = 0; j < bs; j++) {
348: ii[j] = a->j + a->i[i * bs + j];
349: ilens[j] = a->i[i * bs + j + 1] - a->i[i * bs + j];
350: }
352: currentcol = 1000000000;
353: for (j = 0; j < bs; j++) { /* loop over rows in block finding first nonzero block */
354: if (ilens[j] > 0) currentcol = PetscMin(currentcol, ii[j][0] / bs);
355: }
357: while (PETSC_TRUE) { /* loops over blocks in block row */
358: notdone = PETSC_FALSE;
359: nextcol = 1000000000;
360: PetscCall(PetscArrayzero(llens, bs));
361: for (j = 0; j < bs; j++) { /* loop over rows in block */
362: while (ilens[j] > 0 && ii[j][0] / bs <= currentcol) { /* loop over columns in row */
363: ii[j]++;
364: ilens[j]--;
365: llens[j]++;
366: }
367: if (ilens[j] > 0) {
368: notdone = PETSC_TRUE;
369: nextcol = PetscMin(nextcol, ii[j][0] / bs);
370: }
371: }
372: PetscCheck(cnt < amat->maxnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Number of blocks found greater than expected %" PetscInt_FMT, cnt);
373: if (!flg || currentcol >= i) {
374: amat->j[cnt] = currentcol;
375: PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, bs, bs, 0, llens, amat->a + cnt++));
376: }
378: if (!notdone) break;
379: currentcol = nextcol;
380: }
381: amat->ilen[i] = lens[i];
382: }
384: PetscCall(PetscFree3(lens, ii, ilens));
385: PetscCall(PetscFree(llens));
387: /* copy over the matrix, one row at a time */
388: for (i = 0; i < m; i++) {
389: PetscCall(MatGetRow(tmpA, i, &ncols, &cols, &values));
390: PetscCall(MatSetValues(newmat, 1, &i, ncols, cols, values, INSERT_VALUES));
391: PetscCall(MatRestoreRow(tmpA, i, &ncols, &cols, &values));
392: }
393: PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY));
394: PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY));
395: PetscFunctionReturn(PETSC_SUCCESS);
396: }
398: static PetscErrorCode MatView_BlockMat(Mat A, PetscViewer viewer)
399: {
400: Mat_BlockMat *a = (Mat_BlockMat *)A->data;
401: const char *name;
402: PetscViewerFormat format;
404: PetscFunctionBegin;
405: PetscCall(PetscObjectGetName((PetscObject)A, &name));
406: PetscCall(PetscViewerGetFormat(viewer, &format));
407: if (format == PETSC_VIEWER_ASCII_FACTOR_INFO || format == PETSC_VIEWER_ASCII_INFO) {
408: PetscCall(PetscViewerASCIIPrintf(viewer, "Nonzero block matrices = %" PetscInt_FMT " \n", a->nz));
409: if (A->symmetric == PETSC_BOOL3_TRUE) PetscCall(PetscViewerASCIIPrintf(viewer, "Only upper triangular part of symmetric matrix is stored\n"));
410: }
411: PetscFunctionReturn(PETSC_SUCCESS);
412: }
414: static PetscErrorCode MatDestroy_BlockMat(Mat mat)
415: {
416: Mat_BlockMat *bmat = (Mat_BlockMat *)mat->data;
417: PetscInt i;
419: PetscFunctionBegin;
420: PetscCall(VecDestroy(&bmat->right));
421: PetscCall(VecDestroy(&bmat->left));
422: PetscCall(VecDestroy(&bmat->middle));
423: PetscCall(VecDestroy(&bmat->workb));
424: if (bmat->diags) {
425: for (i = 0; i < mat->rmap->n / mat->rmap->bs; i++) PetscCall(MatDestroy(&bmat->diags[i]));
426: }
427: if (bmat->a) {
428: for (i = 0; i < bmat->nz; i++) PetscCall(MatDestroy(&bmat->a[i]));
429: }
430: PetscCall(MatSeqXAIJFreeAIJ(mat, (PetscScalar **)&bmat->a, &bmat->j, &bmat->i));
431: PetscCall(PetscFree(mat->data));
432: PetscFunctionReturn(PETSC_SUCCESS);
433: }
435: static PetscErrorCode MatMult_BlockMat(Mat A, Vec x, Vec y)
436: {
437: Mat_BlockMat *bmat = (Mat_BlockMat *)A->data;
438: PetscScalar *xx, *yy;
439: PetscInt *aj, i, *ii, jrow, m = A->rmap->n / A->rmap->bs, bs = A->rmap->bs, n, j;
440: Mat *aa;
442: PetscFunctionBegin;
443: /*
444: Standard CSR multiply except each entry is a Mat
445: */
446: PetscCall(VecGetArray(x, &xx));
448: PetscCall(VecSet(y, 0.0));
449: PetscCall(VecGetArray(y, &yy));
450: aj = bmat->j;
451: aa = bmat->a;
452: ii = bmat->i;
453: for (i = 0; i < m; i++) {
454: jrow = ii[i];
455: PetscCall(VecPlaceArray(bmat->left, yy + bs * i));
456: n = ii[i + 1] - jrow;
457: for (j = 0; j < n; j++) {
458: PetscCall(VecPlaceArray(bmat->right, xx + bs * aj[jrow]));
459: PetscCall(MatMultAdd(aa[jrow], bmat->right, bmat->left, bmat->left));
460: PetscCall(VecResetArray(bmat->right));
461: jrow++;
462: }
463: PetscCall(VecResetArray(bmat->left));
464: }
465: PetscCall(VecRestoreArray(x, &xx));
466: PetscCall(VecRestoreArray(y, &yy));
467: PetscFunctionReturn(PETSC_SUCCESS);
468: }
470: static PetscErrorCode MatMult_BlockMat_Symmetric(Mat A, Vec x, Vec y)
471: {
472: Mat_BlockMat *bmat = (Mat_BlockMat *)A->data;
473: PetscScalar *xx, *yy;
474: PetscInt *aj, i, *ii, jrow, m = A->rmap->n / A->rmap->bs, bs = A->rmap->bs, n, j;
475: Mat *aa;
477: PetscFunctionBegin;
478: /*
479: Standard CSR multiply except each entry is a Mat
480: */
481: PetscCall(VecGetArray(x, &xx));
483: PetscCall(VecSet(y, 0.0));
484: PetscCall(VecGetArray(y, &yy));
485: aj = bmat->j;
486: aa = bmat->a;
487: ii = bmat->i;
488: for (i = 0; i < m; i++) {
489: jrow = ii[i];
490: n = ii[i + 1] - jrow;
491: PetscCall(VecPlaceArray(bmat->left, yy + bs * i));
492: PetscCall(VecPlaceArray(bmat->middle, xx + bs * i));
493: /* if we ALWAYS required a diagonal entry then could remove this if test */
494: if (aj[jrow] == i) {
495: PetscCall(VecPlaceArray(bmat->right, xx + bs * aj[jrow]));
496: PetscCall(MatMultAdd(aa[jrow], bmat->right, bmat->left, bmat->left));
497: PetscCall(VecResetArray(bmat->right));
498: jrow++;
499: n--;
500: }
501: for (j = 0; j < n; j++) {
502: PetscCall(VecPlaceArray(bmat->right, xx + bs * aj[jrow])); /* upper triangular part */
503: PetscCall(MatMultAdd(aa[jrow], bmat->right, bmat->left, bmat->left));
504: PetscCall(VecResetArray(bmat->right));
506: PetscCall(VecPlaceArray(bmat->right, yy + bs * aj[jrow])); /* lower triangular part */
507: PetscCall(MatMultTransposeAdd(aa[jrow], bmat->middle, bmat->right, bmat->right));
508: PetscCall(VecResetArray(bmat->right));
509: jrow++;
510: }
511: PetscCall(VecResetArray(bmat->left));
512: PetscCall(VecResetArray(bmat->middle));
513: }
514: PetscCall(VecRestoreArray(x, &xx));
515: PetscCall(VecRestoreArray(y, &yy));
516: PetscFunctionReturn(PETSC_SUCCESS);
517: }
519: static PetscErrorCode MatMultAdd_BlockMat(Mat A, Vec x, Vec y, Vec z)
520: {
521: PetscFunctionBegin;
522: PetscFunctionReturn(PETSC_SUCCESS);
523: }
525: static PetscErrorCode MatMultTranspose_BlockMat(Mat A, Vec x, Vec y)
526: {
527: PetscFunctionBegin;
528: PetscFunctionReturn(PETSC_SUCCESS);
529: }
531: static PetscErrorCode MatMultTransposeAdd_BlockMat(Mat A, Vec x, Vec y, Vec z)
532: {
533: PetscFunctionBegin;
534: PetscFunctionReturn(PETSC_SUCCESS);
535: }
537: static PetscErrorCode MatCreateSubMatrix_BlockMat(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B)
538: {
539: Mat_BlockMat *a = (Mat_BlockMat *)A->data;
540: Mat_SeqAIJ *c;
541: PetscInt i, k, first, step, lensi, nrows, ncols;
542: PetscInt *j_new, *i_new, *aj = a->j, *ailen = a->ilen;
543: PetscScalar *a_new;
544: Mat C, *aa = a->a;
545: PetscBool stride, equal;
547: PetscFunctionBegin;
548: PetscCall(ISEqual(isrow, iscol, &equal));
549: PetscCheck(equal, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only for identical column and row indices");
550: PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &stride));
551: PetscCheck(stride, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only for stride indices");
552: PetscCall(ISStrideGetInfo(iscol, &first, &step));
553: PetscCheck(step == A->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_SUP, "Can only select one entry from each block");
555: PetscCall(ISGetLocalSize(isrow, &nrows));
556: ncols = nrows;
558: /* create submatrix */
559: if (scall == MAT_REUSE_MATRIX) {
560: PetscInt n_cols, n_rows;
561: C = *B;
562: PetscCall(MatGetSize(C, &n_rows, &n_cols));
563: PetscCheck(n_rows == nrows && n_cols == ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Reused submatrix wrong size");
564: PetscCall(MatZeroEntries(C));
565: } else {
566: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C));
567: PetscCall(MatSetSizes(C, nrows, ncols, PETSC_DETERMINE, PETSC_DETERMINE));
568: if (A->symmetric == PETSC_BOOL3_TRUE) PetscCall(MatSetType(C, MATSEQSBAIJ));
569: else PetscCall(MatSetType(C, MATSEQAIJ));
570: PetscCall(MatSeqAIJSetPreallocation(C, 0, ailen));
571: PetscCall(MatSeqSBAIJSetPreallocation(C, 1, 0, ailen));
572: }
573: c = (Mat_SeqAIJ *)C->data;
575: /* loop over rows inserting into submatrix */
576: a_new = c->a;
577: j_new = c->j;
578: i_new = c->i;
580: for (i = 0; i < nrows; i++) {
581: lensi = ailen[i];
582: for (k = 0; k < lensi; k++) {
583: *j_new++ = *aj++;
584: PetscCall(MatGetValue(*aa++, first, first, a_new++));
585: }
586: i_new[i + 1] = i_new[i] + lensi;
587: c->ilen[i] = lensi;
588: }
590: PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
591: PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
592: *B = C;
593: PetscFunctionReturn(PETSC_SUCCESS);
594: }
596: static PetscErrorCode MatAssemblyEnd_BlockMat(Mat A, MatAssemblyType mode)
597: {
598: Mat_BlockMat *a = (Mat_BlockMat *)A->data;
599: PetscInt fshift = 0, i, j, *ai = a->i, *aj = a->j, *imax = a->imax;
600: PetscInt m = a->mbs, *ip, N, *ailen = a->ilen, rmax = 0;
601: Mat *aa = a->a, *ap;
603: PetscFunctionBegin;
604: if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(PETSC_SUCCESS);
606: if (m) rmax = ailen[0]; /* determine row with most nonzeros */
607: for (i = 1; i < m; i++) {
608: /* move each row back by the amount of empty slots (fshift) before it*/
609: fshift += imax[i - 1] - ailen[i - 1];
610: rmax = PetscMax(rmax, ailen[i]);
611: if (fshift) {
612: ip = aj + ai[i];
613: ap = aa + ai[i];
614: N = ailen[i];
615: for (j = 0; j < N; j++) {
616: ip[j - fshift] = ip[j];
617: ap[j - fshift] = ap[j];
618: }
619: }
620: ai[i] = ai[i - 1] + ailen[i - 1];
621: }
622: if (m) {
623: fshift += imax[m - 1] - ailen[m - 1];
624: ai[m] = ai[m - 1] + ailen[m - 1];
625: }
626: /* reset ilen and imax for each row */
627: for (i = 0; i < m; i++) ailen[i] = imax[i] = ai[i + 1] - ai[i];
628: a->nz = ai[m];
629: for (i = 0; i < a->nz; i++) {
630: PetscAssert(aa[i], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Null matrix at location %" PetscInt_FMT " column %" PetscInt_FMT " nz %" PetscInt_FMT, i, aj[i], a->nz);
631: PetscCall(MatAssemblyBegin(aa[i], MAT_FINAL_ASSEMBLY));
632: PetscCall(MatAssemblyEnd(aa[i], MAT_FINAL_ASSEMBLY));
633: }
634: PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded,%" PetscInt_FMT " used\n", m, A->cmap->n / A->cmap->bs, fshift, a->nz));
635: PetscCall(PetscInfo(A, "Number of mallocs during MatSetValues() is %" PetscInt_FMT "\n", a->reallocs));
636: PetscCall(PetscInfo(A, "Maximum nonzeros in any row is %" PetscInt_FMT "\n", rmax));
638: A->info.mallocs += a->reallocs;
639: a->reallocs = 0;
640: A->info.nz_unneeded = (double)fshift;
641: a->rmax = rmax;
642: PetscFunctionReturn(PETSC_SUCCESS);
643: }
645: static PetscErrorCode MatSetOption_BlockMat(Mat A, MatOption opt, PetscBool flg)
646: {
647: PetscFunctionBegin;
648: if (opt == MAT_SYMMETRIC && flg) {
649: A->ops->sor = MatSOR_BlockMat_Symmetric;
650: A->ops->mult = MatMult_BlockMat_Symmetric;
651: } else {
652: PetscCall(PetscInfo(A, "Unused matrix option %s\n", MatOptions[opt]));
653: }
654: PetscFunctionReturn(PETSC_SUCCESS);
655: }
657: static struct _MatOps MatOps_Values = {MatSetValues_BlockMat,
658: NULL,
659: NULL,
660: MatMult_BlockMat,
661: /* 4*/ MatMultAdd_BlockMat,
662: MatMultTranspose_BlockMat,
663: MatMultTransposeAdd_BlockMat,
664: NULL,
665: NULL,
666: NULL,
667: /* 10*/ NULL,
668: NULL,
669: NULL,
670: MatSOR_BlockMat,
671: NULL,
672: /* 15*/ NULL,
673: NULL,
674: NULL,
675: NULL,
676: NULL,
677: /* 20*/ NULL,
678: MatAssemblyEnd_BlockMat,
679: MatSetOption_BlockMat,
680: NULL,
681: /* 24*/ NULL,
682: NULL,
683: NULL,
684: NULL,
685: NULL,
686: /* 29*/ NULL,
687: NULL,
688: NULL,
689: NULL,
690: NULL,
691: /* 34*/ NULL,
692: NULL,
693: NULL,
694: NULL,
695: NULL,
696: /* 39*/ NULL,
697: NULL,
698: NULL,
699: NULL,
700: NULL,
701: /* 44*/ NULL,
702: NULL,
703: MatShift_Basic,
704: NULL,
705: NULL,
706: /* 49*/ NULL,
707: NULL,
708: NULL,
709: NULL,
710: NULL,
711: /* 54*/ NULL,
712: NULL,
713: NULL,
714: NULL,
715: NULL,
716: /* 59*/ MatCreateSubMatrix_BlockMat,
717: MatDestroy_BlockMat,
718: MatView_BlockMat,
719: NULL,
720: NULL,
721: /* 64*/ NULL,
722: NULL,
723: NULL,
724: NULL,
725: NULL,
726: /* 69*/ NULL,
727: NULL,
728: NULL,
729: NULL,
730: NULL,
731: /* 74*/ NULL,
732: NULL,
733: NULL,
734: NULL,
735: MatLoad_BlockMat,
736: /* 79*/ NULL,
737: NULL,
738: NULL,
739: NULL,
740: NULL,
741: /* 84*/ NULL,
742: NULL,
743: NULL,
744: NULL,
745: NULL,
746: /* 89*/ NULL,
747: NULL,
748: NULL,
749: NULL,
750: NULL,
751: /* 94*/ NULL,
752: NULL,
753: NULL,
754: NULL,
755: NULL,
756: /* 99*/ NULL,
757: NULL,
758: NULL,
759: NULL,
760: NULL,
761: /*104*/ NULL,
762: NULL,
763: NULL,
764: NULL,
765: NULL,
766: /*109*/ NULL,
767: NULL,
768: NULL,
769: NULL,
770: NULL,
771: /*114*/ NULL,
772: NULL,
773: NULL,
774: NULL,
775: NULL,
776: /*119*/ NULL,
777: NULL,
778: NULL,
779: NULL,
780: NULL,
781: /*124*/ NULL,
782: NULL,
783: NULL,
784: NULL,
785: NULL,
786: /*129*/ NULL,
787: NULL,
788: NULL,
789: NULL,
790: NULL,
791: /*134*/ NULL,
792: NULL,
793: NULL,
794: NULL,
795: NULL,
796: /*139*/ NULL,
797: NULL,
798: NULL,
799: NULL,
800: NULL};
802: /*@C
803: MatBlockMatSetPreallocation - For good matrix assembly performance
804: the user should preallocate the matrix storage by setting the parameter nz
805: (or the array nnz). By setting these parameters accurately, performance
806: during matrix assembly can be increased by more than a factor of 50.
808: Collective
810: Input Parameters:
811: + B - The matrix
812: . bs - size of each block in matrix
813: . nz - number of nonzeros per block row (same for all rows)
814: - nnz - array containing the number of nonzeros in the various block rows
815: (possibly different for each row) or `NULL`
817: Level: intermediate
819: Notes:
820: If `nnz` is given then `nz` is ignored
822: Specify the preallocated storage with either `nz` or `nnz` (not both).
823: Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory
824: allocation.
826: .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateBlockMat()`, `MatSetValues()`
827: @*/
828: PetscErrorCode MatBlockMatSetPreallocation(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[])
829: {
830: PetscFunctionBegin;
831: PetscTryMethod(B, "MatBlockMatSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[]), (B, bs, nz, nnz));
832: PetscFunctionReturn(PETSC_SUCCESS);
833: }
835: static PetscErrorCode MatBlockMatSetPreallocation_BlockMat(Mat A, PetscInt bs, PetscInt nz, PetscInt *nnz)
836: {
837: Mat_BlockMat *bmat = (Mat_BlockMat *)A->data;
838: PetscInt i;
840: PetscFunctionBegin;
841: PetscCall(PetscLayoutSetBlockSize(A->rmap, bs));
842: PetscCall(PetscLayoutSetBlockSize(A->cmap, bs));
843: PetscCall(PetscLayoutSetUp(A->rmap));
844: PetscCall(PetscLayoutSetUp(A->cmap));
845: PetscCall(PetscLayoutGetBlockSize(A->rmap, &bs));
847: if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
848: PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nz cannot be less than 0: value %" PetscInt_FMT, nz);
849: if (nnz) {
850: for (i = 0; i < A->rmap->n / bs; i++) {
851: PetscCheck(nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, nnz[i]);
852: PetscCheck(nnz[i] <= A->cmap->n / bs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be greater than row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT, i, nnz[i], A->cmap->n / bs);
853: }
854: }
855: bmat->mbs = A->rmap->n / bs;
857: PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs, NULL, &bmat->right));
858: PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs, NULL, &bmat->middle));
859: PetscCall(VecCreateSeq(PETSC_COMM_SELF, bs, &bmat->left));
861: if (!bmat->imax) PetscCall(PetscMalloc2(A->rmap->n, &bmat->imax, A->rmap->n, &bmat->ilen));
862: PetscCheck(PetscLikely(nnz), PETSC_COMM_SELF, PETSC_ERR_SUP, "Currently requires block row by row preallocation");
863: nz = 0;
864: for (i = 0; i < A->rmap->n / A->rmap->bs; i++) {
865: bmat->imax[i] = nnz[i];
866: nz += nnz[i];
867: }
869: /* bmat->ilen will count nonzeros in each row so far. */
870: PetscCall(PetscArrayzero(bmat->ilen, bmat->mbs));
872: /* allocate the matrix space */
873: PetscCall(MatSeqXAIJFreeAIJ(A, (PetscScalar **)&bmat->a, &bmat->j, &bmat->i));
874: PetscCall(PetscShmgetAllocateArray(nz, sizeof(PetscScalar), (void **)&bmat->a));
875: PetscCall(PetscShmgetAllocateArray(nz, sizeof(PetscInt), (void **)&bmat->j));
876: PetscCall(PetscShmgetAllocateArray(A->rmap->n + 1, sizeof(PetscInt), (void **)&bmat->i));
877: bmat->free_a = PETSC_TRUE;
878: bmat->free_ij = PETSC_TRUE;
880: bmat->i[0] = 0;
881: for (i = 1; i < bmat->mbs + 1; i++) bmat->i[i] = bmat->i[i - 1] + bmat->imax[i - 1];
883: bmat->nz = 0;
884: bmat->maxnz = nz;
885: A->info.nz_unneeded = (double)bmat->maxnz;
886: PetscCall(MatSetOption(A, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
887: PetscFunctionReturn(PETSC_SUCCESS);
888: }
890: /*MC
891: MATBLOCKMAT - A matrix that is defined by a set of `Mat`'s that represents a sparse block matrix
892: consisting of (usually) sparse blocks.
894: Level: advanced
896: .seealso: [](ch_matrices), `Mat`, `MatCreateBlockMat()`
897: M*/
899: PETSC_EXTERN PetscErrorCode MatCreate_BlockMat(Mat A)
900: {
901: Mat_BlockMat *b;
903: PetscFunctionBegin;
904: PetscCall(PetscNew(&b));
905: A->data = (void *)b;
906: A->ops[0] = MatOps_Values;
907: A->assembled = PETSC_TRUE;
908: A->preallocated = PETSC_FALSE;
909: PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATBLOCKMAT));
911: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatBlockMatSetPreallocation_C", MatBlockMatSetPreallocation_BlockMat));
912: PetscFunctionReturn(PETSC_SUCCESS);
913: }
915: /*@C
916: MatCreateBlockMat - Creates a new matrix in which each block contains a uniform-size sequential `Mat` object
918: Collective
920: Input Parameters:
921: + comm - MPI communicator
922: . m - number of rows
923: . n - number of columns
924: . bs - size of each submatrix
925: . nz - expected maximum number of nonzero blocks in row (use `PETSC_DEFAULT` if not known)
926: - nnz - expected number of nonzers per block row if known (use `NULL` otherwise)
928: Output Parameter:
929: . A - the matrix
931: Level: intermediate
933: Notes:
934: Matrices of this type are nominally-sparse matrices in which each "entry" is a `Mat` object. Each `Mat` must
935: have the same size and be sequential. The local and global sizes must be compatible with this decomposition.
937: For matrices containing parallel submatrices and variable block sizes, see `MATNEST`.
939: Developer Notes:
940: I don't like the name, it is not `MATNESTMAT`
942: .seealso: [](ch_matrices), `Mat`, `MATBLOCKMAT`, `MatCreateNest()`
943: @*/
944: PetscErrorCode MatCreateBlockMat(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt bs, PetscInt nz, PetscInt *nnz, Mat *A)
945: {
946: PetscFunctionBegin;
947: PetscCall(MatCreate(comm, A));
948: PetscCall(MatSetSizes(*A, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
949: PetscCall(MatSetType(*A, MATBLOCKMAT));
950: PetscCall(MatBlockMatSetPreallocation(*A, bs, nz, nnz));
951: PetscFunctionReturn(PETSC_SUCCESS);
952: }