Actual source code: blockmat.c

  1: /*
  2:    This provides a matrix that consists of Mats
  3: */

  5: #include <petsc/private/matimpl.h>
  6: #include <../src/mat/impls/baij/seq/baij.h>

  8: typedef struct {
  9:   SEQAIJHEADER(Mat);
 10:   SEQBAIJHEADER;
 11:   Mat *diags;

 13:   Vec left, right, middle, workb; /* dummy vectors to perform local parts of product */
 14: } Mat_BlockMat;

 16: MatGetDiagonalMarkers(BlockMat, A->rmap->bs)

 18: static PetscErrorCode MatSOR_BlockMat_Symmetric(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
 19: {
 20:   Mat_BlockMat      *a = (Mat_BlockMat *)A->data;
 21:   PetscScalar       *x;
 22:   const Mat         *v;
 23:   const PetscScalar *b;
 24:   PetscInt           n = A->cmap->n, i, mbs = n / A->rmap->bs, j, bs = A->rmap->bs;
 25:   const PetscInt    *idx;
 26:   IS                 row, col;
 27:   MatFactorInfo      info;
 28:   Vec                left = a->left, right = a->right, middle = a->middle;
 29:   Mat               *diag;

 31:   PetscFunctionBegin;
 32:   its = its * lits;
 33:   PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
 34:   PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat");
 35:   PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for omega not equal to 1.0");
 36:   PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for fshift");
 37:   PetscCheck(!((flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) && !(flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP)), PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot do backward sweep without forward sweep");

 39:   if (!a->diags) {
 40:     const PetscInt *adiag;

 42:     PetscCall(MatGetDiagonalMarkers_BlockMat(A, &adiag, NULL));
 43:     PetscCall(PetscMalloc1(mbs, &a->diags));
 44:     PetscCall(MatFactorInfoInitialize(&info));
 45:     for (i = 0; i < mbs; i++) {
 46:       PetscCall(MatGetOrdering(a->a[a->diag[i]], MATORDERINGND, &row, &col));
 47:       PetscCall(MatCholeskyFactorSymbolic(a->diags[i], a->a[adiag[i]], row, &info));
 48:       PetscCall(MatCholeskyFactorNumeric(a->diags[i], a->a[adiag[i]], &info));
 49:       PetscCall(ISDestroy(&row));
 50:       PetscCall(ISDestroy(&col));
 51:     }
 52:     PetscCall(VecDuplicate(bb, &a->workb));
 53:   }
 54:   diag = a->diags;

 56:   PetscCall(VecSet(xx, 0.0));
 57:   PetscCall(VecGetArray(xx, &x));
 58:   /* copy right-hand side because it must be modified during iteration */
 59:   PetscCall(VecCopy(bb, a->workb));
 60:   PetscCall(VecGetArrayRead(a->workb, &b));

 62:   /* need to add code for when initial guess is zero, see MatSOR_SeqAIJ */
 63:   while (its--) {
 64:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
 65:       for (i = 0; i < mbs; i++) {
 66:         n   = a->i[i + 1] - a->i[i] - 1;
 67:         idx = a->j + a->i[i] + 1;
 68:         v   = a->a + a->i[i] + 1;

 70:         PetscCall(VecSet(left, 0.0));
 71:         for (j = 0; j < n; j++) {
 72:           PetscCall(VecPlaceArray(right, x + idx[j] * bs));
 73:           PetscCall(MatMultAdd(v[j], right, left, left));
 74:           PetscCall(VecResetArray(right));
 75:         }
 76:         PetscCall(VecPlaceArray(right, b + i * bs));
 77:         PetscCall(VecAYPX(left, -1.0, right));
 78:         PetscCall(VecResetArray(right));

 80:         PetscCall(VecPlaceArray(right, x + i * bs));
 81:         PetscCall(MatSolve(diag[i], left, right));

 83:         /* now adjust right-hand side, see MatSOR_SeqSBAIJ */
 84:         for (j = 0; j < n; j++) {
 85:           PetscCall(MatMultTranspose(v[j], right, left));
 86:           PetscCall(VecPlaceArray(middle, b + idx[j] * bs));
 87:           PetscCall(VecAXPY(middle, -1.0, left));
 88:           PetscCall(VecResetArray(middle));
 89:         }
 90:         PetscCall(VecResetArray(right));
 91:       }
 92:     }
 93:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
 94:       for (i = mbs - 1; i >= 0; i--) {
 95:         n   = a->i[i + 1] - a->i[i] - 1;
 96:         idx = a->j + a->i[i] + 1;
 97:         v   = a->a + a->i[i] + 1;

 99:         PetscCall(VecSet(left, 0.0));
100:         for (j = 0; j < n; j++) {
101:           PetscCall(VecPlaceArray(right, x + idx[j] * bs));
102:           PetscCall(MatMultAdd(v[j], right, left, left));
103:           PetscCall(VecResetArray(right));
104:         }
105:         PetscCall(VecPlaceArray(right, b + i * bs));
106:         PetscCall(VecAYPX(left, -1.0, right));
107:         PetscCall(VecResetArray(right));

109:         PetscCall(VecPlaceArray(right, x + i * bs));
110:         PetscCall(MatSolve(diag[i], left, right));
111:         PetscCall(VecResetArray(right));
112:       }
113:     }
114:   }
115:   PetscCall(VecRestoreArray(xx, &x));
116:   PetscCall(VecRestoreArrayRead(a->workb, &b));
117:   PetscFunctionReturn(PETSC_SUCCESS);
118: }

120: static PetscErrorCode MatSOR_BlockMat(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
121: {
122:   Mat_BlockMat      *a = (Mat_BlockMat *)A->data;
123:   PetscScalar       *x;
124:   const Mat         *v;
125:   const PetscScalar *b;
126:   PetscInt           n = A->cmap->n, i, mbs = n / A->rmap->bs, j, bs = A->rmap->bs;
127:   const PetscInt    *idx;
128:   IS                 row, col;
129:   MatFactorInfo      info;
130:   Vec                left = a->left, right = a->right;
131:   Mat               *diag;

133:   PetscFunctionBegin;
134:   its = its * lits;
135:   PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
136:   PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat");
137:   PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for omega not equal to 1.0");
138:   PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for fshift");

140:   if (!a->diags) {
141:     const PetscInt *adiag;

143:     PetscCall(MatGetDiagonalMarkers_BlockMat(A, &adiag, NULL));
144:     PetscCall(PetscMalloc1(mbs, &a->diags));
145:     PetscCall(MatFactorInfoInitialize(&info));
146:     for (i = 0; i < mbs; i++) {
147:       PetscCall(MatGetOrdering(a->a[adiag[i]], MATORDERINGND, &row, &col));
148:       PetscCall(MatLUFactorSymbolic(a->diags[i], a->a[adiag[i]], row, col, &info));
149:       PetscCall(MatLUFactorNumeric(a->diags[i], a->a[adiag[i]], &info));
150:       PetscCall(ISDestroy(&row));
151:       PetscCall(ISDestroy(&col));
152:     }
153:   }
154:   diag = a->diags;

156:   PetscCall(VecSet(xx, 0.0));
157:   PetscCall(VecGetArray(xx, &x));
158:   PetscCall(VecGetArrayRead(bb, &b));

160:   /* need to add code for when initial guess is zero, see MatSOR_SeqAIJ */
161:   while (its--) {
162:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
163:       for (i = 0; i < mbs; i++) {
164:         n   = a->i[i + 1] - a->i[i];
165:         idx = a->j + a->i[i];
166:         v   = a->a + a->i[i];

168:         PetscCall(VecSet(left, 0.0));
169:         for (j = 0; j < n; j++) {
170:           if (idx[j] != i) {
171:             PetscCall(VecPlaceArray(right, x + idx[j] * bs));
172:             PetscCall(MatMultAdd(v[j], right, left, left));
173:             PetscCall(VecResetArray(right));
174:           }
175:         }
176:         PetscCall(VecPlaceArray(right, b + i * bs));
177:         PetscCall(VecAYPX(left, -1.0, right));
178:         PetscCall(VecResetArray(right));

180:         PetscCall(VecPlaceArray(right, x + i * bs));
181:         PetscCall(MatSolve(diag[i], left, right));
182:         PetscCall(VecResetArray(right));
183:       }
184:     }
185:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
186:       for (i = mbs - 1; i >= 0; i--) {
187:         n   = a->i[i + 1] - a->i[i];
188:         idx = a->j + a->i[i];
189:         v   = a->a + a->i[i];

191:         PetscCall(VecSet(left, 0.0));
192:         for (j = 0; j < n; j++) {
193:           if (idx[j] != i) {
194:             PetscCall(VecPlaceArray(right, x + idx[j] * bs));
195:             PetscCall(MatMultAdd(v[j], right, left, left));
196:             PetscCall(VecResetArray(right));
197:           }
198:         }
199:         PetscCall(VecPlaceArray(right, b + i * bs));
200:         PetscCall(VecAYPX(left, -1.0, right));
201:         PetscCall(VecResetArray(right));

203:         PetscCall(VecPlaceArray(right, x + i * bs));
204:         PetscCall(MatSolve(diag[i], left, right));
205:         PetscCall(VecResetArray(right));
206:       }
207:     }
208:   }
209:   PetscCall(VecRestoreArray(xx, &x));
210:   PetscCall(VecRestoreArrayRead(bb, &b));
211:   PetscFunctionReturn(PETSC_SUCCESS);
212: }

214: static PetscErrorCode MatSetValues_BlockMat(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is)
215: {
216:   Mat_BlockMat *a = (Mat_BlockMat *)A->data;
217:   PetscInt     *rp, k, low, high, t, row, nrow, i, col, l, lastcol = -1;
218:   PetscInt     *ai = a->i, *ailen = a->ilen;
219:   PetscInt     *aj = a->j, nonew = a->nonew, bs = A->rmap->bs, brow, bcol;
220:   PetscInt      ridx, cidx;
221:   PetscBool     roworiented = a->roworiented;
222:   MatScalar     value;
223:   Mat          *ap, *aa = a->a;

225:   PetscFunctionBegin;
226:   for (k = 0; k < m; k++) { /* loop over added rows */
227:     row  = im[k];
228:     brow = row / bs;
229:     if (row < 0) continue;
230:     PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, row, A->rmap->N - 1);
231:     rp   = aj + ai[brow];
232:     ap   = aa + ai[brow];
233:     nrow = ailen[brow];
234:     low  = 0;
235:     high = nrow;
236:     for (l = 0; l < n; l++) { /* loop over added columns */
237:       if (in[l] < 0) continue;
238:       PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[l], A->cmap->n - 1);
239:       col  = in[l];
240:       bcol = col / bs;
241:       if (A->symmetric == PETSC_BOOL3_TRUE && brow > bcol) continue;
242:       ridx = row % bs;
243:       cidx = col % bs;
244:       if (roworiented) value = v[l + k * n];
245:       else value = v[k + l * m];

247:       if (col <= lastcol) low = 0;
248:       else high = nrow;
249:       lastcol = col;
250:       while (high - low > 7) {
251:         t = (low + high) / 2;
252:         if (rp[t] > bcol) high = t;
253:         else low = t;
254:       }
255:       for (i = low; i < high; i++) {
256:         if (rp[i] > bcol) break;
257:         if (rp[i] == bcol) goto noinsert1;
258:       }
259:       if (nonew == 1) goto noinsert1;
260:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the block matrix", row, col);
261: #if 0
262:       MatSeqXAIJReallocateAIJ(A, a->mbs, 1, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, imax, nonew, Mat);
263:       N = nrow++ - 1;
264:       high++;
265:       /* shift up all the later entries in this row */
266:       for (ii = N; ii >= i; ii--) {
267:         rp[ii + 1] = rp[ii];
268:         ap[ii + 1] = ap[ii];
269:       }
270:       if (N >= i) ap[i] = NULL;
271:       rp[i] = bcol;
272:       a->nz++;
273: #endif
274:     noinsert1:;
275:       if (!*(ap + i)) PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, bs, bs, 0, NULL, ap + i));
276:       PetscCall(MatSetValues(ap[i], 1, &ridx, 1, &cidx, &value, is));
277:       low = i;
278:     }
279:     ailen[brow] = nrow;
280:   }
281:   PetscFunctionReturn(PETSC_SUCCESS);
282: }

284: static PetscErrorCode MatLoad_BlockMat(Mat newmat, PetscViewer viewer)
285: {
286:   Mat                tmpA;
287:   PetscInt           i, j, m, n, bs = 1, ncols, *lens, currentcol, mbs, **ii, *ilens, nextcol, *llens, cnt = 0;
288:   const PetscInt    *cols;
289:   const PetscScalar *values;
290:   PetscBool          flg = PETSC_FALSE, notdone;
291:   Mat_SeqAIJ        *a;
292:   Mat_BlockMat      *amat;

294:   PetscFunctionBegin;
295:   /* force binary viewer to load .info file if it has not yet done so */
296:   PetscCall(PetscViewerSetUp(viewer));
297:   PetscCall(MatCreate(PETSC_COMM_SELF, &tmpA));
298:   PetscCall(MatSetType(tmpA, MATSEQAIJ));
299:   PetscCall(MatLoad_SeqAIJ(tmpA, viewer));

301:   PetscCall(MatGetLocalSize(tmpA, &m, &n));
302:   PetscOptionsBegin(PETSC_COMM_SELF, NULL, "Options for loading BlockMat matrix 1", "Mat");
303:   PetscCall(PetscOptionsInt("-matload_block_size", "Set the blocksize used to store the matrix", "MatLoad", bs, &bs, NULL));
304:   PetscCall(PetscOptionsBool("-matload_symmetric", "Store the matrix as symmetric", "MatLoad", flg, &flg, NULL));
305:   PetscOptionsEnd();

307:   /* Determine number of nonzero blocks for each block row */
308:   a   = (Mat_SeqAIJ *)tmpA->data;
309:   mbs = m / bs;
310:   PetscCall(PetscMalloc3(mbs, &lens, bs, &ii, bs, &ilens));
311:   PetscCall(PetscArrayzero(lens, mbs));

313:   for (i = 0; i < mbs; i++) {
314:     for (j = 0; j < bs; j++) {
315:       ii[j]    = a->j + a->i[i * bs + j];
316:       ilens[j] = a->i[i * bs + j + 1] - a->i[i * bs + j];
317:     }

319:     currentcol = -1;
320:     while (PETSC_TRUE) {
321:       notdone = PETSC_FALSE;
322:       nextcol = 1000000000;
323:       for (j = 0; j < bs; j++) {
324:         while (ilens[j] > 0 && ii[j][0] / bs <= currentcol) {
325:           ii[j]++;
326:           ilens[j]--;
327:         }
328:         if (ilens[j] > 0) {
329:           notdone = PETSC_TRUE;
330:           nextcol = PetscMin(nextcol, ii[j][0] / bs);
331:         }
332:       }
333:       if (!notdone) break;
334:       if (!flg || (nextcol >= i)) lens[i]++;
335:       currentcol = nextcol;
336:     }
337:   }

339:   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) PetscCall(MatSetSizes(newmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
340:   PetscCall(MatBlockMatSetPreallocation(newmat, bs, 0, lens));
341:   if (flg) PetscCall(MatSetOption(newmat, MAT_SYMMETRIC, PETSC_TRUE));
342:   amat = (Mat_BlockMat *)newmat->data;

344:   /* preallocate the submatrices */
345:   PetscCall(PetscMalloc1(bs, &llens));
346:   for (i = 0; i < mbs; i++) { /* loops for block rows */
347:     for (j = 0; j < bs; j++) {
348:       ii[j]    = a->j + a->i[i * bs + j];
349:       ilens[j] = a->i[i * bs + j + 1] - a->i[i * bs + j];
350:     }

352:     currentcol = 1000000000;
353:     for (j = 0; j < bs; j++) { /* loop over rows in block finding first nonzero block */
354:       if (ilens[j] > 0) currentcol = PetscMin(currentcol, ii[j][0] / bs);
355:     }

357:     while (PETSC_TRUE) { /* loops over blocks in block row */
358:       notdone = PETSC_FALSE;
359:       nextcol = 1000000000;
360:       PetscCall(PetscArrayzero(llens, bs));
361:       for (j = 0; j < bs; j++) {                              /* loop over rows in block */
362:         while (ilens[j] > 0 && ii[j][0] / bs <= currentcol) { /* loop over columns in row */
363:           ii[j]++;
364:           ilens[j]--;
365:           llens[j]++;
366:         }
367:         if (ilens[j] > 0) {
368:           notdone = PETSC_TRUE;
369:           nextcol = PetscMin(nextcol, ii[j][0] / bs);
370:         }
371:       }
372:       PetscCheck(cnt < amat->maxnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Number of blocks found greater than expected %" PetscInt_FMT, cnt);
373:       if (!flg || currentcol >= i) {
374:         amat->j[cnt] = currentcol;
375:         PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, bs, bs, 0, llens, amat->a + cnt++));
376:       }

378:       if (!notdone) break;
379:       currentcol = nextcol;
380:     }
381:     amat->ilen[i] = lens[i];
382:   }

384:   PetscCall(PetscFree3(lens, ii, ilens));
385:   PetscCall(PetscFree(llens));

387:   /* copy over the matrix, one row at a time */
388:   for (i = 0; i < m; i++) {
389:     PetscCall(MatGetRow(tmpA, i, &ncols, &cols, &values));
390:     PetscCall(MatSetValues(newmat, 1, &i, ncols, cols, values, INSERT_VALUES));
391:     PetscCall(MatRestoreRow(tmpA, i, &ncols, &cols, &values));
392:   }
393:   PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY));
394:   PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY));
395:   PetscFunctionReturn(PETSC_SUCCESS);
396: }

398: static PetscErrorCode MatView_BlockMat(Mat A, PetscViewer viewer)
399: {
400:   Mat_BlockMat     *a = (Mat_BlockMat *)A->data;
401:   const char       *name;
402:   PetscViewerFormat format;

404:   PetscFunctionBegin;
405:   PetscCall(PetscObjectGetName((PetscObject)A, &name));
406:   PetscCall(PetscViewerGetFormat(viewer, &format));
407:   if (format == PETSC_VIEWER_ASCII_FACTOR_INFO || format == PETSC_VIEWER_ASCII_INFO) {
408:     PetscCall(PetscViewerASCIIPrintf(viewer, "Nonzero block matrices = %" PetscInt_FMT " \n", a->nz));
409:     if (A->symmetric == PETSC_BOOL3_TRUE) PetscCall(PetscViewerASCIIPrintf(viewer, "Only upper triangular part of symmetric matrix is stored\n"));
410:   }
411:   PetscFunctionReturn(PETSC_SUCCESS);
412: }

414: static PetscErrorCode MatDestroy_BlockMat(Mat mat)
415: {
416:   Mat_BlockMat *bmat = (Mat_BlockMat *)mat->data;
417:   PetscInt      i;

419:   PetscFunctionBegin;
420:   PetscCall(VecDestroy(&bmat->right));
421:   PetscCall(VecDestroy(&bmat->left));
422:   PetscCall(VecDestroy(&bmat->middle));
423:   PetscCall(VecDestroy(&bmat->workb));
424:   if (bmat->diags) {
425:     for (i = 0; i < mat->rmap->n / mat->rmap->bs; i++) PetscCall(MatDestroy(&bmat->diags[i]));
426:   }
427:   if (bmat->a) {
428:     for (i = 0; i < bmat->nz; i++) PetscCall(MatDestroy(&bmat->a[i]));
429:   }
430:   PetscCall(MatSeqXAIJFreeAIJ(mat, (PetscScalar **)&bmat->a, &bmat->j, &bmat->i));
431:   PetscCall(PetscFree(mat->data));
432:   PetscFunctionReturn(PETSC_SUCCESS);
433: }

435: static PetscErrorCode MatMult_BlockMat(Mat A, Vec x, Vec y)
436: {
437:   Mat_BlockMat *bmat = (Mat_BlockMat *)A->data;
438:   PetscScalar  *xx, *yy;
439:   PetscInt     *aj, i, *ii, jrow, m = A->rmap->n / A->rmap->bs, bs = A->rmap->bs, n, j;
440:   Mat          *aa;

442:   PetscFunctionBegin;
443:   /*
444:      Standard CSR multiply except each entry is a Mat
445:   */
446:   PetscCall(VecGetArray(x, &xx));

448:   PetscCall(VecSet(y, 0.0));
449:   PetscCall(VecGetArray(y, &yy));
450:   aj = bmat->j;
451:   aa = bmat->a;
452:   ii = bmat->i;
453:   for (i = 0; i < m; i++) {
454:     jrow = ii[i];
455:     PetscCall(VecPlaceArray(bmat->left, yy + bs * i));
456:     n = ii[i + 1] - jrow;
457:     for (j = 0; j < n; j++) {
458:       PetscCall(VecPlaceArray(bmat->right, xx + bs * aj[jrow]));
459:       PetscCall(MatMultAdd(aa[jrow], bmat->right, bmat->left, bmat->left));
460:       PetscCall(VecResetArray(bmat->right));
461:       jrow++;
462:     }
463:     PetscCall(VecResetArray(bmat->left));
464:   }
465:   PetscCall(VecRestoreArray(x, &xx));
466:   PetscCall(VecRestoreArray(y, &yy));
467:   PetscFunctionReturn(PETSC_SUCCESS);
468: }

470: static PetscErrorCode MatMult_BlockMat_Symmetric(Mat A, Vec x, Vec y)
471: {
472:   Mat_BlockMat *bmat = (Mat_BlockMat *)A->data;
473:   PetscScalar  *xx, *yy;
474:   PetscInt     *aj, i, *ii, jrow, m = A->rmap->n / A->rmap->bs, bs = A->rmap->bs, n, j;
475:   Mat          *aa;

477:   PetscFunctionBegin;
478:   /*
479:      Standard CSR multiply except each entry is a Mat
480:   */
481:   PetscCall(VecGetArray(x, &xx));

483:   PetscCall(VecSet(y, 0.0));
484:   PetscCall(VecGetArray(y, &yy));
485:   aj = bmat->j;
486:   aa = bmat->a;
487:   ii = bmat->i;
488:   for (i = 0; i < m; i++) {
489:     jrow = ii[i];
490:     n    = ii[i + 1] - jrow;
491:     PetscCall(VecPlaceArray(bmat->left, yy + bs * i));
492:     PetscCall(VecPlaceArray(bmat->middle, xx + bs * i));
493:     /* if we ALWAYS required a diagonal entry then could remove this if test */
494:     if (aj[jrow] == i) {
495:       PetscCall(VecPlaceArray(bmat->right, xx + bs * aj[jrow]));
496:       PetscCall(MatMultAdd(aa[jrow], bmat->right, bmat->left, bmat->left));
497:       PetscCall(VecResetArray(bmat->right));
498:       jrow++;
499:       n--;
500:     }
501:     for (j = 0; j < n; j++) {
502:       PetscCall(VecPlaceArray(bmat->right, xx + bs * aj[jrow])); /* upper triangular part */
503:       PetscCall(MatMultAdd(aa[jrow], bmat->right, bmat->left, bmat->left));
504:       PetscCall(VecResetArray(bmat->right));

506:       PetscCall(VecPlaceArray(bmat->right, yy + bs * aj[jrow])); /* lower triangular part */
507:       PetscCall(MatMultTransposeAdd(aa[jrow], bmat->middle, bmat->right, bmat->right));
508:       PetscCall(VecResetArray(bmat->right));
509:       jrow++;
510:     }
511:     PetscCall(VecResetArray(bmat->left));
512:     PetscCall(VecResetArray(bmat->middle));
513:   }
514:   PetscCall(VecRestoreArray(x, &xx));
515:   PetscCall(VecRestoreArray(y, &yy));
516:   PetscFunctionReturn(PETSC_SUCCESS);
517: }

519: static PetscErrorCode MatMultAdd_BlockMat(Mat A, Vec x, Vec y, Vec z)
520: {
521:   PetscFunctionBegin;
522:   PetscFunctionReturn(PETSC_SUCCESS);
523: }

525: static PetscErrorCode MatMultTranspose_BlockMat(Mat A, Vec x, Vec y)
526: {
527:   PetscFunctionBegin;
528:   PetscFunctionReturn(PETSC_SUCCESS);
529: }

531: static PetscErrorCode MatMultTransposeAdd_BlockMat(Mat A, Vec x, Vec y, Vec z)
532: {
533:   PetscFunctionBegin;
534:   PetscFunctionReturn(PETSC_SUCCESS);
535: }

537: static PetscErrorCode MatCreateSubMatrix_BlockMat(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B)
538: {
539:   Mat_BlockMat *a = (Mat_BlockMat *)A->data;
540:   Mat_SeqAIJ   *c;
541:   PetscInt      i, k, first, step, lensi, nrows, ncols;
542:   PetscInt     *j_new, *i_new, *aj = a->j, *ailen = a->ilen;
543:   PetscScalar  *a_new;
544:   Mat           C, *aa = a->a;
545:   PetscBool     stride, equal;

547:   PetscFunctionBegin;
548:   PetscCall(ISEqual(isrow, iscol, &equal));
549:   PetscCheck(equal, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only for identical column and row indices");
550:   PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &stride));
551:   PetscCheck(stride, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only for stride indices");
552:   PetscCall(ISStrideGetInfo(iscol, &first, &step));
553:   PetscCheck(step == A->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_SUP, "Can only select one entry from each block");

555:   PetscCall(ISGetLocalSize(isrow, &nrows));
556:   ncols = nrows;

558:   /* create submatrix */
559:   if (scall == MAT_REUSE_MATRIX) {
560:     PetscInt n_cols, n_rows;
561:     C = *B;
562:     PetscCall(MatGetSize(C, &n_rows, &n_cols));
563:     PetscCheck(n_rows == nrows && n_cols == ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Reused submatrix wrong size");
564:     PetscCall(MatZeroEntries(C));
565:   } else {
566:     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C));
567:     PetscCall(MatSetSizes(C, nrows, ncols, PETSC_DETERMINE, PETSC_DETERMINE));
568:     if (A->symmetric == PETSC_BOOL3_TRUE) PetscCall(MatSetType(C, MATSEQSBAIJ));
569:     else PetscCall(MatSetType(C, MATSEQAIJ));
570:     PetscCall(MatSeqAIJSetPreallocation(C, 0, ailen));
571:     PetscCall(MatSeqSBAIJSetPreallocation(C, 1, 0, ailen));
572:   }
573:   c = (Mat_SeqAIJ *)C->data;

575:   /* loop over rows inserting into submatrix */
576:   a_new = c->a;
577:   j_new = c->j;
578:   i_new = c->i;

580:   for (i = 0; i < nrows; i++) {
581:     lensi = ailen[i];
582:     for (k = 0; k < lensi; k++) {
583:       *j_new++ = *aj++;
584:       PetscCall(MatGetValue(*aa++, first, first, a_new++));
585:     }
586:     i_new[i + 1] = i_new[i] + lensi;
587:     c->ilen[i]   = lensi;
588:   }

590:   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
591:   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
592:   *B = C;
593:   PetscFunctionReturn(PETSC_SUCCESS);
594: }

596: static PetscErrorCode MatAssemblyEnd_BlockMat(Mat A, MatAssemblyType mode)
597: {
598:   Mat_BlockMat *a      = (Mat_BlockMat *)A->data;
599:   PetscInt      fshift = 0, i, j, *ai = a->i, *aj = a->j, *imax = a->imax;
600:   PetscInt      m = a->mbs, *ip, N, *ailen = a->ilen, rmax = 0;
601:   Mat          *aa = a->a, *ap;

603:   PetscFunctionBegin;
604:   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(PETSC_SUCCESS);

606:   if (m) rmax = ailen[0]; /* determine row with most nonzeros */
607:   for (i = 1; i < m; i++) {
608:     /* move each row back by the amount of empty slots (fshift) before it*/
609:     fshift += imax[i - 1] - ailen[i - 1];
610:     rmax = PetscMax(rmax, ailen[i]);
611:     if (fshift) {
612:       ip = aj + ai[i];
613:       ap = aa + ai[i];
614:       N  = ailen[i];
615:       for (j = 0; j < N; j++) {
616:         ip[j - fshift] = ip[j];
617:         ap[j - fshift] = ap[j];
618:       }
619:     }
620:     ai[i] = ai[i - 1] + ailen[i - 1];
621:   }
622:   if (m) {
623:     fshift += imax[m - 1] - ailen[m - 1];
624:     ai[m] = ai[m - 1] + ailen[m - 1];
625:   }
626:   /* reset ilen and imax for each row */
627:   for (i = 0; i < m; i++) ailen[i] = imax[i] = ai[i + 1] - ai[i];
628:   a->nz = ai[m];
629:   for (i = 0; i < a->nz; i++) {
630:     PetscAssert(aa[i], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Null matrix at location %" PetscInt_FMT " column %" PetscInt_FMT " nz %" PetscInt_FMT, i, aj[i], a->nz);
631:     PetscCall(MatAssemblyBegin(aa[i], MAT_FINAL_ASSEMBLY));
632:     PetscCall(MatAssemblyEnd(aa[i], MAT_FINAL_ASSEMBLY));
633:   }
634:   PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded,%" PetscInt_FMT " used\n", m, A->cmap->n / A->cmap->bs, fshift, a->nz));
635:   PetscCall(PetscInfo(A, "Number of mallocs during MatSetValues() is %" PetscInt_FMT "\n", a->reallocs));
636:   PetscCall(PetscInfo(A, "Maximum nonzeros in any row is %" PetscInt_FMT "\n", rmax));

638:   A->info.mallocs += a->reallocs;
639:   a->reallocs         = 0;
640:   A->info.nz_unneeded = (double)fshift;
641:   a->rmax             = rmax;
642:   PetscFunctionReturn(PETSC_SUCCESS);
643: }

645: static PetscErrorCode MatSetOption_BlockMat(Mat A, MatOption opt, PetscBool flg)
646: {
647:   PetscFunctionBegin;
648:   if (opt == MAT_SYMMETRIC && flg) {
649:     A->ops->sor  = MatSOR_BlockMat_Symmetric;
650:     A->ops->mult = MatMult_BlockMat_Symmetric;
651:   } else {
652:     PetscCall(PetscInfo(A, "Unused matrix option %s\n", MatOptions[opt]));
653:   }
654:   PetscFunctionReturn(PETSC_SUCCESS);
655: }

657: static struct _MatOps MatOps_Values = {MatSetValues_BlockMat,
658:                                        NULL,
659:                                        NULL,
660:                                        MatMult_BlockMat,
661:                                        /*  4*/ MatMultAdd_BlockMat,
662:                                        MatMultTranspose_BlockMat,
663:                                        MatMultTransposeAdd_BlockMat,
664:                                        NULL,
665:                                        NULL,
666:                                        NULL,
667:                                        /* 10*/ NULL,
668:                                        NULL,
669:                                        NULL,
670:                                        MatSOR_BlockMat,
671:                                        NULL,
672:                                        /* 15*/ NULL,
673:                                        NULL,
674:                                        NULL,
675:                                        NULL,
676:                                        NULL,
677:                                        /* 20*/ NULL,
678:                                        MatAssemblyEnd_BlockMat,
679:                                        MatSetOption_BlockMat,
680:                                        NULL,
681:                                        /* 24*/ NULL,
682:                                        NULL,
683:                                        NULL,
684:                                        NULL,
685:                                        NULL,
686:                                        /* 29*/ NULL,
687:                                        NULL,
688:                                        NULL,
689:                                        NULL,
690:                                        NULL,
691:                                        /* 34*/ NULL,
692:                                        NULL,
693:                                        NULL,
694:                                        NULL,
695:                                        NULL,
696:                                        /* 39*/ NULL,
697:                                        NULL,
698:                                        NULL,
699:                                        NULL,
700:                                        NULL,
701:                                        /* 44*/ NULL,
702:                                        NULL,
703:                                        MatShift_Basic,
704:                                        NULL,
705:                                        NULL,
706:                                        /* 49*/ NULL,
707:                                        NULL,
708:                                        NULL,
709:                                        NULL,
710:                                        NULL,
711:                                        /* 54*/ NULL,
712:                                        NULL,
713:                                        NULL,
714:                                        NULL,
715:                                        NULL,
716:                                        /* 59*/ MatCreateSubMatrix_BlockMat,
717:                                        MatDestroy_BlockMat,
718:                                        MatView_BlockMat,
719:                                        NULL,
720:                                        NULL,
721:                                        /* 64*/ NULL,
722:                                        NULL,
723:                                        NULL,
724:                                        NULL,
725:                                        NULL,
726:                                        /* 69*/ NULL,
727:                                        NULL,
728:                                        NULL,
729:                                        NULL,
730:                                        NULL,
731:                                        /* 74*/ NULL,
732:                                        NULL,
733:                                        NULL,
734:                                        NULL,
735:                                        MatLoad_BlockMat,
736:                                        /* 79*/ NULL,
737:                                        NULL,
738:                                        NULL,
739:                                        NULL,
740:                                        NULL,
741:                                        /* 84*/ NULL,
742:                                        NULL,
743:                                        NULL,
744:                                        NULL,
745:                                        NULL,
746:                                        /* 89*/ NULL,
747:                                        NULL,
748:                                        NULL,
749:                                        NULL,
750:                                        NULL,
751:                                        /* 94*/ NULL,
752:                                        NULL,
753:                                        NULL,
754:                                        NULL,
755:                                        NULL,
756:                                        /* 99*/ NULL,
757:                                        NULL,
758:                                        NULL,
759:                                        NULL,
760:                                        NULL,
761:                                        /*104*/ NULL,
762:                                        NULL,
763:                                        NULL,
764:                                        NULL,
765:                                        NULL,
766:                                        /*109*/ NULL,
767:                                        NULL,
768:                                        NULL,
769:                                        NULL,
770:                                        NULL,
771:                                        /*114*/ NULL,
772:                                        NULL,
773:                                        NULL,
774:                                        NULL,
775:                                        NULL,
776:                                        /*119*/ NULL,
777:                                        NULL,
778:                                        NULL,
779:                                        NULL,
780:                                        NULL,
781:                                        /*124*/ NULL,
782:                                        NULL,
783:                                        NULL,
784:                                        NULL,
785:                                        NULL,
786:                                        /*129*/ NULL,
787:                                        NULL,
788:                                        NULL,
789:                                        NULL,
790:                                        NULL,
791:                                        /*134*/ NULL,
792:                                        NULL,
793:                                        NULL,
794:                                        NULL,
795:                                        NULL,
796:                                        /*139*/ NULL,
797:                                        NULL,
798:                                        NULL,
799:                                        NULL,
800:                                        NULL};

802: /*@C
803:   MatBlockMatSetPreallocation - For good matrix assembly performance
804:   the user should preallocate the matrix storage by setting the parameter nz
805:   (or the array nnz).  By setting these parameters accurately, performance
806:   during matrix assembly can be increased by more than a factor of 50.

808:   Collective

810:   Input Parameters:
811: + B   - The matrix
812: . bs  - size of each block in matrix
813: . nz  - number of nonzeros per block row (same for all rows)
814: - nnz - array containing the number of nonzeros in the various block rows
815:          (possibly different for each row) or `NULL`

817:   Level: intermediate

819:   Notes:
820:   If `nnz` is given then `nz` is ignored

822:   Specify the preallocated storage with either `nz` or `nnz` (not both).
823:   Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory
824:   allocation.

826: .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateBlockMat()`, `MatSetValues()`
827: @*/
828: PetscErrorCode MatBlockMatSetPreallocation(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[])
829: {
830:   PetscFunctionBegin;
831:   PetscTryMethod(B, "MatBlockMatSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[]), (B, bs, nz, nnz));
832:   PetscFunctionReturn(PETSC_SUCCESS);
833: }

835: static PetscErrorCode MatBlockMatSetPreallocation_BlockMat(Mat A, PetscInt bs, PetscInt nz, PetscInt *nnz)
836: {
837:   Mat_BlockMat *bmat = (Mat_BlockMat *)A->data;
838:   PetscInt      i;

840:   PetscFunctionBegin;
841:   PetscCall(PetscLayoutSetBlockSize(A->rmap, bs));
842:   PetscCall(PetscLayoutSetBlockSize(A->cmap, bs));
843:   PetscCall(PetscLayoutSetUp(A->rmap));
844:   PetscCall(PetscLayoutSetUp(A->cmap));
845:   PetscCall(PetscLayoutGetBlockSize(A->rmap, &bs));

847:   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
848:   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nz cannot be less than 0: value %" PetscInt_FMT, nz);
849:   if (nnz) {
850:     for (i = 0; i < A->rmap->n / bs; i++) {
851:       PetscCheck(nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, nnz[i]);
852:       PetscCheck(nnz[i] <= A->cmap->n / bs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be greater than row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT, i, nnz[i], A->cmap->n / bs);
853:     }
854:   }
855:   bmat->mbs = A->rmap->n / bs;

857:   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs, NULL, &bmat->right));
858:   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs, NULL, &bmat->middle));
859:   PetscCall(VecCreateSeq(PETSC_COMM_SELF, bs, &bmat->left));

861:   if (!bmat->imax) PetscCall(PetscMalloc2(A->rmap->n, &bmat->imax, A->rmap->n, &bmat->ilen));
862:   PetscCheck(PetscLikely(nnz), PETSC_COMM_SELF, PETSC_ERR_SUP, "Currently requires block row by row preallocation");
863:   nz = 0;
864:   for (i = 0; i < A->rmap->n / A->rmap->bs; i++) {
865:     bmat->imax[i] = nnz[i];
866:     nz += nnz[i];
867:   }

869:   /* bmat->ilen will count nonzeros in each row so far. */
870:   PetscCall(PetscArrayzero(bmat->ilen, bmat->mbs));

872:   /* allocate the matrix space */
873:   PetscCall(MatSeqXAIJFreeAIJ(A, (PetscScalar **)&bmat->a, &bmat->j, &bmat->i));
874:   PetscCall(PetscShmgetAllocateArray(nz, sizeof(PetscScalar), (void **)&bmat->a));
875:   PetscCall(PetscShmgetAllocateArray(nz, sizeof(PetscInt), (void **)&bmat->j));
876:   PetscCall(PetscShmgetAllocateArray(A->rmap->n + 1, sizeof(PetscInt), (void **)&bmat->i));
877:   bmat->free_a  = PETSC_TRUE;
878:   bmat->free_ij = PETSC_TRUE;

880:   bmat->i[0] = 0;
881:   for (i = 1; i < bmat->mbs + 1; i++) bmat->i[i] = bmat->i[i - 1] + bmat->imax[i - 1];

883:   bmat->nz            = 0;
884:   bmat->maxnz         = nz;
885:   A->info.nz_unneeded = (double)bmat->maxnz;
886:   PetscCall(MatSetOption(A, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
887:   PetscFunctionReturn(PETSC_SUCCESS);
888: }

890: /*MC
891:    MATBLOCKMAT - A matrix that is defined by a set of `Mat`'s that represents a sparse block matrix
892:                  consisting of (usually) sparse blocks.

894:   Level: advanced

896: .seealso: [](ch_matrices), `Mat`, `MatCreateBlockMat()`
897: M*/

899: PETSC_EXTERN PetscErrorCode MatCreate_BlockMat(Mat A)
900: {
901:   Mat_BlockMat *b;

903:   PetscFunctionBegin;
904:   PetscCall(PetscNew(&b));
905:   A->data         = (void *)b;
906:   A->ops[0]       = MatOps_Values;
907:   A->assembled    = PETSC_TRUE;
908:   A->preallocated = PETSC_FALSE;
909:   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATBLOCKMAT));

911:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatBlockMatSetPreallocation_C", MatBlockMatSetPreallocation_BlockMat));
912:   PetscFunctionReturn(PETSC_SUCCESS);
913: }

915: /*@C
916:   MatCreateBlockMat - Creates a new matrix in which each block contains a uniform-size sequential `Mat` object

918:   Collective

920:   Input Parameters:
921: + comm - MPI communicator
922: . m    - number of rows
923: . n    - number of columns
924: . bs   - size of each submatrix
925: . nz   - expected maximum number of nonzero blocks in row (use `PETSC_DEFAULT` if not known)
926: - nnz  - expected number of nonzers per block row if known (use `NULL` otherwise)

928:   Output Parameter:
929: . A - the matrix

931:   Level: intermediate

933:   Notes:
934:   Matrices of this type are nominally-sparse matrices in which each "entry" is a `Mat` object.  Each `Mat` must
935:   have the same size and be sequential.  The local and global sizes must be compatible with this decomposition.

937:   For matrices containing parallel submatrices and variable block sizes, see `MATNEST`.

939:   Developer Notes:
940:   I don't like the name, it is not `MATNESTMAT`

942: .seealso: [](ch_matrices), `Mat`, `MATBLOCKMAT`, `MatCreateNest()`
943: @*/
944: PetscErrorCode MatCreateBlockMat(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt bs, PetscInt nz, PetscInt *nnz, Mat *A)
945: {
946:   PetscFunctionBegin;
947:   PetscCall(MatCreate(comm, A));
948:   PetscCall(MatSetSizes(*A, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
949:   PetscCall(MatSetType(*A, MATBLOCKMAT));
950:   PetscCall(MatBlockMatSetPreallocation(*A, bs, nz, nnz));
951:   PetscFunctionReturn(PETSC_SUCCESS);
952: }