Actual source code: blockmat.c

  1: /*
  2:    This provides a matrix that consists of Mats
  3: */

  5: #include <petsc/private/matimpl.h>
  6: #include <../src/mat/impls/baij/seq/baij.h>

  8: typedef struct {
  9:   SEQAIJHEADER(Mat);
 10:   SEQBAIJHEADER;
 11:   Mat *diags;

 13:   Vec left, right, middle, workb; /* dummy vectors to perform local parts of product */
 14: } Mat_BlockMat;

 16: static PetscErrorCode MatSOR_BlockMat_Symmetric(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
 17: {
 18:   Mat_BlockMat      *a = (Mat_BlockMat *)A->data;
 19:   PetscScalar       *x;
 20:   const Mat         *v;
 21:   const PetscScalar *b;
 22:   PetscInt           n = A->cmap->n, i, mbs = n / A->rmap->bs, j, bs = A->rmap->bs;
 23:   const PetscInt    *idx;
 24:   IS                 row, col;
 25:   MatFactorInfo      info;
 26:   Vec                left = a->left, right = a->right, middle = a->middle;
 27:   Mat               *diag;

 29:   PetscFunctionBegin;
 30:   its = its * lits;
 31:   PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
 32:   PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat");
 33:   PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for omega not equal to 1.0");
 34:   PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for fshift");
 35:   PetscCheck(!((flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) && !(flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP)), PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot do backward sweep without forward sweep");

 37:   if (!a->diags) {
 38:     PetscCall(PetscMalloc1(mbs, &a->diags));
 39:     PetscCall(MatFactorInfoInitialize(&info));
 40:     for (i = 0; i < mbs; i++) {
 41:       PetscCall(MatGetOrdering(a->a[a->diag[i]], MATORDERINGND, &row, &col));
 42:       PetscCall(MatCholeskyFactorSymbolic(a->diags[i], a->a[a->diag[i]], row, &info));
 43:       PetscCall(MatCholeskyFactorNumeric(a->diags[i], a->a[a->diag[i]], &info));
 44:       PetscCall(ISDestroy(&row));
 45:       PetscCall(ISDestroy(&col));
 46:     }
 47:     PetscCall(VecDuplicate(bb, &a->workb));
 48:   }
 49:   diag = a->diags;

 51:   PetscCall(VecSet(xx, 0.0));
 52:   PetscCall(VecGetArray(xx, &x));
 53:   /* copy right-hand side because it must be modified during iteration */
 54:   PetscCall(VecCopy(bb, a->workb));
 55:   PetscCall(VecGetArrayRead(a->workb, &b));

 57:   /* need to add code for when initial guess is zero, see MatSOR_SeqAIJ */
 58:   while (its--) {
 59:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
 60:       for (i = 0; i < mbs; i++) {
 61:         n   = a->i[i + 1] - a->i[i] - 1;
 62:         idx = a->j + a->i[i] + 1;
 63:         v   = a->a + a->i[i] + 1;

 65:         PetscCall(VecSet(left, 0.0));
 66:         for (j = 0; j < n; j++) {
 67:           PetscCall(VecPlaceArray(right, x + idx[j] * bs));
 68:           PetscCall(MatMultAdd(v[j], right, left, left));
 69:           PetscCall(VecResetArray(right));
 70:         }
 71:         PetscCall(VecPlaceArray(right, b + i * bs));
 72:         PetscCall(VecAYPX(left, -1.0, right));
 73:         PetscCall(VecResetArray(right));

 75:         PetscCall(VecPlaceArray(right, x + i * bs));
 76:         PetscCall(MatSolve(diag[i], left, right));

 78:         /* now adjust right-hand side, see MatSOR_SeqSBAIJ */
 79:         for (j = 0; j < n; j++) {
 80:           PetscCall(MatMultTranspose(v[j], right, left));
 81:           PetscCall(VecPlaceArray(middle, b + idx[j] * bs));
 82:           PetscCall(VecAXPY(middle, -1.0, left));
 83:           PetscCall(VecResetArray(middle));
 84:         }
 85:         PetscCall(VecResetArray(right));
 86:       }
 87:     }
 88:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
 89:       for (i = mbs - 1; i >= 0; i--) {
 90:         n   = a->i[i + 1] - a->i[i] - 1;
 91:         idx = a->j + a->i[i] + 1;
 92:         v   = a->a + a->i[i] + 1;

 94:         PetscCall(VecSet(left, 0.0));
 95:         for (j = 0; j < n; j++) {
 96:           PetscCall(VecPlaceArray(right, x + idx[j] * bs));
 97:           PetscCall(MatMultAdd(v[j], right, left, left));
 98:           PetscCall(VecResetArray(right));
 99:         }
100:         PetscCall(VecPlaceArray(right, b + i * bs));
101:         PetscCall(VecAYPX(left, -1.0, right));
102:         PetscCall(VecResetArray(right));

104:         PetscCall(VecPlaceArray(right, x + i * bs));
105:         PetscCall(MatSolve(diag[i], left, right));
106:         PetscCall(VecResetArray(right));
107:       }
108:     }
109:   }
110:   PetscCall(VecRestoreArray(xx, &x));
111:   PetscCall(VecRestoreArrayRead(a->workb, &b));
112:   PetscFunctionReturn(PETSC_SUCCESS);
113: }

115: static PetscErrorCode MatSOR_BlockMat(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
116: {
117:   Mat_BlockMat      *a = (Mat_BlockMat *)A->data;
118:   PetscScalar       *x;
119:   const Mat         *v;
120:   const PetscScalar *b;
121:   PetscInt           n = A->cmap->n, i, mbs = n / A->rmap->bs, j, bs = A->rmap->bs;
122:   const PetscInt    *idx;
123:   IS                 row, col;
124:   MatFactorInfo      info;
125:   Vec                left = a->left, right = a->right;
126:   Mat               *diag;

128:   PetscFunctionBegin;
129:   its = its * lits;
130:   PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
131:   PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat");
132:   PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for omega not equal to 1.0");
133:   PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for fshift");

135:   if (!a->diags) {
136:     PetscCall(PetscMalloc1(mbs, &a->diags));
137:     PetscCall(MatFactorInfoInitialize(&info));
138:     for (i = 0; i < mbs; i++) {
139:       PetscCall(MatGetOrdering(a->a[a->diag[i]], MATORDERINGND, &row, &col));
140:       PetscCall(MatLUFactorSymbolic(a->diags[i], a->a[a->diag[i]], row, col, &info));
141:       PetscCall(MatLUFactorNumeric(a->diags[i], a->a[a->diag[i]], &info));
142:       PetscCall(ISDestroy(&row));
143:       PetscCall(ISDestroy(&col));
144:     }
145:   }
146:   diag = a->diags;

148:   PetscCall(VecSet(xx, 0.0));
149:   PetscCall(VecGetArray(xx, &x));
150:   PetscCall(VecGetArrayRead(bb, &b));

152:   /* need to add code for when initial guess is zero, see MatSOR_SeqAIJ */
153:   while (its--) {
154:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
155:       for (i = 0; i < mbs; i++) {
156:         n   = a->i[i + 1] - a->i[i];
157:         idx = a->j + a->i[i];
158:         v   = a->a + a->i[i];

160:         PetscCall(VecSet(left, 0.0));
161:         for (j = 0; j < n; j++) {
162:           if (idx[j] != i) {
163:             PetscCall(VecPlaceArray(right, x + idx[j] * bs));
164:             PetscCall(MatMultAdd(v[j], right, left, left));
165:             PetscCall(VecResetArray(right));
166:           }
167:         }
168:         PetscCall(VecPlaceArray(right, b + i * bs));
169:         PetscCall(VecAYPX(left, -1.0, right));
170:         PetscCall(VecResetArray(right));

172:         PetscCall(VecPlaceArray(right, x + i * bs));
173:         PetscCall(MatSolve(diag[i], left, right));
174:         PetscCall(VecResetArray(right));
175:       }
176:     }
177:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
178:       for (i = mbs - 1; i >= 0; i--) {
179:         n   = a->i[i + 1] - a->i[i];
180:         idx = a->j + a->i[i];
181:         v   = a->a + a->i[i];

183:         PetscCall(VecSet(left, 0.0));
184:         for (j = 0; j < n; j++) {
185:           if (idx[j] != i) {
186:             PetscCall(VecPlaceArray(right, x + idx[j] * bs));
187:             PetscCall(MatMultAdd(v[j], right, left, left));
188:             PetscCall(VecResetArray(right));
189:           }
190:         }
191:         PetscCall(VecPlaceArray(right, b + i * bs));
192:         PetscCall(VecAYPX(left, -1.0, right));
193:         PetscCall(VecResetArray(right));

195:         PetscCall(VecPlaceArray(right, x + i * bs));
196:         PetscCall(MatSolve(diag[i], left, right));
197:         PetscCall(VecResetArray(right));
198:       }
199:     }
200:   }
201:   PetscCall(VecRestoreArray(xx, &x));
202:   PetscCall(VecRestoreArrayRead(bb, &b));
203:   PetscFunctionReturn(PETSC_SUCCESS);
204: }

206: static PetscErrorCode MatSetValues_BlockMat(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is)
207: {
208:   Mat_BlockMat *a = (Mat_BlockMat *)A->data;
209:   PetscInt     *rp, k, low, high, t, ii, row, nrow, i, col, l, rmax, N, lastcol = -1;
210:   PetscInt     *imax = a->imax, *ai = a->i, *ailen = a->ilen;
211:   PetscInt     *aj = a->j, nonew = a->nonew, bs = A->rmap->bs, brow, bcol;
212:   PetscInt      ridx, cidx;
213:   PetscBool     roworiented = a->roworiented;
214:   MatScalar     value;
215:   Mat          *ap, *aa = a->a;

217:   PetscFunctionBegin;
218:   for (k = 0; k < m; k++) { /* loop over added rows */
219:     row  = im[k];
220:     brow = row / bs;
221:     if (row < 0) continue;
222:     PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, row, A->rmap->N - 1);
223:     rp   = aj + ai[brow];
224:     ap   = aa + ai[brow];
225:     rmax = imax[brow];
226:     nrow = ailen[brow];
227:     low  = 0;
228:     high = nrow;
229:     for (l = 0; l < n; l++) { /* loop over added columns */
230:       if (in[l] < 0) continue;
231:       PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[l], A->cmap->n - 1);
232:       col  = in[l];
233:       bcol = col / bs;
234:       if (A->symmetric == PETSC_BOOL3_TRUE && brow > bcol) continue;
235:       ridx = row % bs;
236:       cidx = col % bs;
237:       if (roworiented) value = v[l + k * n];
238:       else value = v[k + l * m];

240:       if (col <= lastcol) low = 0;
241:       else high = nrow;
242:       lastcol = col;
243:       while (high - low > 7) {
244:         t = (low + high) / 2;
245:         if (rp[t] > bcol) high = t;
246:         else low = t;
247:       }
248:       for (i = low; i < high; i++) {
249:         if (rp[i] > bcol) break;
250:         if (rp[i] == bcol) goto noinsert1;
251:       }
252:       if (nonew == 1) goto noinsert1;
253:       PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
254:       MatSeqXAIJReallocateAIJ(A, a->mbs, 1, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, imax, nonew, Mat);
255:       N = nrow++ - 1;
256:       high++;
257:       /* shift up all the later entries in this row */
258:       for (ii = N; ii >= i; ii--) {
259:         rp[ii + 1] = rp[ii];
260:         ap[ii + 1] = ap[ii];
261:       }
262:       if (N >= i) ap[i] = NULL;
263:       rp[i] = bcol;
264:       a->nz++;
265:     noinsert1:;
266:       if (!*(ap + i)) PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, bs, bs, 0, NULL, ap + i));
267:       PetscCall(MatSetValues(ap[i], 1, &ridx, 1, &cidx, &value, is));
268:       low = i;
269:     }
270:     ailen[brow] = nrow;
271:   }
272:   PetscFunctionReturn(PETSC_SUCCESS);
273: }

275: static PetscErrorCode MatLoad_BlockMat(Mat newmat, PetscViewer viewer)
276: {
277:   Mat                tmpA;
278:   PetscInt           i, j, m, n, bs = 1, ncols, *lens, currentcol, mbs, **ii, *ilens, nextcol, *llens, cnt = 0;
279:   const PetscInt    *cols;
280:   const PetscScalar *values;
281:   PetscBool          flg = PETSC_FALSE, notdone;
282:   Mat_SeqAIJ        *a;
283:   Mat_BlockMat      *amat;

285:   PetscFunctionBegin;
286:   /* force binary viewer to load .info file if it has not yet done so */
287:   PetscCall(PetscViewerSetUp(viewer));
288:   PetscCall(MatCreate(PETSC_COMM_SELF, &tmpA));
289:   PetscCall(MatSetType(tmpA, MATSEQAIJ));
290:   PetscCall(MatLoad_SeqAIJ(tmpA, viewer));

292:   PetscCall(MatGetLocalSize(tmpA, &m, &n));
293:   PetscOptionsBegin(PETSC_COMM_SELF, NULL, "Options for loading BlockMat matrix 1", "Mat");
294:   PetscCall(PetscOptionsInt("-matload_block_size", "Set the blocksize used to store the matrix", "MatLoad", bs, &bs, NULL));
295:   PetscCall(PetscOptionsBool("-matload_symmetric", "Store the matrix as symmetric", "MatLoad", flg, &flg, NULL));
296:   PetscOptionsEnd();

298:   /* Determine number of nonzero blocks for each block row */
299:   a   = (Mat_SeqAIJ *)tmpA->data;
300:   mbs = m / bs;
301:   PetscCall(PetscMalloc3(mbs, &lens, bs, &ii, bs, &ilens));
302:   PetscCall(PetscArrayzero(lens, mbs));

304:   for (i = 0; i < mbs; i++) {
305:     for (j = 0; j < bs; j++) {
306:       ii[j]    = a->j + a->i[i * bs + j];
307:       ilens[j] = a->i[i * bs + j + 1] - a->i[i * bs + j];
308:     }

310:     currentcol = -1;
311:     while (PETSC_TRUE) {
312:       notdone = PETSC_FALSE;
313:       nextcol = 1000000000;
314:       for (j = 0; j < bs; j++) {
315:         while (ilens[j] > 0 && ii[j][0] / bs <= currentcol) {
316:           ii[j]++;
317:           ilens[j]--;
318:         }
319:         if (ilens[j] > 0) {
320:           notdone = PETSC_TRUE;
321:           nextcol = PetscMin(nextcol, ii[j][0] / bs);
322:         }
323:       }
324:       if (!notdone) break;
325:       if (!flg || (nextcol >= i)) lens[i]++;
326:       currentcol = nextcol;
327:     }
328:   }

330:   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) PetscCall(MatSetSizes(newmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
331:   PetscCall(MatBlockMatSetPreallocation(newmat, bs, 0, lens));
332:   if (flg) PetscCall(MatSetOption(newmat, MAT_SYMMETRIC, PETSC_TRUE));
333:   amat = (Mat_BlockMat *)(newmat)->data;

335:   /* preallocate the submatrices */
336:   PetscCall(PetscMalloc1(bs, &llens));
337:   for (i = 0; i < mbs; i++) { /* loops for block rows */
338:     for (j = 0; j < bs; j++) {
339:       ii[j]    = a->j + a->i[i * bs + j];
340:       ilens[j] = a->i[i * bs + j + 1] - a->i[i * bs + j];
341:     }

343:     currentcol = 1000000000;
344:     for (j = 0; j < bs; j++) { /* loop over rows in block finding first nonzero block */
345:       if (ilens[j] > 0) currentcol = PetscMin(currentcol, ii[j][0] / bs);
346:     }

348:     while (PETSC_TRUE) { /* loops over blocks in block row */
349:       notdone = PETSC_FALSE;
350:       nextcol = 1000000000;
351:       PetscCall(PetscArrayzero(llens, bs));
352:       for (j = 0; j < bs; j++) {                              /* loop over rows in block */
353:         while (ilens[j] > 0 && ii[j][0] / bs <= currentcol) { /* loop over columns in row */
354:           ii[j]++;
355:           ilens[j]--;
356:           llens[j]++;
357:         }
358:         if (ilens[j] > 0) {
359:           notdone = PETSC_TRUE;
360:           nextcol = PetscMin(nextcol, ii[j][0] / bs);
361:         }
362:       }
363:       PetscCheck(cnt < amat->maxnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Number of blocks found greater than expected %" PetscInt_FMT, cnt);
364:       if (!flg || currentcol >= i) {
365:         amat->j[cnt] = currentcol;
366:         PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, bs, bs, 0, llens, amat->a + cnt++));
367:       }

369:       if (!notdone) break;
370:       currentcol = nextcol;
371:     }
372:     amat->ilen[i] = lens[i];
373:   }

375:   PetscCall(PetscFree3(lens, ii, ilens));
376:   PetscCall(PetscFree(llens));

378:   /* copy over the matrix, one row at a time */
379:   for (i = 0; i < m; i++) {
380:     PetscCall(MatGetRow(tmpA, i, &ncols, &cols, &values));
381:     PetscCall(MatSetValues(newmat, 1, &i, ncols, cols, values, INSERT_VALUES));
382:     PetscCall(MatRestoreRow(tmpA, i, &ncols, &cols, &values));
383:   }
384:   PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY));
385:   PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY));
386:   PetscFunctionReturn(PETSC_SUCCESS);
387: }

389: static PetscErrorCode MatView_BlockMat(Mat A, PetscViewer viewer)
390: {
391:   Mat_BlockMat     *a = (Mat_BlockMat *)A->data;
392:   const char       *name;
393:   PetscViewerFormat format;

395:   PetscFunctionBegin;
396:   PetscCall(PetscObjectGetName((PetscObject)A, &name));
397:   PetscCall(PetscViewerGetFormat(viewer, &format));
398:   if (format == PETSC_VIEWER_ASCII_FACTOR_INFO || format == PETSC_VIEWER_ASCII_INFO) {
399:     PetscCall(PetscViewerASCIIPrintf(viewer, "Nonzero block matrices = %" PetscInt_FMT " \n", a->nz));
400:     if (A->symmetric == PETSC_BOOL3_TRUE) PetscCall(PetscViewerASCIIPrintf(viewer, "Only upper triangular part of symmetric matrix is stored\n"));
401:   }
402:   PetscFunctionReturn(PETSC_SUCCESS);
403: }

405: static PetscErrorCode MatDestroy_BlockMat(Mat mat)
406: {
407:   Mat_BlockMat *bmat = (Mat_BlockMat *)mat->data;
408:   PetscInt      i;

410:   PetscFunctionBegin;
411:   PetscCall(VecDestroy(&bmat->right));
412:   PetscCall(VecDestroy(&bmat->left));
413:   PetscCall(VecDestroy(&bmat->middle));
414:   PetscCall(VecDestroy(&bmat->workb));
415:   if (bmat->diags) {
416:     for (i = 0; i < mat->rmap->n / mat->rmap->bs; i++) PetscCall(MatDestroy(&bmat->diags[i]));
417:   }
418:   if (bmat->a) {
419:     for (i = 0; i < bmat->nz; i++) PetscCall(MatDestroy(&bmat->a[i]));
420:   }
421:   PetscCall(MatSeqXAIJFreeAIJ(mat, (PetscScalar **)&bmat->a, &bmat->j, &bmat->i));
422:   PetscCall(PetscFree(mat->data));
423:   PetscFunctionReturn(PETSC_SUCCESS);
424: }

426: static PetscErrorCode MatMult_BlockMat(Mat A, Vec x, Vec y)
427: {
428:   Mat_BlockMat *bmat = (Mat_BlockMat *)A->data;
429:   PetscScalar  *xx, *yy;
430:   PetscInt     *aj, i, *ii, jrow, m = A->rmap->n / A->rmap->bs, bs = A->rmap->bs, n, j;
431:   Mat          *aa;

433:   PetscFunctionBegin;
434:   /*
435:      Standard CSR multiply except each entry is a Mat
436:   */
437:   PetscCall(VecGetArray(x, &xx));

439:   PetscCall(VecSet(y, 0.0));
440:   PetscCall(VecGetArray(y, &yy));
441:   aj = bmat->j;
442:   aa = bmat->a;
443:   ii = bmat->i;
444:   for (i = 0; i < m; i++) {
445:     jrow = ii[i];
446:     PetscCall(VecPlaceArray(bmat->left, yy + bs * i));
447:     n = ii[i + 1] - jrow;
448:     for (j = 0; j < n; j++) {
449:       PetscCall(VecPlaceArray(bmat->right, xx + bs * aj[jrow]));
450:       PetscCall(MatMultAdd(aa[jrow], bmat->right, bmat->left, bmat->left));
451:       PetscCall(VecResetArray(bmat->right));
452:       jrow++;
453:     }
454:     PetscCall(VecResetArray(bmat->left));
455:   }
456:   PetscCall(VecRestoreArray(x, &xx));
457:   PetscCall(VecRestoreArray(y, &yy));
458:   PetscFunctionReturn(PETSC_SUCCESS);
459: }

461: static PetscErrorCode MatMult_BlockMat_Symmetric(Mat A, Vec x, Vec y)
462: {
463:   Mat_BlockMat *bmat = (Mat_BlockMat *)A->data;
464:   PetscScalar  *xx, *yy;
465:   PetscInt     *aj, i, *ii, jrow, m = A->rmap->n / A->rmap->bs, bs = A->rmap->bs, n, j;
466:   Mat          *aa;

468:   PetscFunctionBegin;
469:   /*
470:      Standard CSR multiply except each entry is a Mat
471:   */
472:   PetscCall(VecGetArray(x, &xx));

474:   PetscCall(VecSet(y, 0.0));
475:   PetscCall(VecGetArray(y, &yy));
476:   aj = bmat->j;
477:   aa = bmat->a;
478:   ii = bmat->i;
479:   for (i = 0; i < m; i++) {
480:     jrow = ii[i];
481:     n    = ii[i + 1] - jrow;
482:     PetscCall(VecPlaceArray(bmat->left, yy + bs * i));
483:     PetscCall(VecPlaceArray(bmat->middle, xx + bs * i));
484:     /* if we ALWAYS required a diagonal entry then could remove this if test */
485:     if (aj[jrow] == i) {
486:       PetscCall(VecPlaceArray(bmat->right, xx + bs * aj[jrow]));
487:       PetscCall(MatMultAdd(aa[jrow], bmat->right, bmat->left, bmat->left));
488:       PetscCall(VecResetArray(bmat->right));
489:       jrow++;
490:       n--;
491:     }
492:     for (j = 0; j < n; j++) {
493:       PetscCall(VecPlaceArray(bmat->right, xx + bs * aj[jrow])); /* upper triangular part */
494:       PetscCall(MatMultAdd(aa[jrow], bmat->right, bmat->left, bmat->left));
495:       PetscCall(VecResetArray(bmat->right));

497:       PetscCall(VecPlaceArray(bmat->right, yy + bs * aj[jrow])); /* lower triangular part */
498:       PetscCall(MatMultTransposeAdd(aa[jrow], bmat->middle, bmat->right, bmat->right));
499:       PetscCall(VecResetArray(bmat->right));
500:       jrow++;
501:     }
502:     PetscCall(VecResetArray(bmat->left));
503:     PetscCall(VecResetArray(bmat->middle));
504:   }
505:   PetscCall(VecRestoreArray(x, &xx));
506:   PetscCall(VecRestoreArray(y, &yy));
507:   PetscFunctionReturn(PETSC_SUCCESS);
508: }

510: static PetscErrorCode MatMultAdd_BlockMat(Mat A, Vec x, Vec y, Vec z)
511: {
512:   PetscFunctionBegin;
513:   PetscFunctionReturn(PETSC_SUCCESS);
514: }

516: static PetscErrorCode MatMultTranspose_BlockMat(Mat A, Vec x, Vec y)
517: {
518:   PetscFunctionBegin;
519:   PetscFunctionReturn(PETSC_SUCCESS);
520: }

522: static PetscErrorCode MatMultTransposeAdd_BlockMat(Mat A, Vec x, Vec y, Vec z)
523: {
524:   PetscFunctionBegin;
525:   PetscFunctionReturn(PETSC_SUCCESS);
526: }

528: /*
529:      Adds diagonal pointers to sparse matrix structure.
530: */
531: static PetscErrorCode MatMarkDiagonal_BlockMat(Mat A)
532: {
533:   Mat_BlockMat *a = (Mat_BlockMat *)A->data;
534:   PetscInt      i, j, mbs = A->rmap->n / A->rmap->bs;

536:   PetscFunctionBegin;
537:   if (!a->diag) PetscCall(PetscMalloc1(mbs, &a->diag));
538:   for (i = 0; i < mbs; i++) {
539:     a->diag[i] = a->i[i + 1];
540:     for (j = a->i[i]; j < a->i[i + 1]; j++) {
541:       if (a->j[j] == i) {
542:         a->diag[i] = j;
543:         break;
544:       }
545:     }
546:   }
547:   PetscFunctionReturn(PETSC_SUCCESS);
548: }

550: static PetscErrorCode MatCreateSubMatrix_BlockMat(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B)
551: {
552:   Mat_BlockMat *a = (Mat_BlockMat *)A->data;
553:   Mat_SeqAIJ   *c;
554:   PetscInt      i, k, first, step, lensi, nrows, ncols;
555:   PetscInt     *j_new, *i_new, *aj = a->j, *ailen = a->ilen;
556:   PetscScalar  *a_new;
557:   Mat           C, *aa = a->a;
558:   PetscBool     stride, equal;

560:   PetscFunctionBegin;
561:   PetscCall(ISEqual(isrow, iscol, &equal));
562:   PetscCheck(equal, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only for identical column and row indices");
563:   PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &stride));
564:   PetscCheck(stride, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only for stride indices");
565:   PetscCall(ISStrideGetInfo(iscol, &first, &step));
566:   PetscCheck(step == A->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_SUP, "Can only select one entry from each block");

568:   PetscCall(ISGetLocalSize(isrow, &nrows));
569:   ncols = nrows;

571:   /* create submatrix */
572:   if (scall == MAT_REUSE_MATRIX) {
573:     PetscInt n_cols, n_rows;
574:     C = *B;
575:     PetscCall(MatGetSize(C, &n_rows, &n_cols));
576:     PetscCheck(n_rows == nrows && n_cols == ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Reused submatrix wrong size");
577:     PetscCall(MatZeroEntries(C));
578:   } else {
579:     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C));
580:     PetscCall(MatSetSizes(C, nrows, ncols, PETSC_DETERMINE, PETSC_DETERMINE));
581:     if (A->symmetric == PETSC_BOOL3_TRUE) PetscCall(MatSetType(C, MATSEQSBAIJ));
582:     else PetscCall(MatSetType(C, MATSEQAIJ));
583:     PetscCall(MatSeqAIJSetPreallocation(C, 0, ailen));
584:     PetscCall(MatSeqSBAIJSetPreallocation(C, 1, 0, ailen));
585:   }
586:   c = (Mat_SeqAIJ *)C->data;

588:   /* loop over rows inserting into submatrix */
589:   a_new = c->a;
590:   j_new = c->j;
591:   i_new = c->i;

593:   for (i = 0; i < nrows; i++) {
594:     lensi = ailen[i];
595:     for (k = 0; k < lensi; k++) {
596:       *j_new++ = *aj++;
597:       PetscCall(MatGetValue(*aa++, first, first, a_new++));
598:     }
599:     i_new[i + 1] = i_new[i] + lensi;
600:     c->ilen[i]   = lensi;
601:   }

603:   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
604:   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
605:   *B = C;
606:   PetscFunctionReturn(PETSC_SUCCESS);
607: }

609: static PetscErrorCode MatAssemblyEnd_BlockMat(Mat A, MatAssemblyType mode)
610: {
611:   Mat_BlockMat *a      = (Mat_BlockMat *)A->data;
612:   PetscInt      fshift = 0, i, j, *ai = a->i, *aj = a->j, *imax = a->imax;
613:   PetscInt      m = a->mbs, *ip, N, *ailen = a->ilen, rmax = 0;
614:   Mat          *aa = a->a, *ap;

616:   PetscFunctionBegin;
617:   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(PETSC_SUCCESS);

619:   if (m) rmax = ailen[0]; /* determine row with most nonzeros */
620:   for (i = 1; i < m; i++) {
621:     /* move each row back by the amount of empty slots (fshift) before it*/
622:     fshift += imax[i - 1] - ailen[i - 1];
623:     rmax = PetscMax(rmax, ailen[i]);
624:     if (fshift) {
625:       ip = aj + ai[i];
626:       ap = aa + ai[i];
627:       N  = ailen[i];
628:       for (j = 0; j < N; j++) {
629:         ip[j - fshift] = ip[j];
630:         ap[j - fshift] = ap[j];
631:       }
632:     }
633:     ai[i] = ai[i - 1] + ailen[i - 1];
634:   }
635:   if (m) {
636:     fshift += imax[m - 1] - ailen[m - 1];
637:     ai[m] = ai[m - 1] + ailen[m - 1];
638:   }
639:   /* reset ilen and imax for each row */
640:   for (i = 0; i < m; i++) ailen[i] = imax[i] = ai[i + 1] - ai[i];
641:   a->nz = ai[m];
642:   for (i = 0; i < a->nz; i++) {
643:     PetscAssert(aa[i], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Null matrix at location %" PetscInt_FMT " column %" PetscInt_FMT " nz %" PetscInt_FMT, i, aj[i], a->nz);
644:     PetscCall(MatAssemblyBegin(aa[i], MAT_FINAL_ASSEMBLY));
645:     PetscCall(MatAssemblyEnd(aa[i], MAT_FINAL_ASSEMBLY));
646:   }
647:   PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded,%" PetscInt_FMT " used\n", m, A->cmap->n / A->cmap->bs, fshift, a->nz));
648:   PetscCall(PetscInfo(A, "Number of mallocs during MatSetValues() is %" PetscInt_FMT "\n", a->reallocs));
649:   PetscCall(PetscInfo(A, "Maximum nonzeros in any row is %" PetscInt_FMT "\n", rmax));

651:   A->info.mallocs += a->reallocs;
652:   a->reallocs         = 0;
653:   A->info.nz_unneeded = (double)fshift;
654:   a->rmax             = rmax;
655:   PetscCall(MatMarkDiagonal_BlockMat(A));
656:   PetscFunctionReturn(PETSC_SUCCESS);
657: }

659: static PetscErrorCode MatSetOption_BlockMat(Mat A, MatOption opt, PetscBool flg)
660: {
661:   PetscFunctionBegin;
662:   if (opt == MAT_SYMMETRIC && flg) {
663:     A->ops->sor  = MatSOR_BlockMat_Symmetric;
664:     A->ops->mult = MatMult_BlockMat_Symmetric;
665:   } else {
666:     PetscCall(PetscInfo(A, "Unused matrix option %s\n", MatOptions[opt]));
667:   }
668:   PetscFunctionReturn(PETSC_SUCCESS);
669: }

671: static struct _MatOps MatOps_Values = {MatSetValues_BlockMat,
672:                                        NULL,
673:                                        NULL,
674:                                        MatMult_BlockMat,
675:                                        /*  4*/ MatMultAdd_BlockMat,
676:                                        MatMultTranspose_BlockMat,
677:                                        MatMultTransposeAdd_BlockMat,
678:                                        NULL,
679:                                        NULL,
680:                                        NULL,
681:                                        /* 10*/ NULL,
682:                                        NULL,
683:                                        NULL,
684:                                        MatSOR_BlockMat,
685:                                        NULL,
686:                                        /* 15*/ NULL,
687:                                        NULL,
688:                                        NULL,
689:                                        NULL,
690:                                        NULL,
691:                                        /* 20*/ NULL,
692:                                        MatAssemblyEnd_BlockMat,
693:                                        MatSetOption_BlockMat,
694:                                        NULL,
695:                                        /* 24*/ NULL,
696:                                        NULL,
697:                                        NULL,
698:                                        NULL,
699:                                        NULL,
700:                                        /* 29*/ NULL,
701:                                        NULL,
702:                                        NULL,
703:                                        NULL,
704:                                        NULL,
705:                                        /* 34*/ NULL,
706:                                        NULL,
707:                                        NULL,
708:                                        NULL,
709:                                        NULL,
710:                                        /* 39*/ NULL,
711:                                        NULL,
712:                                        NULL,
713:                                        NULL,
714:                                        NULL,
715:                                        /* 44*/ NULL,
716:                                        NULL,
717:                                        MatShift_Basic,
718:                                        NULL,
719:                                        NULL,
720:                                        /* 49*/ NULL,
721:                                        NULL,
722:                                        NULL,
723:                                        NULL,
724:                                        NULL,
725:                                        /* 54*/ NULL,
726:                                        NULL,
727:                                        NULL,
728:                                        NULL,
729:                                        NULL,
730:                                        /* 59*/ MatCreateSubMatrix_BlockMat,
731:                                        MatDestroy_BlockMat,
732:                                        MatView_BlockMat,
733:                                        NULL,
734:                                        NULL,
735:                                        /* 64*/ NULL,
736:                                        NULL,
737:                                        NULL,
738:                                        NULL,
739:                                        NULL,
740:                                        /* 69*/ NULL,
741:                                        NULL,
742:                                        NULL,
743:                                        NULL,
744:                                        NULL,
745:                                        /* 74*/ NULL,
746:                                        NULL,
747:                                        NULL,
748:                                        NULL,
749:                                        NULL,
750:                                        /* 79*/ NULL,
751:                                        NULL,
752:                                        NULL,
753:                                        NULL,
754:                                        MatLoad_BlockMat,
755:                                        /* 84*/ NULL,
756:                                        NULL,
757:                                        NULL,
758:                                        NULL,
759:                                        NULL,
760:                                        /* 89*/ NULL,
761:                                        NULL,
762:                                        NULL,
763:                                        NULL,
764:                                        NULL,
765:                                        /* 94*/ NULL,
766:                                        NULL,
767:                                        NULL,
768:                                        NULL,
769:                                        NULL,
770:                                        /* 99*/ NULL,
771:                                        NULL,
772:                                        NULL,
773:                                        NULL,
774:                                        NULL,
775:                                        /*104*/ NULL,
776:                                        NULL,
777:                                        NULL,
778:                                        NULL,
779:                                        NULL,
780:                                        /*109*/ NULL,
781:                                        NULL,
782:                                        NULL,
783:                                        NULL,
784:                                        NULL,
785:                                        /*114*/ NULL,
786:                                        NULL,
787:                                        NULL,
788:                                        NULL,
789:                                        NULL,
790:                                        /*119*/ NULL,
791:                                        NULL,
792:                                        NULL,
793:                                        NULL,
794:                                        NULL,
795:                                        /*124*/ NULL,
796:                                        NULL,
797:                                        NULL,
798:                                        NULL,
799:                                        NULL,
800:                                        /*129*/ NULL,
801:                                        NULL,
802:                                        NULL,
803:                                        NULL,
804:                                        NULL,
805:                                        /*134*/ NULL,
806:                                        NULL,
807:                                        NULL,
808:                                        NULL,
809:                                        NULL,
810:                                        /*139*/ NULL,
811:                                        NULL,
812:                                        NULL,
813:                                        NULL,
814:                                        NULL,
815:                                        /*144*/ NULL,
816:                                        NULL,
817:                                        NULL,
818:                                        NULL,
819:                                        NULL,
820:                                        NULL,
821:                                        /*150*/ NULL,
822:                                        NULL,
823:                                        NULL};

825: /*@C
826:   MatBlockMatSetPreallocation - For good matrix assembly performance
827:   the user should preallocate the matrix storage by setting the parameter nz
828:   (or the array nnz).  By setting these parameters accurately, performance
829:   during matrix assembly can be increased by more than a factor of 50.

831:   Collective

833:   Input Parameters:
834: + B   - The matrix
835: . bs  - size of each block in matrix
836: . nz  - number of nonzeros per block row (same for all rows)
837: - nnz - array containing the number of nonzeros in the various block rows
838:          (possibly different for each row) or `NULL`

840:   Level: intermediate

842:   Notes:
843:   If `nnz` is given then `nz` is ignored

845:   Specify the preallocated storage with either `nz` or `nnz` (not both).
846:   Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory
847:   allocation.

849: .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateBlockMat()`, `MatSetValues()`
850: @*/
851: PetscErrorCode MatBlockMatSetPreallocation(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[])
852: {
853:   PetscFunctionBegin;
854:   PetscTryMethod(B, "MatBlockMatSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[]), (B, bs, nz, nnz));
855:   PetscFunctionReturn(PETSC_SUCCESS);
856: }

858: static PetscErrorCode MatBlockMatSetPreallocation_BlockMat(Mat A, PetscInt bs, PetscInt nz, PetscInt *nnz)
859: {
860:   Mat_BlockMat *bmat = (Mat_BlockMat *)A->data;
861:   PetscInt      i;

863:   PetscFunctionBegin;
864:   PetscCall(PetscLayoutSetBlockSize(A->rmap, bs));
865:   PetscCall(PetscLayoutSetBlockSize(A->cmap, bs));
866:   PetscCall(PetscLayoutSetUp(A->rmap));
867:   PetscCall(PetscLayoutSetUp(A->cmap));
868:   PetscCall(PetscLayoutGetBlockSize(A->rmap, &bs));

870:   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
871:   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nz cannot be less than 0: value %" PetscInt_FMT, nz);
872:   if (nnz) {
873:     for (i = 0; i < A->rmap->n / bs; i++) {
874:       PetscCheck(nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, nnz[i]);
875:       PetscCheck(nnz[i] <= A->cmap->n / bs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be greater than row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT, i, nnz[i], A->cmap->n / bs);
876:     }
877:   }
878:   bmat->mbs = A->rmap->n / bs;

880:   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs, NULL, &bmat->right));
881:   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs, NULL, &bmat->middle));
882:   PetscCall(VecCreateSeq(PETSC_COMM_SELF, bs, &bmat->left));

884:   if (!bmat->imax) PetscCall(PetscMalloc2(A->rmap->n, &bmat->imax, A->rmap->n, &bmat->ilen));
885:   if (PetscLikely(nnz)) {
886:     nz = 0;
887:     for (i = 0; i < A->rmap->n / A->rmap->bs; i++) {
888:       bmat->imax[i] = nnz[i];
889:       nz += nnz[i];
890:     }
891:   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Currently requires block row by row preallocation");

893:   /* bmat->ilen will count nonzeros in each row so far. */
894:   PetscCall(PetscArrayzero(bmat->ilen, bmat->mbs));

896:   /* allocate the matrix space */
897:   PetscCall(MatSeqXAIJFreeAIJ(A, (PetscScalar **)&bmat->a, &bmat->j, &bmat->i));
898:   PetscCall(PetscMalloc3(nz, &bmat->a, nz, &bmat->j, A->rmap->n + 1, &bmat->i));
899:   bmat->i[0] = 0;
900:   for (i = 1; i < bmat->mbs + 1; i++) bmat->i[i] = bmat->i[i - 1] + bmat->imax[i - 1];
901:   bmat->singlemalloc = PETSC_TRUE;
902:   bmat->free_a       = PETSC_TRUE;
903:   bmat->free_ij      = PETSC_TRUE;

905:   bmat->nz            = 0;
906:   bmat->maxnz         = nz;
907:   A->info.nz_unneeded = (double)bmat->maxnz;
908:   PetscCall(MatSetOption(A, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
909:   PetscFunctionReturn(PETSC_SUCCESS);
910: }

912: /*MC
913:    MATBLOCKMAT - A matrix that is defined by a set of `Mat`'s that represents a sparse block matrix
914:                  consisting of (usually) sparse blocks.

916:   Level: advanced

918: .seealso: [](ch_matrices), `Mat`, `MatCreateBlockMat()`
919: M*/

921: PETSC_EXTERN PetscErrorCode MatCreate_BlockMat(Mat A)
922: {
923:   Mat_BlockMat *b;

925:   PetscFunctionBegin;
926:   PetscCall(PetscNew(&b));
927:   A->data         = (void *)b;
928:   A->ops[0]       = MatOps_Values;
929:   A->assembled    = PETSC_TRUE;
930:   A->preallocated = PETSC_FALSE;
931:   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATBLOCKMAT));

933:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatBlockMatSetPreallocation_C", MatBlockMatSetPreallocation_BlockMat));
934:   PetscFunctionReturn(PETSC_SUCCESS);
935: }

937: /*@C
938:   MatCreateBlockMat - Creates a new matrix in which each block contains a uniform-size sequential `Mat` object

940:   Collective

942:   Input Parameters:
943: + comm - MPI communicator
944: . m    - number of rows
945: . n    - number of columns
946: . bs   - size of each submatrix
947: . nz   - expected maximum number of nonzero blocks in row (use `PETSC_DEFAULT` if not known)
948: - nnz  - expected number of nonzers per block row if known (use `NULL` otherwise)

950:   Output Parameter:
951: . A - the matrix

953:   Level: intermediate

955:   Notes:
956:   Matrices of this type are nominally-sparse matrices in which each "entry" is a `Mat` object.  Each `Mat` must
957:   have the same size and be sequential.  The local and global sizes must be compatible with this decomposition.

959:   For matrices containing parallel submatrices and variable block sizes, see `MATNEST`.

961:   Developer Notes:
962:   I don't like the name, it is not `MATNESTMAT`

964: .seealso: [](ch_matrices), `Mat`, `MATBLOCKMAT`, `MatCreateNest()`
965: @*/
966: PetscErrorCode MatCreateBlockMat(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt bs, PetscInt nz, PetscInt *nnz, Mat *A)
967: {
968:   PetscFunctionBegin;
969:   PetscCall(MatCreate(comm, A));
970:   PetscCall(MatSetSizes(*A, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
971:   PetscCall(MatSetType(*A, MATBLOCKMAT));
972:   PetscCall(MatBlockMatSetPreallocation(*A, bs, nz, nnz));
973:   PetscFunctionReturn(PETSC_SUCCESS);
974: }