Actual source code: blockmat.c


  2: /*
  3:    This provides a matrix that consists of Mats
  4: */

  6: #include <petsc/private/matimpl.h>
  7: #include <../src/mat/impls/baij/seq/baij.h>

  9: typedef struct {
 10:   SEQAIJHEADER(Mat);
 11:   SEQBAIJHEADER;
 12:   Mat *diags;

 14:   Vec left, right, middle, workb; /* dummy vectors to perform local parts of product */
 15: } Mat_BlockMat;

 17: static PetscErrorCode MatSOR_BlockMat_Symmetric(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
 18: {
 19:   Mat_BlockMat      *a = (Mat_BlockMat *)A->data;
 20:   PetscScalar       *x;
 21:   const Mat         *v;
 22:   const PetscScalar *b;
 23:   PetscInt           n = A->cmap->n, i, mbs = n / A->rmap->bs, j, bs = A->rmap->bs;
 24:   const PetscInt    *idx;
 25:   IS                 row, col;
 26:   MatFactorInfo      info;
 27:   Vec                left = a->left, right = a->right, middle = a->middle;
 28:   Mat               *diag;

 30:   its = its * lits;

 37:   if (!a->diags) {
 38:     PetscMalloc1(mbs, &a->diags);
 39:     MatFactorInfoInitialize(&info);
 40:     for (i = 0; i < mbs; i++) {
 41:       MatGetOrdering(a->a[a->diag[i]], MATORDERINGND, &row, &col);
 42:       MatCholeskyFactorSymbolic(a->diags[i], a->a[a->diag[i]], row, &info);
 43:       MatCholeskyFactorNumeric(a->diags[i], a->a[a->diag[i]], &info);
 44:       ISDestroy(&row);
 45:       ISDestroy(&col);
 46:     }
 47:     VecDuplicate(bb, &a->workb);
 48:   }
 49:   diag = a->diags;

 51:   VecSet(xx, 0.0);
 52:   VecGetArray(xx, &x);
 53:   /* copy right hand side because it must be modified during iteration */
 54:   VecCopy(bb, a->workb);
 55:   VecGetArrayRead(a->workb, &b);

 57:   /* need to add code for when initial guess is zero, see MatSOR_SeqAIJ */
 58:   while (its--) {
 59:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
 60:       for (i = 0; i < mbs; i++) {
 61:         n   = a->i[i + 1] - a->i[i] - 1;
 62:         idx = a->j + a->i[i] + 1;
 63:         v   = a->a + a->i[i] + 1;

 65:         VecSet(left, 0.0);
 66:         for (j = 0; j < n; j++) {
 67:           VecPlaceArray(right, x + idx[j] * bs);
 68:           MatMultAdd(v[j], right, left, left);
 69:           VecResetArray(right);
 70:         }
 71:         VecPlaceArray(right, b + i * bs);
 72:         VecAYPX(left, -1.0, right);
 73:         VecResetArray(right);

 75:         VecPlaceArray(right, x + i * bs);
 76:         MatSolve(diag[i], left, right);

 78:         /* now adjust right hand side, see MatSOR_SeqSBAIJ */
 79:         for (j = 0; j < n; j++) {
 80:           MatMultTranspose(v[j], right, left);
 81:           VecPlaceArray(middle, b + idx[j] * bs);
 82:           VecAXPY(middle, -1.0, left);
 83:           VecResetArray(middle);
 84:         }
 85:         VecResetArray(right);
 86:       }
 87:     }
 88:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
 89:       for (i = mbs - 1; i >= 0; i--) {
 90:         n   = a->i[i + 1] - a->i[i] - 1;
 91:         idx = a->j + a->i[i] + 1;
 92:         v   = a->a + a->i[i] + 1;

 94:         VecSet(left, 0.0);
 95:         for (j = 0; j < n; j++) {
 96:           VecPlaceArray(right, x + idx[j] * bs);
 97:           MatMultAdd(v[j], right, left, left);
 98:           VecResetArray(right);
 99:         }
100:         VecPlaceArray(right, b + i * bs);
101:         VecAYPX(left, -1.0, right);
102:         VecResetArray(right);

104:         VecPlaceArray(right, x + i * bs);
105:         MatSolve(diag[i], left, right);
106:         VecResetArray(right);
107:       }
108:     }
109:   }
110:   VecRestoreArray(xx, &x);
111:   VecRestoreArrayRead(a->workb, &b);
112:   return 0;
113: }

115: static PetscErrorCode MatSOR_BlockMat(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
116: {
117:   Mat_BlockMat      *a = (Mat_BlockMat *)A->data;
118:   PetscScalar       *x;
119:   const Mat         *v;
120:   const PetscScalar *b;
121:   PetscInt           n = A->cmap->n, i, mbs = n / A->rmap->bs, j, bs = A->rmap->bs;
122:   const PetscInt    *idx;
123:   IS                 row, col;
124:   MatFactorInfo      info;
125:   Vec                left = a->left, right = a->right;
126:   Mat               *diag;

128:   its = its * lits;

134:   if (!a->diags) {
135:     PetscMalloc1(mbs, &a->diags);
136:     MatFactorInfoInitialize(&info);
137:     for (i = 0; i < mbs; i++) {
138:       MatGetOrdering(a->a[a->diag[i]], MATORDERINGND, &row, &col);
139:       MatLUFactorSymbolic(a->diags[i], a->a[a->diag[i]], row, col, &info);
140:       MatLUFactorNumeric(a->diags[i], a->a[a->diag[i]], &info);
141:       ISDestroy(&row);
142:       ISDestroy(&col);
143:     }
144:   }
145:   diag = a->diags;

147:   VecSet(xx, 0.0);
148:   VecGetArray(xx, &x);
149:   VecGetArrayRead(bb, &b);

151:   /* need to add code for when initial guess is zero, see MatSOR_SeqAIJ */
152:   while (its--) {
153:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
154:       for (i = 0; i < mbs; i++) {
155:         n   = a->i[i + 1] - a->i[i];
156:         idx = a->j + a->i[i];
157:         v   = a->a + a->i[i];

159:         VecSet(left, 0.0);
160:         for (j = 0; j < n; j++) {
161:           if (idx[j] != i) {
162:             VecPlaceArray(right, x + idx[j] * bs);
163:             MatMultAdd(v[j], right, left, left);
164:             VecResetArray(right);
165:           }
166:         }
167:         VecPlaceArray(right, b + i * bs);
168:         VecAYPX(left, -1.0, right);
169:         VecResetArray(right);

171:         VecPlaceArray(right, x + i * bs);
172:         MatSolve(diag[i], left, right);
173:         VecResetArray(right);
174:       }
175:     }
176:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
177:       for (i = mbs - 1; i >= 0; i--) {
178:         n   = a->i[i + 1] - a->i[i];
179:         idx = a->j + a->i[i];
180:         v   = a->a + a->i[i];

182:         VecSet(left, 0.0);
183:         for (j = 0; j < n; j++) {
184:           if (idx[j] != i) {
185:             VecPlaceArray(right, x + idx[j] * bs);
186:             MatMultAdd(v[j], right, left, left);
187:             VecResetArray(right);
188:           }
189:         }
190:         VecPlaceArray(right, b + i * bs);
191:         VecAYPX(left, -1.0, right);
192:         VecResetArray(right);

194:         VecPlaceArray(right, x + i * bs);
195:         MatSolve(diag[i], left, right);
196:         VecResetArray(right);
197:       }
198:     }
199:   }
200:   VecRestoreArray(xx, &x);
201:   VecRestoreArrayRead(bb, &b);
202:   return 0;
203: }

205: static PetscErrorCode MatSetValues_BlockMat(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is)
206: {
207:   Mat_BlockMat *a = (Mat_BlockMat *)A->data;
208:   PetscInt     *rp, k, low, high, t, ii, row, nrow, i, col, l, rmax, N, lastcol = -1;
209:   PetscInt     *imax = a->imax, *ai = a->i, *ailen = a->ilen;
210:   PetscInt     *aj = a->j, nonew = a->nonew, bs = A->rmap->bs, brow, bcol;
211:   PetscInt      ridx, cidx;
212:   PetscBool     roworiented = a->roworiented;
213:   MatScalar     value;
214:   Mat          *ap, *aa = a->a;

216:   for (k = 0; k < m; k++) { /* loop over added rows */
217:     row  = im[k];
218:     brow = row / bs;
219:     if (row < 0) continue;
221:     rp   = aj + ai[brow];
222:     ap   = aa + ai[brow];
223:     rmax = imax[brow];
224:     nrow = ailen[brow];
225:     low  = 0;
226:     high = nrow;
227:     for (l = 0; l < n; l++) { /* loop over added columns */
228:       if (in[l] < 0) continue;
230:       col  = in[l];
231:       bcol = col / bs;
232:       if (A->symmetric == PETSC_BOOL3_TRUE && brow > bcol) continue;
233:       ridx = row % bs;
234:       cidx = col % bs;
235:       if (roworiented) value = v[l + k * n];
236:       else value = v[k + l * m];

238:       if (col <= lastcol) low = 0;
239:       else high = nrow;
240:       lastcol = col;
241:       while (high - low > 7) {
242:         t = (low + high) / 2;
243:         if (rp[t] > bcol) high = t;
244:         else low = t;
245:       }
246:       for (i = low; i < high; i++) {
247:         if (rp[i] > bcol) break;
248:         if (rp[i] == bcol) goto noinsert1;
249:       }
250:       if (nonew == 1) goto noinsert1;
252:       MatSeqXAIJReallocateAIJ(A, a->mbs, 1, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, imax, nonew, Mat);
253:       N = nrow++ - 1;
254:       high++;
255:       /* shift up all the later entries in this row */
256:       for (ii = N; ii >= i; ii--) {
257:         rp[ii + 1] = rp[ii];
258:         ap[ii + 1] = ap[ii];
259:       }
260:       if (N >= i) ap[i] = NULL;
261:       rp[i] = bcol;
262:       a->nz++;
263:       A->nonzerostate++;
264:     noinsert1:;
265:       if (!*(ap + i)) MatCreateSeqAIJ(PETSC_COMM_SELF, bs, bs, 0, NULL, ap + i);
266:       MatSetValues(ap[i], 1, &ridx, 1, &cidx, &value, is);
267:       low = i;
268:     }
269:     ailen[brow] = nrow;
270:   }
271:   return 0;
272: }

274: static PetscErrorCode MatLoad_BlockMat(Mat newmat, PetscViewer viewer)
275: {
276:   Mat                tmpA;
277:   PetscInt           i, j, m, n, bs = 1, ncols, *lens, currentcol, mbs, **ii, *ilens, nextcol, *llens, cnt = 0;
278:   const PetscInt    *cols;
279:   const PetscScalar *values;
280:   PetscBool          flg = PETSC_FALSE, notdone;
281:   Mat_SeqAIJ        *a;
282:   Mat_BlockMat      *amat;

284:   /* force binary viewer to load .info file if it has not yet done so */
285:   PetscViewerSetUp(viewer);
286:   MatCreate(PETSC_COMM_SELF, &tmpA);
287:   MatSetType(tmpA, MATSEQAIJ);
288:   MatLoad_SeqAIJ(tmpA, viewer);

290:   MatGetLocalSize(tmpA, &m, &n);
291:   PetscOptionsBegin(PETSC_COMM_SELF, NULL, "Options for loading BlockMat matrix 1", "Mat");
292:   PetscOptionsInt("-matload_block_size", "Set the blocksize used to store the matrix", "MatLoad", bs, &bs, NULL);
293:   PetscOptionsBool("-matload_symmetric", "Store the matrix as symmetric", "MatLoad", flg, &flg, NULL);
294:   PetscOptionsEnd();

296:   /* Determine number of nonzero blocks for each block row */
297:   a   = (Mat_SeqAIJ *)tmpA->data;
298:   mbs = m / bs;
299:   PetscMalloc3(mbs, &lens, bs, &ii, bs, &ilens);
300:   PetscArrayzero(lens, mbs);

302:   for (i = 0; i < mbs; i++) {
303:     for (j = 0; j < bs; j++) {
304:       ii[j]    = a->j + a->i[i * bs + j];
305:       ilens[j] = a->i[i * bs + j + 1] - a->i[i * bs + j];
306:     }

308:     currentcol = -1;
309:     while (PETSC_TRUE) {
310:       notdone = PETSC_FALSE;
311:       nextcol = 1000000000;
312:       for (j = 0; j < bs; j++) {
313:         while ((ilens[j] > 0 && ii[j][0] / bs <= currentcol)) {
314:           ii[j]++;
315:           ilens[j]--;
316:         }
317:         if (ilens[j] > 0) {
318:           notdone = PETSC_TRUE;
319:           nextcol = PetscMin(nextcol, ii[j][0] / bs);
320:         }
321:       }
322:       if (!notdone) break;
323:       if (!flg || (nextcol >= i)) lens[i]++;
324:       currentcol = nextcol;
325:     }
326:   }

328:   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) MatSetSizes(newmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE);
329:   MatBlockMatSetPreallocation(newmat, bs, 0, lens);
330:   if (flg) MatSetOption(newmat, MAT_SYMMETRIC, PETSC_TRUE);
331:   amat = (Mat_BlockMat *)(newmat)->data;

333:   /* preallocate the submatrices */
334:   PetscMalloc1(bs, &llens);
335:   for (i = 0; i < mbs; i++) { /* loops for block rows */
336:     for (j = 0; j < bs; j++) {
337:       ii[j]    = a->j + a->i[i * bs + j];
338:       ilens[j] = a->i[i * bs + j + 1] - a->i[i * bs + j];
339:     }

341:     currentcol = 1000000000;
342:     for (j = 0; j < bs; j++) { /* loop over rows in block finding first nonzero block */
343:       if (ilens[j] > 0) currentcol = PetscMin(currentcol, ii[j][0] / bs);
344:     }

346:     while (PETSC_TRUE) { /* loops over blocks in block row */
347:       notdone = PETSC_FALSE;
348:       nextcol = 1000000000;
349:       PetscArrayzero(llens, bs);
350:       for (j = 0; j < bs; j++) {                                /* loop over rows in block */
351:         while ((ilens[j] > 0 && ii[j][0] / bs <= currentcol)) { /* loop over columns in row */
352:           ii[j]++;
353:           ilens[j]--;
354:           llens[j]++;
355:         }
356:         if (ilens[j] > 0) {
357:           notdone = PETSC_TRUE;
358:           nextcol = PetscMin(nextcol, ii[j][0] / bs);
359:         }
360:       }
362:       if (!flg || currentcol >= i) {
363:         amat->j[cnt] = currentcol;
364:         MatCreateSeqAIJ(PETSC_COMM_SELF, bs, bs, 0, llens, amat->a + cnt++);
365:       }

367:       if (!notdone) break;
368:       currentcol = nextcol;
369:     }
370:     amat->ilen[i] = lens[i];
371:   }

373:   PetscFree3(lens, ii, ilens);
374:   PetscFree(llens);

376:   /* copy over the matrix, one row at a time */
377:   for (i = 0; i < m; i++) {
378:     MatGetRow(tmpA, i, &ncols, &cols, &values);
379:     MatSetValues(newmat, 1, &i, ncols, cols, values, INSERT_VALUES);
380:     MatRestoreRow(tmpA, i, &ncols, &cols, &values);
381:   }
382:   MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY);
383:   MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY);
384:   return 0;
385: }

387: static PetscErrorCode MatView_BlockMat(Mat A, PetscViewer viewer)
388: {
389:   Mat_BlockMat     *a = (Mat_BlockMat *)A->data;
390:   const char       *name;
391:   PetscViewerFormat format;

393:   PetscObjectGetName((PetscObject)A, &name);
394:   PetscViewerGetFormat(viewer, &format);
395:   if (format == PETSC_VIEWER_ASCII_FACTOR_INFO || format == PETSC_VIEWER_ASCII_INFO) {
396:     PetscViewerASCIIPrintf(viewer, "Nonzero block matrices = %" PetscInt_FMT " \n", a->nz);
397:     if (A->symmetric == PETSC_BOOL3_TRUE) PetscViewerASCIIPrintf(viewer, "Only upper triangular part of symmetric matrix is stored\n");
398:   }
399:   return 0;
400: }

402: static PetscErrorCode MatDestroy_BlockMat(Mat mat)
403: {
404:   Mat_BlockMat *bmat = (Mat_BlockMat *)mat->data;
405:   PetscInt      i;

407:   VecDestroy(&bmat->right);
408:   VecDestroy(&bmat->left);
409:   VecDestroy(&bmat->middle);
410:   VecDestroy(&bmat->workb);
411:   if (bmat->diags) {
412:     for (i = 0; i < mat->rmap->n / mat->rmap->bs; i++) MatDestroy(&bmat->diags[i]);
413:   }
414:   if (bmat->a) {
415:     for (i = 0; i < bmat->nz; i++) MatDestroy(&bmat->a[i]);
416:   }
417:   MatSeqXAIJFreeAIJ(mat, (PetscScalar **)&bmat->a, &bmat->j, &bmat->i);
418:   PetscFree(mat->data);
419:   return 0;
420: }

422: static PetscErrorCode MatMult_BlockMat(Mat A, Vec x, Vec y)
423: {
424:   Mat_BlockMat *bmat = (Mat_BlockMat *)A->data;
425:   PetscScalar  *xx, *yy;
426:   PetscInt     *aj, i, *ii, jrow, m = A->rmap->n / A->rmap->bs, bs = A->rmap->bs, n, j;
427:   Mat          *aa;

429:   /*
430:      Standard CSR multiply except each entry is a Mat
431:   */
432:   VecGetArray(x, &xx);

434:   VecSet(y, 0.0);
435:   VecGetArray(y, &yy);
436:   aj = bmat->j;
437:   aa = bmat->a;
438:   ii = bmat->i;
439:   for (i = 0; i < m; i++) {
440:     jrow = ii[i];
441:     VecPlaceArray(bmat->left, yy + bs * i);
442:     n = ii[i + 1] - jrow;
443:     for (j = 0; j < n; j++) {
444:       VecPlaceArray(bmat->right, xx + bs * aj[jrow]);
445:       MatMultAdd(aa[jrow], bmat->right, bmat->left, bmat->left);
446:       VecResetArray(bmat->right);
447:       jrow++;
448:     }
449:     VecResetArray(bmat->left);
450:   }
451:   VecRestoreArray(x, &xx);
452:   VecRestoreArray(y, &yy);
453:   return 0;
454: }

456: PetscErrorCode MatMult_BlockMat_Symmetric(Mat A, Vec x, Vec y)
457: {
458:   Mat_BlockMat *bmat = (Mat_BlockMat *)A->data;
459:   PetscScalar  *xx, *yy;
460:   PetscInt     *aj, i, *ii, jrow, m = A->rmap->n / A->rmap->bs, bs = A->rmap->bs, n, j;
461:   Mat          *aa;

463:   /*
464:      Standard CSR multiply except each entry is a Mat
465:   */
466:   VecGetArray(x, &xx);

468:   VecSet(y, 0.0);
469:   VecGetArray(y, &yy);
470:   aj = bmat->j;
471:   aa = bmat->a;
472:   ii = bmat->i;
473:   for (i = 0; i < m; i++) {
474:     jrow = ii[i];
475:     n    = ii[i + 1] - jrow;
476:     VecPlaceArray(bmat->left, yy + bs * i);
477:     VecPlaceArray(bmat->middle, xx + bs * i);
478:     /* if we ALWAYS required a diagonal entry then could remove this if test */
479:     if (aj[jrow] == i) {
480:       VecPlaceArray(bmat->right, xx + bs * aj[jrow]);
481:       MatMultAdd(aa[jrow], bmat->right, bmat->left, bmat->left);
482:       VecResetArray(bmat->right);
483:       jrow++;
484:       n--;
485:     }
486:     for (j = 0; j < n; j++) {
487:       VecPlaceArray(bmat->right, xx + bs * aj[jrow]); /* upper triangular part */
488:       MatMultAdd(aa[jrow], bmat->right, bmat->left, bmat->left);
489:       VecResetArray(bmat->right);

491:       VecPlaceArray(bmat->right, yy + bs * aj[jrow]); /* lower triangular part */
492:       MatMultTransposeAdd(aa[jrow], bmat->middle, bmat->right, bmat->right);
493:       VecResetArray(bmat->right);
494:       jrow++;
495:     }
496:     VecResetArray(bmat->left);
497:     VecResetArray(bmat->middle);
498:   }
499:   VecRestoreArray(x, &xx);
500:   VecRestoreArray(y, &yy);
501:   return 0;
502: }

504: static PetscErrorCode MatMultAdd_BlockMat(Mat A, Vec x, Vec y, Vec z)
505: {
506:   return 0;
507: }

509: static PetscErrorCode MatMultTranspose_BlockMat(Mat A, Vec x, Vec y)
510: {
511:   return 0;
512: }

514: static PetscErrorCode MatMultTransposeAdd_BlockMat(Mat A, Vec x, Vec y, Vec z)
515: {
516:   return 0;
517: }

519: /*
520:      Adds diagonal pointers to sparse matrix structure.
521: */
522: static PetscErrorCode MatMarkDiagonal_BlockMat(Mat A)
523: {
524:   Mat_BlockMat *a = (Mat_BlockMat *)A->data;
525:   PetscInt      i, j, mbs = A->rmap->n / A->rmap->bs;

527:   if (!a->diag) PetscMalloc1(mbs, &a->diag);
528:   for (i = 0; i < mbs; i++) {
529:     a->diag[i] = a->i[i + 1];
530:     for (j = a->i[i]; j < a->i[i + 1]; j++) {
531:       if (a->j[j] == i) {
532:         a->diag[i] = j;
533:         break;
534:       }
535:     }
536:   }
537:   return 0;
538: }

540: static PetscErrorCode MatCreateSubMatrix_BlockMat(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B)
541: {
542:   Mat_BlockMat *a = (Mat_BlockMat *)A->data;
543:   Mat_SeqAIJ   *c;
544:   PetscInt      i, k, first, step, lensi, nrows, ncols;
545:   PetscInt     *j_new, *i_new, *aj = a->j, *ailen = a->ilen;
546:   PetscScalar  *a_new;
547:   Mat           C, *aa = a->a;
548:   PetscBool     stride, equal;

550:   ISEqual(isrow, iscol, &equal);
552:   PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &stride);
554:   ISStrideGetInfo(iscol, &first, &step);

557:   ISGetLocalSize(isrow, &nrows);
558:   ncols = nrows;

560:   /* create submatrix */
561:   if (scall == MAT_REUSE_MATRIX) {
562:     PetscInt n_cols, n_rows;
563:     C = *B;
564:     MatGetSize(C, &n_rows, &n_cols);
566:     MatZeroEntries(C);
567:   } else {
568:     MatCreate(PetscObjectComm((PetscObject)A), &C);
569:     MatSetSizes(C, nrows, ncols, PETSC_DETERMINE, PETSC_DETERMINE);
570:     if (A->symmetric == PETSC_BOOL3_TRUE) MatSetType(C, MATSEQSBAIJ);
571:     else MatSetType(C, MATSEQAIJ);
572:     MatSeqAIJSetPreallocation(C, 0, ailen);
573:     MatSeqSBAIJSetPreallocation(C, 1, 0, ailen);
574:   }
575:   c = (Mat_SeqAIJ *)C->data;

577:   /* loop over rows inserting into submatrix */
578:   a_new = c->a;
579:   j_new = c->j;
580:   i_new = c->i;

582:   for (i = 0; i < nrows; i++) {
583:     lensi = ailen[i];
584:     for (k = 0; k < lensi; k++) {
585:       *j_new++ = *aj++;
586:       MatGetValue(*aa++, first, first, a_new++);
587:     }
588:     i_new[i + 1] = i_new[i] + lensi;
589:     c->ilen[i]   = lensi;
590:   }

592:   MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY);
593:   MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY);
594:   *B = C;
595:   return 0;
596: }

598: static PetscErrorCode MatAssemblyEnd_BlockMat(Mat A, MatAssemblyType mode)
599: {
600:   Mat_BlockMat *a      = (Mat_BlockMat *)A->data;
601:   PetscInt      fshift = 0, i, j, *ai = a->i, *aj = a->j, *imax = a->imax;
602:   PetscInt      m = a->mbs, *ip, N, *ailen = a->ilen, rmax = 0;
603:   Mat          *aa = a->a, *ap;

605:   if (mode == MAT_FLUSH_ASSEMBLY) return 0;

607:   if (m) rmax = ailen[0]; /* determine row with most nonzeros */
608:   for (i = 1; i < m; i++) {
609:     /* move each row back by the amount of empty slots (fshift) before it*/
610:     fshift += imax[i - 1] - ailen[i - 1];
611:     rmax = PetscMax(rmax, ailen[i]);
612:     if (fshift) {
613:       ip = aj + ai[i];
614:       ap = aa + ai[i];
615:       N  = ailen[i];
616:       for (j = 0; j < N; j++) {
617:         ip[j - fshift] = ip[j];
618:         ap[j - fshift] = ap[j];
619:       }
620:     }
621:     ai[i] = ai[i - 1] + ailen[i - 1];
622:   }
623:   if (m) {
624:     fshift += imax[m - 1] - ailen[m - 1];
625:     ai[m] = ai[m - 1] + ailen[m - 1];
626:   }
627:   /* reset ilen and imax for each row */
628:   for (i = 0; i < m; i++) ailen[i] = imax[i] = ai[i + 1] - ai[i];
629:   a->nz = ai[m];
630:   for (i = 0; i < a->nz; i++) {
631:     PetscAssert(aa[i], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Null matrix at location %" PetscInt_FMT " column %" PetscInt_FMT " nz %" PetscInt_FMT, i, aj[i], a->nz);
632:     MatAssemblyBegin(aa[i], MAT_FINAL_ASSEMBLY);
633:     MatAssemblyEnd(aa[i], MAT_FINAL_ASSEMBLY);
634:   }
635:   PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded,%" PetscInt_FMT " used\n", m, A->cmap->n / A->cmap->bs, fshift, a->nz);
636:   PetscInfo(A, "Number of mallocs during MatSetValues() is %" PetscInt_FMT "\n", a->reallocs);
637:   PetscInfo(A, "Maximum nonzeros in any row is %" PetscInt_FMT "\n", rmax);

639:   A->info.mallocs += a->reallocs;
640:   a->reallocs         = 0;
641:   A->info.nz_unneeded = (double)fshift;
642:   a->rmax             = rmax;
643:   MatMarkDiagonal_BlockMat(A);
644:   return 0;
645: }

647: static PetscErrorCode MatSetOption_BlockMat(Mat A, MatOption opt, PetscBool flg)
648: {
649:   if (opt == MAT_SYMMETRIC && flg) {
650:     A->ops->sor  = MatSOR_BlockMat_Symmetric;
651:     A->ops->mult = MatMult_BlockMat_Symmetric;
652:   } else {
653:     PetscInfo(A, "Unused matrix option %s\n", MatOptions[opt]);
654:   }
655:   return 0;
656: }

658: static struct _MatOps MatOps_Values = {MatSetValues_BlockMat,
659:                                        NULL,
660:                                        NULL,
661:                                        MatMult_BlockMat,
662:                                        /*  4*/ MatMultAdd_BlockMat,
663:                                        MatMultTranspose_BlockMat,
664:                                        MatMultTransposeAdd_BlockMat,
665:                                        NULL,
666:                                        NULL,
667:                                        NULL,
668:                                        /* 10*/ NULL,
669:                                        NULL,
670:                                        NULL,
671:                                        MatSOR_BlockMat,
672:                                        NULL,
673:                                        /* 15*/ NULL,
674:                                        NULL,
675:                                        NULL,
676:                                        NULL,
677:                                        NULL,
678:                                        /* 20*/ NULL,
679:                                        MatAssemblyEnd_BlockMat,
680:                                        MatSetOption_BlockMat,
681:                                        NULL,
682:                                        /* 24*/ NULL,
683:                                        NULL,
684:                                        NULL,
685:                                        NULL,
686:                                        NULL,
687:                                        /* 29*/ NULL,
688:                                        NULL,
689:                                        NULL,
690:                                        NULL,
691:                                        NULL,
692:                                        /* 34*/ NULL,
693:                                        NULL,
694:                                        NULL,
695:                                        NULL,
696:                                        NULL,
697:                                        /* 39*/ NULL,
698:                                        NULL,
699:                                        NULL,
700:                                        NULL,
701:                                        NULL,
702:                                        /* 44*/ NULL,
703:                                        NULL,
704:                                        MatShift_Basic,
705:                                        NULL,
706:                                        NULL,
707:                                        /* 49*/ NULL,
708:                                        NULL,
709:                                        NULL,
710:                                        NULL,
711:                                        NULL,
712:                                        /* 54*/ NULL,
713:                                        NULL,
714:                                        NULL,
715:                                        NULL,
716:                                        NULL,
717:                                        /* 59*/ MatCreateSubMatrix_BlockMat,
718:                                        MatDestroy_BlockMat,
719:                                        MatView_BlockMat,
720:                                        NULL,
721:                                        NULL,
722:                                        /* 64*/ NULL,
723:                                        NULL,
724:                                        NULL,
725:                                        NULL,
726:                                        NULL,
727:                                        /* 69*/ NULL,
728:                                        NULL,
729:                                        NULL,
730:                                        NULL,
731:                                        NULL,
732:                                        /* 74*/ NULL,
733:                                        NULL,
734:                                        NULL,
735:                                        NULL,
736:                                        NULL,
737:                                        /* 79*/ NULL,
738:                                        NULL,
739:                                        NULL,
740:                                        NULL,
741:                                        MatLoad_BlockMat,
742:                                        /* 84*/ NULL,
743:                                        NULL,
744:                                        NULL,
745:                                        NULL,
746:                                        NULL,
747:                                        /* 89*/ NULL,
748:                                        NULL,
749:                                        NULL,
750:                                        NULL,
751:                                        NULL,
752:                                        /* 94*/ NULL,
753:                                        NULL,
754:                                        NULL,
755:                                        NULL,
756:                                        NULL,
757:                                        /* 99*/ NULL,
758:                                        NULL,
759:                                        NULL,
760:                                        NULL,
761:                                        NULL,
762:                                        /*104*/ NULL,
763:                                        NULL,
764:                                        NULL,
765:                                        NULL,
766:                                        NULL,
767:                                        /*109*/ NULL,
768:                                        NULL,
769:                                        NULL,
770:                                        NULL,
771:                                        NULL,
772:                                        /*114*/ NULL,
773:                                        NULL,
774:                                        NULL,
775:                                        NULL,
776:                                        NULL,
777:                                        /*119*/ NULL,
778:                                        NULL,
779:                                        NULL,
780:                                        NULL,
781:                                        NULL,
782:                                        /*124*/ NULL,
783:                                        NULL,
784:                                        NULL,
785:                                        NULL,
786:                                        NULL,
787:                                        /*129*/ NULL,
788:                                        NULL,
789:                                        NULL,
790:                                        NULL,
791:                                        NULL,
792:                                        /*134*/ NULL,
793:                                        NULL,
794:                                        NULL,
795:                                        NULL,
796:                                        NULL,
797:                                        /*139*/ NULL,
798:                                        NULL,
799:                                        NULL,
800:                                        NULL,
801:                                        NULL,
802:                                        /*144*/ NULL,
803:                                        NULL,
804:                                        NULL,
805:                                        NULL,
806:                                        NULL,
807:                                        NULL,
808:                                        /*150*/ NULL};

810: /*@C
811:    MatBlockMatSetPreallocation - For good matrix assembly performance
812:    the user should preallocate the matrix storage by setting the parameter nz
813:    (or the array nnz).  By setting these parameters accurately, performance
814:    during matrix assembly can be increased by more than a factor of 50.

816:    Collective

818:    Input Parameters:
819: +  B - The matrix
820: .  bs - size of each block in matrix
821: .  nz - number of nonzeros per block row (same for all rows)
822: -  nnz - array containing the number of nonzeros in the various block rows
823:          (possibly different for each row) or NULL

825:    Notes:
826:      If nnz is given then nz is ignored

828:    Specify the preallocated storage with either nz or nnz (not both).
829:    Set nz = `PETSC_DEFAULT` and nnz = NULL for PETSc to control dynamic memory
830:    allocation.  For large problems you MUST preallocate memory or you
831:    will get TERRIBLE performance, see the users' manual chapter on matrices.

833:    Level: intermediate

835: .seealso: `MatCreate()`, `MatCreateBlockMat()`, `MatSetValues()`
836: @*/
837: PetscErrorCode MatBlockMatSetPreallocation(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[])
838: {
839:   PetscTryMethod(B, "MatBlockMatSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[]), (B, bs, nz, nnz));
840:   return 0;
841: }

843: static PetscErrorCode MatBlockMatSetPreallocation_BlockMat(Mat A, PetscInt bs, PetscInt nz, PetscInt *nnz)
844: {
845:   Mat_BlockMat *bmat = (Mat_BlockMat *)A->data;
846:   PetscInt      i;

848:   PetscLayoutSetBlockSize(A->rmap, bs);
849:   PetscLayoutSetBlockSize(A->cmap, bs);
850:   PetscLayoutSetUp(A->rmap);
851:   PetscLayoutSetUp(A->cmap);
852:   PetscLayoutGetBlockSize(A->rmap, &bs);

854:   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
856:   if (nnz) {
857:     for (i = 0; i < A->rmap->n / bs; i++) {
860:     }
861:   }
862:   bmat->mbs = A->rmap->n / bs;

864:   VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs, NULL, &bmat->right);
865:   VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs, NULL, &bmat->middle);
866:   VecCreateSeq(PETSC_COMM_SELF, bs, &bmat->left);

868:   if (!bmat->imax) { PetscMalloc2(A->rmap->n, &bmat->imax, A->rmap->n, &bmat->ilen); }
869:   if (PetscLikely(nnz)) {
870:     nz = 0;
871:     for (i = 0; i < A->rmap->n / A->rmap->bs; i++) {
872:       bmat->imax[i] = nnz[i];
873:       nz += nnz[i];
874:     }
875:   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Currently requires block row by row preallocation");

877:   /* bmat->ilen will count nonzeros in each row so far. */
878:   PetscArrayzero(bmat->ilen, bmat->mbs);

880:   /* allocate the matrix space */
881:   MatSeqXAIJFreeAIJ(A, (PetscScalar **)&bmat->a, &bmat->j, &bmat->i);
882:   PetscMalloc3(nz, &bmat->a, nz, &bmat->j, A->rmap->n + 1, &bmat->i);
883:   bmat->i[0] = 0;
884:   for (i = 1; i < bmat->mbs + 1; i++) bmat->i[i] = bmat->i[i - 1] + bmat->imax[i - 1];
885:   bmat->singlemalloc = PETSC_TRUE;
886:   bmat->free_a       = PETSC_TRUE;
887:   bmat->free_ij      = PETSC_TRUE;

889:   bmat->nz            = 0;
890:   bmat->maxnz         = nz;
891:   A->info.nz_unneeded = (double)bmat->maxnz;
892:   MatSetOption(A, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE);
893:   return 0;
894: }

896: /*MC
897:    MATBLOCKMAT - A matrix that is defined by a set of `Mat`'s that represents a sparse block matrix
898:                  consisting of (usually) sparse blocks.

900:   Level: advanced

902: .seealso: `MatCreateBlockMat()`
903: M*/

905: PETSC_EXTERN PetscErrorCode MatCreate_BlockMat(Mat A)
906: {
907:   Mat_BlockMat *b;

909:   PetscNew(&b);
910:   A->data = (void *)b;
911:   PetscMemcpy(A->ops, &MatOps_Values, sizeof(struct _MatOps));

913:   A->assembled    = PETSC_TRUE;
914:   A->preallocated = PETSC_FALSE;
915:   PetscObjectChangeTypeName((PetscObject)A, MATBLOCKMAT);

917:   PetscObjectComposeFunction((PetscObject)A, "MatBlockMatSetPreallocation_C", MatBlockMatSetPreallocation_BlockMat);
918:   return 0;
919: }

921: /*@C
922:    MatCreateBlockMat - Creates a new matrix in which each block contains a uniform-size sequential `Mat` object

924:   Collective

926:    Input Parameters:
927: +  comm - MPI communicator
928: .  m - number of rows
929: .  n  - number of columns
930: .  bs - size of each submatrix
931: .  nz  - expected maximum number of nonzero blocks in row (use `PETSC_DEFAULT` if not known)
932: -  nnz - expected number of nonzers per block row if known (use NULL otherwise)

934:    Output Parameter:
935: .  A - the matrix

937:    Level: intermediate

939:    Notes:
940:     Matrices of this type are nominally-sparse matrices in which each "entry" is a `Mat` object.  Each `Mat` must
941:    have the same size and be sequential.  The local and global sizes must be compatible with this decomposition.

943:    For matrices containing parallel submatrices and variable block sizes, see `MATNEST`.

945:    Developer Note:
946:    I don't like the name, it is not `MATNESTMAT`

948: .seealso: `MATBLOCKMAT`, `MatCreateNest()`
949: @*/
950: PetscErrorCode MatCreateBlockMat(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt bs, PetscInt nz, PetscInt *nnz, Mat *A)
951: {
952:   MatCreate(comm, A);
953:   MatSetSizes(*A, m, n, PETSC_DETERMINE, PETSC_DETERMINE);
954:   MatSetType(*A, MATBLOCKMAT);
955:   MatBlockMatSetPreallocation(*A, bs, nz, nnz);
956:   return 0;
957: }