Actual source code: maij.c

  1: #include <../src/mat/impls/maij/maij.h>
  2: #include <../src/mat/utils/freespace.h>

  4: /*@
  5:   MatMAIJGetAIJ - Get the `MATAIJ` matrix describing the blockwise action of the `MATMAIJ` matrix

  7:   Not Collective, but if the `MATMAIJ` matrix is parallel, the `MATAIJ` matrix is also parallel

  9:   Input Parameter:
 10: . A - the `MATMAIJ` matrix

 12:   Output Parameter:
 13: . B - the `MATAIJ` matrix

 15:   Level: advanced

 17:   Note:
 18:   The reference count on the `MATAIJ` matrix is not increased so you should not destroy it.

 20: .seealso: [](ch_matrices), `Mat`, `MATMAIJ`, `MATAIJ`, `MatCreateMAIJ()`
 21: @*/
 22: PetscErrorCode MatMAIJGetAIJ(Mat A, Mat *B)
 23: {
 24:   PetscBool ismpimaij, isseqmaij;

 26:   PetscFunctionBegin;
 27:   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIMAIJ, &ismpimaij));
 28:   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQMAIJ, &isseqmaij));
 29:   if (ismpimaij) {
 30:     Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;

 32:     *B = b->A;
 33:   } else if (isseqmaij) {
 34:     Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;

 36:     *B = b->AIJ;
 37:   } else {
 38:     *B = A;
 39:   }
 40:   PetscFunctionReturn(PETSC_SUCCESS);
 41: }

 43: /*@
 44:   MatMAIJRedimension - Get a new `MATMAIJ` matrix with the same action, but for a different block size

 46:   Logically Collective

 48:   Input Parameters:
 49: + A   - the `MATMAIJ` matrix
 50: - dof - the block size for the new matrix

 52:   Output Parameter:
 53: . B - the new `MATMAIJ` matrix

 55:   Level: advanced

 57: .seealso: [](ch_matrices), `Mat`, `MATMAIJ`, `MatCreateMAIJ()`
 58: @*/
 59: PetscErrorCode MatMAIJRedimension(Mat A, PetscInt dof, Mat *B)
 60: {
 61:   Mat Aij = NULL;

 63:   PetscFunctionBegin;
 65:   PetscCall(MatMAIJGetAIJ(A, &Aij));
 66:   PetscCall(MatCreateMAIJ(Aij, dof, B));
 67:   PetscFunctionReturn(PETSC_SUCCESS);
 68: }

 70: static PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
 71: {
 72:   Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;

 74:   PetscFunctionBegin;
 75:   PetscCall(MatDestroy(&b->AIJ));
 76:   PetscCall(PetscFree(A->data));
 77:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaijcusparse_C", NULL));
 78:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaij_C", NULL));
 79:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaij_seqmaij_C", NULL));
 80:   PetscFunctionReturn(PETSC_SUCCESS);
 81: }

 83: static PetscErrorCode MatSetUp_MAIJ(Mat A)
 84: {
 85:   PetscFunctionBegin;
 86:   SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Must use MatCreateMAIJ() to create MAIJ matrices");
 87: }

 89: static PetscErrorCode MatView_SeqMAIJ(Mat A, PetscViewer viewer)
 90: {
 91:   Mat B;

 93:   PetscFunctionBegin;
 94:   PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
 95:   PetscCall(MatView(B, viewer));
 96:   PetscCall(MatDestroy(&B));
 97:   PetscFunctionReturn(PETSC_SUCCESS);
 98: }

100: static PetscErrorCode MatView_MPIMAIJ(Mat A, PetscViewer viewer)
101: {
102:   Mat B;

104:   PetscFunctionBegin;
105:   PetscCall(MatConvert(A, MATMPIAIJ, MAT_INITIAL_MATRIX, &B));
106:   PetscCall(MatView(B, viewer));
107:   PetscCall(MatDestroy(&B));
108:   PetscFunctionReturn(PETSC_SUCCESS);
109: }

111: static PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
112: {
113:   Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;

115:   PetscFunctionBegin;
116:   PetscCall(MatDestroy(&b->AIJ));
117:   PetscCall(MatDestroy(&b->OAIJ));
118:   PetscCall(MatDestroy(&b->A));
119:   PetscCall(VecScatterDestroy(&b->ctx));
120:   PetscCall(VecDestroy(&b->w));
121:   PetscCall(PetscFree(A->data));
122:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaijcusparse_C", NULL));
123:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaij_C", NULL));
124:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_mpiaij_mpimaij_C", NULL));
125:   PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL));
126:   PetscFunctionReturn(PETSC_SUCCESS);
127: }

129: /*MC
130:   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
131:   multicomponent problems, interpolating or restricting each component the same way independently.
132:   The matrix type is based on `MATSEQAIJ` for sequential matrices, and `MATMPIAIJ` for distributed matrices.

134:   Operations provided:
135: .vb
136:     MatMult()
137:     MatMultTranspose()
138:     MatMultAdd()
139:     MatMultTransposeAdd()
140: .ve

142:   Level: advanced

144: .seealso: [](ch_matrices), `Mat`, `MATAIJ`, `MatMAIJGetAIJ()`, `MatMAIJRedimension()`, `MatCreateMAIJ()`
145: M*/

147: PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A)
148: {
149:   Mat_MPIMAIJ *b;
150:   PetscMPIInt  size;

152:   PetscFunctionBegin;
153:   PetscCall(PetscNew(&b));
154:   A->data = (void *)b;

156:   PetscCall(PetscMemzero(A->ops, sizeof(struct _MatOps)));

158:   A->ops->setup = MatSetUp_MAIJ;

160:   b->AIJ  = NULL;
161:   b->dof  = 0;
162:   b->OAIJ = NULL;
163:   b->ctx  = NULL;
164:   b->w    = NULL;
165:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
166:   if (size == 1) {
167:     PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQMAIJ));
168:   } else {
169:     PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPIMAIJ));
170:   }
171:   A->preallocated = PETSC_TRUE;
172:   A->assembled    = PETSC_TRUE;
173:   PetscFunctionReturn(PETSC_SUCCESS);
174: }

176: #if PetscHasAttribute(always_inline)
177:   #define PETSC_FORCE_INLINE __attribute__((always_inline))
178: #else
179:   #define PETSC_FORCE_INLINE
180: #endif

182: #if defined(__clang__)
183:   #define PETSC_PRAGMA_UNROLL _Pragma("unroll")
184: #else
185:   #define PETSC_PRAGMA_UNROLL
186: #endif

188: enum {
189:   MAT_SEQMAIJ_MAX_TEMPLATE_SIZE = 18
190: };

192: // try as hard as possible to get these "template"s inlined, GCC apparently does take 'inline'
193: // keyword into account for these...
194: PETSC_FORCE_INLINE static inline PetscErrorCode MatMult_MatMultAdd_SeqMAIJ_Template(Mat A, Vec xx, Vec yy, Vec zz, int N)
195: {
196:   const PetscBool    mult_add   = yy == NULL ? PETSC_FALSE : PETSC_TRUE;
197:   const Mat_SeqMAIJ *b          = (Mat_SeqMAIJ *)A->data;
198:   const Mat          baij       = b->AIJ;
199:   const Mat_SeqAIJ  *a          = (Mat_SeqAIJ *)baij->data;
200:   const PetscInt     m          = baij->rmap->n;
201:   const PetscInt     nz         = a->nz;
202:   const PetscInt    *idx        = a->j;
203:   const PetscInt    *ii         = a->i;
204:   const PetscScalar *v          = a->a;
205:   PetscInt           nonzerorow = 0;
206:   const PetscScalar *x;
207:   PetscScalar       *z;

209:   PetscFunctionBegin;
210:   PetscAssert(N <= MAT_SEQMAIJ_MAX_TEMPLATE_SIZE, PETSC_COMM_SELF, PETSC_ERR_PLIB, "%s() called with N = %d > max size %d", PETSC_FUNCTION_NAME, N, MAT_SEQMAIJ_MAX_TEMPLATE_SIZE);
211:   if (mult_add && yy != zz) PetscCall(VecCopy(yy, zz));
212:   PetscCall(VecGetArrayRead(xx, &x));
213:   if (mult_add) {
214:     PetscCall(VecGetArray(zz, &z));
215:   } else {
216:     PetscCall(VecGetArrayWrite(zz, &z));
217:   }

219:   for (PetscInt i = 0; i < m; ++i) {
220:     PetscInt       jrow = ii[i];
221:     const PetscInt n    = ii[i + 1] - jrow;
222:     // leave a line so clang-format does not align these decls
223:     PetscScalar sum[MAT_SEQMAIJ_MAX_TEMPLATE_SIZE] = {0};

225:     nonzerorow += n > 0;
226:     for (PetscInt j = 0; j < n; ++j, ++jrow) {
227:       const PetscScalar v_jrow     = v[jrow];
228:       const PetscInt    N_idx_jrow = N * idx[jrow];

230:       PETSC_PRAGMA_UNROLL
231:       for (int k = 0; k < N; ++k) sum[k] += v_jrow * x[N_idx_jrow + k];
232:     }

234:     PETSC_PRAGMA_UNROLL
235:     for (int k = 0; k < N; ++k) {
236:       const PetscInt z_idx = N * i + k;

238:       if (mult_add) {
239:         z[z_idx] += sum[k];
240:       } else {
241:         z[z_idx] = sum[k];
242:       }
243:     }
244:   }
245:   PetscCall(PetscLogFlops(2 * N * nz - (mult_add ? 0 : (N * nonzerorow))));
246:   PetscCall(VecRestoreArrayRead(xx, &x));
247:   if (mult_add) {
248:     PetscCall(VecRestoreArray(zz, &z));
249:   } else {
250:     PetscCall(VecRestoreArrayWrite(zz, &z));
251:   }
252:   PetscFunctionReturn(PETSC_SUCCESS);
253: }

255: PETSC_FORCE_INLINE static inline PetscErrorCode MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(Mat A, Vec xx, Vec yy, Vec zz, int N)
256: {
257:   const PetscBool    mult_add = yy == NULL ? PETSC_FALSE : PETSC_TRUE;
258:   const Mat_SeqMAIJ *b        = (Mat_SeqMAIJ *)A->data;
259:   const Mat          baij     = b->AIJ;
260:   const Mat_SeqAIJ  *a        = (Mat_SeqAIJ *)baij->data;
261:   const PetscInt     m        = baij->rmap->n;
262:   const PetscInt     nz       = a->nz;
263:   const PetscInt    *a_j      = a->j;
264:   const PetscInt    *a_i      = a->i;
265:   const PetscScalar *a_a      = a->a;
266:   const PetscScalar *x;
267:   PetscScalar       *z;

269:   PetscFunctionBegin;
270:   PetscAssert(N <= MAT_SEQMAIJ_MAX_TEMPLATE_SIZE, PETSC_COMM_SELF, PETSC_ERR_PLIB, "%s() called with N = %d > max size %d", PETSC_FUNCTION_NAME, N, MAT_SEQMAIJ_MAX_TEMPLATE_SIZE);
271:   if (mult_add) {
272:     if (yy != zz) PetscCall(VecCopy(yy, zz));
273:   } else {
274:     PetscCall(VecSet(zz, 0.0));
275:   }
276:   PetscCall(VecGetArrayRead(xx, &x));
277:   PetscCall(VecGetArray(zz, &z));

279:   for (PetscInt i = 0; i < m; i++) {
280:     const PetscInt     a_ii = a_i[i];
281:     const PetscInt    *idx  = PetscSafePointerPlusOffset(a_j, a_ii);
282:     const PetscScalar *v    = PetscSafePointerPlusOffset(a_a, a_ii);
283:     const PetscInt     n    = a_i[i + 1] - a_ii;
284:     PetscScalar        alpha[MAT_SEQMAIJ_MAX_TEMPLATE_SIZE];

286:     PETSC_PRAGMA_UNROLL
287:     for (int k = 0; k < N; ++k) alpha[k] = x[N * i + k];
288:     for (PetscInt j = 0; j < n; ++j) {
289:       const PetscInt    N_idx_j = N * idx[j];
290:       const PetscScalar v_j     = v[j];

292:       PETSC_PRAGMA_UNROLL
293:       for (int k = 0; k < N; ++k) z[N_idx_j + k] += alpha[k] * v_j;
294:     }
295:   }

297:   PetscCall(PetscLogFlops(2 * N * nz));
298:   PetscCall(VecRestoreArrayRead(xx, &x));
299:   PetscCall(VecRestoreArray(zz, &z));
300:   PetscFunctionReturn(PETSC_SUCCESS);
301: }

303: #define MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(N) \
304:   static PetscErrorCode PetscConcat(MatMult_SeqMAIJ_, N)(Mat A, Vec xx, Vec yy) \
305:   { \
306:     PetscFunctionBegin; \
307:     PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, N)); \
308:     PetscFunctionReturn(PETSC_SUCCESS); \
309:   } \
310:   static PetscErrorCode PetscConcat(MatMultTranspose_SeqMAIJ_, N)(Mat A, Vec xx, Vec yy) \
311:   { \
312:     PetscFunctionBegin; \
313:     PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, N)); \
314:     PetscFunctionReturn(PETSC_SUCCESS); \
315:   } \
316:   static PetscErrorCode PetscConcat(MatMultAdd_SeqMAIJ_, N)(Mat A, Vec xx, Vec yy, Vec zz) \
317:   { \
318:     PetscFunctionBegin; \
319:     PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, N)); \
320:     PetscFunctionReturn(PETSC_SUCCESS); \
321:   } \
322:   static PetscErrorCode PetscConcat(MatMultTransposeAdd_SeqMAIJ_, N)(Mat A, Vec xx, Vec yy, Vec zz) \
323:   { \
324:     PetscFunctionBegin; \
325:     PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, N)); \
326:     PetscFunctionReturn(PETSC_SUCCESS); \
327:   }

329: MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(2)
330: MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(3)
331: MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(4)
332: MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(5)
333: MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(6)
334: MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(7)
335: MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(8)
336: MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(9)
337: MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(10)
338: MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(11)
339: MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(16)
340: MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(18)

342: #undef MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE

344: static PetscErrorCode MatMult_SeqMAIJ_N(Mat A, Vec xx, Vec yy)
345: {
346:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
347:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
348:   const PetscScalar *x, *v;
349:   PetscScalar       *y, *sums;
350:   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
351:   PetscInt           n, i, jrow, j, dof = b->dof, k;

353:   PetscFunctionBegin;
354:   PetscCall(VecGetArrayRead(xx, &x));
355:   PetscCall(VecSet(yy, 0.0));
356:   PetscCall(VecGetArray(yy, &y));
357:   idx = a->j;
358:   v   = a->a;
359:   ii  = a->i;

361:   for (i = 0; i < m; i++) {
362:     jrow = ii[i];
363:     n    = ii[i + 1] - jrow;
364:     sums = y + dof * i;
365:     for (j = 0; j < n; j++) {
366:       for (k = 0; k < dof; k++) sums[k] += v[jrow] * x[dof * idx[jrow] + k];
367:       jrow++;
368:     }
369:   }

371:   PetscCall(PetscLogFlops(2.0 * dof * a->nz));
372:   PetscCall(VecRestoreArrayRead(xx, &x));
373:   PetscCall(VecRestoreArray(yy, &y));
374:   PetscFunctionReturn(PETSC_SUCCESS);
375: }

377: static PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz)
378: {
379:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
380:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
381:   const PetscScalar *x, *v;
382:   PetscScalar       *y, *sums;
383:   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
384:   PetscInt           n, i, jrow, j, dof = b->dof, k;

386:   PetscFunctionBegin;
387:   if (yy != zz) PetscCall(VecCopy(yy, zz));
388:   PetscCall(VecGetArrayRead(xx, &x));
389:   PetscCall(VecGetArray(zz, &y));
390:   idx = a->j;
391:   v   = a->a;
392:   ii  = a->i;

394:   for (i = 0; i < m; i++) {
395:     jrow = ii[i];
396:     n    = ii[i + 1] - jrow;
397:     sums = y + dof * i;
398:     for (j = 0; j < n; j++) {
399:       for (k = 0; k < dof; k++) sums[k] += v[jrow] * x[dof * idx[jrow] + k];
400:       jrow++;
401:     }
402:   }

404:   PetscCall(PetscLogFlops(2.0 * dof * a->nz));
405:   PetscCall(VecRestoreArrayRead(xx, &x));
406:   PetscCall(VecRestoreArray(zz, &y));
407:   PetscFunctionReturn(PETSC_SUCCESS);
408: }

410: static PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A, Vec xx, Vec yy)
411: {
412:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
413:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
414:   const PetscScalar *x, *v, *alpha;
415:   PetscScalar       *y;
416:   const PetscInt     m = b->AIJ->rmap->n, *idx, dof = b->dof;
417:   PetscInt           n, i, k;

419:   PetscFunctionBegin;
420:   PetscCall(VecGetArrayRead(xx, &x));
421:   PetscCall(VecSet(yy, 0.0));
422:   PetscCall(VecGetArray(yy, &y));
423:   for (i = 0; i < m; i++) {
424:     idx   = PetscSafePointerPlusOffset(a->j, a->i[i]);
425:     v     = PetscSafePointerPlusOffset(a->a, a->i[i]);
426:     n     = a->i[i + 1] - a->i[i];
427:     alpha = x + dof * i;
428:     while (n-- > 0) {
429:       for (k = 0; k < dof; k++) y[dof * (*idx) + k] += alpha[k] * (*v);
430:       idx++;
431:       v++;
432:     }
433:   }
434:   PetscCall(PetscLogFlops(2.0 * dof * a->nz));
435:   PetscCall(VecRestoreArrayRead(xx, &x));
436:   PetscCall(VecRestoreArray(yy, &y));
437:   PetscFunctionReturn(PETSC_SUCCESS);
438: }

440: static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz)
441: {
442:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
443:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
444:   const PetscScalar *x, *v, *alpha;
445:   PetscScalar       *y;
446:   const PetscInt     m = b->AIJ->rmap->n, *idx, dof = b->dof;
447:   PetscInt           n, i, k;

449:   PetscFunctionBegin;
450:   if (yy != zz) PetscCall(VecCopy(yy, zz));
451:   PetscCall(VecGetArrayRead(xx, &x));
452:   PetscCall(VecGetArray(zz, &y));
453:   for (i = 0; i < m; i++) {
454:     idx   = a->j + a->i[i];
455:     v     = a->a + a->i[i];
456:     n     = a->i[i + 1] - a->i[i];
457:     alpha = x + dof * i;
458:     while (n-- > 0) {
459:       for (k = 0; k < dof; k++) y[dof * (*idx) + k] += alpha[k] * (*v);
460:       idx++;
461:       v++;
462:     }
463:   }
464:   PetscCall(PetscLogFlops(2.0 * dof * a->nz));
465:   PetscCall(VecRestoreArrayRead(xx, &x));
466:   PetscCall(VecRestoreArray(zz, &y));
467:   PetscFunctionReturn(PETSC_SUCCESS);
468: }

470: static PetscErrorCode MatMult_MPIMAIJ_dof(Mat A, Vec xx, Vec yy)
471: {
472:   Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;

474:   PetscFunctionBegin;
475:   /* start the scatter */
476:   PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
477:   PetscCall((*b->AIJ->ops->mult)(b->AIJ, xx, yy));
478:   PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
479:   PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, yy, yy));
480:   PetscFunctionReturn(PETSC_SUCCESS);
481: }

483: static PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A, Vec xx, Vec yy)
484: {
485:   Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;

487:   PetscFunctionBegin;
488:   PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w));
489:   PetscCall((*b->AIJ->ops->multtranspose)(b->AIJ, xx, yy));
490:   PetscCall(VecScatterBegin(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE));
491:   PetscCall(VecScatterEnd(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE));
492:   PetscFunctionReturn(PETSC_SUCCESS);
493: }

495: static PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz)
496: {
497:   Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;

499:   PetscFunctionBegin;
500:   /* start the scatter */
501:   PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
502:   PetscCall((*b->AIJ->ops->multadd)(b->AIJ, xx, yy, zz));
503:   PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
504:   PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, zz, zz));
505:   PetscFunctionReturn(PETSC_SUCCESS);
506: }

508: static PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz)
509: {
510:   Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;

512:   PetscFunctionBegin;
513:   PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w));
514:   PetscCall((*b->AIJ->ops->multtransposeadd)(b->AIJ, xx, yy, zz));
515:   PetscCall(VecScatterBegin(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE));
516:   PetscCall(VecScatterEnd(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE));
517:   PetscFunctionReturn(PETSC_SUCCESS);
518: }

520: static PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqMAIJ(Mat C)
521: {
522:   Mat_Product *product = C->product;

524:   PetscFunctionBegin;
525:   if (product->type == MATPRODUCT_PtAP) {
526:     C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ;
527:   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product type %s is not supported for SeqAIJ and SeqMAIJ matrices", MatProductTypes[product->type]);
528:   PetscFunctionReturn(PETSC_SUCCESS);
529: }

531: static PetscErrorCode MatProductSetFromOptions_MPIAIJ_MPIMAIJ(Mat C)
532: {
533:   Mat_Product *product = C->product;
534:   PetscBool    flg     = PETSC_FALSE;
535:   Mat          A = product->A, P = product->B;
536:   PetscInt     alg = 1; /* set default algorithm */
537: #if !defined(PETSC_HAVE_HYPRE)
538:   const char *algTypes[4] = {"scalable", "nonscalable", "allatonce", "allatonce_merged"};
539:   PetscInt    nalg        = 4;
540: #else
541:   const char *algTypes[5] = {"scalable", "nonscalable", "allatonce", "allatonce_merged", "hypre"};
542:   PetscInt    nalg        = 5;
543: #endif

545:   PetscFunctionBegin;
546:   PetscCheck(product->type == MATPRODUCT_PtAP, PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product type %s is not supported for MPIAIJ and MPIMAIJ matrices", MatProductTypes[product->type]);

548:   /* PtAP */
549:   /* Check matrix local sizes */
550:   PetscCheck(A->rmap->rstart == P->rmap->rstart && A->rmap->rend == P->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, Arow (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")",
551:              A->rmap->rstart, A->rmap->rend, P->rmap->rstart, P->rmap->rend);
552:   PetscCheck(A->cmap->rstart == P->rmap->rstart && A->cmap->rend == P->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, Acol (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")",
553:              A->cmap->rstart, A->cmap->rend, P->rmap->rstart, P->rmap->rend);

555:   /* Set the default algorithm */
556:   PetscCall(PetscStrcmp(C->product->alg, "default", &flg));
557:   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));

559:   /* Get runtime option */
560:   PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat");
561:   PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatPtAP", algTypes, nalg, algTypes[alg], &alg, &flg));
562:   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
563:   PetscOptionsEnd();

565:   PetscCall(PetscStrcmp(C->product->alg, "allatonce", &flg));
566:   if (flg) {
567:     C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ;
568:     PetscFunctionReturn(PETSC_SUCCESS);
569:   }

571:   PetscCall(PetscStrcmp(C->product->alg, "allatonce_merged", &flg));
572:   if (flg) {
573:     C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ;
574:     PetscFunctionReturn(PETSC_SUCCESS);
575:   }

577:   /* Convert P from MAIJ to AIJ matrix since implementation not available for MAIJ */
578:   PetscCall(PetscInfo((PetscObject)A, "Converting from MAIJ to AIJ matrix since implementation not available for MAIJ\n"));
579:   PetscCall(MatConvert(P, MATMPIAIJ, MAT_INPLACE_MATRIX, &P));
580:   PetscCall(MatProductSetFromOptions(C));
581:   PetscFunctionReturn(PETSC_SUCCESS);
582: }

584: static PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A, Mat PP, Mat C)
585: {
586:   /* This routine requires testing -- first draft only */
587:   Mat_SeqMAIJ     *pp = (Mat_SeqMAIJ *)PP->data;
588:   Mat              P  = pp->AIJ;
589:   Mat_SeqAIJ      *a  = (Mat_SeqAIJ *)A->data;
590:   Mat_SeqAIJ      *p  = (Mat_SeqAIJ *)P->data;
591:   Mat_SeqAIJ      *c  = (Mat_SeqAIJ *)C->data;
592:   const PetscInt  *ai = a->i, *aj = a->j, *pi = p->i, *pj = p->j, *pJ, *pjj;
593:   const PetscInt  *ci = c->i, *cj = c->j, *cjj;
594:   const PetscInt   am = A->rmap->N, cn = C->cmap->N, cm = C->rmap->N, ppdof = pp->dof;
595:   PetscInt         i, j, k, pshift, poffset, anzi, pnzi, apnzj, nextap, pnzj, prow, crow, *apj, *apjdense;
596:   const MatScalar *aa = a->a, *pa = p->a, *pA, *paj;
597:   MatScalar       *ca = c->a, *caj, *apa;

599:   PetscFunctionBegin;
600:   /* Allocate temporary array for storage of one row of A*P */
601:   PetscCall(PetscCalloc3(cn, &apa, cn, &apj, cn, &apjdense));

603:   /* Clear old values in C */
604:   PetscCall(PetscArrayzero(ca, ci[cm]));

606:   for (i = 0; i < am; i++) {
607:     /* Form sparse row of A*P */
608:     anzi  = ai[i + 1] - ai[i];
609:     apnzj = 0;
610:     for (j = 0; j < anzi; j++) {
611:       /* Get offset within block of P */
612:       pshift = *aj % ppdof;
613:       /* Get block row of P */
614:       prow = *aj++ / ppdof; /* integer division */
615:       pnzj = pi[prow + 1] - pi[prow];
616:       pjj  = pj + pi[prow];
617:       paj  = pa + pi[prow];
618:       for (k = 0; k < pnzj; k++) {
619:         poffset = pjj[k] * ppdof + pshift;
620:         if (!apjdense[poffset]) {
621:           apjdense[poffset] = -1;
622:           apj[apnzj++]      = poffset;
623:         }
624:         apa[poffset] += (*aa) * paj[k];
625:       }
626:       PetscCall(PetscLogFlops(2.0 * pnzj));
627:       aa++;
628:     }

630:     /* Sort the j index array for quick sparse axpy. */
631:     /* Note: a array does not need sorting as it is in dense storage locations. */
632:     PetscCall(PetscSortInt(apnzj, apj));

634:     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
635:     prow    = i / ppdof; /* integer division */
636:     pshift  = i % ppdof;
637:     poffset = pi[prow];
638:     pnzi    = pi[prow + 1] - poffset;
639:     /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
640:     pJ = pj + poffset;
641:     pA = pa + poffset;
642:     for (j = 0; j < pnzi; j++) {
643:       crow = (*pJ) * ppdof + pshift;
644:       cjj  = cj + ci[crow];
645:       caj  = ca + ci[crow];
646:       pJ++;
647:       /* Perform sparse axpy operation.  Note cjj includes apj. */
648:       for (k = 0, nextap = 0; nextap < apnzj; k++) {
649:         if (cjj[k] == apj[nextap]) caj[k] += (*pA) * apa[apj[nextap++]];
650:       }
651:       PetscCall(PetscLogFlops(2.0 * apnzj));
652:       pA++;
653:     }

655:     /* Zero the current row info for A*P */
656:     for (j = 0; j < apnzj; j++) {
657:       apa[apj[j]]      = 0.;
658:       apjdense[apj[j]] = 0;
659:     }
660:   }

662:   /* Assemble the final matrix and clean up */
663:   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
664:   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
665:   PetscCall(PetscFree3(apa, apj, apjdense));
666:   PetscFunctionReturn(PETSC_SUCCESS);
667: }

669: static PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A, Mat PP, PetscReal fill, Mat C)
670: {
671:   PetscFreeSpaceList free_space = NULL, current_space = NULL;
672:   Mat_SeqMAIJ       *pp = (Mat_SeqMAIJ *)PP->data;
673:   Mat                P  = pp->AIJ;
674:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *p = (Mat_SeqAIJ *)P->data, *c;
675:   PetscInt          *pti, *ptj, *ptJ;
676:   PetscInt          *ci, *cj, *ptadenserow, *ptasparserow, *denserow, *sparserow, *ptaj;
677:   const PetscInt     an = A->cmap->N, am = A->rmap->N, pn = P->cmap->N, pm = P->rmap->N, ppdof = pp->dof;
678:   PetscInt           i, j, k, dof, pshift, ptnzi, arow, anzj, ptanzi, prow, pnzj, cnzi, cn;
679:   MatScalar         *ca;
680:   const PetscInt    *pi = p->i, *pj = p->j, *pjj, *ai = a->i, *aj = a->j, *ajj;

682:   PetscFunctionBegin;
683:   /* Get ij structure of P^T */
684:   PetscCall(MatGetSymbolicTranspose_SeqAIJ(P, &pti, &ptj));

686:   cn = pn * ppdof;
687:   /* Allocate ci array, arrays for fill computation and */
688:   /* free space for accumulating nonzero column info */
689:   PetscCall(PetscMalloc1(cn + 1, &ci));
690:   ci[0] = 0;

692:   /* Work arrays for rows of P^T*A */
693:   PetscCall(PetscMalloc4(an, &ptadenserow, an, &ptasparserow, cn, &denserow, cn, &sparserow));
694:   PetscCall(PetscArrayzero(ptadenserow, an));
695:   PetscCall(PetscArrayzero(denserow, cn));

697:   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
698:   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
699:   /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
700:   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(ai[am] / pm, pn), &free_space));
701:   current_space = free_space;

703:   /* Determine symbolic info for each row of C: */
704:   for (i = 0; i < pn; i++) {
705:     ptnzi = pti[i + 1] - pti[i];
706:     ptJ   = ptj + pti[i];
707:     for (dof = 0; dof < ppdof; dof++) {
708:       ptanzi = 0;
709:       /* Determine symbolic row of PtA: */
710:       for (j = 0; j < ptnzi; j++) {
711:         /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
712:         arow = ptJ[j] * ppdof + dof;
713:         /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
714:         anzj = ai[arow + 1] - ai[arow];
715:         ajj  = aj + ai[arow];
716:         for (k = 0; k < anzj; k++) {
717:           if (!ptadenserow[ajj[k]]) {
718:             ptadenserow[ajj[k]]    = -1;
719:             ptasparserow[ptanzi++] = ajj[k];
720:           }
721:         }
722:       }
723:       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
724:       ptaj = ptasparserow;
725:       cnzi = 0;
726:       for (j = 0; j < ptanzi; j++) {
727:         /* Get offset within block of P */
728:         pshift = *ptaj % ppdof;
729:         /* Get block row of P */
730:         prow = (*ptaj++) / ppdof; /* integer division */
731:         /* P has same number of nonzeros per row as the compressed form */
732:         pnzj = pi[prow + 1] - pi[prow];
733:         pjj  = pj + pi[prow];
734:         for (k = 0; k < pnzj; k++) {
735:           /* Locations in C are shifted by the offset within the block */
736:           /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
737:           if (!denserow[pjj[k] * ppdof + pshift]) {
738:             denserow[pjj[k] * ppdof + pshift] = -1;
739:             sparserow[cnzi++]                 = pjj[k] * ppdof + pshift;
740:           }
741:         }
742:       }

744:       /* sort sparserow */
745:       PetscCall(PetscSortInt(cnzi, sparserow));

747:       /* If free space is not available, make more free space */
748:       /* Double the amount of total space in the list */
749:       if (current_space->local_remaining < cnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(cnzi, current_space->total_array_size), &current_space));

751:       /* Copy data into free space, and zero out denserows */
752:       PetscCall(PetscArraycpy(current_space->array, sparserow, cnzi));

754:       current_space->array += cnzi;
755:       current_space->local_used += cnzi;
756:       current_space->local_remaining -= cnzi;

758:       for (j = 0; j < ptanzi; j++) ptadenserow[ptasparserow[j]] = 0;
759:       for (j = 0; j < cnzi; j++) denserow[sparserow[j]] = 0;

761:       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
762:       /*        For now, we will recompute what is needed. */
763:       ci[i * ppdof + 1 + dof] = ci[i * ppdof + dof] + cnzi;
764:     }
765:   }
766:   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
767:   /* Allocate space for cj, initialize cj, and */
768:   /* destroy list of free space and other temporary array(s) */
769:   PetscCall(PetscMalloc1(ci[cn] + 1, &cj));
770:   PetscCall(PetscFreeSpaceContiguous(&free_space, cj));
771:   PetscCall(PetscFree4(ptadenserow, ptasparserow, denserow, sparserow));

773:   /* Allocate space for ca */
774:   PetscCall(PetscCalloc1(ci[cn] + 1, &ca));

776:   /* put together the new matrix */
777:   PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A), cn, cn, ci, cj, ca, NULL, C));
778:   PetscCall(MatSetBlockSize(C, pp->dof));

780:   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
781:   /* Since these are PETSc arrays, change flags to free them as necessary. */
782:   c          = (Mat_SeqAIJ *)C->data;
783:   c->free_a  = PETSC_TRUE;
784:   c->free_ij = PETSC_TRUE;
785:   c->nonew   = 0;

787:   C->ops->ptapnumeric    = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
788:   C->ops->productnumeric = MatProductNumeric_PtAP;

790:   /* Clean up. */
791:   PetscCall(MatRestoreSymbolicTranspose_SeqAIJ(P, &pti, &ptj));
792:   PetscFunctionReturn(PETSC_SUCCESS);
793: }

795: PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ(Mat C)
796: {
797:   Mat_Product *product = C->product;
798:   Mat          A = product->A, P = product->B;

800:   PetscFunctionBegin;
801:   PetscCall(MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A, P, product->fill, C));
802:   PetscFunctionReturn(PETSC_SUCCESS);
803: }

805: PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, Mat);

807: PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, Mat C)
808: {
809:   Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;

811:   PetscFunctionBegin;
812:   PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, C));
813:   PetscFunctionReturn(PETSC_SUCCESS);
814: }

816: PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, PetscReal, Mat);

818: PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, PetscReal fill, Mat C)
819: {
820:   Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;

822:   PetscFunctionBegin;
823:   PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, fill, C));
824:   C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce;
825:   PetscFunctionReturn(PETSC_SUCCESS);
826: }

828: PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, Mat);

830: PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, Mat C)
831: {
832:   Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;

834:   PetscFunctionBegin;
835:   PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, C));
836:   PetscFunctionReturn(PETSC_SUCCESS);
837: }

839: PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, PetscReal, Mat);

841: PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, PetscReal fill, Mat C)
842: {
843:   Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;

845:   PetscFunctionBegin;
846:   PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, fill, C));
847:   C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged;
848:   PetscFunctionReturn(PETSC_SUCCESS);
849: }

851: PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ(Mat C)
852: {
853:   Mat_Product *product = C->product;
854:   Mat          A = product->A, P = product->B;
855:   PetscBool    flg;

857:   PetscFunctionBegin;
858:   PetscCall(PetscStrcmp(product->alg, "allatonce", &flg));
859:   if (flg) {
860:     PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(A, P, product->fill, C));
861:     C->ops->productnumeric = MatProductNumeric_PtAP;
862:     PetscFunctionReturn(PETSC_SUCCESS);
863:   }

865:   PetscCall(PetscStrcmp(product->alg, "allatonce_merged", &flg));
866:   if (flg) {
867:     PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(A, P, product->fill, C));
868:     C->ops->productnumeric = MatProductNumeric_PtAP;
869:     PetscFunctionReturn(PETSC_SUCCESS);
870:   }

872:   SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "Mat Product Algorithm is not supported");
873: }

875: PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
876: {
877:   Mat_SeqMAIJ *b   = (Mat_SeqMAIJ *)A->data;
878:   Mat          a   = b->AIJ, B;
879:   Mat_SeqAIJ  *aij = (Mat_SeqAIJ *)a->data;
880:   PetscInt     m, n, i, ncols, *ilen, nmax = 0, *icols, j, k, ii, dof = b->dof;
881:   PetscInt    *cols;
882:   PetscScalar *vals;

884:   PetscFunctionBegin;
885:   PetscCall(MatGetSize(a, &m, &n));
886:   PetscCall(PetscMalloc1(dof * m, &ilen));
887:   for (i = 0; i < m; i++) {
888:     nmax = PetscMax(nmax, aij->ilen[i]);
889:     for (j = 0; j < dof; j++) ilen[dof * i + j] = aij->ilen[i];
890:   }
891:   PetscCall(MatCreate(PETSC_COMM_SELF, &B));
892:   PetscCall(MatSetSizes(B, dof * m, dof * n, dof * m, dof * n));
893:   PetscCall(MatSetType(B, newtype));
894:   PetscCall(MatSeqAIJSetPreallocation(B, 0, ilen));
895:   PetscCall(PetscFree(ilen));
896:   PetscCall(PetscMalloc1(nmax, &icols));
897:   ii = 0;
898:   for (i = 0; i < m; i++) {
899:     PetscCall(MatGetRow_SeqAIJ(a, i, &ncols, &cols, &vals));
900:     for (j = 0; j < dof; j++) {
901:       for (k = 0; k < ncols; k++) icols[k] = dof * cols[k] + j;
902:       PetscCall(MatSetValues_SeqAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES));
903:       ii++;
904:     }
905:     PetscCall(MatRestoreRow_SeqAIJ(a, i, &ncols, &cols, &vals));
906:   }
907:   PetscCall(PetscFree(icols));
908:   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
909:   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));

911:   if (reuse == MAT_INPLACE_MATRIX) {
912:     PetscCall(MatHeaderReplace(A, &B));
913:   } else {
914:     *newmat = B;
915:   }
916:   PetscFunctionReturn(PETSC_SUCCESS);
917: }

919: #include <../src/mat/impls/aij/mpi/mpiaij.h>

921: PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
922: {
923:   Mat_MPIMAIJ *maij    = (Mat_MPIMAIJ *)A->data;
924:   Mat          MatAIJ  = ((Mat_SeqMAIJ *)maij->AIJ->data)->AIJ, B;
925:   Mat          MatOAIJ = ((Mat_SeqMAIJ *)maij->OAIJ->data)->AIJ;
926:   Mat_SeqAIJ  *AIJ     = (Mat_SeqAIJ *)MatAIJ->data;
927:   Mat_SeqAIJ  *OAIJ    = (Mat_SeqAIJ *)MatOAIJ->data;
928:   Mat_MPIAIJ  *mpiaij  = (Mat_MPIAIJ *)maij->A->data;
929:   PetscInt     dof = maij->dof, i, j, *dnz = NULL, *onz = NULL, nmax = 0, onmax = 0;
930:   PetscInt    *oicols = NULL, *icols = NULL, ncols, *cols = NULL, oncols, *ocols = NULL;
931:   PetscInt     rstart, cstart, *garray, ii, k;
932:   PetscScalar *vals, *ovals;

934:   PetscFunctionBegin;
935:   PetscCall(PetscMalloc2(A->rmap->n, &dnz, A->rmap->n, &onz));
936:   for (i = 0; i < A->rmap->n / dof; i++) {
937:     nmax  = PetscMax(nmax, AIJ->ilen[i]);
938:     onmax = PetscMax(onmax, OAIJ->ilen[i]);
939:     for (j = 0; j < dof; j++) {
940:       dnz[dof * i + j] = AIJ->ilen[i];
941:       onz[dof * i + j] = OAIJ->ilen[i];
942:     }
943:   }
944:   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
945:   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
946:   PetscCall(MatSetType(B, newtype));
947:   PetscCall(MatMPIAIJSetPreallocation(B, 0, dnz, 0, onz));
948:   PetscCall(MatSetBlockSize(B, dof));
949:   PetscCall(PetscFree2(dnz, onz));

951:   PetscCall(PetscMalloc2(nmax, &icols, onmax, &oicols));
952:   rstart = dof * maij->A->rmap->rstart;
953:   cstart = dof * maij->A->cmap->rstart;
954:   garray = mpiaij->garray;

956:   ii = rstart;
957:   for (i = 0; i < A->rmap->n / dof; i++) {
958:     PetscCall(MatGetRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals));
959:     PetscCall(MatGetRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals));
960:     for (j = 0; j < dof; j++) {
961:       for (k = 0; k < ncols; k++) icols[k] = cstart + dof * cols[k] + j;
962:       for (k = 0; k < oncols; k++) oicols[k] = dof * garray[ocols[k]] + j;
963:       PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES));
964:       PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, oncols, oicols, ovals, INSERT_VALUES));
965:       ii++;
966:     }
967:     PetscCall(MatRestoreRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals));
968:     PetscCall(MatRestoreRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals));
969:   }
970:   PetscCall(PetscFree2(icols, oicols));

972:   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
973:   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));

975:   if (reuse == MAT_INPLACE_MATRIX) {
976:     PetscInt refct          = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
977:     ((PetscObject)A)->refct = 1;

979:     PetscCall(MatHeaderReplace(A, &B));

981:     ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
982:   } else {
983:     *newmat = B;
984:   }
985:   PetscFunctionReturn(PETSC_SUCCESS);
986: }

988: static PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat, IS isrow, IS iscol, MatReuse cll, Mat *newmat)
989: {
990:   Mat A;

992:   PetscFunctionBegin;
993:   PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A));
994:   PetscCall(MatCreateSubMatrix(A, isrow, iscol, cll, newmat));
995:   PetscCall(MatDestroy(&A));
996:   PetscFunctionReturn(PETSC_SUCCESS);
997: }

999: static PetscErrorCode MatCreateSubMatrices_MAIJ(Mat mat, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *submat[])
1000: {
1001:   Mat A;

1003:   PetscFunctionBegin;
1004:   PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A));
1005:   PetscCall(MatCreateSubMatrices(A, n, irow, icol, scall, submat));
1006:   PetscCall(MatDestroy(&A));
1007:   PetscFunctionReturn(PETSC_SUCCESS);
1008: }

1010: /*@
1011:   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
1012:   operations for multicomponent problems.  It interpolates each component the same
1013:   way independently.  The matrix type is based on `MATSEQAIJ` for sequential matrices,
1014:   and `MATMPIAIJ` for distributed matrices.

1016:   Collective

1018:   Input Parameters:
1019: + A   - the `MATAIJ` matrix describing the action on blocks
1020: - dof - the block size (number of components per node)

1022:   Output Parameter:
1023: . maij - the new `MATMAIJ` matrix

1025:   Level: advanced

1027: .seealso: [](ch_matrices), `Mat`, `MATAIJ`, `MATMAIJ`, `MatMAIJGetAIJ()`, `MatMAIJRedimension()`
1028: @*/
1029: PetscErrorCode MatCreateMAIJ(Mat A, PetscInt dof, Mat *maij)
1030: {
1031:   PetscInt  n;
1032:   Mat       B;
1033:   PetscBool flg;
1034: #if defined(PETSC_HAVE_CUDA)
1035:   /* hack to prevent conversion to AIJ format for CUDA when used inside a parallel MAIJ */
1036:   PetscBool convert = dof < 0 ? PETSC_FALSE : PETSC_TRUE;
1037: #endif

1039:   PetscFunctionBegin;
1040:   dof = PetscAbs(dof);
1041:   PetscCall(PetscObjectReference((PetscObject)A));

1043:   if (dof == 1) *maij = A;
1044:   else {
1045:     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1046:     /* propagate vec type */
1047:     PetscCall(MatSetVecType(B, A->defaultvectype));
1048:     PetscCall(MatSetSizes(B, dof * A->rmap->n, dof * A->cmap->n, dof * A->rmap->N, dof * A->cmap->N));
1049:     PetscCall(PetscLayoutSetBlockSize(B->rmap, dof));
1050:     PetscCall(PetscLayoutSetBlockSize(B->cmap, dof));
1051:     PetscCall(PetscLayoutSetUp(B->rmap));
1052:     PetscCall(PetscLayoutSetUp(B->cmap));

1054:     B->assembled = PETSC_TRUE;

1056:     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &flg));
1057:     if (flg) {
1058:       Mat_SeqMAIJ *b;

1060:       PetscCall(MatSetType(B, MATSEQMAIJ));

1062:       B->ops->setup   = NULL;
1063:       B->ops->destroy = MatDestroy_SeqMAIJ;
1064:       B->ops->view    = MatView_SeqMAIJ;

1066:       b      = (Mat_SeqMAIJ *)B->data;
1067:       b->dof = dof;
1068:       b->AIJ = A;

1070:       if (dof == 2) {
1071:         B->ops->mult             = MatMult_SeqMAIJ_2;
1072:         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
1073:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
1074:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
1075:       } else if (dof == 3) {
1076:         B->ops->mult             = MatMult_SeqMAIJ_3;
1077:         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
1078:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
1079:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
1080:       } else if (dof == 4) {
1081:         B->ops->mult             = MatMult_SeqMAIJ_4;
1082:         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
1083:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
1084:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
1085:       } else if (dof == 5) {
1086:         B->ops->mult             = MatMult_SeqMAIJ_5;
1087:         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
1088:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
1089:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
1090:       } else if (dof == 6) {
1091:         B->ops->mult             = MatMult_SeqMAIJ_6;
1092:         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
1093:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
1094:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
1095:       } else if (dof == 7) {
1096:         B->ops->mult             = MatMult_SeqMAIJ_7;
1097:         B->ops->multadd          = MatMultAdd_SeqMAIJ_7;
1098:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_7;
1099:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
1100:       } else if (dof == 8) {
1101:         B->ops->mult             = MatMult_SeqMAIJ_8;
1102:         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
1103:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
1104:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
1105:       } else if (dof == 9) {
1106:         B->ops->mult             = MatMult_SeqMAIJ_9;
1107:         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
1108:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
1109:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
1110:       } else if (dof == 10) {
1111:         B->ops->mult             = MatMult_SeqMAIJ_10;
1112:         B->ops->multadd          = MatMultAdd_SeqMAIJ_10;
1113:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_10;
1114:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
1115:       } else if (dof == 11) {
1116:         B->ops->mult             = MatMult_SeqMAIJ_11;
1117:         B->ops->multadd          = MatMultAdd_SeqMAIJ_11;
1118:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_11;
1119:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11;
1120:       } else if (dof == 16) {
1121:         B->ops->mult             = MatMult_SeqMAIJ_16;
1122:         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
1123:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
1124:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
1125:       } else if (dof == 18) {
1126:         B->ops->mult             = MatMult_SeqMAIJ_18;
1127:         B->ops->multadd          = MatMultAdd_SeqMAIJ_18;
1128:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_18;
1129:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
1130:       } else {
1131:         B->ops->mult             = MatMult_SeqMAIJ_N;
1132:         B->ops->multadd          = MatMultAdd_SeqMAIJ_N;
1133:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_N;
1134:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N;
1135:       }
1136: #if defined(PETSC_HAVE_CUDA)
1137:       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaijcusparse_C", MatConvert_SeqMAIJ_SeqAIJ));
1138: #endif
1139:       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaij_C", MatConvert_SeqMAIJ_SeqAIJ));
1140:       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqaij_seqmaij_C", MatProductSetFromOptions_SeqAIJ_SeqMAIJ));
1141:     } else {
1142:       Mat_MPIAIJ  *mpiaij = (Mat_MPIAIJ *)A->data;
1143:       Mat_MPIMAIJ *b;
1144:       IS           from, to;
1145:       Vec          gvec;

1147:       PetscCall(MatSetType(B, MATMPIMAIJ));

1149:       B->ops->setup   = NULL;
1150:       B->ops->destroy = MatDestroy_MPIMAIJ;
1151:       B->ops->view    = MatView_MPIMAIJ;

1153:       b      = (Mat_MPIMAIJ *)B->data;
1154:       b->dof = dof;
1155:       b->A   = A;

1157:       PetscCall(MatCreateMAIJ(mpiaij->A, -dof, &b->AIJ));
1158:       PetscCall(MatCreateMAIJ(mpiaij->B, -dof, &b->OAIJ));

1160:       PetscCall(VecGetSize(mpiaij->lvec, &n));
1161:       PetscCall(VecCreate(PETSC_COMM_SELF, &b->w));
1162:       PetscCall(VecSetSizes(b->w, n * dof, n * dof));
1163:       PetscCall(VecSetBlockSize(b->w, dof));
1164:       PetscCall(VecSetType(b->w, VECSEQ));

1166:       /* create two temporary Index sets for build scatter gather */
1167:       PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)A), dof, n, mpiaij->garray, PETSC_COPY_VALUES, &from));
1168:       PetscCall(ISCreateStride(PETSC_COMM_SELF, n * dof, 0, 1, &to));

1170:       /* create temporary global vector to generate scatter context */
1171:       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), dof, dof * A->cmap->n, dof * A->cmap->N, NULL, &gvec));

1173:       /* generate the scatter context */
1174:       PetscCall(VecScatterCreate(gvec, from, b->w, to, &b->ctx));

1176:       PetscCall(ISDestroy(&from));
1177:       PetscCall(ISDestroy(&to));
1178:       PetscCall(VecDestroy(&gvec));

1180:       B->ops->mult             = MatMult_MPIMAIJ_dof;
1181:       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
1182:       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
1183:       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;

1185: #if defined(PETSC_HAVE_CUDA)
1186:       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaijcusparse_C", MatConvert_MPIMAIJ_MPIAIJ));
1187: #endif
1188:       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaij_C", MatConvert_MPIMAIJ_MPIAIJ));
1189:       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpimaij_C", MatProductSetFromOptions_MPIAIJ_MPIMAIJ));
1190:     }
1191:     B->ops->createsubmatrix   = MatCreateSubMatrix_MAIJ;
1192:     B->ops->createsubmatrices = MatCreateSubMatrices_MAIJ;
1193:     PetscCall(MatSetUp(B));
1194: #if defined(PETSC_HAVE_CUDA)
1195:     /* temporary until we have CUDA implementation of MAIJ */
1196:     {
1197:       PetscBool flg;
1198:       if (convert) {
1199:         PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, MATAIJCUSPARSE, ""));
1200:         if (flg) PetscCall(MatConvert(B, ((PetscObject)A)->type_name, MAT_INPLACE_MATRIX, &B));
1201:       }
1202:     }
1203: #endif
1204:     *maij = B;
1205:   }
1206:   PetscFunctionReturn(PETSC_SUCCESS);
1207: }