Actual source code: maij.c
1: #include <../src/mat/impls/maij/maij.h>
2: #include <../src/mat/utils/freespace.h>
4: /*@
5: MatMAIJGetAIJ - Get the `MATAIJ` matrix describing the blockwise action of the `MATMAIJ` matrix
7: Not Collective, but if the `MATMAIJ` matrix is parallel, the `MATAIJ` matrix is also parallel
9: Input Parameter:
10: . A - the `MATMAIJ` matrix
12: Output Parameter:
13: . B - the `MATAIJ` matrix
15: Level: advanced
17: Note:
18: The reference count on the `MATAIJ` matrix is not increased so you should not destroy it.
20: .seealso: [](ch_matrices), `Mat`, `MATMAIJ`, `MATAIJ`, `MatCreateMAIJ()`
21: @*/
22: PetscErrorCode MatMAIJGetAIJ(Mat A, Mat *B)
23: {
24: PetscBool ismpimaij, isseqmaij;
26: PetscFunctionBegin;
27: PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIMAIJ, &ismpimaij));
28: PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQMAIJ, &isseqmaij));
29: if (ismpimaij) {
30: Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
32: *B = b->A;
33: } else if (isseqmaij) {
34: Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
36: *B = b->AIJ;
37: } else {
38: *B = A;
39: }
40: PetscFunctionReturn(PETSC_SUCCESS);
41: }
43: /*@
44: MatMAIJRedimension - Get a new `MATMAIJ` matrix with the same action, but for a different block size
46: Logically Collective
48: Input Parameters:
49: + A - the `MATMAIJ` matrix
50: - dof - the block size for the new matrix
52: Output Parameter:
53: . B - the new `MATMAIJ` matrix
55: Level: advanced
57: .seealso: [](ch_matrices), `Mat`, `MATMAIJ`, `MatCreateMAIJ()`
58: @*/
59: PetscErrorCode MatMAIJRedimension(Mat A, PetscInt dof, Mat *B)
60: {
61: Mat Aij = NULL;
63: PetscFunctionBegin;
65: PetscCall(MatMAIJGetAIJ(A, &Aij));
66: PetscCall(MatCreateMAIJ(Aij, dof, B));
67: PetscFunctionReturn(PETSC_SUCCESS);
68: }
70: static PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
71: {
72: Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
74: PetscFunctionBegin;
75: PetscCall(MatDestroy(&b->AIJ));
76: PetscCall(PetscFree(A->data));
77: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaijcusparse_C", NULL));
78: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaij_C", NULL));
79: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaij_seqmaij_C", NULL));
80: PetscFunctionReturn(PETSC_SUCCESS);
81: }
83: static PetscErrorCode MatSetUp_MAIJ(Mat A)
84: {
85: PetscFunctionBegin;
86: SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Must use MatCreateMAIJ() to create MAIJ matrices");
87: }
89: static PetscErrorCode MatView_SeqMAIJ(Mat A, PetscViewer viewer)
90: {
91: Mat B;
93: PetscFunctionBegin;
94: PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
95: PetscCall(MatView(B, viewer));
96: PetscCall(MatDestroy(&B));
97: PetscFunctionReturn(PETSC_SUCCESS);
98: }
100: static PetscErrorCode MatView_MPIMAIJ(Mat A, PetscViewer viewer)
101: {
102: Mat B;
104: PetscFunctionBegin;
105: PetscCall(MatConvert(A, MATMPIAIJ, MAT_INITIAL_MATRIX, &B));
106: PetscCall(MatView(B, viewer));
107: PetscCall(MatDestroy(&B));
108: PetscFunctionReturn(PETSC_SUCCESS);
109: }
111: static PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
112: {
113: Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
115: PetscFunctionBegin;
116: PetscCall(MatDestroy(&b->AIJ));
117: PetscCall(MatDestroy(&b->OAIJ));
118: PetscCall(MatDestroy(&b->A));
119: PetscCall(VecScatterDestroy(&b->ctx));
120: PetscCall(VecDestroy(&b->w));
121: PetscCall(PetscFree(A->data));
122: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaijcusparse_C", NULL));
123: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaij_C", NULL));
124: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_mpiaij_mpimaij_C", NULL));
125: PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL));
126: PetscFunctionReturn(PETSC_SUCCESS);
127: }
129: /*MC
130: MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
131: multicomponent problems, interpolating or restricting each component the same way independently.
132: The matrix type is based on `MATSEQAIJ` for sequential matrices, and `MATMPIAIJ` for distributed matrices.
134: Operations provided:
135: .vb
136: MatMult()
137: MatMultTranspose()
138: MatMultAdd()
139: MatMultTransposeAdd()
140: .ve
142: Level: advanced
144: .seealso: [](ch_matrices), `Mat`, `MATAIJ`, `MatMAIJGetAIJ()`, `MatMAIJRedimension()`, `MatCreateMAIJ()`
145: M*/
147: PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A)
148: {
149: Mat_MPIMAIJ *b;
150: PetscMPIInt size;
152: PetscFunctionBegin;
153: PetscCall(PetscNew(&b));
154: A->data = (void *)b;
156: PetscCall(PetscMemzero(A->ops, sizeof(struct _MatOps)));
158: A->ops->setup = MatSetUp_MAIJ;
160: b->AIJ = NULL;
161: b->dof = 0;
162: b->OAIJ = NULL;
163: b->ctx = NULL;
164: b->w = NULL;
165: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
166: if (size == 1) PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQMAIJ));
167: else PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPIMAIJ));
168: A->preallocated = PETSC_TRUE;
169: A->assembled = PETSC_TRUE;
170: PetscFunctionReturn(PETSC_SUCCESS);
171: }
173: #if PetscHasAttribute(always_inline)
174: #define PETSC_FORCE_INLINE __attribute__((always_inline))
175: #else
176: #define PETSC_FORCE_INLINE
177: #endif
179: #if defined(__clang__)
180: #define PETSC_PRAGMA_UNROLL _Pragma("unroll")
181: #else
182: #define PETSC_PRAGMA_UNROLL
183: #endif
185: enum {
186: MAT_SEQMAIJ_MAX_TEMPLATE_SIZE = 18
187: };
189: // try as hard as possible to get these "template"s inlined, GCC apparently does take 'inline'
190: // keyword into account for these...
191: PETSC_FORCE_INLINE static inline PetscErrorCode MatMult_MatMultAdd_SeqMAIJ_Template(Mat A, Vec xx, Vec yy, Vec zz, int N)
192: {
193: const PetscBool mult_add = yy == NULL ? PETSC_FALSE : PETSC_TRUE;
194: const Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
195: const Mat baij = b->AIJ;
196: const Mat_SeqAIJ *a = (Mat_SeqAIJ *)baij->data;
197: const PetscInt m = baij->rmap->n;
198: const PetscInt nz = a->nz;
199: const PetscInt *idx = a->j;
200: const PetscInt *ii = a->i;
201: const PetscScalar *v = a->a;
202: PetscInt nonzerorow = 0;
203: const PetscScalar *x;
204: PetscScalar *z;
206: PetscFunctionBegin;
207: PetscAssert(N <= MAT_SEQMAIJ_MAX_TEMPLATE_SIZE, PETSC_COMM_SELF, PETSC_ERR_PLIB, "%s() called with N = %d > max size %d", PETSC_FUNCTION_NAME, N, MAT_SEQMAIJ_MAX_TEMPLATE_SIZE);
208: if (mult_add && yy != zz) PetscCall(VecCopy(yy, zz));
209: PetscCall(VecGetArrayRead(xx, &x));
210: if (mult_add) {
211: PetscCall(VecGetArray(zz, &z));
212: } else {
213: PetscCall(VecGetArrayWrite(zz, &z));
214: }
216: for (PetscInt i = 0; i < m; ++i) {
217: PetscInt jrow = ii[i];
218: const PetscInt n = ii[i + 1] - jrow;
219: // leave a line so clang-format does not align these decls
220: PetscScalar sum[MAT_SEQMAIJ_MAX_TEMPLATE_SIZE] = {0};
222: nonzerorow += n > 0;
223: for (PetscInt j = 0; j < n; ++j, ++jrow) {
224: const PetscScalar v_jrow = v[jrow];
225: const PetscInt N_idx_jrow = N * idx[jrow];
227: PETSC_PRAGMA_UNROLL
228: for (int k = 0; k < N; ++k) sum[k] += v_jrow * x[N_idx_jrow + k];
229: }
231: PETSC_PRAGMA_UNROLL
232: for (int k = 0; k < N; ++k) {
233: const PetscInt z_idx = N * i + k;
235: if (mult_add) {
236: z[z_idx] += sum[k];
237: } else {
238: z[z_idx] = sum[k];
239: }
240: }
241: }
242: PetscCall(PetscLogFlops(2 * N * nz - (mult_add ? 0 : (N * nonzerorow))));
243: PetscCall(VecRestoreArrayRead(xx, &x));
244: if (mult_add) {
245: PetscCall(VecRestoreArray(zz, &z));
246: } else {
247: PetscCall(VecRestoreArrayWrite(zz, &z));
248: }
249: PetscFunctionReturn(PETSC_SUCCESS);
250: }
252: PETSC_FORCE_INLINE static inline PetscErrorCode MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(Mat A, Vec xx, Vec yy, Vec zz, int N)
253: {
254: const PetscBool mult_add = yy == NULL ? PETSC_FALSE : PETSC_TRUE;
255: const Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
256: const Mat baij = b->AIJ;
257: const Mat_SeqAIJ *a = (Mat_SeqAIJ *)baij->data;
258: const PetscInt m = baij->rmap->n;
259: const PetscInt nz = a->nz;
260: const PetscInt *a_j = a->j;
261: const PetscInt *a_i = a->i;
262: const PetscScalar *a_a = a->a;
263: const PetscScalar *x;
264: PetscScalar *z;
266: PetscFunctionBegin;
267: PetscAssert(N <= MAT_SEQMAIJ_MAX_TEMPLATE_SIZE, PETSC_COMM_SELF, PETSC_ERR_PLIB, "%s() called with N = %d > max size %d", PETSC_FUNCTION_NAME, N, MAT_SEQMAIJ_MAX_TEMPLATE_SIZE);
268: if (mult_add) {
269: if (yy != zz) PetscCall(VecCopy(yy, zz));
270: } else {
271: PetscCall(VecSet(zz, 0.0));
272: }
273: PetscCall(VecGetArrayRead(xx, &x));
274: PetscCall(VecGetArray(zz, &z));
276: for (PetscInt i = 0; i < m; i++) {
277: const PetscInt a_ii = a_i[i];
278: const PetscInt *idx = PetscSafePointerPlusOffset(a_j, a_ii);
279: const PetscScalar *v = PetscSafePointerPlusOffset(a_a, a_ii);
280: const PetscInt n = a_i[i + 1] - a_ii;
281: PetscScalar alpha[MAT_SEQMAIJ_MAX_TEMPLATE_SIZE];
283: PETSC_PRAGMA_UNROLL
284: for (int k = 0; k < N; ++k) alpha[k] = x[N * i + k];
285: for (PetscInt j = 0; j < n; ++j) {
286: const PetscInt N_idx_j = N * idx[j];
287: const PetscScalar v_j = v[j];
289: PETSC_PRAGMA_UNROLL
290: for (int k = 0; k < N; ++k) z[N_idx_j + k] += alpha[k] * v_j;
291: }
292: }
294: PetscCall(PetscLogFlops(2 * N * nz));
295: PetscCall(VecRestoreArrayRead(xx, &x));
296: PetscCall(VecRestoreArray(zz, &z));
297: PetscFunctionReturn(PETSC_SUCCESS);
298: }
300: #define MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(N) \
301: static PetscErrorCode PetscConcat(MatMult_SeqMAIJ_, N)(Mat A, Vec xx, Vec yy) \
302: { \
303: PetscFunctionBegin; \
304: PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, N)); \
305: PetscFunctionReturn(PETSC_SUCCESS); \
306: } \
307: static PetscErrorCode PetscConcat(MatMultTranspose_SeqMAIJ_, N)(Mat A, Vec xx, Vec yy) \
308: { \
309: PetscFunctionBegin; \
310: PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, N)); \
311: PetscFunctionReturn(PETSC_SUCCESS); \
312: } \
313: static PetscErrorCode PetscConcat(MatMultAdd_SeqMAIJ_, N)(Mat A, Vec xx, Vec yy, Vec zz) \
314: { \
315: PetscFunctionBegin; \
316: PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, N)); \
317: PetscFunctionReturn(PETSC_SUCCESS); \
318: } \
319: static PetscErrorCode PetscConcat(MatMultTransposeAdd_SeqMAIJ_, N)(Mat A, Vec xx, Vec yy, Vec zz) \
320: { \
321: PetscFunctionBegin; \
322: PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, N)); \
323: PetscFunctionReturn(PETSC_SUCCESS); \
324: }
326: MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(2)
327: MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(3)
328: MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(4)
329: MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(5)
330: MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(6)
331: MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(7)
332: MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(8)
333: MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(9)
334: MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(10)
335: MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(11)
336: MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(16)
337: MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(18)
339: #undef MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE
341: static PetscErrorCode MatMult_SeqMAIJ_N(Mat A, Vec xx, Vec yy)
342: {
343: Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
344: Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data;
345: const PetscScalar *x, *v;
346: PetscScalar *y, *sums;
347: const PetscInt m = b->AIJ->rmap->n, *idx, *ii;
348: PetscInt n, i, jrow, j, dof = b->dof, k;
350: PetscFunctionBegin;
351: PetscCall(VecGetArrayRead(xx, &x));
352: PetscCall(VecSet(yy, 0.0));
353: PetscCall(VecGetArray(yy, &y));
354: idx = a->j;
355: v = a->a;
356: ii = a->i;
358: for (i = 0; i < m; i++) {
359: jrow = ii[i];
360: n = ii[i + 1] - jrow;
361: sums = y + dof * i;
362: for (j = 0; j < n; j++) {
363: for (k = 0; k < dof; k++) sums[k] += v[jrow] * x[dof * idx[jrow] + k];
364: jrow++;
365: }
366: }
368: PetscCall(PetscLogFlops(2.0 * dof * a->nz));
369: PetscCall(VecRestoreArrayRead(xx, &x));
370: PetscCall(VecRestoreArray(yy, &y));
371: PetscFunctionReturn(PETSC_SUCCESS);
372: }
374: static PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz)
375: {
376: Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
377: Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data;
378: const PetscScalar *x, *v;
379: PetscScalar *y, *sums;
380: const PetscInt m = b->AIJ->rmap->n, *idx, *ii;
381: PetscInt n, i, jrow, j, dof = b->dof, k;
383: PetscFunctionBegin;
384: if (yy != zz) PetscCall(VecCopy(yy, zz));
385: PetscCall(VecGetArrayRead(xx, &x));
386: PetscCall(VecGetArray(zz, &y));
387: idx = a->j;
388: v = a->a;
389: ii = a->i;
391: for (i = 0; i < m; i++) {
392: jrow = ii[i];
393: n = ii[i + 1] - jrow;
394: sums = y + dof * i;
395: for (j = 0; j < n; j++) {
396: for (k = 0; k < dof; k++) sums[k] += v[jrow] * x[dof * idx[jrow] + k];
397: jrow++;
398: }
399: }
401: PetscCall(PetscLogFlops(2.0 * dof * a->nz));
402: PetscCall(VecRestoreArrayRead(xx, &x));
403: PetscCall(VecRestoreArray(zz, &y));
404: PetscFunctionReturn(PETSC_SUCCESS);
405: }
407: static PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A, Vec xx, Vec yy)
408: {
409: Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
410: Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data;
411: const PetscScalar *x, *v, *alpha;
412: PetscScalar *y;
413: const PetscInt m = b->AIJ->rmap->n, *idx, dof = b->dof;
414: PetscInt n, i, k;
416: PetscFunctionBegin;
417: PetscCall(VecGetArrayRead(xx, &x));
418: PetscCall(VecSet(yy, 0.0));
419: PetscCall(VecGetArray(yy, &y));
420: for (i = 0; i < m; i++) {
421: idx = PetscSafePointerPlusOffset(a->j, a->i[i]);
422: v = PetscSafePointerPlusOffset(a->a, a->i[i]);
423: n = a->i[i + 1] - a->i[i];
424: alpha = x + dof * i;
425: while (n-- > 0) {
426: for (k = 0; k < dof; k++) y[dof * (*idx) + k] += alpha[k] * (*v);
427: idx++;
428: v++;
429: }
430: }
431: PetscCall(PetscLogFlops(2.0 * dof * a->nz));
432: PetscCall(VecRestoreArrayRead(xx, &x));
433: PetscCall(VecRestoreArray(yy, &y));
434: PetscFunctionReturn(PETSC_SUCCESS);
435: }
437: static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz)
438: {
439: Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
440: Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data;
441: const PetscScalar *x, *v, *alpha;
442: PetscScalar *y;
443: const PetscInt m = b->AIJ->rmap->n, *idx, dof = b->dof;
444: PetscInt n, i, k;
446: PetscFunctionBegin;
447: if (yy != zz) PetscCall(VecCopy(yy, zz));
448: PetscCall(VecGetArrayRead(xx, &x));
449: PetscCall(VecGetArray(zz, &y));
450: for (i = 0; i < m; i++) {
451: idx = a->j + a->i[i];
452: v = a->a + a->i[i];
453: n = a->i[i + 1] - a->i[i];
454: alpha = x + dof * i;
455: while (n-- > 0) {
456: for (k = 0; k < dof; k++) y[dof * (*idx) + k] += alpha[k] * (*v);
457: idx++;
458: v++;
459: }
460: }
461: PetscCall(PetscLogFlops(2.0 * dof * a->nz));
462: PetscCall(VecRestoreArrayRead(xx, &x));
463: PetscCall(VecRestoreArray(zz, &y));
464: PetscFunctionReturn(PETSC_SUCCESS);
465: }
467: static PetscErrorCode MatMult_MPIMAIJ_dof(Mat A, Vec xx, Vec yy)
468: {
469: Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
471: PetscFunctionBegin;
472: /* start the scatter */
473: PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
474: PetscCall((*b->AIJ->ops->mult)(b->AIJ, xx, yy));
475: PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
476: PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, yy, yy));
477: PetscFunctionReturn(PETSC_SUCCESS);
478: }
480: static PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A, Vec xx, Vec yy)
481: {
482: Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
484: PetscFunctionBegin;
485: PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w));
486: PetscCall((*b->AIJ->ops->multtranspose)(b->AIJ, xx, yy));
487: PetscCall(VecScatterBegin(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE));
488: PetscCall(VecScatterEnd(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE));
489: PetscFunctionReturn(PETSC_SUCCESS);
490: }
492: static PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz)
493: {
494: Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
496: PetscFunctionBegin;
497: /* start the scatter */
498: PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
499: PetscCall((*b->AIJ->ops->multadd)(b->AIJ, xx, yy, zz));
500: PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
501: PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, zz, zz));
502: PetscFunctionReturn(PETSC_SUCCESS);
503: }
505: static PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz)
506: {
507: Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
509: PetscFunctionBegin;
510: PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w));
511: PetscCall((*b->AIJ->ops->multtransposeadd)(b->AIJ, xx, yy, zz));
512: PetscCall(VecScatterBegin(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE));
513: PetscCall(VecScatterEnd(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE));
514: PetscFunctionReturn(PETSC_SUCCESS);
515: }
517: static PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqMAIJ(Mat C)
518: {
519: Mat_Product *product = C->product;
521: PetscFunctionBegin;
522: PetscCheck(product->type == MATPRODUCT_PtAP, PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product type %s is not supported for SeqAIJ and SeqMAIJ matrices", MatProductTypes[product->type]);
523: C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ;
524: PetscFunctionReturn(PETSC_SUCCESS);
525: }
527: static PetscErrorCode MatProductSetFromOptions_MPIAIJ_MPIMAIJ(Mat C)
528: {
529: Mat_Product *product = C->product;
530: PetscBool flg = PETSC_FALSE;
531: Mat A = product->A, P = product->B;
532: PetscInt alg = 1; /* set default algorithm */
533: #if !defined(PETSC_HAVE_HYPRE)
534: const char *algTypes[4] = {"scalable", "nonscalable", "allatonce", "allatonce_merged"};
535: PetscInt nalg = 4;
536: #else
537: const char *algTypes[5] = {"scalable", "nonscalable", "allatonce", "allatonce_merged", "hypre"};
538: PetscInt nalg = 5;
539: #endif
541: PetscFunctionBegin;
542: PetscCheck(product->type == MATPRODUCT_PtAP, PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product type %s is not supported for MPIAIJ and MPIMAIJ matrices", MatProductTypes[product->type]);
544: /* PtAP */
545: /* Check matrix local sizes */
546: PetscCheck(A->rmap->rstart == P->rmap->rstart && A->rmap->rend == P->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, Arow (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")",
547: A->rmap->rstart, A->rmap->rend, P->rmap->rstart, P->rmap->rend);
548: PetscCheck(A->cmap->rstart == P->rmap->rstart && A->cmap->rend == P->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, Acol (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")",
549: A->cmap->rstart, A->cmap->rend, P->rmap->rstart, P->rmap->rend);
551: /* Set the default algorithm */
552: PetscCall(PetscStrcmp(C->product->alg, "default", &flg));
553: if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg]));
555: /* Get runtime option */
556: PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat");
557: PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatPtAP", algTypes, nalg, algTypes[alg], &alg, &flg));
558: if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg]));
559: PetscOptionsEnd();
561: PetscCall(PetscStrcmp(C->product->alg, "allatonce", &flg));
562: if (flg) {
563: C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ;
564: PetscFunctionReturn(PETSC_SUCCESS);
565: }
567: PetscCall(PetscStrcmp(C->product->alg, "allatonce_merged", &flg));
568: if (flg) {
569: C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ;
570: PetscFunctionReturn(PETSC_SUCCESS);
571: }
573: /* Convert P from MAIJ to AIJ matrix since implementation not available for MAIJ */
574: PetscCall(PetscInfo(A, "Converting from MAIJ to AIJ matrix since implementation not available for MAIJ\n"));
575: PetscCall(MatConvert(P, MATMPIAIJ, MAT_INPLACE_MATRIX, &P));
576: PetscCall(MatProductSetFromOptions(C));
577: PetscFunctionReturn(PETSC_SUCCESS);
578: }
580: static PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A, Mat PP, Mat C)
581: {
582: /* This routine requires testing -- first draft only */
583: Mat_SeqMAIJ *pp = (Mat_SeqMAIJ *)PP->data;
584: Mat P = pp->AIJ;
585: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
586: Mat_SeqAIJ *p = (Mat_SeqAIJ *)P->data;
587: Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data;
588: const PetscInt *ai = a->i, *aj = a->j, *pi = p->i, *pj = p->j, *pJ, *pjj;
589: const PetscInt *ci = c->i, *cj = c->j, *cjj;
590: const PetscInt am = A->rmap->N, cn = C->cmap->N, cm = C->rmap->N, ppdof = pp->dof;
591: PetscInt i, j, k, pshift, poffset, anzi, pnzi, apnzj, nextap, pnzj, prow, crow, *apj, *apjdense;
592: const MatScalar *aa = a->a, *pa = p->a, *pA, *paj;
593: MatScalar *ca = c->a, *caj, *apa;
595: PetscFunctionBegin;
596: /* Allocate temporary array for storage of one row of A*P */
597: PetscCall(PetscCalloc3(cn, &apa, cn, &apj, cn, &apjdense));
599: /* Clear old values in C */
600: PetscCall(PetscArrayzero(ca, ci[cm]));
602: for (i = 0; i < am; i++) {
603: /* Form sparse row of A*P */
604: anzi = ai[i + 1] - ai[i];
605: apnzj = 0;
606: for (j = 0; j < anzi; j++) {
607: /* Get offset within block of P */
608: pshift = *aj % ppdof;
609: /* Get block row of P */
610: prow = *aj++ / ppdof; /* integer division */
611: pnzj = pi[prow + 1] - pi[prow];
612: pjj = pj + pi[prow];
613: paj = pa + pi[prow];
614: for (k = 0; k < pnzj; k++) {
615: poffset = pjj[k] * ppdof + pshift;
616: if (!apjdense[poffset]) {
617: apjdense[poffset] = -1;
618: apj[apnzj++] = poffset;
619: }
620: apa[poffset] += (*aa) * paj[k];
621: }
622: PetscCall(PetscLogFlops(2.0 * pnzj));
623: aa++;
624: }
626: /* Sort the j index array for quick sparse axpy. */
627: /* Note: a array does not need sorting as it is in dense storage locations. */
628: PetscCall(PetscSortInt(apnzj, apj));
630: /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
631: prow = i / ppdof; /* integer division */
632: pshift = i % ppdof;
633: poffset = pi[prow];
634: pnzi = pi[prow + 1] - poffset;
635: /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
636: pJ = pj + poffset;
637: pA = pa + poffset;
638: for (j = 0; j < pnzi; j++) {
639: crow = (*pJ) * ppdof + pshift;
640: cjj = cj + ci[crow];
641: caj = ca + ci[crow];
642: pJ++;
643: /* Perform sparse axpy operation. Note cjj includes apj. */
644: for (k = 0, nextap = 0; nextap < apnzj; k++) {
645: if (cjj[k] == apj[nextap]) caj[k] += (*pA) * apa[apj[nextap++]];
646: }
647: PetscCall(PetscLogFlops(2.0 * apnzj));
648: pA++;
649: }
651: /* Zero the current row info for A*P */
652: for (j = 0; j < apnzj; j++) {
653: apa[apj[j]] = 0.;
654: apjdense[apj[j]] = 0;
655: }
656: }
658: /* Assemble the final matrix and clean up */
659: PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
660: PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
661: PetscCall(PetscFree3(apa, apj, apjdense));
662: PetscFunctionReturn(PETSC_SUCCESS);
663: }
665: static PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A, Mat PP, PetscReal fill, Mat C)
666: {
667: PetscFreeSpaceList free_space = NULL, current_space = NULL;
668: Mat_SeqMAIJ *pp = (Mat_SeqMAIJ *)PP->data;
669: Mat P = pp->AIJ;
670: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *p = (Mat_SeqAIJ *)P->data, *c;
671: PetscInt *pti, *ptj, *ptJ;
672: PetscInt *ci, *cj, *ptadenserow, *ptasparserow, *denserow, *sparserow, *ptaj;
673: const PetscInt an = A->cmap->N, am = A->rmap->N, pn = P->cmap->N, pm = P->rmap->N, ppdof = pp->dof;
674: PetscInt i, j, k, dof, pshift, ptnzi, arow, anzj, ptanzi, prow, pnzj, cnzi, cn;
675: MatScalar *ca;
676: const PetscInt *pi = p->i, *pj = p->j, *pjj, *ai = a->i, *aj = a->j, *ajj;
678: PetscFunctionBegin;
679: /* Get ij structure of P^T */
680: PetscCall(MatGetSymbolicTranspose_SeqAIJ(P, &pti, &ptj));
682: cn = pn * ppdof;
683: /* Allocate ci array, arrays for fill computation and */
684: /* free space for accumulating nonzero column info */
685: PetscCall(PetscMalloc1(cn + 1, &ci));
686: ci[0] = 0;
688: /* Work arrays for rows of P^T*A */
689: PetscCall(PetscMalloc4(an, &ptadenserow, an, &ptasparserow, cn, &denserow, cn, &sparserow));
690: PetscCall(PetscArrayzero(ptadenserow, an));
691: PetscCall(PetscArrayzero(denserow, cn));
693: /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
694: /* This should be reasonable if sparsity of PtAP is similar to that of A. */
695: /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
696: PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(ai[am] / pm, pn), &free_space));
697: current_space = free_space;
699: /* Determine symbolic info for each row of C: */
700: for (i = 0; i < pn; i++) {
701: ptnzi = pti[i + 1] - pti[i];
702: ptJ = ptj + pti[i];
703: for (dof = 0; dof < ppdof; dof++) {
704: ptanzi = 0;
705: /* Determine symbolic row of PtA: */
706: for (j = 0; j < ptnzi; j++) {
707: /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
708: arow = ptJ[j] * ppdof + dof;
709: /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
710: anzj = ai[arow + 1] - ai[arow];
711: ajj = aj + ai[arow];
712: for (k = 0; k < anzj; k++) {
713: if (!ptadenserow[ajj[k]]) {
714: ptadenserow[ajj[k]] = -1;
715: ptasparserow[ptanzi++] = ajj[k];
716: }
717: }
718: }
719: /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
720: ptaj = ptasparserow;
721: cnzi = 0;
722: for (j = 0; j < ptanzi; j++) {
723: /* Get offset within block of P */
724: pshift = *ptaj % ppdof;
725: /* Get block row of P */
726: prow = (*ptaj++) / ppdof; /* integer division */
727: /* P has same number of nonzeros per row as the compressed form */
728: pnzj = pi[prow + 1] - pi[prow];
729: pjj = pj + pi[prow];
730: for (k = 0; k < pnzj; k++) {
731: /* Locations in C are shifted by the offset within the block */
732: /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
733: if (!denserow[pjj[k] * ppdof + pshift]) {
734: denserow[pjj[k] * ppdof + pshift] = -1;
735: sparserow[cnzi++] = pjj[k] * ppdof + pshift;
736: }
737: }
738: }
740: /* sort sparserow */
741: PetscCall(PetscSortInt(cnzi, sparserow));
743: /* If free space is not available, make more free space */
744: /* Double the amount of total space in the list */
745: if (current_space->local_remaining < cnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(cnzi, current_space->total_array_size), ¤t_space));
747: /* Copy data into free space, and zero out denserows */
748: PetscCall(PetscArraycpy(current_space->array, sparserow, cnzi));
750: current_space->array += cnzi;
751: current_space->local_used += cnzi;
752: current_space->local_remaining -= cnzi;
754: for (j = 0; j < ptanzi; j++) ptadenserow[ptasparserow[j]] = 0;
755: for (j = 0; j < cnzi; j++) denserow[sparserow[j]] = 0;
757: /* Aside: Perhaps we should save the pta info for the numerical factorization. */
758: /* For now, we will recompute what is needed. */
759: ci[i * ppdof + 1 + dof] = ci[i * ppdof + dof] + cnzi;
760: }
761: }
762: /* nnz is now stored in ci[ptm], column indices are in the list of free space */
763: /* Allocate space for cj, initialize cj, and */
764: /* destroy list of free space and other temporary array(s) */
765: PetscCall(PetscMalloc1(ci[cn], &cj));
766: PetscCall(PetscFreeSpaceContiguous(&free_space, cj));
767: PetscCall(PetscFree4(ptadenserow, ptasparserow, denserow, sparserow));
769: /* Allocate space for ca */
770: PetscCall(PetscCalloc1(ci[cn], &ca));
772: /* put together the new matrix */
773: PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A), cn, cn, ci, cj, ca, NULL, C));
774: PetscCall(MatSetBlockSize(C, pp->dof));
776: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
777: /* Since these are PETSc arrays, change flags to free them as necessary. */
778: c = (Mat_SeqAIJ *)C->data;
779: c->free_a = PETSC_TRUE;
780: c->free_ij = PETSC_TRUE;
781: c->nonew = 0;
783: C->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
784: C->ops->productnumeric = MatProductNumeric_PtAP;
786: /* Clean up. */
787: PetscCall(MatRestoreSymbolicTranspose_SeqAIJ(P, &pti, &ptj));
788: PetscFunctionReturn(PETSC_SUCCESS);
789: }
791: PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ(Mat C)
792: {
793: Mat_Product *product = C->product;
794: Mat A = product->A, P = product->B;
796: PetscFunctionBegin;
797: PetscCall(MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A, P, product->fill, C));
798: PetscFunctionReturn(PETSC_SUCCESS);
799: }
801: PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, Mat);
803: PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, Mat C)
804: {
805: Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;
807: PetscFunctionBegin;
808: PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, C));
809: PetscFunctionReturn(PETSC_SUCCESS);
810: }
812: PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, PetscReal, Mat);
814: PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, PetscReal fill, Mat C)
815: {
816: Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;
818: PetscFunctionBegin;
819: PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, fill, C));
820: C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce;
821: PetscFunctionReturn(PETSC_SUCCESS);
822: }
824: PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, Mat);
826: PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, Mat C)
827: {
828: Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;
830: PetscFunctionBegin;
831: PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, C));
832: PetscFunctionReturn(PETSC_SUCCESS);
833: }
835: PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, PetscReal, Mat);
837: PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, PetscReal fill, Mat C)
838: {
839: Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;
841: PetscFunctionBegin;
842: PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, fill, C));
843: C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged;
844: PetscFunctionReturn(PETSC_SUCCESS);
845: }
847: PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ(Mat C)
848: {
849: Mat_Product *product = C->product;
850: Mat A = product->A, P = product->B;
851: PetscBool flg;
853: PetscFunctionBegin;
854: PetscCall(PetscStrcmp(product->alg, "allatonce", &flg));
855: if (flg) {
856: PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(A, P, product->fill, C));
857: C->ops->productnumeric = MatProductNumeric_PtAP;
858: PetscFunctionReturn(PETSC_SUCCESS);
859: }
861: PetscCall(PetscStrcmp(product->alg, "allatonce_merged", &flg));
862: PetscCheck(flg, PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "Mat Product Algorithm is not supported");
863: PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(A, P, product->fill, C));
864: C->ops->productnumeric = MatProductNumeric_PtAP;
865: PetscFunctionReturn(PETSC_SUCCESS);
866: }
868: PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
869: {
870: Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
871: Mat a = b->AIJ, B;
872: Mat_SeqAIJ *aij = (Mat_SeqAIJ *)a->data;
873: PetscInt m, n, i, ncols, *ilen, nmax = 0, *icols, j, k, ii, dof = b->dof;
874: PetscInt *cols;
875: PetscScalar *vals;
877: PetscFunctionBegin;
878: PetscCall(MatGetSize(a, &m, &n));
879: PetscCall(PetscMalloc1(dof * m, &ilen));
880: for (i = 0; i < m; i++) {
881: nmax = PetscMax(nmax, aij->ilen[i]);
882: for (j = 0; j < dof; j++) ilen[dof * i + j] = aij->ilen[i];
883: }
884: PetscCall(MatCreate(PETSC_COMM_SELF, &B));
885: PetscCall(MatSetSizes(B, dof * m, dof * n, dof * m, dof * n));
886: PetscCall(MatSetType(B, newtype));
887: PetscCall(MatSeqAIJSetPreallocation(B, 0, ilen));
888: PetscCall(PetscFree(ilen));
889: PetscCall(PetscMalloc1(nmax, &icols));
890: ii = 0;
891: for (i = 0; i < m; i++) {
892: PetscCall(MatGetRow_SeqAIJ(a, i, &ncols, &cols, &vals));
893: for (j = 0; j < dof; j++) {
894: for (k = 0; k < ncols; k++) icols[k] = dof * cols[k] + j;
895: PetscCall(MatSetValues_SeqAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES));
896: ii++;
897: }
898: PetscCall(MatRestoreRow_SeqAIJ(a, i, &ncols, &cols, &vals));
899: }
900: PetscCall(PetscFree(icols));
901: PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
902: PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
904: if (reuse == MAT_INPLACE_MATRIX) {
905: PetscCall(MatHeaderReplace(A, &B));
906: } else {
907: *newmat = B;
908: }
909: PetscFunctionReturn(PETSC_SUCCESS);
910: }
912: #include <../src/mat/impls/aij/mpi/mpiaij.h>
914: PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
915: {
916: Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)A->data;
917: Mat MatAIJ = ((Mat_SeqMAIJ *)maij->AIJ->data)->AIJ, B;
918: Mat MatOAIJ = ((Mat_SeqMAIJ *)maij->OAIJ->data)->AIJ;
919: Mat_SeqAIJ *AIJ = (Mat_SeqAIJ *)MatAIJ->data;
920: Mat_SeqAIJ *OAIJ = (Mat_SeqAIJ *)MatOAIJ->data;
921: Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)maij->A->data;
922: PetscInt dof = maij->dof, i, j, *dnz = NULL, *onz = NULL, nmax = 0, onmax = 0;
923: PetscInt *oicols = NULL, *icols = NULL, ncols, *cols = NULL, oncols, *ocols = NULL;
924: PetscInt rstart, cstart, *garray, ii, k;
925: PetscScalar *vals, *ovals;
927: PetscFunctionBegin;
928: PetscCall(PetscMalloc2(A->rmap->n, &dnz, A->rmap->n, &onz));
929: for (i = 0; i < A->rmap->n / dof; i++) {
930: nmax = PetscMax(nmax, AIJ->ilen[i]);
931: onmax = PetscMax(onmax, OAIJ->ilen[i]);
932: for (j = 0; j < dof; j++) {
933: dnz[dof * i + j] = AIJ->ilen[i];
934: onz[dof * i + j] = OAIJ->ilen[i];
935: }
936: }
937: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
938: PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
939: PetscCall(MatSetType(B, newtype));
940: PetscCall(MatMPIAIJSetPreallocation(B, 0, dnz, 0, onz));
941: PetscCall(MatSetBlockSize(B, dof));
942: PetscCall(PetscFree2(dnz, onz));
944: PetscCall(PetscMalloc2(nmax, &icols, onmax, &oicols));
945: rstart = dof * maij->A->rmap->rstart;
946: cstart = dof * maij->A->cmap->rstart;
947: garray = mpiaij->garray;
949: ii = rstart;
950: for (i = 0; i < A->rmap->n / dof; i++) {
951: PetscCall(MatGetRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals));
952: PetscCall(MatGetRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals));
953: for (j = 0; j < dof; j++) {
954: for (k = 0; k < ncols; k++) icols[k] = cstart + dof * cols[k] + j;
955: for (k = 0; k < oncols; k++) oicols[k] = dof * garray[ocols[k]] + j;
956: PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES));
957: PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, oncols, oicols, ovals, INSERT_VALUES));
958: ii++;
959: }
960: PetscCall(MatRestoreRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals));
961: PetscCall(MatRestoreRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals));
962: }
963: PetscCall(PetscFree2(icols, oicols));
965: PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
966: PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
968: if (reuse == MAT_INPLACE_MATRIX) {
969: PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
970: ((PetscObject)A)->refct = 1;
972: PetscCall(MatHeaderReplace(A, &B));
974: ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
975: } else {
976: *newmat = B;
977: }
978: PetscFunctionReturn(PETSC_SUCCESS);
979: }
981: static PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat, IS isrow, IS iscol, MatReuse cll, Mat *newmat)
982: {
983: Mat A;
985: PetscFunctionBegin;
986: PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A));
987: PetscCall(MatCreateSubMatrix(A, isrow, iscol, cll, newmat));
988: PetscCall(MatDestroy(&A));
989: PetscFunctionReturn(PETSC_SUCCESS);
990: }
992: static PetscErrorCode MatCreateSubMatrices_MAIJ(Mat mat, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *submat[])
993: {
994: Mat A;
996: PetscFunctionBegin;
997: PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A));
998: PetscCall(MatCreateSubMatrices(A, n, irow, icol, scall, submat));
999: PetscCall(MatDestroy(&A));
1000: PetscFunctionReturn(PETSC_SUCCESS);
1001: }
1003: /*@
1004: MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
1005: operations for multicomponent problems. It interpolates each component the same
1006: way independently. The matrix type is based on `MATSEQAIJ` for sequential matrices,
1007: and `MATMPIAIJ` for distributed matrices.
1009: Collective
1011: Input Parameters:
1012: + A - the `MATAIJ` matrix describing the action on blocks
1013: - dof - the block size (number of components per node)
1015: Output Parameter:
1016: . maij - the new `MATMAIJ` matrix
1018: Level: advanced
1020: .seealso: [](ch_matrices), `Mat`, `MATAIJ`, `MATMAIJ`, `MatMAIJGetAIJ()`, `MatMAIJRedimension()`
1021: @*/
1022: PetscErrorCode MatCreateMAIJ(Mat A, PetscInt dof, Mat *maij)
1023: {
1024: PetscInt n;
1025: Mat B;
1026: PetscBool flg;
1027: #if defined(PETSC_HAVE_CUDA)
1028: /* hack to prevent conversion to AIJ format for CUDA when used inside a parallel MAIJ */
1029: PetscBool convert = dof < 0 ? PETSC_FALSE : PETSC_TRUE;
1030: #endif
1032: PetscFunctionBegin;
1033: dof = PetscAbs(dof);
1034: PetscCall(PetscObjectReference((PetscObject)A));
1036: if (dof == 1) *maij = A;
1037: else {
1038: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1039: /* propagate vec type */
1040: PetscCall(MatSetVecType(B, A->defaultvectype));
1041: PetscCall(MatSetSizes(B, dof * A->rmap->n, dof * A->cmap->n, dof * A->rmap->N, dof * A->cmap->N));
1042: PetscCall(PetscLayoutSetBlockSize(B->rmap, dof));
1043: PetscCall(PetscLayoutSetBlockSize(B->cmap, dof));
1044: PetscCall(PetscLayoutSetUp(B->rmap));
1045: PetscCall(PetscLayoutSetUp(B->cmap));
1047: B->assembled = PETSC_TRUE;
1049: PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &flg));
1050: if (flg) {
1051: Mat_SeqMAIJ *b;
1053: PetscCall(MatSetType(B, MATSEQMAIJ));
1055: B->ops->setup = NULL;
1056: B->ops->destroy = MatDestroy_SeqMAIJ;
1057: B->ops->view = MatView_SeqMAIJ;
1059: b = (Mat_SeqMAIJ *)B->data;
1060: b->dof = dof;
1061: b->AIJ = A;
1063: if (dof == 2) {
1064: B->ops->mult = MatMult_SeqMAIJ_2;
1065: B->ops->multadd = MatMultAdd_SeqMAIJ_2;
1066: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2;
1067: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
1068: } else if (dof == 3) {
1069: B->ops->mult = MatMult_SeqMAIJ_3;
1070: B->ops->multadd = MatMultAdd_SeqMAIJ_3;
1071: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3;
1072: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
1073: } else if (dof == 4) {
1074: B->ops->mult = MatMult_SeqMAIJ_4;
1075: B->ops->multadd = MatMultAdd_SeqMAIJ_4;
1076: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4;
1077: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
1078: } else if (dof == 5) {
1079: B->ops->mult = MatMult_SeqMAIJ_5;
1080: B->ops->multadd = MatMultAdd_SeqMAIJ_5;
1081: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5;
1082: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
1083: } else if (dof == 6) {
1084: B->ops->mult = MatMult_SeqMAIJ_6;
1085: B->ops->multadd = MatMultAdd_SeqMAIJ_6;
1086: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6;
1087: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
1088: } else if (dof == 7) {
1089: B->ops->mult = MatMult_SeqMAIJ_7;
1090: B->ops->multadd = MatMultAdd_SeqMAIJ_7;
1091: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7;
1092: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
1093: } else if (dof == 8) {
1094: B->ops->mult = MatMult_SeqMAIJ_8;
1095: B->ops->multadd = MatMultAdd_SeqMAIJ_8;
1096: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8;
1097: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
1098: } else if (dof == 9) {
1099: B->ops->mult = MatMult_SeqMAIJ_9;
1100: B->ops->multadd = MatMultAdd_SeqMAIJ_9;
1101: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9;
1102: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
1103: } else if (dof == 10) {
1104: B->ops->mult = MatMult_SeqMAIJ_10;
1105: B->ops->multadd = MatMultAdd_SeqMAIJ_10;
1106: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10;
1107: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
1108: } else if (dof == 11) {
1109: B->ops->mult = MatMult_SeqMAIJ_11;
1110: B->ops->multadd = MatMultAdd_SeqMAIJ_11;
1111: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_11;
1112: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11;
1113: } else if (dof == 16) {
1114: B->ops->mult = MatMult_SeqMAIJ_16;
1115: B->ops->multadd = MatMultAdd_SeqMAIJ_16;
1116: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16;
1117: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
1118: } else if (dof == 18) {
1119: B->ops->mult = MatMult_SeqMAIJ_18;
1120: B->ops->multadd = MatMultAdd_SeqMAIJ_18;
1121: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_18;
1122: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
1123: } else {
1124: B->ops->mult = MatMult_SeqMAIJ_N;
1125: B->ops->multadd = MatMultAdd_SeqMAIJ_N;
1126: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_N;
1127: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N;
1128: }
1129: #if defined(PETSC_HAVE_CUDA)
1130: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaijcusparse_C", MatConvert_SeqMAIJ_SeqAIJ));
1131: #endif
1132: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaij_C", MatConvert_SeqMAIJ_SeqAIJ));
1133: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqaij_seqmaij_C", MatProductSetFromOptions_SeqAIJ_SeqMAIJ));
1134: } else {
1135: Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data;
1136: Mat_MPIMAIJ *b;
1137: IS from, to;
1138: Vec gvec;
1140: PetscCall(MatSetType(B, MATMPIMAIJ));
1142: B->ops->setup = NULL;
1143: B->ops->destroy = MatDestroy_MPIMAIJ;
1144: B->ops->view = MatView_MPIMAIJ;
1146: b = (Mat_MPIMAIJ *)B->data;
1147: b->dof = dof;
1148: b->A = A;
1150: PetscCall(MatCreateMAIJ(mpiaij->A, -dof, &b->AIJ));
1151: PetscCall(MatCreateMAIJ(mpiaij->B, -dof, &b->OAIJ));
1153: PetscCall(VecGetSize(mpiaij->lvec, &n));
1154: PetscCall(VecCreate(PETSC_COMM_SELF, &b->w));
1155: PetscCall(VecSetSizes(b->w, n * dof, n * dof));
1156: PetscCall(VecSetBlockSize(b->w, dof));
1157: PetscCall(VecSetType(b->w, VECSEQ));
1159: /* create two temporary Index sets for build scatter gather */
1160: PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)A), dof, n, mpiaij->garray, PETSC_COPY_VALUES, &from));
1161: PetscCall(ISCreateStride(PETSC_COMM_SELF, n * dof, 0, 1, &to));
1163: /* create temporary global vector to generate scatter context */
1164: PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), dof, dof * A->cmap->n, dof * A->cmap->N, NULL, &gvec));
1166: /* generate the scatter context */
1167: PetscCall(VecScatterCreate(gvec, from, b->w, to, &b->ctx));
1169: PetscCall(ISDestroy(&from));
1170: PetscCall(ISDestroy(&to));
1171: PetscCall(VecDestroy(&gvec));
1173: B->ops->mult = MatMult_MPIMAIJ_dof;
1174: B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof;
1175: B->ops->multadd = MatMultAdd_MPIMAIJ_dof;
1176: B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
1178: #if defined(PETSC_HAVE_CUDA)
1179: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaijcusparse_C", MatConvert_MPIMAIJ_MPIAIJ));
1180: #endif
1181: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaij_C", MatConvert_MPIMAIJ_MPIAIJ));
1182: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpimaij_C", MatProductSetFromOptions_MPIAIJ_MPIMAIJ));
1183: }
1184: B->ops->createsubmatrix = MatCreateSubMatrix_MAIJ;
1185: B->ops->createsubmatrices = MatCreateSubMatrices_MAIJ;
1186: PetscCall(MatSetUp(B));
1187: #if defined(PETSC_HAVE_CUDA)
1188: /* temporary until we have CUDA implementation of MAIJ */
1189: {
1190: PetscBool flg;
1191: if (convert) {
1192: PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, MATAIJCUSPARSE, ""));
1193: if (flg) PetscCall(MatConvert(B, ((PetscObject)A)->type_name, MAT_INPLACE_MATRIX, &B));
1194: }
1195: }
1196: #endif
1197: *maij = B;
1198: }
1199: PetscFunctionReturn(PETSC_SUCCESS);
1200: }