Actual source code: mpidense.c
1: /*
2: Basic functions for basic parallel dense matrices.
3: Portions of this code are under:
4: Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
5: */
7: #include <../src/mat/impls/dense/mpi/mpidense.h>
8: #include <../src/mat/impls/aij/mpi/mpiaij.h>
9: #include <petscblaslapack.h>
10: #include <petsc/private/vecimpl.h>
11: #include <petsc/private/deviceimpl.h>
12: #include <petsc/private/sfimpl.h>
14: /*@
15: MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential
16: matrix that represents the operator. For sequential matrices it returns itself.
18: Input Parameter:
19: . A - the sequential or MPI `MATDENSE` matrix
21: Output Parameter:
22: . B - the inner matrix
24: Level: intermediate
26: .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE`
27: @*/
28: PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B)
29: {
30: Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
31: PetscBool flg;
33: PetscFunctionBegin;
35: PetscAssertPointer(B, 2);
36: PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg));
37: if (flg) *B = mat->A;
38: else {
39: PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg));
40: PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name);
41: *B = A;
42: }
43: PetscFunctionReturn(PETSC_SUCCESS);
44: }
46: static PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s)
47: {
48: Mat_MPIDense *Amat = (Mat_MPIDense *)A->data;
49: Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data;
51: PetscFunctionBegin;
52: PetscCall(MatCopy(Amat->A, Bmat->A, s));
53: PetscFunctionReturn(PETSC_SUCCESS);
54: }
56: PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha)
57: {
58: Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
59: PetscInt j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2;
60: PetscScalar *v;
62: PetscFunctionBegin;
63: PetscCall(MatDenseGetArray(mat->A, &v));
64: PetscCall(MatDenseGetLDA(mat->A, &lda));
65: rend2 = PetscMin(rend, A->cmap->N);
66: if (rend2 > rstart) {
67: for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha;
68: PetscCall(PetscLogFlops(rend2 - rstart));
69: }
70: PetscCall(MatDenseRestoreArray(mat->A, &v));
71: PetscFunctionReturn(PETSC_SUCCESS);
72: }
74: static PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
75: {
76: Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
77: PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
79: PetscFunctionBegin;
80: PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
81: lrow = row - rstart;
82: PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
83: PetscFunctionReturn(PETSC_SUCCESS);
84: }
86: static PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
87: {
88: Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
89: PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
91: PetscFunctionBegin;
92: PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
93: lrow = row - rstart;
94: PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
95: PetscFunctionReturn(PETSC_SUCCESS);
96: }
98: static PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a)
99: {
100: Mat_MPIDense *mdn = (Mat_MPIDense *)A->data;
101: PetscInt m = A->rmap->n, rstart = A->rmap->rstart;
102: PetscScalar *array;
103: MPI_Comm comm;
104: PetscBool flg;
105: Mat B;
107: PetscFunctionBegin;
108: PetscCall(MatHasCongruentLayouts(A, &flg));
109: PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported.");
110: PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B));
111: if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
112: #if PetscDefined(HAVE_CUDA)
113: PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg));
114: PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA);
115: #elif PetscDefined(HAVE_HIP)
116: PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSEHIP, &flg));
117: PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSEHIP);
118: #endif
119: PetscCall(PetscObjectGetComm((PetscObject)mdn->A, &comm));
120: PetscCall(MatCreate(comm, &B));
121: PetscCall(MatSetSizes(B, m, m, m, m));
122: PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name));
123: PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array));
124: PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart));
125: PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array));
126: PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B));
127: *a = B;
128: PetscCall(MatDestroy(&B));
129: } else *a = B;
130: PetscFunctionReturn(PETSC_SUCCESS);
131: }
133: static PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv)
134: {
135: Mat_MPIDense *A = (Mat_MPIDense *)mat->data;
136: PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
137: PetscBool roworiented = A->roworiented;
139: PetscFunctionBegin;
140: for (i = 0; i < m; i++) {
141: if (idxm[i] < 0) continue;
142: PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
143: if (idxm[i] >= rstart && idxm[i] < rend) {
144: row = idxm[i] - rstart;
145: if (roworiented) {
146: PetscCall(MatSetValues(A->A, 1, &row, n, idxn, PetscSafePointerPlusOffset(v, i * n), addv));
147: } else {
148: for (j = 0; j < n; j++) {
149: if (idxn[j] < 0) continue;
150: PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
151: PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], PetscSafePointerPlusOffset(v, i + j * m), addv));
152: }
153: }
154: } else if (!A->donotstash) {
155: mat->assembled = PETSC_FALSE;
156: if (roworiented) {
157: PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, PetscSafePointerPlusOffset(v, i * n), PETSC_FALSE));
158: } else {
159: PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, PetscSafePointerPlusOffset(v, i), m, PETSC_FALSE));
160: }
161: }
162: }
163: PetscFunctionReturn(PETSC_SUCCESS);
164: }
166: static PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
167: {
168: Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
169: PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
171: PetscFunctionBegin;
172: for (i = 0; i < m; i++) {
173: if (idxm[i] < 0) continue; /* negative row */
174: PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
175: PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
176: row = idxm[i] - rstart;
177: for (j = 0; j < n; j++) {
178: if (idxn[j] < 0) continue; /* negative column */
179: PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
180: PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j));
181: }
182: }
183: PetscFunctionReturn(PETSC_SUCCESS);
184: }
186: static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda)
187: {
188: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
190: PetscFunctionBegin;
191: PetscCall(MatDenseGetLDA(a->A, lda));
192: PetscFunctionReturn(PETSC_SUCCESS);
193: }
195: static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda)
196: {
197: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
198: MatType mtype = MATSEQDENSE;
200: PetscFunctionBegin;
201: if (!a->A) {
202: PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
203: PetscCall(PetscLayoutSetUp(A->rmap));
204: PetscCall(PetscLayoutSetUp(A->cmap));
205: PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
206: PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N));
207: #if PetscDefined(HAVE_CUDA)
208: PetscBool iscuda;
209: PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda));
210: if (iscuda) mtype = MATSEQDENSECUDA;
211: #elif PetscDefined(HAVE_HIP)
212: PetscBool iship;
213: PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iship));
214: if (iship) mtype = MATSEQDENSEHIP;
215: #endif
216: PetscCall(MatSetType(a->A, mtype));
217: }
218: PetscCall(MatDenseSetLDA(a->A, lda));
219: PetscFunctionReturn(PETSC_SUCCESS);
220: }
222: static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array)
223: {
224: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
226: PetscFunctionBegin;
227: PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
228: PetscCall(MatDenseGetArray(a->A, array));
229: PetscFunctionReturn(PETSC_SUCCESS);
230: }
232: static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, PetscScalar **array)
233: {
234: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
236: PetscFunctionBegin;
237: PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
238: PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)array));
239: PetscFunctionReturn(PETSC_SUCCESS);
240: }
242: static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array)
243: {
244: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
246: PetscFunctionBegin;
247: PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
248: PetscCall(MatDenseGetArrayWrite(a->A, array));
249: PetscFunctionReturn(PETSC_SUCCESS);
250: }
252: static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array)
253: {
254: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
256: PetscFunctionBegin;
257: PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
258: PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
259: PetscCall(MatDensePlaceArray(a->A, array));
260: PetscFunctionReturn(PETSC_SUCCESS);
261: }
263: static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
264: {
265: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
267: PetscFunctionBegin;
268: PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
269: PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
270: PetscCall(MatDenseResetArray(a->A));
271: PetscFunctionReturn(PETSC_SUCCESS);
272: }
274: static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array)
275: {
276: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
278: PetscFunctionBegin;
279: PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
280: PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
281: PetscCall(MatDenseReplaceArray(a->A, array));
282: PetscFunctionReturn(PETSC_SUCCESS);
283: }
285: static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B)
286: {
287: Mat_MPIDense *mat = (Mat_MPIDense *)A->data, *newmatd;
288: PetscInt lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols;
289: const PetscInt *irow, *icol;
290: const PetscScalar *v;
291: PetscScalar *bv;
292: Mat newmat;
293: IS iscol_local;
294: MPI_Comm comm_is, comm_mat;
296: PetscFunctionBegin;
297: PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat));
298: PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is));
299: PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator");
301: PetscCall(ISAllGather(iscol, &iscol_local));
302: PetscCall(ISGetIndices(isrow, &irow));
303: PetscCall(ISGetIndices(iscol_local, &icol));
304: PetscCall(ISGetLocalSize(isrow, &nrows));
305: PetscCall(ISGetLocalSize(iscol, &ncols));
306: PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */
308: /* No parallel redistribution currently supported! Should really check each index set
309: to confirm that it is OK. ... Currently supports only submatrix same partitioning as
310: original matrix! */
312: PetscCall(MatGetLocalSize(A, &nlrows, &nlcols));
313: PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
315: /* Check submatrix call */
316: if (scall == MAT_REUSE_MATRIX) {
317: /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
318: /* Really need to test rows and column sizes! */
319: newmat = *B;
320: } else {
321: /* Create and fill new matrix */
322: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat));
323: PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols));
324: PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name));
325: PetscCall(MatMPIDenseSetPreallocation(newmat, NULL));
326: }
328: /* Now extract the data pointers and do the copy, column at a time */
329: newmatd = (Mat_MPIDense *)newmat->data;
330: PetscCall(MatDenseGetArray(newmatd->A, &bv));
331: PetscCall(MatDenseGetArrayRead(mat->A, &v));
332: PetscCall(MatDenseGetLDA(mat->A, &lda));
333: for (i = 0; i < Ncols; i++) {
334: const PetscScalar *av = v + lda * icol[i];
335: for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart];
336: }
337: PetscCall(MatDenseRestoreArrayRead(mat->A, &v));
338: PetscCall(MatDenseRestoreArray(newmatd->A, &bv));
340: /* Assemble the matrices so that the correct flags are set */
341: PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY));
342: PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY));
344: /* Free work space */
345: PetscCall(ISRestoreIndices(isrow, &irow));
346: PetscCall(ISRestoreIndices(iscol_local, &icol));
347: PetscCall(ISDestroy(&iscol_local));
348: *B = newmat;
349: PetscFunctionReturn(PETSC_SUCCESS);
350: }
352: static PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array)
353: {
354: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
356: PetscFunctionBegin;
357: PetscCall(MatDenseRestoreArray(a->A, array));
358: PetscFunctionReturn(PETSC_SUCCESS);
359: }
361: static PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, PetscScalar **array)
362: {
363: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
365: PetscFunctionBegin;
366: PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)array));
367: PetscFunctionReturn(PETSC_SUCCESS);
368: }
370: static PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array)
371: {
372: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
374: PetscFunctionBegin;
375: PetscCall(MatDenseRestoreArrayWrite(a->A, array));
376: PetscFunctionReturn(PETSC_SUCCESS);
377: }
379: static PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode)
380: {
381: Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
382: PetscInt nstash, reallocs;
384: PetscFunctionBegin;
385: if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
387: PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
388: PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
389: PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
390: PetscFunctionReturn(PETSC_SUCCESS);
391: }
393: static PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode)
394: {
395: Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
396: PetscInt i, *row, *col, flg, j, rstart, ncols;
397: PetscMPIInt n;
398: PetscScalar *val;
400: PetscFunctionBegin;
401: if (!mdn->donotstash && !mat->nooffprocentries) {
402: /* wait on receives */
403: while (1) {
404: PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
405: if (!flg) break;
407: for (i = 0; i < n;) {
408: /* Now identify the consecutive vals belonging to the same row */
409: for (j = i, rstart = row[j]; j < n; j++) {
410: if (row[j] != rstart) break;
411: }
412: if (j < n) ncols = j - i;
413: else ncols = n - i;
414: /* Now assemble all these values with a single function call */
415: PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
416: i = j;
417: }
418: }
419: PetscCall(MatStashScatterEnd_Private(&mat->stash));
420: }
422: PetscCall(MatAssemblyBegin(mdn->A, mode));
423: PetscCall(MatAssemblyEnd(mdn->A, mode));
424: PetscFunctionReturn(PETSC_SUCCESS);
425: }
427: static PetscErrorCode MatZeroEntries_MPIDense(Mat A)
428: {
429: Mat_MPIDense *l = (Mat_MPIDense *)A->data;
431: PetscFunctionBegin;
432: PetscCall(MatZeroEntries(l->A));
433: PetscFunctionReturn(PETSC_SUCCESS);
434: }
436: static PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
437: {
438: Mat_MPIDense *l = (Mat_MPIDense *)A->data;
439: PetscInt i, len, *lrows;
441: PetscFunctionBegin;
442: /* get locally owned rows */
443: PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL));
444: /* fix right-hand side if needed */
445: if (x && b) {
446: const PetscScalar *xx;
447: PetscScalar *bb;
449: PetscCall(VecGetArrayRead(x, &xx));
450: PetscCall(VecGetArrayWrite(b, &bb));
451: for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]];
452: PetscCall(VecRestoreArrayRead(x, &xx));
453: PetscCall(VecRestoreArrayWrite(b, &bb));
454: }
455: PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL));
456: if (diag != 0.0) {
457: Vec d;
459: PetscCall(MatCreateVecs(A, NULL, &d));
460: PetscCall(VecSet(d, diag));
461: PetscCall(MatDiagonalSet(A, d, INSERT_VALUES));
462: PetscCall(VecDestroy(&d));
463: }
464: PetscCall(PetscFree(lrows));
465: PetscFunctionReturn(PETSC_SUCCESS);
466: }
468: PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat, Vec, Vec);
469: PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat, Vec, Vec, Vec);
470: PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat, Vec, Vec);
471: PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat, Vec, Vec, Vec);
473: static PetscErrorCode MatMultColumnRange_MPIDense(Mat mat, Vec xx, Vec yy, PetscInt c_start, PetscInt c_end)
474: {
475: Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
476: const PetscScalar *ax;
477: PetscScalar *ay;
478: PetscMemType axmtype, aymtype;
480: PetscFunctionBegin;
481: if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
482: PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
483: PetscCall(VecGetArrayWriteAndMemType(mdn->lvec, &ay, &aymtype));
484: PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
485: PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
486: PetscCall(VecRestoreArrayWriteAndMemType(mdn->lvec, &ay));
487: PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
488: PetscUseMethod(mdn->A, "MatMultColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (mdn->A, mdn->lvec, yy, c_start, c_end));
489: PetscFunctionReturn(PETSC_SUCCESS);
490: }
492: static PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy)
493: {
494: Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
495: const PetscScalar *ax;
496: PetscScalar *ay;
497: PetscMemType axmtype, aymtype;
499: PetscFunctionBegin;
500: if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
501: PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
502: PetscCall(VecGetArrayWriteAndMemType(mdn->lvec, &ay, &aymtype));
503: PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
504: PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
505: PetscCall(VecRestoreArrayWriteAndMemType(mdn->lvec, &ay));
506: PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
507: PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy));
508: PetscFunctionReturn(PETSC_SUCCESS);
509: }
511: static PetscErrorCode MatMultAddColumnRange_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz, PetscInt c_start, PetscInt c_end)
512: {
513: Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
514: const PetscScalar *ax;
515: PetscScalar *ay;
516: PetscMemType axmtype, aymtype;
518: PetscFunctionBegin;
519: if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
520: PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
521: PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
522: PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
523: PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
524: PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
525: PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
526: PetscUseMethod(mdn->A, "MatMultAddColumnRange_C", (Mat, Vec, Vec, Vec, PetscInt, PetscInt), (mdn->A, mdn->lvec, yy, zz, c_start, c_end));
527: PetscFunctionReturn(PETSC_SUCCESS);
528: }
530: static PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz)
531: {
532: Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
533: const PetscScalar *ax;
534: PetscScalar *ay;
535: PetscMemType axmtype, aymtype;
537: PetscFunctionBegin;
538: if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
539: PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
540: PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
541: PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
542: PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
543: PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
544: PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
545: PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz));
546: PetscFunctionReturn(PETSC_SUCCESS);
547: }
549: static PetscErrorCode MatMultHermitianTransposeColumnRange_MPIDense(Mat A, Vec xx, Vec yy, PetscInt c_start, PetscInt c_end)
550: {
551: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
552: const PetscScalar *ax;
553: PetscScalar *ay;
554: PetscMemType axmtype, aymtype;
555: PetscInt r_start, r_end;
556: PetscInt c_start_local, c_end_local;
558: PetscFunctionBegin;
559: if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
560: PetscCall(VecZeroEntries(a->lvec));
561: PetscCall(VecGetOwnershipRange(yy, &r_start, &r_end));
562: c_start_local = PetscMax(c_start, r_start);
563: c_end_local = PetscMin(c_end, r_end);
564: PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype));
565: if (c_end_local > c_start_local) {
566: if (PetscMemTypeHost(aymtype)) {
567: PetscCall(PetscArrayzero(&ay[c_start_local], (size_t)(c_end_local - c_start_local)));
568: } else {
569: PetscCall(PetscDeviceRegisterMemory(ay, aymtype, sizeof(*ay) * ((size_t)(r_end - r_start))));
570: PetscCall(PetscDeviceArrayZero(NULL, &ay[c_start_local], (size_t)(c_end_local - c_start_local)));
571: }
572: }
573: PetscUseMethod(a->A, "MatMultHermitianTransposeColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (a->A, xx, a->lvec, c_start, c_end));
574: PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
575: PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
576: PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
577: PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
578: PetscCall(VecRestoreArrayAndMemType(yy, &ay));
579: PetscFunctionReturn(PETSC_SUCCESS);
580: }
582: static PetscErrorCode MatMultTransposeKernel_MPIDense(Mat A, Vec xx, Vec yy, PetscBool herm)
583: {
584: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
585: const PetscScalar *ax;
586: PetscScalar *ay;
587: PetscMemType axmtype, aymtype;
589: PetscFunctionBegin;
590: if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
591: PetscCall(VecSet(yy, 0.0));
592: if (herm) PetscCall((*a->A->ops->multhermitiantranspose)(a->A, xx, a->lvec));
593: else PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
594: PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
595: PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype));
596: PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
597: PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
598: PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
599: PetscCall(VecRestoreArrayAndMemType(yy, &ay));
600: PetscFunctionReturn(PETSC_SUCCESS);
601: }
603: static PetscErrorCode MatMultHermitianTransposeAddColumnRange_MPIDense(Mat A, Vec xx, Vec yy, Vec zz, PetscInt c_start, PetscInt c_end)
604: {
605: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
606: const PetscScalar *ax;
607: PetscScalar *ay;
608: PetscMemType axmtype, aymtype;
610: PetscFunctionBegin;
611: if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
612: PetscCall(VecCopy(yy, zz));
613: PetscCall(VecZeroEntries(a->lvec));
614: PetscUseMethod(a->A, "MatMultHermitianTransposeColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (a->A, xx, a->lvec, c_start, c_end));
615: PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
616: PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype));
617: PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
618: PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
619: PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
620: PetscCall(VecRestoreArrayAndMemType(zz, &ay));
621: PetscFunctionReturn(PETSC_SUCCESS);
622: }
624: static PetscErrorCode MatMultTransposeAddKernel_MPIDense(Mat A, Vec xx, Vec yy, Vec zz, PetscBool herm)
625: {
626: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
627: const PetscScalar *ax;
628: PetscScalar *ay;
629: PetscMemType axmtype, aymtype;
631: PetscFunctionBegin;
632: if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
633: PetscCall(VecCopy(yy, zz));
634: if (herm) PetscCall((*a->A->ops->multhermitiantranspose)(a->A, xx, a->lvec));
635: else PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
636: PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
637: PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype));
638: PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
639: PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
640: PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
641: PetscCall(VecRestoreArrayAndMemType(zz, &ay));
642: PetscFunctionReturn(PETSC_SUCCESS);
643: }
645: static PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy)
646: {
647: PetscFunctionBegin;
648: PetscCall(MatMultTransposeKernel_MPIDense(A, xx, yy, PETSC_FALSE));
649: PetscFunctionReturn(PETSC_SUCCESS);
650: }
652: static PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz)
653: {
654: PetscFunctionBegin;
655: PetscCall(MatMultTransposeAddKernel_MPIDense(A, xx, yy, zz, PETSC_FALSE));
656: PetscFunctionReturn(PETSC_SUCCESS);
657: }
659: static PetscErrorCode MatMultHermitianTranspose_MPIDense(Mat A, Vec xx, Vec yy)
660: {
661: PetscFunctionBegin;
662: PetscCall(MatMultTransposeKernel_MPIDense(A, xx, yy, PETSC_TRUE));
663: PetscFunctionReturn(PETSC_SUCCESS);
664: }
666: static PetscErrorCode MatMultHermitianTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz)
667: {
668: PetscFunctionBegin;
669: PetscCall(MatMultTransposeAddKernel_MPIDense(A, xx, yy, zz, PETSC_TRUE));
670: PetscFunctionReturn(PETSC_SUCCESS);
671: }
673: PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v)
674: {
675: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
676: PetscInt lda, len, i, nl, ng, m = A->rmap->n, radd;
677: PetscScalar *x;
678: const PetscScalar *av;
680: PetscFunctionBegin;
681: PetscCall(VecGetArray(v, &x));
682: PetscCall(VecGetSize(v, &ng));
683: PetscCheck(ng == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec");
684: PetscCall(VecGetLocalSize(v, &nl));
685: len = PetscMin(a->A->rmap->n, a->A->cmap->n);
686: radd = A->rmap->rstart * m;
687: PetscCall(MatDenseGetArrayRead(a->A, &av));
688: PetscCall(MatDenseGetLDA(a->A, &lda));
689: for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i];
690: PetscCall(MatDenseRestoreArrayRead(a->A, &av));
691: if (nl - i > 0) PetscCall(PetscArrayzero(x + i, nl - i));
692: PetscCall(VecRestoreArray(v, &x));
693: PetscFunctionReturn(PETSC_SUCCESS);
694: }
696: static PetscErrorCode MatDestroy_MPIDense(Mat mat)
697: {
698: Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
700: PetscFunctionBegin;
701: PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
702: PetscCall(MatStashDestroy_Private(&mat->stash));
703: PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
704: PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
705: PetscCall(MatDestroy(&mdn->A));
706: PetscCall(VecDestroy(&mdn->lvec));
707: PetscCall(PetscSFDestroy(&mdn->Mvctx));
708: PetscCall(VecDestroy(&mdn->cvec));
709: PetscCall(MatDestroy(&mdn->cmat));
711: PetscCall(PetscFree(mat->data));
712: PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
714: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL));
715: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL));
716: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL));
717: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL));
718: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL));
719: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL));
720: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL));
721: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL));
722: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL));
723: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL));
724: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL));
725: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
726: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL));
727: #if defined(PETSC_HAVE_ELEMENTAL)
728: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL));
729: #endif
730: #if defined(PETSC_HAVE_SCALAPACK) && (defined(PETSC_USE_REAL_SINGLE) || defined(PETSC_USE_REAL_DOUBLE))
731: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL));
732: #endif
733: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL));
734: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL));
735: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL));
736: #if defined(PETSC_HAVE_CUDA)
737: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL));
738: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL));
739: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL));
740: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL));
741: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL));
742: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL));
743: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL));
744: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL));
745: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL));
746: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL));
747: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL));
748: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL));
749: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL));
750: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL));
751: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL));
752: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL));
753: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL));
754: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDASetPreallocation_C", NULL));
755: #endif
756: #if defined(PETSC_HAVE_HIP)
757: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", NULL));
758: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", NULL));
759: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", NULL));
760: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensehip_mpidense_C", NULL));
761: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL));
762: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL));
763: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL));
764: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL));
765: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArray_C", NULL));
766: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayRead_C", NULL));
767: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayWrite_C", NULL));
768: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArray_C", NULL));
769: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayRead_C", NULL));
770: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayWrite_C", NULL));
771: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPPlaceArray_C", NULL));
772: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPResetArray_C", NULL));
773: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPReplaceArray_C", NULL));
774: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPSetPreallocation_C", NULL));
775: #endif
776: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL));
777: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL));
778: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL));
779: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL));
780: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL));
781: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL));
782: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL));
783: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL));
784: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL));
785: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL));
786: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultColumnRange_C", NULL));
787: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultAddColumnRange_C", NULL));
788: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeColumnRange_C", NULL));
789: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeAddColumnRange_C", NULL));
791: PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL));
792: PetscFunctionReturn(PETSC_SUCCESS);
793: }
795: #include <petscdraw.h>
796: static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
797: {
798: Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
799: PetscMPIInt rank;
800: PetscViewerType vtype;
801: PetscBool isascii, isdraw;
802: PetscViewer sviewer;
803: PetscViewerFormat format;
805: PetscFunctionBegin;
806: PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
807: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
808: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
809: if (isascii) {
810: PetscCall(PetscViewerGetType(viewer, &vtype));
811: PetscCall(PetscViewerGetFormat(viewer, &format));
812: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
813: MatInfo info;
814: PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
815: PetscCall(PetscViewerASCIIPushSynchronized(viewer));
816: PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
817: (PetscInt)info.memory));
818: PetscCall(PetscViewerFlush(viewer));
819: PetscCall(PetscViewerASCIIPopSynchronized(viewer));
820: if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer));
821: PetscFunctionReturn(PETSC_SUCCESS);
822: } else if (format == PETSC_VIEWER_ASCII_INFO) {
823: PetscFunctionReturn(PETSC_SUCCESS);
824: }
825: } else if (isdraw) {
826: PetscDraw draw;
827: PetscBool isnull;
829: PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
830: PetscCall(PetscDrawIsNull(draw, &isnull));
831: if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
832: }
834: {
835: /* assemble the entire matrix onto first processor. */
836: Mat A;
837: PetscInt M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz;
838: PetscInt *cols;
839: PetscScalar *vals;
841: PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
842: if (rank == 0) {
843: PetscCall(MatSetSizes(A, M, N, M, N));
844: } else {
845: PetscCall(MatSetSizes(A, 0, 0, M, N));
846: }
847: /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
848: PetscCall(MatSetType(A, MATMPIDENSE));
849: PetscCall(MatMPIDenseSetPreallocation(A, NULL));
851: /* Copy the matrix ... This isn't the most efficient means,
852: but it's quick for now */
853: A->insertmode = INSERT_VALUES;
855: row = mat->rmap->rstart;
856: m = mdn->A->rmap->n;
857: for (i = 0; i < m; i++) {
858: PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals));
859: PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES));
860: PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals));
861: row++;
862: }
864: PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
865: PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
866: PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
867: if (rank == 0) {
868: PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)A->data)->A, ((PetscObject)mat)->name));
869: PetscCall(MatView_SeqDense(((Mat_MPIDense *)A->data)->A, sviewer));
870: }
871: PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
872: PetscCall(MatDestroy(&A));
873: }
874: PetscFunctionReturn(PETSC_SUCCESS);
875: }
877: static PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer)
878: {
879: PetscBool isascii, isbinary, isdraw, issocket;
881: PetscFunctionBegin;
882: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
883: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
884: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
885: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
887: if (isascii || issocket || isdraw) PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer));
888: else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer));
889: PetscFunctionReturn(PETSC_SUCCESS);
890: }
892: static PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info)
893: {
894: Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
895: Mat mdn = mat->A;
896: PetscLogDouble isend[5], irecv[5];
898: PetscFunctionBegin;
899: info->block_size = 1.0;
901: PetscCall(MatGetInfo(mdn, MAT_LOCAL, info));
903: isend[0] = info->nz_used;
904: isend[1] = info->nz_allocated;
905: isend[2] = info->nz_unneeded;
906: isend[3] = info->memory;
907: isend[4] = info->mallocs;
908: if (flag == MAT_LOCAL) {
909: info->nz_used = isend[0];
910: info->nz_allocated = isend[1];
911: info->nz_unneeded = isend[2];
912: info->memory = isend[3];
913: info->mallocs = isend[4];
914: } else if (flag == MAT_GLOBAL_MAX) {
915: PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A)));
917: info->nz_used = irecv[0];
918: info->nz_allocated = irecv[1];
919: info->nz_unneeded = irecv[2];
920: info->memory = irecv[3];
921: info->mallocs = irecv[4];
922: } else if (flag == MAT_GLOBAL_SUM) {
923: PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A)));
925: info->nz_used = irecv[0];
926: info->nz_allocated = irecv[1];
927: info->nz_unneeded = irecv[2];
928: info->memory = irecv[3];
929: info->mallocs = irecv[4];
930: }
931: info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */
932: info->fill_ratio_needed = 0;
933: info->factor_mallocs = 0;
934: PetscFunctionReturn(PETSC_SUCCESS);
935: }
937: static PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg)
938: {
939: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
941: PetscFunctionBegin;
942: switch (op) {
943: case MAT_NEW_NONZERO_LOCATIONS:
944: case MAT_NEW_NONZERO_LOCATION_ERR:
945: case MAT_NEW_NONZERO_ALLOCATION_ERR:
946: MatCheckPreallocated(A, 1);
947: PetscCall(MatSetOption(a->A, op, flg));
948: break;
949: case MAT_ROW_ORIENTED:
950: MatCheckPreallocated(A, 1);
951: a->roworiented = flg;
952: PetscCall(MatSetOption(a->A, op, flg));
953: break;
954: case MAT_IGNORE_OFF_PROC_ENTRIES:
955: a->donotstash = flg;
956: break;
957: case MAT_SYMMETRIC:
958: case MAT_STRUCTURALLY_SYMMETRIC:
959: case MAT_HERMITIAN:
960: case MAT_SYMMETRY_ETERNAL:
961: case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
962: case MAT_SPD:
963: case MAT_SPD_ETERNAL:
964: /* if the diagonal matrix is square it inherits some of the properties above */
965: if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg));
966: break;
967: default:
968: break;
969: }
970: PetscFunctionReturn(PETSC_SUCCESS);
971: }
973: static PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr)
974: {
975: Mat_MPIDense *mdn = (Mat_MPIDense *)A->data;
976: const PetscScalar *l;
977: PetscScalar x, *v, *vv, *r;
978: PetscInt i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda;
980: PetscFunctionBegin;
981: PetscCall(MatDenseGetArray(mdn->A, &vv));
982: PetscCall(MatDenseGetLDA(mdn->A, &lda));
983: PetscCall(MatGetLocalSize(A, &s2, &s3));
984: if (ll) {
985: PetscCall(VecGetLocalSize(ll, &s2a));
986: PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2);
987: PetscCall(VecGetArrayRead(ll, &l));
988: for (i = 0; i < m; i++) {
989: x = l[i];
990: v = vv + i;
991: for (j = 0; j < n; j++) {
992: (*v) *= x;
993: v += lda;
994: }
995: }
996: PetscCall(VecRestoreArrayRead(ll, &l));
997: PetscCall(PetscLogFlops(1.0 * n * m));
998: }
999: if (rr) {
1000: const PetscScalar *ar;
1002: PetscCall(VecGetLocalSize(rr, &s3a));
1003: PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3);
1004: PetscCall(VecGetArrayRead(rr, &ar));
1005: if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
1006: PetscCall(VecGetArray(mdn->lvec, &r));
1007: PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
1008: PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
1009: PetscCall(VecRestoreArrayRead(rr, &ar));
1010: for (i = 0; i < n; i++) {
1011: x = r[i];
1012: v = vv + i * lda;
1013: for (j = 0; j < m; j++) (*v++) *= x;
1014: }
1015: PetscCall(VecRestoreArray(mdn->lvec, &r));
1016: PetscCall(PetscLogFlops(1.0 * n * m));
1017: }
1018: PetscCall(MatDenseRestoreArray(mdn->A, &vv));
1019: PetscFunctionReturn(PETSC_SUCCESS);
1020: }
1022: static PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm)
1023: {
1024: Mat_MPIDense *mdn = (Mat_MPIDense *)A->data;
1025: PetscInt i, j;
1026: PetscMPIInt size;
1027: PetscReal sum = 0.0;
1028: const PetscScalar *av, *v;
1030: PetscFunctionBegin;
1031: PetscCall(MatDenseGetArrayRead(mdn->A, &av));
1032: v = av;
1033: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
1034: if (size == 1) {
1035: PetscCall(MatNorm(mdn->A, type, nrm));
1036: } else {
1037: if (type == NORM_FROBENIUS) {
1038: for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) {
1039: sum += PetscRealPart(PetscConj(*v) * (*v));
1040: v++;
1041: }
1042: PetscCallMPI(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
1043: *nrm = PetscSqrtReal(*nrm);
1044: PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n));
1045: } else if (type == NORM_1) {
1046: PetscReal *tmp;
1048: PetscCall(PetscCalloc1(A->cmap->N, &tmp));
1049: *nrm = 0.0;
1050: v = av;
1051: for (j = 0; j < mdn->A->cmap->n; j++) {
1052: for (i = 0; i < mdn->A->rmap->n; i++) {
1053: tmp[j] += PetscAbsScalar(*v);
1054: v++;
1055: }
1056: }
1057: PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, tmp, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
1058: for (j = 0; j < A->cmap->N; j++) {
1059: if (tmp[j] > *nrm) *nrm = tmp[j];
1060: }
1061: PetscCall(PetscFree(tmp));
1062: PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n));
1063: } else if (type == NORM_INFINITY) { /* max row norm */
1064: PetscCall(MatNorm(mdn->A, type, nrm));
1065: PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
1066: } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm");
1067: }
1068: PetscCall(MatDenseRestoreArrayRead(mdn->A, &av));
1069: PetscFunctionReturn(PETSC_SUCCESS);
1070: }
1072: static PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout)
1073: {
1074: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1075: Mat B;
1076: PetscInt M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart;
1077: PetscInt j, i, lda;
1078: PetscScalar *v;
1080: PetscFunctionBegin;
1081: if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1082: if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1083: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1084: PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1085: PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1086: PetscCall(MatMPIDenseSetPreallocation(B, NULL));
1087: } else B = *matout;
1089: m = a->A->rmap->n;
1090: n = a->A->cmap->n;
1091: PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v));
1092: PetscCall(MatDenseGetLDA(a->A, &lda));
1093: PetscCall(PetscMalloc1(m, &rwork));
1094: for (i = 0; i < m; i++) rwork[i] = rstart + i;
1095: for (j = 0; j < n; j++) {
1096: PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES));
1097: v = PetscSafePointerPlusOffset(v, lda);
1098: }
1099: PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v));
1100: PetscCall(PetscFree(rwork));
1101: PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1102: PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1103: if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1104: *matout = B;
1105: } else {
1106: PetscCall(MatHeaderMerge(A, &B));
1107: }
1108: PetscFunctionReturn(PETSC_SUCCESS);
1109: }
1111: static PetscErrorCode MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *);
1112: PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar);
1114: static PetscErrorCode MatSetUp_MPIDense(Mat A)
1115: {
1116: PetscFunctionBegin;
1117: PetscCall(PetscLayoutSetUp(A->rmap));
1118: PetscCall(PetscLayoutSetUp(A->cmap));
1119: if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL));
1120: PetscFunctionReturn(PETSC_SUCCESS);
1121: }
1123: static PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str)
1124: {
1125: Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data;
1127: PetscFunctionBegin;
1128: PetscCall(MatAXPY(A->A, alpha, B->A, str));
1129: PetscFunctionReturn(PETSC_SUCCESS);
1130: }
1132: static PetscErrorCode MatConjugate_MPIDense(Mat mat)
1133: {
1134: Mat_MPIDense *a = (Mat_MPIDense *)mat->data;
1136: PetscFunctionBegin;
1137: PetscCall(MatConjugate(a->A));
1138: PetscFunctionReturn(PETSC_SUCCESS);
1139: }
1141: static PetscErrorCode MatRealPart_MPIDense(Mat A)
1142: {
1143: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1145: PetscFunctionBegin;
1146: PetscCall(MatRealPart(a->A));
1147: PetscFunctionReturn(PETSC_SUCCESS);
1148: }
1150: static PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1151: {
1152: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1154: PetscFunctionBegin;
1155: PetscCall(MatImaginaryPart(a->A));
1156: PetscFunctionReturn(PETSC_SUCCESS);
1157: }
1159: static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col)
1160: {
1161: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1163: PetscFunctionBegin;
1164: PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix");
1165: PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation");
1166: PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col));
1167: PetscFunctionReturn(PETSC_SUCCESS);
1168: }
1170: PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *);
1172: static PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions)
1173: {
1174: PetscInt i, m, n;
1175: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1177: PetscFunctionBegin;
1178: PetscCall(MatGetSize(A, &m, &n));
1179: if (type == REDUCTION_MEAN_REALPART) {
1180: PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, reductions));
1181: } else if (type == REDUCTION_MEAN_IMAGINARYPART) {
1182: PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, reductions));
1183: } else {
1184: PetscCall(MatGetColumnReductions_SeqDense(a->A, type, reductions));
1185: }
1186: if (type == NORM_2) {
1187: for (i = 0; i < n; i++) reductions[i] *= reductions[i];
1188: }
1189: if (type == NORM_INFINITY) {
1190: PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm));
1191: } else {
1192: PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm));
1193: }
1194: if (type == NORM_2) {
1195: for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
1196: } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
1197: for (i = 0; i < n; i++) reductions[i] /= m;
1198: }
1199: PetscFunctionReturn(PETSC_SUCCESS);
1200: }
1202: static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx)
1203: {
1204: Mat_MPIDense *d = (Mat_MPIDense *)x->data;
1206: PetscFunctionBegin;
1207: PetscCall(MatSetRandom(d->A, rctx));
1208: #if defined(PETSC_HAVE_DEVICE)
1209: x->offloadmask = d->A->offloadmask;
1210: #endif
1211: PetscFunctionReturn(PETSC_SUCCESS);
1212: }
1214: static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1215: static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1216: static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1217: static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1218: static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *);
1219: static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer);
1220: static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat);
1222: static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1223: MatGetRow_MPIDense,
1224: MatRestoreRow_MPIDense,
1225: MatMult_MPIDense,
1226: /* 4*/ MatMultAdd_MPIDense,
1227: MatMultTranspose_MPIDense,
1228: MatMultTransposeAdd_MPIDense,
1229: NULL,
1230: NULL,
1231: NULL,
1232: /* 10*/ NULL,
1233: NULL,
1234: NULL,
1235: NULL,
1236: MatTranspose_MPIDense,
1237: /* 15*/ MatGetInfo_MPIDense,
1238: MatEqual_MPIDense,
1239: MatGetDiagonal_MPIDense,
1240: MatDiagonalScale_MPIDense,
1241: MatNorm_MPIDense,
1242: /* 20*/ MatAssemblyBegin_MPIDense,
1243: MatAssemblyEnd_MPIDense,
1244: MatSetOption_MPIDense,
1245: MatZeroEntries_MPIDense,
1246: /* 24*/ MatZeroRows_MPIDense,
1247: NULL,
1248: NULL,
1249: NULL,
1250: NULL,
1251: /* 29*/ MatSetUp_MPIDense,
1252: NULL,
1253: NULL,
1254: MatGetDiagonalBlock_MPIDense,
1255: NULL,
1256: /* 34*/ MatDuplicate_MPIDense,
1257: NULL,
1258: NULL,
1259: NULL,
1260: NULL,
1261: /* 39*/ MatAXPY_MPIDense,
1262: MatCreateSubMatrices_MPIDense,
1263: NULL,
1264: MatGetValues_MPIDense,
1265: MatCopy_MPIDense,
1266: /* 44*/ NULL,
1267: MatScale_MPIDense,
1268: MatShift_MPIDense,
1269: NULL,
1270: NULL,
1271: /* 49*/ MatSetRandom_MPIDense,
1272: NULL,
1273: NULL,
1274: NULL,
1275: NULL,
1276: /* 54*/ NULL,
1277: NULL,
1278: NULL,
1279: NULL,
1280: NULL,
1281: /* 59*/ MatCreateSubMatrix_MPIDense,
1282: MatDestroy_MPIDense,
1283: MatView_MPIDense,
1284: NULL,
1285: NULL,
1286: /* 64*/ NULL,
1287: NULL,
1288: NULL,
1289: NULL,
1290: NULL,
1291: /* 69*/ NULL,
1292: NULL,
1293: NULL,
1294: NULL,
1295: NULL,
1296: /* 74*/ NULL,
1297: NULL,
1298: NULL,
1299: NULL,
1300: MatLoad_MPIDense,
1301: /* 79*/ NULL,
1302: NULL,
1303: NULL,
1304: NULL,
1305: /* 83*/ NULL,
1306: NULL,
1307: NULL,
1308: NULL,
1309: MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1310: MatMatTransposeMultNumeric_MPIDense_MPIDense,
1311: /* 89*/ NULL,
1312: MatProductSetFromOptions_MPIDense,
1313: NULL,
1314: NULL,
1315: MatConjugate_MPIDense,
1316: /* 94*/ NULL,
1317: NULL,
1318: MatRealPart_MPIDense,
1319: MatImaginaryPart_MPIDense,
1320: NULL,
1321: /*99*/ NULL,
1322: NULL,
1323: NULL,
1324: NULL,
1325: MatGetColumnVector_MPIDense,
1326: /*104*/ NULL,
1327: NULL,
1328: NULL,
1329: NULL,
1330: NULL,
1331: /*109*/ NULL,
1332: NULL,
1333: MatMultHermitianTranspose_MPIDense,
1334: MatMultHermitianTransposeAdd_MPIDense,
1335: NULL,
1336: /*114*/ NULL,
1337: MatGetColumnReductions_MPIDense,
1338: NULL,
1339: NULL,
1340: NULL,
1341: /*120*/ NULL,
1342: MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1343: MatTransposeMatMultNumeric_MPIDense_MPIDense,
1344: NULL,
1345: /*124*/ NULL,
1346: NULL,
1347: NULL,
1348: NULL,
1349: NULL,
1350: /*129*/ NULL,
1351: NULL,
1352: MatCreateMPIMatConcatenateSeqMat_MPIDense,
1353: NULL,
1354: NULL,
1355: /*134*/ NULL,
1356: NULL,
1357: NULL,
1358: NULL,
1359: NULL,
1360: /*139*/ NULL,
1361: NULL,
1362: NULL,
1363: NULL,
1364: NULL,
1365: NULL,
1366: /*144*/ MatADot_Default,
1367: MatANorm_Default,
1368: NULL,
1369: NULL};
1371: static PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data)
1372: {
1373: Mat_MPIDense *a = (Mat_MPIDense *)mat->data;
1374: MatType mtype = MATSEQDENSE;
1376: PetscFunctionBegin;
1377: PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1378: PetscCall(PetscLayoutSetUp(mat->rmap));
1379: PetscCall(PetscLayoutSetUp(mat->cmap));
1380: if (!a->A) {
1381: PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
1382: PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N));
1383: }
1384: #if defined(PETSC_HAVE_CUDA)
1385: PetscBool iscuda;
1386: PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda));
1387: if (iscuda) mtype = MATSEQDENSECUDA;
1388: #endif
1389: #if defined(PETSC_HAVE_HIP)
1390: PetscBool iship;
1391: PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSEHIP, &iship));
1392: if (iship) mtype = MATSEQDENSEHIP;
1393: #endif
1394: PetscCall(MatSetType(a->A, mtype));
1395: PetscCall(MatSeqDenseSetPreallocation(a->A, data));
1396: #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1397: mat->offloadmask = a->A->offloadmask;
1398: #endif
1399: mat->preallocated = PETSC_TRUE;
1400: mat->assembled = PETSC_TRUE;
1401: PetscFunctionReturn(PETSC_SUCCESS);
1402: }
1404: PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1405: {
1406: Mat B, C;
1408: PetscFunctionBegin;
1409: PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C));
1410: PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B));
1411: PetscCall(MatDestroy(&C));
1412: if (reuse == MAT_REUSE_MATRIX) {
1413: C = *newmat;
1414: } else C = NULL;
1415: PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1416: PetscCall(MatDestroy(&B));
1417: if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &C));
1418: else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1419: PetscFunctionReturn(PETSC_SUCCESS);
1420: }
1422: static PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1423: {
1424: Mat B, C;
1426: PetscFunctionBegin;
1427: PetscCall(MatDenseGetLocalMatrix(A, &C));
1428: PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
1429: if (reuse == MAT_REUSE_MATRIX) {
1430: C = *newmat;
1431: } else C = NULL;
1432: PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1433: PetscCall(MatDestroy(&B));
1434: if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &C));
1435: else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1436: PetscFunctionReturn(PETSC_SUCCESS);
1437: }
1439: #if defined(PETSC_HAVE_ELEMENTAL)
1440: PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1441: {
1442: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1443: Mat mat_elemental;
1444: PetscScalar *v;
1445: PetscInt m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols, lda;
1447: PetscFunctionBegin;
1448: if (reuse == MAT_REUSE_MATRIX) {
1449: mat_elemental = *newmat;
1450: PetscCall(MatZeroEntries(*newmat));
1451: } else {
1452: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
1453: PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
1454: PetscCall(MatSetType(mat_elemental, MATELEMENTAL));
1455: PetscCall(MatSetUp(mat_elemental));
1456: PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE));
1457: }
1459: PetscCall(PetscMalloc2(m, &rows, N, &cols));
1460: for (i = 0; i < N; i++) cols[i] = i;
1461: for (i = 0; i < m; i++) rows[i] = rstart + i;
1463: /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1464: PetscCall(MatDenseGetArray(A, &v));
1465: PetscCall(MatDenseGetLDA(a->A, &lda));
1466: if (lda == m) PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES));
1467: else {
1468: for (i = 0; i < N; i++) PetscCall(MatSetValues(mat_elemental, m, rows, 1, &i, v + lda * i, ADD_VALUES));
1469: }
1470: PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
1471: PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
1472: PetscCall(MatDenseRestoreArray(A, &v));
1473: PetscCall(PetscFree2(rows, cols));
1475: if (reuse == MAT_INPLACE_MATRIX) {
1476: PetscCall(MatHeaderReplace(A, &mat_elemental));
1477: } else {
1478: *newmat = mat_elemental;
1479: }
1480: PetscFunctionReturn(PETSC_SUCCESS);
1481: }
1482: #endif
1484: static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals)
1485: {
1486: Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
1488: PetscFunctionBegin;
1489: PetscCall(MatDenseGetColumn(mat->A, col, vals));
1490: PetscFunctionReturn(PETSC_SUCCESS);
1491: }
1493: static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals)
1494: {
1495: Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
1497: PetscFunctionBegin;
1498: PetscCall(MatDenseRestoreColumn(mat->A, vals));
1499: PetscFunctionReturn(PETSC_SUCCESS);
1500: }
1502: PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
1503: {
1504: Mat_MPIDense *mat;
1505: PetscInt m, nloc, N;
1507: PetscFunctionBegin;
1508: PetscCall(MatGetSize(inmat, &m, &N));
1509: PetscCall(MatGetLocalSize(inmat, NULL, &nloc));
1510: if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1511: PetscInt sum;
1513: if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
1514: /* Check sum(n) = N */
1515: PetscCallMPI(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
1516: PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
1518: PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat));
1519: PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
1520: }
1522: /* numeric phase */
1523: mat = (Mat_MPIDense *)(*outmat)->data;
1524: PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN));
1525: PetscFunctionReturn(PETSC_SUCCESS);
1526: }
1528: PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
1529: {
1530: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1531: PetscInt lda;
1533: PetscFunctionBegin;
1534: PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1535: PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1536: if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1537: a->vecinuse = col + 1;
1538: PetscCall(MatDenseGetLDA(a->A, &lda));
1539: PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse));
1540: PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda)));
1541: *v = a->cvec;
1542: PetscFunctionReturn(PETSC_SUCCESS);
1543: }
1545: PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
1546: {
1547: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1549: PetscFunctionBegin;
1550: PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1551: PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector");
1552: VecCheckAssembled(a->cvec);
1553: a->vecinuse = 0;
1554: PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse));
1555: PetscCall(VecResetArray(a->cvec));
1556: if (v) *v = NULL;
1557: PetscFunctionReturn(PETSC_SUCCESS);
1558: }
1560: PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
1561: {
1562: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1563: PetscInt lda;
1565: PetscFunctionBegin;
1566: PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1567: PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1568: if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1569: a->vecinuse = col + 1;
1570: PetscCall(MatDenseGetLDA(a->A, &lda));
1571: PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse));
1572: PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda)));
1573: PetscCall(VecLockReadPush(a->cvec));
1574: *v = a->cvec;
1575: PetscFunctionReturn(PETSC_SUCCESS);
1576: }
1578: PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
1579: {
1580: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1582: PetscFunctionBegin;
1583: PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1584: PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1585: VecCheckAssembled(a->cvec);
1586: a->vecinuse = 0;
1587: PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse));
1588: PetscCall(VecLockReadPop(a->cvec));
1589: PetscCall(VecResetArray(a->cvec));
1590: if (v) *v = NULL;
1591: PetscFunctionReturn(PETSC_SUCCESS);
1592: }
1594: PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
1595: {
1596: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1597: PetscInt lda;
1599: PetscFunctionBegin;
1600: PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1601: PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1602: if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1603: a->vecinuse = col + 1;
1604: PetscCall(MatDenseGetLDA(a->A, &lda));
1605: PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1606: PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda)));
1607: *v = a->cvec;
1608: PetscFunctionReturn(PETSC_SUCCESS);
1609: }
1611: PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
1612: {
1613: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1615: PetscFunctionBegin;
1616: PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1617: PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1618: VecCheckAssembled(a->cvec);
1619: a->vecinuse = 0;
1620: PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1621: PetscCall(VecResetArray(a->cvec));
1622: if (v) *v = NULL;
1623: PetscFunctionReturn(PETSC_SUCCESS);
1624: }
1626: static PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v)
1627: {
1628: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1629: Mat_MPIDense *c;
1630: MPI_Comm comm;
1631: PetscInt prbegin, prend, pcbegin, pcend;
1633: PetscFunctionBegin;
1634: PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1635: PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1636: PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1637: prbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart);
1638: prend = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart));
1639: pcbegin = PetscMax(0, PetscMin(A->cmap->rend, cbegin) - A->cmap->rstart);
1640: pcend = PetscMin(A->cmap->n, PetscMax(0, cend - A->cmap->rstart));
1641: if (!a->cmat) {
1642: PetscCall(MatCreate(comm, &a->cmat));
1643: PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name));
1644: if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap));
1645: else {
1646: PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, prend - prbegin));
1647: PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1648: PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1649: }
1650: if (cend - cbegin == A->cmap->N) PetscCall(PetscLayoutReference(A->cmap, &a->cmat->cmap));
1651: else {
1652: PetscCall(PetscLayoutSetLocalSize(a->cmat->cmap, pcend - pcbegin));
1653: PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
1654: PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1655: }
1656: c = (Mat_MPIDense *)a->cmat->data;
1657: c->sub_rbegin = rbegin;
1658: c->sub_rend = rend;
1659: c->sub_cbegin = cbegin;
1660: c->sub_cend = cend;
1661: }
1662: c = (Mat_MPIDense *)a->cmat->data;
1663: if (c->sub_rbegin != rbegin || c->sub_rend != rend) {
1664: PetscCall(PetscLayoutDestroy(&a->cmat->rmap));
1665: PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap));
1666: PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, prend - prbegin));
1667: PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1668: PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1669: c->sub_rbegin = rbegin;
1670: c->sub_rend = rend;
1671: }
1672: if (c->sub_cbegin != cbegin || c->sub_cend != cend) {
1673: // special optimization: check if all columns are owned by rank 0, in which case no communication is necessary
1674: if ((cend - cbegin != a->cmat->cmap->N) || (A->cmap->range[1] != A->cmap->N)) {
1675: PetscCall(PetscLayoutDestroy(&a->cmat->cmap));
1676: PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap));
1677: PetscCall(PetscLayoutSetLocalSize(a->cmat->cmap, pcend - pcbegin));
1678: PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
1679: PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1680: PetscCall(VecDestroy(&c->lvec));
1681: PetscCall(PetscSFDestroy(&c->Mvctx));
1682: }
1683: c->sub_cbegin = cbegin;
1684: c->sub_cend = cend;
1685: }
1686: PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1687: PetscCall(MatDenseGetSubMatrix(a->A, prbegin, prend, cbegin, cend, &c->A));
1689: a->cmat->preallocated = PETSC_TRUE;
1690: a->cmat->assembled = PETSC_TRUE;
1691: #if defined(PETSC_HAVE_DEVICE)
1692: a->cmat->offloadmask = c->A->offloadmask;
1693: #endif
1694: a->matinuse = cbegin + 1;
1695: *v = a->cmat;
1696: PetscFunctionReturn(PETSC_SUCCESS);
1697: }
1699: static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v)
1700: {
1701: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1702: Mat_MPIDense *c;
1704: PetscFunctionBegin;
1705: PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first");
1706: PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix");
1707: PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()");
1708: a->matinuse = 0;
1709: c = (Mat_MPIDense *)a->cmat->data;
1710: PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A));
1711: *v = NULL;
1712: #if defined(PETSC_HAVE_DEVICE)
1713: A->offloadmask = a->A->offloadmask;
1714: #endif
1715: PetscFunctionReturn(PETSC_SUCCESS);
1716: }
1718: /*MC
1719: MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1721: Options Database Key:
1722: . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()`
1724: Level: beginner
1726: .seealso: [](ch_matrices), `Mat`, `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE`
1727: M*/
1728: PetscErrorCode MatCreate_MPIDense(Mat mat)
1729: {
1730: Mat_MPIDense *a;
1732: PetscFunctionBegin;
1733: PetscCall(PetscNew(&a));
1734: mat->data = (void *)a;
1735: mat->ops[0] = MatOps_Values;
1737: mat->insertmode = NOT_SET_VALUES;
1739: /* build cache for off array entries formed */
1740: a->donotstash = PETSC_FALSE;
1742: PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash));
1744: /* stuff used for matrix vector multiply */
1745: a->lvec = NULL;
1746: a->Mvctx = NULL;
1747: a->roworiented = PETSC_TRUE;
1749: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense));
1750: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense));
1751: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense));
1752: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense));
1753: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense));
1754: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense));
1755: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense));
1756: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense));
1757: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense));
1758: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense));
1759: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense));
1760: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense));
1761: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense));
1762: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense));
1763: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense));
1764: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense));
1765: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense));
1766: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense));
1767: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense));
1768: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
1769: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ));
1770: #if defined(PETSC_HAVE_ELEMENTAL)
1771: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental));
1772: #endif
1773: #if defined(PETSC_HAVE_SCALAPACK) && (defined(PETSC_USE_REAL_SINGLE) || defined(PETSC_USE_REAL_DOUBLE))
1774: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK));
1775: #endif
1776: #if defined(PETSC_HAVE_CUDA)
1777: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA));
1778: #endif
1779: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense));
1780: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1781: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1782: #if defined(PETSC_HAVE_CUDA)
1783: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1784: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1785: #endif
1786: #if defined(PETSC_HAVE_HIP)
1787: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", MatConvert_MPIDense_MPIDenseHIP));
1788: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1789: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1790: #endif
1791: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense));
1792: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense));
1793: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultColumnRange_C", MatMultColumnRange_MPIDense));
1794: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultAddColumnRange_C", MatMultAddColumnRange_MPIDense));
1795: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeColumnRange_C", MatMultHermitianTransposeColumnRange_MPIDense));
1796: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeAddColumnRange_C", MatMultHermitianTransposeAddColumnRange_MPIDense));
1797: PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE));
1798: PetscFunctionReturn(PETSC_SUCCESS);
1799: }
1801: /*MC
1802: MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1804: This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator,
1805: and `MATMPIDENSE` otherwise.
1807: Options Database Key:
1808: . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()`
1810: Level: beginner
1812: .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`, `MATDENSEHIP`
1813: M*/
1815: /*@
1816: MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1818: Collective
1820: Input Parameters:
1821: + B - the matrix
1822: - data - optional location of matrix data. Set to `NULL` for PETSc
1823: to control all matrix memory allocation.
1825: Level: intermediate
1827: Notes:
1828: The dense format is fully compatible with standard Fortran
1829: storage by columns.
1831: The data input variable is intended primarily for Fortran programmers
1832: who wish to allocate their own matrix memory space. Most users should
1833: set `data` to `NULL`.
1835: .seealso: [](ch_matrices), `Mat`, `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
1836: @*/
1837: PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data)
1838: {
1839: PetscFunctionBegin;
1841: PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data));
1842: PetscFunctionReturn(PETSC_SUCCESS);
1843: }
1845: /*@
1846: MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an
1847: array provided by the user. This is useful to avoid copying an array
1848: into a matrix
1850: Not Collective
1852: Input Parameters:
1853: + mat - the matrix
1854: - array - the array in column major order
1856: Level: developer
1858: Note:
1859: Adding `const` to `array` was an oversight, see notes in `VecPlaceArray()`.
1861: You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be
1862: freed when the matrix is destroyed.
1864: .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`,
1865: `MatDenseReplaceArray()`
1866: @*/
1867: PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array)
1868: {
1869: PetscFunctionBegin;
1871: PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array));
1872: PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1873: #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1874: mat->offloadmask = PETSC_OFFLOAD_CPU;
1875: #endif
1876: PetscFunctionReturn(PETSC_SUCCESS);
1877: }
1879: /*@
1880: MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()`
1882: Not Collective
1884: Input Parameter:
1885: . mat - the matrix
1887: Level: developer
1889: Note:
1890: You can only call this after a call to `MatDensePlaceArray()`
1892: .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`
1893: @*/
1894: PetscErrorCode MatDenseResetArray(Mat mat)
1895: {
1896: PetscFunctionBegin;
1898: PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat));
1899: PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1900: PetscFunctionReturn(PETSC_SUCCESS);
1901: }
1903: /*@
1904: MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
1905: array provided by the user. This is useful to avoid copying an array
1906: into a matrix
1908: Not Collective
1910: Input Parameters:
1911: + mat - the matrix
1912: - array - the array in column major order
1914: Level: developer
1916: Note:
1917: Adding `const` to `array` was an oversight, see notes in `VecPlaceArray()`.
1919: The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be
1920: freed by the user. It will be freed when the matrix is destroyed.
1922: .seealso: [](ch_matrices), `Mat`, `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()`
1923: @*/
1924: PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array)
1925: {
1926: PetscFunctionBegin;
1928: PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array));
1929: PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1930: #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1931: mat->offloadmask = PETSC_OFFLOAD_CPU;
1932: #endif
1933: PetscFunctionReturn(PETSC_SUCCESS);
1934: }
1936: /*@
1937: MatCreateDense - Creates a matrix in `MATDENSE` format.
1939: Collective
1941: Input Parameters:
1942: + comm - MPI communicator
1943: . m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
1944: . n - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given)
1945: . M - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given)
1946: . N - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given)
1947: - data - optional location of matrix data. Set data to `NULL` (`PETSC_NULL_SCALAR_ARRAY` for Fortran users) for PETSc
1948: to control all matrix memory allocation.
1950: Output Parameter:
1951: . A - the matrix
1953: Level: intermediate
1955: Notes:
1956: The dense format is fully compatible with standard Fortran
1957: storage by columns.
1959: Although local portions of the matrix are stored in column-major
1960: order, the matrix is partitioned across MPI ranks by row.
1962: The data input variable is intended primarily for Fortran programmers
1963: who wish to allocate their own matrix memory space. Most users should
1964: set `data` to `NULL` (`PETSC_NULL_SCALAR_ARRAY` for Fortran users).
1966: The user MUST specify either the local or global matrix dimensions
1967: (possibly both).
1969: .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
1970: @*/
1971: PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar data[], Mat *A)
1972: {
1973: PetscFunctionBegin;
1974: PetscCall(MatCreate(comm, A));
1975: PetscCall(MatSetSizes(*A, m, n, M, N));
1976: PetscCall(MatSetType(*A, MATDENSE));
1977: PetscCall(MatSeqDenseSetPreallocation(*A, data));
1978: PetscCall(MatMPIDenseSetPreallocation(*A, data));
1979: PetscFunctionReturn(PETSC_SUCCESS);
1980: }
1982: static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat)
1983: {
1984: Mat mat;
1985: Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data;
1987: PetscFunctionBegin;
1988: *newmat = NULL;
1989: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat));
1990: PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
1991: PetscCall(MatSetType(mat, ((PetscObject)A)->type_name));
1992: a = (Mat_MPIDense *)mat->data;
1994: mat->factortype = A->factortype;
1995: mat->assembled = PETSC_TRUE;
1996: mat->preallocated = PETSC_TRUE;
1998: mat->insertmode = NOT_SET_VALUES;
1999: a->donotstash = oldmat->donotstash;
2001: PetscCall(PetscLayoutReference(A->rmap, &mat->rmap));
2002: PetscCall(PetscLayoutReference(A->cmap, &mat->cmap));
2004: PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
2006: *newmat = mat;
2007: PetscFunctionReturn(PETSC_SUCCESS);
2008: }
2010: static PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
2011: {
2012: PetscBool isbinary;
2013: #if defined(PETSC_HAVE_HDF5)
2014: PetscBool ishdf5;
2015: #endif
2017: PetscFunctionBegin;
2020: /* force binary viewer to load .info file if it has not yet done so */
2021: PetscCall(PetscViewerSetUp(viewer));
2022: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
2023: #if defined(PETSC_HAVE_HDF5)
2024: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
2025: #endif
2026: if (isbinary) {
2027: PetscCall(MatLoad_Dense_Binary(newMat, viewer));
2028: #if defined(PETSC_HAVE_HDF5)
2029: } else if (ishdf5) {
2030: PetscCall(MatLoad_Dense_HDF5(newMat, viewer));
2031: #endif
2032: } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
2033: PetscFunctionReturn(PETSC_SUCCESS);
2034: }
2036: static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag)
2037: {
2038: Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data;
2039: Mat a, b;
2041: PetscFunctionBegin;
2042: a = matA->A;
2043: b = matB->A;
2044: PetscCall(MatEqual(a, b, flag));
2045: PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
2046: PetscFunctionReturn(PETSC_SUCCESS);
2047: }
2049: static PetscErrorCode MatProductCtxDestroy_MatTransMatMult_MPIDense_MPIDense(PetscCtxRt data)
2050: {
2051: MatProductCtx_TransMatMultDense *atb = *(MatProductCtx_TransMatMultDense **)data;
2053: PetscFunctionBegin;
2054: PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts));
2055: PetscCall(MatDestroy(&atb->atb));
2056: PetscCall(PetscFree(atb));
2057: PetscFunctionReturn(PETSC_SUCCESS);
2058: }
2060: static PetscErrorCode MatProductCtxDestroy_MatMatTransMult_MPIDense_MPIDense(PetscCtxRt data)
2061: {
2062: MatProductCtx_MatTransMultDense *abt = *(MatProductCtx_MatTransMultDense **)data;
2064: PetscFunctionBegin;
2065: PetscCall(PetscFree2(abt->buf[0], abt->buf[1]));
2066: PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls));
2067: PetscCall(PetscFree(abt));
2068: PetscFunctionReturn(PETSC_SUCCESS);
2069: }
2071: static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2072: {
2073: Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2074: MatProductCtx_TransMatMultDense *atb;
2075: MPI_Comm comm;
2076: PetscMPIInt size, *recvcounts;
2077: PetscScalar *carray, *sendbuf;
2078: const PetscScalar *atbarray;
2079: PetscInt i, cN = C->cmap->N, proc, k, j, lda;
2080: const PetscInt *ranges;
2082: PetscFunctionBegin;
2083: MatCheckProduct(C, 3);
2084: PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2085: atb = (MatProductCtx_TransMatMultDense *)C->product->data;
2086: recvcounts = atb->recvcounts;
2087: sendbuf = atb->sendbuf;
2089: PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2090: PetscCallMPI(MPI_Comm_size(comm, &size));
2092: /* compute atbarray = aseq^T * bseq */
2093: PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DETERMINE, &atb->atb));
2095: PetscCall(MatGetOwnershipRanges(C, &ranges));
2097: if (ranges[1] == C->rmap->N) {
2098: /* all of the values are being reduced to rank 0: optimize this case to use MPI_Reduce and GPU aware MPI if available */
2099: PetscInt atb_lda, c_lda;
2100: Mat atb_local = atb->atb;
2101: Mat atb_alloc = NULL;
2102: Mat c_local = c->A;
2103: Mat c_alloc = NULL;
2104: PetscMemType atb_memtype, c_memtype;
2105: const PetscScalar *atb_array = NULL;
2106: MPI_Datatype vector_type;
2107: PetscScalar *c_array = NULL;
2108: PetscMPIInt rank;
2110: PetscCallMPI(MPI_Comm_rank(comm, &rank));
2112: PetscCall(MatDenseGetLDA(atb_local, &atb_lda));
2113: if (atb_lda != C->rmap->N) {
2114: // copy atb to a matrix that will have lda == the number of rows
2115: PetscCall(MatDuplicate(atb_local, MAT_DO_NOT_COPY_VALUES, &atb_alloc));
2116: PetscCall(MatCopy(atb_local, atb_alloc, DIFFERENT_NONZERO_PATTERN));
2117: atb_local = atb_alloc;
2118: }
2120: if (rank == 0) {
2121: PetscCall(MatDenseGetLDA(c_local, &c_lda));
2122: if (c_lda != C->rmap->N) {
2123: // copy c to a matrix that will have lda == the number of rows
2124: PetscCall(MatDuplicate(c_local, MAT_DO_NOT_COPY_VALUES, &c_alloc));
2125: c_local = c_alloc;
2126: }
2127: PetscCall(MatZeroEntries(c_local));
2128: }
2129: /* atb_local and c_local have nrows = lda = A->cmap->N and ncols =
2130: * B->cmap->N: use the a->Mvctx to use the best reduction method */
2131: if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
2132: vector_type = MPIU_SCALAR;
2133: if (B->cmap->N > 1) {
2134: PetscMPIInt mpi_N;
2136: PetscCall(PetscMPIIntCast(B->cmap->N, &mpi_N));
2137: PetscCallMPI(MPI_Type_contiguous(mpi_N, MPIU_SCALAR, &vector_type));
2138: PetscCallMPI(MPI_Type_commit(&vector_type));
2139: }
2140: PetscCall(MatDenseGetArrayReadAndMemType(atb_local, &atb_array, &atb_memtype));
2141: PetscCall(MatDenseGetArrayWriteAndMemType(c_local, &c_array, &c_memtype));
2142: PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, vector_type, atb_memtype, atb_array, c_memtype, c_array, MPIU_SUM));
2143: PetscCall(PetscSFReduceEnd(a->Mvctx, vector_type, atb_array, c_array, MPIU_SUM));
2144: PetscCall(MatDenseRestoreArrayWriteAndMemType(c_local, &c_array));
2145: PetscCall(MatDenseRestoreArrayReadAndMemType(atb_local, &atb_array));
2146: if (rank == 0 && c_local != c->A) PetscCall(MatCopy(c_local, c->A, DIFFERENT_NONZERO_PATTERN));
2147: if (B->cmap->N > 1) PetscCallMPI(MPI_Type_free(&vector_type));
2148: PetscCall(MatDestroy(&atb_alloc));
2149: PetscCall(MatDestroy(&c_alloc));
2150: PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
2151: PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2152: PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2153: PetscFunctionReturn(PETSC_SUCCESS);
2154: }
2156: /* arrange atbarray into sendbuf */
2157: PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray));
2158: PetscCall(MatDenseGetLDA(atb->atb, &lda));
2159: for (proc = 0, k = 0; proc < size; proc++) {
2160: for (j = 0; j < cN; j++) {
2161: for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda];
2162: }
2163: }
2164: PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray));
2166: /* sum all atbarray to local values of C */
2167: PetscCall(MatDenseGetArrayWrite(c->A, &carray));
2168: PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm));
2169: PetscCall(MatDenseRestoreArrayWrite(c->A, &carray));
2170: PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
2171: PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2172: PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2173: PetscFunctionReturn(PETSC_SUCCESS);
2174: }
2176: static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2177: {
2178: MPI_Comm comm;
2179: PetscMPIInt size;
2180: PetscInt cm = A->cmap->n, cM, cN = B->cmap->N;
2181: MatProductCtx_TransMatMultDense *atb;
2182: PetscBool cisdense = PETSC_FALSE;
2183: const PetscInt *ranges;
2185: PetscFunctionBegin;
2186: MatCheckProduct(C, 4);
2187: PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2188: PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2189: PetscCheck(A->rmap->rstart == B->rmap->rstart && A->rmap->rend == B->rmap->rend, comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart,
2190: A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2192: /* create matrix product C */
2193: PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N));
2194: #if defined(PETSC_HAVE_CUDA)
2195: PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, ""));
2196: #endif
2197: #if defined(PETSC_HAVE_HIP)
2198: PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSEHIP, ""));
2199: #endif
2200: if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
2201: PetscCall(MatSetUp(C));
2203: /* create data structure for reuse C */
2204: PetscCallMPI(MPI_Comm_size(comm, &size));
2205: PetscCall(PetscNew(&atb));
2206: cM = C->rmap->N;
2207: PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts));
2208: PetscCall(MatGetOwnershipRanges(C, &ranges));
2209: for (PetscMPIInt i = 0; i < size; i++) PetscCall(PetscMPIIntCast((ranges[i + 1] - ranges[i]) * cN, &atb->recvcounts[i]));
2210: C->product->data = atb;
2211: C->product->destroy = MatProductCtxDestroy_MatTransMatMult_MPIDense_MPIDense;
2212: PetscFunctionReturn(PETSC_SUCCESS);
2213: }
2215: static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2216: {
2217: MPI_Comm comm;
2218: PetscMPIInt i, size;
2219: PetscInt maxRows, bufsiz;
2220: PetscMPIInt tag;
2221: PetscInt alg;
2222: MatProductCtx_MatTransMultDense *abt;
2223: Mat_Product *product = C->product;
2224: PetscBool flg;
2226: PetscFunctionBegin;
2227: MatCheckProduct(C, 4);
2228: PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2229: /* check local size of A and B */
2230: PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n);
2232: PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg));
2233: alg = flg ? 0 : 1;
2235: /* setup matrix product C */
2236: PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N));
2237: PetscCall(MatSetType(C, MATMPIDENSE));
2238: PetscCall(MatSetUp(C));
2239: PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag));
2241: /* create data structure for reuse C */
2242: PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
2243: PetscCallMPI(MPI_Comm_size(comm, &size));
2244: PetscCall(PetscNew(&abt));
2245: abt->tag = tag;
2246: abt->alg = alg;
2247: switch (alg) {
2248: case 1: /* alg: "cyclic" */
2249: for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, B->rmap->range[i + 1] - B->rmap->range[i]);
2250: bufsiz = A->cmap->N * maxRows;
2251: PetscCall(PetscMalloc2(bufsiz, &abt->buf[0], bufsiz, &abt->buf[1]));
2252: break;
2253: default: /* alg: "allgatherv" */
2254: PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &abt->buf[0], B->rmap->N * B->cmap->N, &abt->buf[1]));
2255: PetscCall(PetscMalloc2(size, &abt->recvcounts, size + 1, &abt->recvdispls));
2256: for (i = 0; i <= size; i++) PetscCall(PetscMPIIntCast(B->rmap->range[i] * A->cmap->N, &abt->recvdispls[i]));
2257: for (i = 0; i < size; i++) PetscCall(PetscMPIIntCast(abt->recvdispls[i + 1] - abt->recvdispls[i], &abt->recvcounts[i]));
2258: break;
2259: }
2261: C->product->data = abt;
2262: C->product->destroy = MatProductCtxDestroy_MatMatTransMult_MPIDense_MPIDense;
2263: C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2264: PetscFunctionReturn(PETSC_SUCCESS);
2265: }
2267: static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2268: {
2269: Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2270: MatProductCtx_MatTransMultDense *abt;
2271: MPI_Comm comm;
2272: PetscMPIInt rank, size, sendto, recvfrom, recvisfrom;
2273: PetscScalar *sendbuf, *recvbuf = NULL, *cv;
2274: PetscInt i, cK = A->cmap->N, sendsiz, recvsiz, k, j, bn;
2275: PetscScalar _DOne = 1.0, _DZero = 0.0;
2276: const PetscScalar *av, *bv;
2277: PetscBLASInt cm, cn, ck, alda, blda = 0, clda;
2278: MPI_Request reqs[2];
2279: const PetscInt *ranges;
2281: PetscFunctionBegin;
2282: MatCheckProduct(C, 3);
2283: PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2284: abt = (MatProductCtx_MatTransMultDense *)C->product->data;
2285: PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
2286: PetscCallMPI(MPI_Comm_rank(comm, &rank));
2287: PetscCallMPI(MPI_Comm_size(comm, &size));
2288: PetscCall(MatDenseGetArrayRead(a->A, &av));
2289: PetscCall(MatDenseGetArrayRead(b->A, &bv));
2290: PetscCall(MatDenseGetArrayWrite(c->A, &cv));
2291: PetscCall(MatDenseGetLDA(a->A, &i));
2292: PetscCall(PetscBLASIntCast(i, &alda));
2293: PetscCall(MatDenseGetLDA(b->A, &i));
2294: PetscCall(PetscBLASIntCast(i, &blda));
2295: PetscCall(MatDenseGetLDA(c->A, &i));
2296: PetscCall(PetscBLASIntCast(i, &clda));
2297: PetscCall(MatGetOwnershipRanges(B, &ranges));
2298: bn = B->rmap->n;
2299: if (blda == bn) {
2300: sendbuf = (PetscScalar *)bv;
2301: } else {
2302: sendbuf = abt->buf[0];
2303: for (k = 0, i = 0; i < cK; i++) {
2304: for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j];
2305: }
2306: }
2307: if (size > 1) {
2308: sendto = (rank + size - 1) % size;
2309: recvfrom = (rank + size + 1) % size;
2310: } else {
2311: sendto = recvfrom = 0;
2312: }
2313: PetscCall(PetscBLASIntCast(cK, &ck));
2314: PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
2315: recvisfrom = rank;
2316: for (i = 0; i < size; i++) {
2317: /* we have finished receiving in sending, bufs can be read/modified */
2318: PetscMPIInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2319: PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2321: if (nextrecvisfrom != rank) {
2322: /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2323: sendsiz = cK * bn;
2324: recvsiz = cK * nextbn;
2325: recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2326: PetscCallMPI(MPIU_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]));
2327: PetscCallMPI(MPIU_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]));
2328: }
2330: /* local aseq * sendbuf^T */
2331: PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn));
2332: if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda));
2334: if (nextrecvisfrom != rank) {
2335: /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2336: PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE));
2337: }
2338: bn = nextbn;
2339: recvisfrom = nextrecvisfrom;
2340: sendbuf = recvbuf;
2341: }
2342: PetscCall(MatDenseRestoreArrayRead(a->A, &av));
2343: PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2344: PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
2345: PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
2346: PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2347: PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2348: PetscFunctionReturn(PETSC_SUCCESS);
2349: }
2351: static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2352: {
2353: Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2354: MatProductCtx_MatTransMultDense *abt;
2355: MPI_Comm comm;
2356: PetscMPIInt size, ibn;
2357: PetscScalar *cv, *sendbuf, *recvbuf;
2358: const PetscScalar *av, *bv;
2359: PetscInt blda, i, cK = A->cmap->N, k, j, bn;
2360: PetscScalar _DOne = 1.0, _DZero = 0.0;
2361: PetscBLASInt cm, cn, ck, alda, clda;
2363: PetscFunctionBegin;
2364: MatCheckProduct(C, 3);
2365: PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2366: abt = (MatProductCtx_MatTransMultDense *)C->product->data;
2367: PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2368: PetscCallMPI(MPI_Comm_size(comm, &size));
2369: PetscCall(MatDenseGetArrayRead(a->A, &av));
2370: PetscCall(MatDenseGetArrayRead(b->A, &bv));
2371: PetscCall(MatDenseGetArrayWrite(c->A, &cv));
2372: PetscCall(MatDenseGetLDA(a->A, &i));
2373: PetscCall(PetscBLASIntCast(i, &alda));
2374: PetscCall(MatDenseGetLDA(b->A, &blda));
2375: PetscCall(MatDenseGetLDA(c->A, &i));
2376: PetscCall(PetscBLASIntCast(i, &clda));
2377: /* copy transpose of B into buf[0] */
2378: bn = B->rmap->n;
2379: sendbuf = abt->buf[0];
2380: recvbuf = abt->buf[1];
2381: for (k = 0, j = 0; j < bn; j++) {
2382: for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j];
2383: }
2384: PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2385: PetscCall(PetscMPIIntCast(bn * cK, &ibn));
2386: PetscCallMPI(MPI_Allgatherv(sendbuf, ibn, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm));
2387: PetscCall(PetscBLASIntCast(cK, &ck));
2388: PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
2389: PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn));
2390: if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda));
2391: PetscCall(MatDenseRestoreArrayRead(a->A, &av));
2392: PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2393: PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
2394: PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
2395: PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2396: PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2397: PetscFunctionReturn(PETSC_SUCCESS);
2398: }
2400: static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2401: {
2402: MatProductCtx_MatTransMultDense *abt;
2404: PetscFunctionBegin;
2405: MatCheckProduct(C, 3);
2406: PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2407: abt = (MatProductCtx_MatTransMultDense *)C->product->data;
2408: switch (abt->alg) {
2409: case 1:
2410: PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C));
2411: break;
2412: default:
2413: PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C));
2414: break;
2415: }
2416: PetscFunctionReturn(PETSC_SUCCESS);
2417: }
2419: static PetscErrorCode MatProductCtxDestroy_MatMatMult_MPIDense_MPIDense(PetscCtxRt data)
2420: {
2421: MatProductCtx_MatMultDense *ab = *(MatProductCtx_MatMultDense **)data;
2423: PetscFunctionBegin;
2424: PetscCall(MatDestroy(&ab->Ce));
2425: PetscCall(MatDestroy(&ab->Ae));
2426: PetscCall(MatDestroy(&ab->Be));
2427: PetscCall(PetscFree(ab));
2428: PetscFunctionReturn(PETSC_SUCCESS);
2429: }
2431: static PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2432: {
2433: MatProductCtx_MatMultDense *ab;
2434: Mat_MPIDense *mdn = (Mat_MPIDense *)A->data;
2435: Mat_MPIDense *b = (Mat_MPIDense *)B->data;
2437: PetscFunctionBegin;
2438: MatCheckProduct(C, 3);
2439: PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data");
2440: ab = (MatProductCtx_MatMultDense *)C->product->data;
2441: if (ab->Ae && ab->Ce) {
2442: #if PetscDefined(HAVE_ELEMENTAL)
2443: PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae));
2444: PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be));
2445: PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce));
2446: PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C));
2447: #else
2448: SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined");
2449: #endif
2450: } else {
2451: MPI_Comm comm;
2452: const PetscScalar *read;
2453: PetscScalar *write;
2454: PetscInt lda;
2455: const PetscInt *ranges;
2456: PetscMPIInt size;
2458: if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); /* cannot be done during the symbolic phase because of possible calls to MatProductReplaceMats() */
2459: comm = PetscObjectComm((PetscObject)B);
2460: PetscCallMPI(MPI_Comm_size(comm, &size));
2461: PetscCall(PetscLayoutGetRanges(B->rmap, &ranges));
2462: if (ranges[1] == ranges[size]) {
2463: // optimize for the case where the B matrix is broadcast from rank 0
2464: PetscInt b_lda, be_lda;
2465: Mat b_local = b->A;
2466: Mat b_alloc = NULL;
2467: Mat be_local = ab->Be;
2468: Mat be_alloc = NULL;
2469: PetscMemType b_memtype, be_memtype;
2470: const PetscScalar *b_array = NULL;
2471: MPI_Datatype vector_type;
2472: PetscScalar *be_array = NULL;
2473: PetscMPIInt rank;
2475: PetscCallMPI(MPI_Comm_rank(comm, &rank));
2476: PetscCall(MatDenseGetLDA(be_local, &be_lda));
2477: if (be_lda != B->rmap->N) {
2478: PetscCall(MatDuplicate(be_local, MAT_DO_NOT_COPY_VALUES, &be_alloc));
2479: be_local = be_alloc;
2480: }
2482: if (rank == 0) {
2483: PetscCall(MatDenseGetLDA(b_local, &b_lda));
2484: if (b_lda != B->rmap->N) {
2485: PetscCall(MatDuplicate(b_local, MAT_DO_NOT_COPY_VALUES, &b_alloc));
2486: PetscCall(MatCopy(b_local, b_alloc, DIFFERENT_NONZERO_PATTERN));
2487: b_local = b_alloc;
2488: }
2489: }
2490: vector_type = MPIU_SCALAR;
2491: if (B->cmap->N > 1) {
2492: PetscMPIInt mpi_N;
2494: PetscCall(PetscMPIIntCast(B->cmap->N, &mpi_N));
2495: PetscCallMPI(MPI_Type_contiguous(mpi_N, MPIU_SCALAR, &vector_type));
2496: PetscCallMPI(MPI_Type_commit(&vector_type));
2497: }
2498: PetscCall(MatDenseGetArrayReadAndMemType(b_local, &b_array, &b_memtype));
2499: PetscCall(MatDenseGetArrayWriteAndMemType(be_local, &be_array, &be_memtype));
2500: PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, vector_type, b_memtype, b_array, be_memtype, be_array, MPI_REPLACE));
2501: PetscCall(PetscSFBcastEnd(mdn->Mvctx, vector_type, b_array, be_array, MPI_REPLACE));
2502: PetscCall(MatDenseRestoreArrayWriteAndMemType(be_local, &be_array));
2503: PetscCall(MatDenseRestoreArrayReadAndMemType(b_local, &b_array));
2504: if (be_local != ab->Be) PetscCall(MatCopy(be_local, ab->Be, DIFFERENT_NONZERO_PATTERN));
2505: if (B->cmap->N > 1) PetscCallMPI(MPI_Type_free(&vector_type));
2506: PetscCall(MatDestroy(&be_alloc));
2507: PetscCall(MatDestroy(&b_alloc));
2508: } else {
2509: PetscCall(MatDenseGetLDA(B, &lda));
2510: PetscCall(MatDenseGetArrayRead(B, &read));
2511: PetscCall(MatDenseGetArrayWrite(ab->Be, &write));
2512: for (PetscInt i = 0; i < C->cmap->N; ++i) {
2513: PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE));
2514: PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE));
2515: }
2516: PetscCall(MatDenseRestoreArrayWrite(ab->Be, &write));
2517: PetscCall(MatDenseRestoreArrayRead(B, &read));
2518: }
2519: PetscCall(MatMatMultNumeric_SeqDense_SeqDense(((Mat_MPIDense *)A->data)->A, ab->Be, ((Mat_MPIDense *)C->data)->A));
2520: }
2521: PetscFunctionReturn(PETSC_SUCCESS);
2522: }
2524: static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2525: {
2526: Mat_Product *product = C->product;
2527: PetscInt alg;
2528: MatProductCtx_MatMultDense *ab;
2529: PetscBool flg;
2531: PetscFunctionBegin;
2532: MatCheckProduct(C, 4);
2533: PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2534: /* check local size of A and B */
2535: PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ", %" PetscInt_FMT ")",
2536: A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2538: PetscCall(PetscStrcmp(product->alg, "petsc", &flg));
2539: alg = flg ? 0 : 1;
2541: /* setup C */
2542: PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N));
2543: PetscCall(MatSetType(C, MATMPIDENSE));
2544: PetscCall(MatSetUp(C));
2546: /* create data structure for reuse Cdense */
2547: PetscCall(PetscNew(&ab));
2549: switch (alg) {
2550: case 1: /* alg: "elemental" */
2551: #if PetscDefined(HAVE_ELEMENTAL)
2552: /* create elemental matrices Ae and Be */
2553: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &ab->Ae));
2554: PetscCall(MatSetSizes(ab->Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
2555: PetscCall(MatSetType(ab->Ae, MATELEMENTAL));
2556: PetscCall(MatSetUp(ab->Ae));
2557: PetscCall(MatSetOption(ab->Ae, MAT_ROW_ORIENTED, PETSC_FALSE));
2559: PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &ab->Be));
2560: PetscCall(MatSetSizes(ab->Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N));
2561: PetscCall(MatSetType(ab->Be, MATELEMENTAL));
2562: PetscCall(MatSetUp(ab->Be));
2563: PetscCall(MatSetOption(ab->Be, MAT_ROW_ORIENTED, PETSC_FALSE));
2565: /* compute symbolic Ce = Ae*Be */
2566: PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &ab->Ce));
2567: PetscCall(MatMatMultSymbolic_Elemental(ab->Ae, ab->Be, fill, ab->Ce));
2568: #else
2569: SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined");
2570: #endif
2571: break;
2572: default: /* alg: "petsc" */
2573: ab->Ae = NULL;
2574: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, A->cmap->N, B->cmap->N, NULL, &ab->Be));
2575: ab->Ce = NULL;
2576: break;
2577: }
2579: C->product->data = ab;
2580: C->product->destroy = MatProductCtxDestroy_MatMatMult_MPIDense_MPIDense;
2581: C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2582: PetscFunctionReturn(PETSC_SUCCESS);
2583: }
2585: static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
2586: {
2587: Mat_Product *product = C->product;
2588: const char *algTypes[2] = {"petsc", "elemental"};
2589: PetscInt alg, nalg = PetscDefined(HAVE_ELEMENTAL) ? 2 : 1;
2590: PetscBool flg = PETSC_FALSE;
2592: PetscFunctionBegin;
2593: /* Set default algorithm */
2594: alg = 0; /* default is PETSc */
2595: PetscCall(PetscStrcmp(product->alg, "default", &flg));
2596: if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg]));
2598: /* Get runtime option */
2599: PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat");
2600: PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_AB", algTypes, nalg, algTypes[alg], &alg, &flg));
2601: PetscOptionsEnd();
2602: if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg]));
2604: C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
2605: C->ops->productsymbolic = MatProductSymbolic_AB;
2606: PetscFunctionReturn(PETSC_SUCCESS);
2607: }
2609: static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
2610: {
2611: Mat_Product *product = C->product;
2612: Mat A = product->A, B = product->B;
2614: PetscFunctionBegin;
2615: PetscCheck(A->rmap->rstart == B->rmap->rstart && A->rmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
2616: A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2617: C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
2618: C->ops->productsymbolic = MatProductSymbolic_AtB;
2619: PetscFunctionReturn(PETSC_SUCCESS);
2620: }
2622: static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
2623: {
2624: Mat_Product *product = C->product;
2625: const char *algTypes[2] = {"allgatherv", "cyclic"};
2626: PetscInt alg, nalg = 2;
2627: PetscBool flg = PETSC_FALSE;
2629: PetscFunctionBegin;
2630: /* Set default algorithm */
2631: alg = 0; /* default is allgatherv */
2632: PetscCall(PetscStrcmp(product->alg, "default", &flg));
2633: if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg]));
2635: /* Get runtime option */
2636: if (product->api_user) {
2637: PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat");
2638: PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg));
2639: PetscOptionsEnd();
2640: } else {
2641: PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat");
2642: PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg));
2643: PetscOptionsEnd();
2644: }
2645: if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg]));
2647: C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
2648: C->ops->productsymbolic = MatProductSymbolic_ABt;
2649: PetscFunctionReturn(PETSC_SUCCESS);
2650: }
2652: static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
2653: {
2654: Mat_Product *product = C->product;
2656: PetscFunctionBegin;
2657: switch (product->type) {
2658: case MATPRODUCT_AB:
2659: PetscCall(MatProductSetFromOptions_MPIDense_AB(C));
2660: break;
2661: case MATPRODUCT_AtB:
2662: PetscCall(MatProductSetFromOptions_MPIDense_AtB(C));
2663: break;
2664: case MATPRODUCT_ABt:
2665: PetscCall(MatProductSetFromOptions_MPIDense_ABt(C));
2666: break;
2667: default:
2668: break;
2669: }
2670: PetscFunctionReturn(PETSC_SUCCESS);
2671: }
2673: PetscErrorCode MatDenseScatter_Private(PetscSF sf, Mat X, Mat Y, InsertMode mode, ScatterMode smode)
2674: {
2675: const PetscScalar *in;
2676: PetscScalar *out;
2677: PetscSF vsf;
2678: PetscInt N, ny, rld, lld;
2679: PetscMemType mtype[2];
2680: MPI_Op op = MPI_OP_NULL;
2682: PetscFunctionBegin;
2686: if (mode == INSERT_VALUES) op = MPI_REPLACE;
2687: else if (mode == ADD_VALUES) op = MPIU_SUM;
2688: else if (mode == MAX_VALUES) op = MPIU_MAX;
2689: else if (mode == MIN_VALUES) op = MPIU_MIN;
2690: PetscCheck(op != MPI_OP_NULL, PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "Unsupported InsertMode %d in MatDenseScatter_Private()", mode);
2691: PetscCheck(smode == SCATTER_FORWARD || smode == SCATTER_REVERSE, PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "Unsupported ScatterMode %d in MatDenseScatter_Private()", smode);
2692: PetscCall(MatGetSize(X, NULL, &N));
2693: PetscCall(MatGetSize(Y, NULL, &ny));
2694: PetscCheck(N == ny, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_SIZ, "Matrix column sizes must match: %" PetscInt_FMT " != %" PetscInt_FMT, N, ny);
2695: PetscCall(MatDenseGetLDA(X, &rld));
2696: PetscCall(MatDenseGetLDA(Y, &lld));
2697: /* get cached or create new strided PetscSF when the number of columns is greater than one */
2698: if (N > 1) {
2699: PetscCall(PetscObjectQuery((PetscObject)sf, "_MatDenseScatter_StridedSF", (PetscObject *)&vsf));
2700: if (vsf) {
2701: PetscInt nr[2], nl[2];
2703: PetscCall(PetscSFGetGraph(sf, nr, nl, NULL, NULL));
2704: PetscCall(PetscSFGetGraph(vsf, nr + 1, nl + 1, NULL, NULL));
2705: if (N * nr[0] != nr[1] || N * nl[0] != nl[1]) vsf = NULL;
2706: }
2707: if (!vsf) {
2708: PetscCall(PetscSFCreateStridedSF(sf, N, rld, lld, &vsf));
2709: PetscCall(PetscObjectCompose((PetscObject)sf, "_MatDenseScatter_StridedSF", (PetscObject)vsf));
2710: PetscCall(PetscObjectDereference((PetscObject)vsf));
2711: }
2712: } else vsf = sf;
2713: /* the output array is accessed in read and write mode,
2714: but write-only in the INSERT_VALUES case could be worth exploring */
2715: PetscCall(MatDenseGetArrayReadAndMemType(X, &in, &mtype[0]));
2716: PetscCall(MatDenseGetArrayAndMemType(Y, &out, &mtype[1]));
2717: if (smode == SCATTER_FORWARD) {
2718: PetscCall(PetscSFBcastWithMemTypeBegin(vsf, vsf->vscat.unit, mtype[0], in, mtype[1], out, op));
2719: PetscCall(PetscSFBcastEnd(vsf, vsf->vscat.unit, in, out, op));
2720: } else {
2721: PetscCall(PetscSFReduceWithMemTypeBegin(vsf, vsf->vscat.unit, mtype[0], in, mtype[1], out, op));
2722: PetscCall(PetscSFReduceEnd(vsf, vsf->vscat.unit, in, out, op));
2723: }
2724: PetscCall(MatDenseRestoreArrayAndMemType(Y, &out));
2725: PetscCall(MatDenseRestoreArrayReadAndMemType(X, &in));
2726: PetscFunctionReturn(PETSC_SUCCESS);
2727: }