Actual source code: mpidense.c

  1: /*
  2:    Basic functions for basic parallel dense matrices.
  3:    Portions of this code are under:
  4:    Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
  5: */

  7: #include <../src/mat/impls/dense/mpi/mpidense.h>
  8: #include <../src/mat/impls/aij/mpi/mpiaij.h>
  9: #include <petscblaslapack.h>
 10: #include <petsc/private/vecimpl.h>

 12: /*@
 13:   MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential
 14:   matrix that represents the operator. For sequential matrices it returns itself.

 16:   Input Parameter:
 17: . A - the sequential or MPI `MATDENSE` matrix

 19:   Output Parameter:
 20: . B - the inner matrix

 22:   Level: intermediate

 24: .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE`
 25: @*/
 26: PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B)
 27: {
 28:   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
 29:   PetscBool     flg;

 31:   PetscFunctionBegin;
 33:   PetscAssertPointer(B, 2);
 34:   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg));
 35:   if (flg) *B = mat->A;
 36:   else {
 37:     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg));
 38:     PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name);
 39:     *B = A;
 40:   }
 41:   PetscFunctionReturn(PETSC_SUCCESS);
 42: }

 44: static PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s)
 45: {
 46:   Mat_MPIDense *Amat = (Mat_MPIDense *)A->data;
 47:   Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data;

 49:   PetscFunctionBegin;
 50:   PetscCall(MatCopy(Amat->A, Bmat->A, s));
 51:   PetscFunctionReturn(PETSC_SUCCESS);
 52: }

 54: PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha)
 55: {
 56:   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
 57:   PetscInt      j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2;
 58:   PetscScalar  *v;

 60:   PetscFunctionBegin;
 61:   PetscCall(MatDenseGetArray(mat->A, &v));
 62:   PetscCall(MatDenseGetLDA(mat->A, &lda));
 63:   rend2 = PetscMin(rend, A->cmap->N);
 64:   if (rend2 > rstart) {
 65:     for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha;
 66:     PetscCall(PetscLogFlops(rend2 - rstart));
 67:   }
 68:   PetscCall(MatDenseRestoreArray(mat->A, &v));
 69:   PetscFunctionReturn(PETSC_SUCCESS);
 70: }

 72: static PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
 73: {
 74:   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
 75:   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;

 77:   PetscFunctionBegin;
 78:   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
 79:   lrow = row - rstart;
 80:   PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
 81:   PetscFunctionReturn(PETSC_SUCCESS);
 82: }

 84: static PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
 85: {
 86:   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
 87:   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;

 89:   PetscFunctionBegin;
 90:   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
 91:   lrow = row - rstart;
 92:   PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
 93:   PetscFunctionReturn(PETSC_SUCCESS);
 94: }

 96: static PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a)
 97: {
 98:   Mat_MPIDense *mdn = (Mat_MPIDense *)A->data;
 99:   PetscInt      m = A->rmap->n, rstart = A->rmap->rstart;
100:   PetscScalar  *array;
101:   MPI_Comm      comm;
102:   PetscBool     flg;
103:   Mat           B;

105:   PetscFunctionBegin;
106:   PetscCall(MatHasCongruentLayouts(A, &flg));
107:   PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported.");
108:   PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B));
109:   if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
110: #if PetscDefined(HAVE_CUDA)
111:     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg));
112:     PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA);
113: #elif PetscDefined(HAVE_HIP)
114:     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSEHIP, &flg));
115:     PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSEHIP);
116: #endif
117:     PetscCall(PetscObjectGetComm((PetscObject)mdn->A, &comm));
118:     PetscCall(MatCreate(comm, &B));
119:     PetscCall(MatSetSizes(B, m, m, m, m));
120:     PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name));
121:     PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array));
122:     PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart));
123:     PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array));
124:     PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B));
125:     *a = B;
126:     PetscCall(MatDestroy(&B));
127:   } else *a = B;
128:   PetscFunctionReturn(PETSC_SUCCESS);
129: }

131: static PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv)
132: {
133:   Mat_MPIDense *A = (Mat_MPIDense *)mat->data;
134:   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
135:   PetscBool     roworiented = A->roworiented;

137:   PetscFunctionBegin;
138:   for (i = 0; i < m; i++) {
139:     if (idxm[i] < 0) continue;
140:     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
141:     if (idxm[i] >= rstart && idxm[i] < rend) {
142:       row = idxm[i] - rstart;
143:       if (roworiented) {
144:         PetscCall(MatSetValues(A->A, 1, &row, n, idxn, PetscSafePointerPlusOffset(v, i * n), addv));
145:       } else {
146:         for (j = 0; j < n; j++) {
147:           if (idxn[j] < 0) continue;
148:           PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
149:           PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], PetscSafePointerPlusOffset(v, i + j * m), addv));
150:         }
151:       }
152:     } else if (!A->donotstash) {
153:       mat->assembled = PETSC_FALSE;
154:       if (roworiented) {
155:         PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, PetscSafePointerPlusOffset(v, i * n), PETSC_FALSE));
156:       } else {
157:         PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, PetscSafePointerPlusOffset(v, i), m, PETSC_FALSE));
158:       }
159:     }
160:   }
161:   PetscFunctionReturn(PETSC_SUCCESS);
162: }

164: static PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
165: {
166:   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
167:   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;

169:   PetscFunctionBegin;
170:   for (i = 0; i < m; i++) {
171:     if (idxm[i] < 0) continue; /* negative row */
172:     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
173:     if (idxm[i] >= rstart && idxm[i] < rend) {
174:       row = idxm[i] - rstart;
175:       for (j = 0; j < n; j++) {
176:         if (idxn[j] < 0) continue; /* negative column */
177:         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
178:         PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j));
179:       }
180:     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
181:   }
182:   PetscFunctionReturn(PETSC_SUCCESS);
183: }

185: static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda)
186: {
187:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

189:   PetscFunctionBegin;
190:   PetscCall(MatDenseGetLDA(a->A, lda));
191:   PetscFunctionReturn(PETSC_SUCCESS);
192: }

194: static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda)
195: {
196:   Mat_MPIDense *a     = (Mat_MPIDense *)A->data;
197:   MatType       mtype = MATSEQDENSE;

199:   PetscFunctionBegin;
200:   if (!a->A) {
201:     PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
202:     PetscCall(PetscLayoutSetUp(A->rmap));
203:     PetscCall(PetscLayoutSetUp(A->cmap));
204:     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
205:     PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N));
206: #if PetscDefined(HAVE_CUDA)
207:     PetscBool iscuda;
208:     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda));
209:     if (iscuda) mtype = MATSEQDENSECUDA;
210: #elif PetscDefined(HAVE_HIP)
211:     PetscBool iship;
212:     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iship));
213:     if (iship) mtype = MATSEQDENSEHIP;
214: #endif
215:     PetscCall(MatSetType(a->A, mtype));
216:   }
217:   PetscCall(MatDenseSetLDA(a->A, lda));
218:   PetscFunctionReturn(PETSC_SUCCESS);
219: }

221: static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array)
222: {
223:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

225:   PetscFunctionBegin;
226:   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
227:   PetscCall(MatDenseGetArray(a->A, array));
228:   PetscFunctionReturn(PETSC_SUCCESS);
229: }

231: static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, PetscScalar **array)
232: {
233:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

235:   PetscFunctionBegin;
236:   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
237:   PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)array));
238:   PetscFunctionReturn(PETSC_SUCCESS);
239: }

241: static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array)
242: {
243:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

245:   PetscFunctionBegin;
246:   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
247:   PetscCall(MatDenseGetArrayWrite(a->A, array));
248:   PetscFunctionReturn(PETSC_SUCCESS);
249: }

251: static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array)
252: {
253:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

255:   PetscFunctionBegin;
256:   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
257:   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
258:   PetscCall(MatDensePlaceArray(a->A, array));
259:   PetscFunctionReturn(PETSC_SUCCESS);
260: }

262: static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
263: {
264:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

266:   PetscFunctionBegin;
267:   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
268:   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
269:   PetscCall(MatDenseResetArray(a->A));
270:   PetscFunctionReturn(PETSC_SUCCESS);
271: }

273: static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array)
274: {
275:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

277:   PetscFunctionBegin;
278:   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
279:   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
280:   PetscCall(MatDenseReplaceArray(a->A, array));
281:   PetscFunctionReturn(PETSC_SUCCESS);
282: }

284: static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B)
285: {
286:   Mat_MPIDense      *mat = (Mat_MPIDense *)A->data, *newmatd;
287:   PetscInt           lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols;
288:   const PetscInt    *irow, *icol;
289:   const PetscScalar *v;
290:   PetscScalar       *bv;
291:   Mat                newmat;
292:   IS                 iscol_local;
293:   MPI_Comm           comm_is, comm_mat;

295:   PetscFunctionBegin;
296:   PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat));
297:   PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is));
298:   PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator");

300:   PetscCall(ISAllGather(iscol, &iscol_local));
301:   PetscCall(ISGetIndices(isrow, &irow));
302:   PetscCall(ISGetIndices(iscol_local, &icol));
303:   PetscCall(ISGetLocalSize(isrow, &nrows));
304:   PetscCall(ISGetLocalSize(iscol, &ncols));
305:   PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */

307:   /* No parallel redistribution currently supported! Should really check each index set
308:      to confirm that it is OK.  ... Currently supports only submatrix same partitioning as
309:      original matrix! */

311:   PetscCall(MatGetLocalSize(A, &nlrows, &nlcols));
312:   PetscCall(MatGetOwnershipRange(A, &rstart, &rend));

314:   /* Check submatrix call */
315:   if (scall == MAT_REUSE_MATRIX) {
316:     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
317:     /* Really need to test rows and column sizes! */
318:     newmat = *B;
319:   } else {
320:     /* Create and fill new matrix */
321:     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat));
322:     PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols));
323:     PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name));
324:     PetscCall(MatMPIDenseSetPreallocation(newmat, NULL));
325:   }

327:   /* Now extract the data pointers and do the copy, column at a time */
328:   newmatd = (Mat_MPIDense *)newmat->data;
329:   PetscCall(MatDenseGetArray(newmatd->A, &bv));
330:   PetscCall(MatDenseGetArrayRead(mat->A, &v));
331:   PetscCall(MatDenseGetLDA(mat->A, &lda));
332:   for (i = 0; i < Ncols; i++) {
333:     const PetscScalar *av = v + lda * icol[i];
334:     for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart];
335:   }
336:   PetscCall(MatDenseRestoreArrayRead(mat->A, &v));
337:   PetscCall(MatDenseRestoreArray(newmatd->A, &bv));

339:   /* Assemble the matrices so that the correct flags are set */
340:   PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY));
341:   PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY));

343:   /* Free work space */
344:   PetscCall(ISRestoreIndices(isrow, &irow));
345:   PetscCall(ISRestoreIndices(iscol_local, &icol));
346:   PetscCall(ISDestroy(&iscol_local));
347:   *B = newmat;
348:   PetscFunctionReturn(PETSC_SUCCESS);
349: }

351: static PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array)
352: {
353:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

355:   PetscFunctionBegin;
356:   PetscCall(MatDenseRestoreArray(a->A, array));
357:   PetscFunctionReturn(PETSC_SUCCESS);
358: }

360: static PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, PetscScalar **array)
361: {
362:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

364:   PetscFunctionBegin;
365:   PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)array));
366:   PetscFunctionReturn(PETSC_SUCCESS);
367: }

369: static PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array)
370: {
371:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

373:   PetscFunctionBegin;
374:   PetscCall(MatDenseRestoreArrayWrite(a->A, array));
375:   PetscFunctionReturn(PETSC_SUCCESS);
376: }

378: static PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode)
379: {
380:   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
381:   PetscInt      nstash, reallocs;

383:   PetscFunctionBegin;
384:   if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);

386:   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
387:   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
388:   PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
389:   PetscFunctionReturn(PETSC_SUCCESS);
390: }

392: static PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode)
393: {
394:   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
395:   PetscInt      i, *row, *col, flg, j, rstart, ncols;
396:   PetscMPIInt   n;
397:   PetscScalar  *val;

399:   PetscFunctionBegin;
400:   if (!mdn->donotstash && !mat->nooffprocentries) {
401:     /*  wait on receives */
402:     while (1) {
403:       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
404:       if (!flg) break;

406:       for (i = 0; i < n;) {
407:         /* Now identify the consecutive vals belonging to the same row */
408:         for (j = i, rstart = row[j]; j < n; j++) {
409:           if (row[j] != rstart) break;
410:         }
411:         if (j < n) ncols = j - i;
412:         else ncols = n - i;
413:         /* Now assemble all these values with a single function call */
414:         PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
415:         i = j;
416:       }
417:     }
418:     PetscCall(MatStashScatterEnd_Private(&mat->stash));
419:   }

421:   PetscCall(MatAssemblyBegin(mdn->A, mode));
422:   PetscCall(MatAssemblyEnd(mdn->A, mode));
423:   PetscFunctionReturn(PETSC_SUCCESS);
424: }

426: static PetscErrorCode MatZeroEntries_MPIDense(Mat A)
427: {
428:   Mat_MPIDense *l = (Mat_MPIDense *)A->data;

430:   PetscFunctionBegin;
431:   PetscCall(MatZeroEntries(l->A));
432:   PetscFunctionReturn(PETSC_SUCCESS);
433: }

435: static PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
436: {
437:   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
438:   PetscInt      i, len, *lrows;

440:   PetscFunctionBegin;
441:   /* get locally owned rows */
442:   PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL));
443:   /* fix right-hand side if needed */
444:   if (x && b) {
445:     const PetscScalar *xx;
446:     PetscScalar       *bb;

448:     PetscCall(VecGetArrayRead(x, &xx));
449:     PetscCall(VecGetArrayWrite(b, &bb));
450:     for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]];
451:     PetscCall(VecRestoreArrayRead(x, &xx));
452:     PetscCall(VecRestoreArrayWrite(b, &bb));
453:   }
454:   PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL));
455:   if (diag != 0.0) {
456:     Vec d;

458:     PetscCall(MatCreateVecs(A, NULL, &d));
459:     PetscCall(VecSet(d, diag));
460:     PetscCall(MatDiagonalSet(A, d, INSERT_VALUES));
461:     PetscCall(VecDestroy(&d));
462:   }
463:   PetscCall(PetscFree(lrows));
464:   PetscFunctionReturn(PETSC_SUCCESS);
465: }

467: PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat, Vec, Vec);
468: PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat, Vec, Vec, Vec);
469: PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat, Vec, Vec);
470: PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat, Vec, Vec, Vec);

472: static PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy)
473: {
474:   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
475:   const PetscScalar *ax;
476:   PetscScalar       *ay;
477:   PetscMemType       axmtype, aymtype;

479:   PetscFunctionBegin;
480:   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
481:   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
482:   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
483:   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
484:   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
485:   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
486:   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
487:   PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy));
488:   PetscFunctionReturn(PETSC_SUCCESS);
489: }

491: static PetscErrorCode MatMultAddColumnRange_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz, PetscInt c_start, PetscInt c_end)
492: {
493:   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
494:   const PetscScalar *ax;
495:   PetscScalar       *ay;
496:   PetscMemType       axmtype, aymtype;

498:   PetscFunctionBegin;
499:   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
500:   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
501:   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
502:   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
503:   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
504:   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
505:   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
506:   PetscUseMethod(mdn->A, "MatMultAddColumnRange_C", (Mat, Vec, Vec, Vec, PetscInt, PetscInt), (mdn->A, mdn->lvec, yy, zz, c_start, c_end));
507:   PetscFunctionReturn(PETSC_SUCCESS);
508: }

510: static PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz)
511: {
512:   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
513:   const PetscScalar *ax;
514:   PetscScalar       *ay;
515:   PetscMemType       axmtype, aymtype;

517:   PetscFunctionBegin;
518:   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
519:   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
520:   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
521:   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
522:   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
523:   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
524:   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
525:   PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz));
526:   PetscFunctionReturn(PETSC_SUCCESS);
527: }

529: static PetscErrorCode MatMultHermitianTransposeColumnRange_MPIDense(Mat A, Vec xx, Vec yy, PetscInt c_start, PetscInt c_end)
530: {
531:   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
532:   const PetscScalar *ax;
533:   PetscScalar       *ay;
534:   PetscMemType       axmtype, aymtype;

536:   PetscFunctionBegin;
537:   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
538:   PetscCall(VecSet(yy, 0.0));
539:   PetscUseMethod(a->A, "MatMultHermitianTransposeColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (a->A, xx, a->lvec, c_start, c_end));
540:   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
541:   PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype));
542:   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
543:   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
544:   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
545:   PetscCall(VecRestoreArrayAndMemType(yy, &ay));
546:   PetscFunctionReturn(PETSC_SUCCESS);
547: }

549: static PetscErrorCode MatMultTransposeKernel_MPIDense(Mat A, Vec xx, Vec yy, PetscBool herm)
550: {
551:   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
552:   const PetscScalar *ax;
553:   PetscScalar       *ay;
554:   PetscMemType       axmtype, aymtype;

556:   PetscFunctionBegin;
557:   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
558:   PetscCall(VecSet(yy, 0.0));
559:   if (herm) PetscCall((*a->A->ops->multhermitiantranspose)(a->A, xx, a->lvec));
560:   else PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
561:   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
562:   PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype));
563:   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
564:   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
565:   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
566:   PetscCall(VecRestoreArrayAndMemType(yy, &ay));
567:   PetscFunctionReturn(PETSC_SUCCESS);
568: }

570: static PetscErrorCode MatMultHermitianTransposeAddColumnRange_MPIDense(Mat A, Vec xx, Vec yy, Vec zz, PetscInt c_start, PetscInt c_end)
571: {
572:   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
573:   const PetscScalar *ax;
574:   PetscScalar       *ay;
575:   PetscMemType       axmtype, aymtype;

577:   PetscFunctionBegin;
578:   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
579:   PetscCall(VecCopy(yy, zz));
580:   PetscMPIInt rank;
581:   PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank));
582:   PetscUseMethod(a->A, "MatMultHermitianTransposeColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (a->A, xx, a->lvec, c_start, c_end));
583:   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
584:   PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype));
585:   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
586:   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
587:   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
588:   PetscCall(VecRestoreArrayAndMemType(zz, &ay));
589:   PetscFunctionReturn(PETSC_SUCCESS);
590: }

592: static PetscErrorCode MatMultTransposeAddKernel_MPIDense(Mat A, Vec xx, Vec yy, Vec zz, PetscBool herm)
593: {
594:   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
595:   const PetscScalar *ax;
596:   PetscScalar       *ay;
597:   PetscMemType       axmtype, aymtype;

599:   PetscFunctionBegin;
600:   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
601:   PetscCall(VecCopy(yy, zz));
602:   if (herm) PetscCall((*a->A->ops->multhermitiantranspose)(a->A, xx, a->lvec));
603:   else PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
604:   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
605:   PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype));
606:   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
607:   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
608:   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
609:   PetscCall(VecRestoreArrayAndMemType(zz, &ay));
610:   PetscFunctionReturn(PETSC_SUCCESS);
611: }

613: static PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy)
614: {
615:   PetscFunctionBegin;
616:   PetscCall(MatMultTransposeKernel_MPIDense(A, xx, yy, PETSC_FALSE));
617:   PetscFunctionReturn(PETSC_SUCCESS);
618: }

620: static PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz)
621: {
622:   PetscFunctionBegin;
623:   PetscCall(MatMultTransposeAddKernel_MPIDense(A, xx, yy, zz, PETSC_FALSE));
624:   PetscFunctionReturn(PETSC_SUCCESS);
625: }

627: static PetscErrorCode MatMultHermitianTranspose_MPIDense(Mat A, Vec xx, Vec yy)
628: {
629:   PetscFunctionBegin;
630:   PetscCall(MatMultTransposeKernel_MPIDense(A, xx, yy, PETSC_TRUE));
631:   PetscFunctionReturn(PETSC_SUCCESS);
632: }

634: static PetscErrorCode MatMultHermitianTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz)
635: {
636:   PetscFunctionBegin;
637:   PetscCall(MatMultTransposeAddKernel_MPIDense(A, xx, yy, zz, PETSC_TRUE));
638:   PetscFunctionReturn(PETSC_SUCCESS);
639: }

641: PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v)
642: {
643:   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
644:   PetscInt           lda, len, i, nl, ng, m = A->rmap->n, radd;
645:   PetscScalar       *x;
646:   const PetscScalar *av;

648:   PetscFunctionBegin;
649:   PetscCall(VecGetArray(v, &x));
650:   PetscCall(VecGetSize(v, &ng));
651:   PetscCheck(ng == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec");
652:   PetscCall(VecGetLocalSize(v, &nl));
653:   len  = PetscMin(a->A->rmap->n, a->A->cmap->n);
654:   radd = A->rmap->rstart * m;
655:   PetscCall(MatDenseGetArrayRead(a->A, &av));
656:   PetscCall(MatDenseGetLDA(a->A, &lda));
657:   for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i];
658:   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
659:   if (nl - i > 0) PetscCall(PetscArrayzero(x + i, nl - i));
660:   PetscCall(VecRestoreArray(v, &x));
661:   PetscFunctionReturn(PETSC_SUCCESS);
662: }

664: static PetscErrorCode MatDestroy_MPIDense(Mat mat)
665: {
666:   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;

668:   PetscFunctionBegin;
669:   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
670:   PetscCall(MatStashDestroy_Private(&mat->stash));
671:   PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
672:   PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
673:   PetscCall(MatDestroy(&mdn->A));
674:   PetscCall(VecDestroy(&mdn->lvec));
675:   PetscCall(PetscSFDestroy(&mdn->Mvctx));
676:   PetscCall(VecDestroy(&mdn->cvec));
677:   PetscCall(MatDestroy(&mdn->cmat));

679:   PetscCall(PetscFree(mat->data));
680:   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));

682:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL));
683:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL));
684:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL));
685:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL));
686:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL));
687:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL));
688:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL));
689:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL));
690:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL));
691:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL));
692:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL));
693:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
694:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL));
695: #if defined(PETSC_HAVE_ELEMENTAL)
696:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL));
697: #endif
698: #if defined(PETSC_HAVE_SCALAPACK)
699:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL));
700: #endif
701:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL));
702:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL));
703:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL));
704: #if defined(PETSC_HAVE_CUDA)
705:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL));
706:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL));
707:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL));
708:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL));
709:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL));
710:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL));
711:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL));
712:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL));
713:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL));
714:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL));
715:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL));
716:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL));
717:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL));
718:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL));
719:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL));
720:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL));
721:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL));
722:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDASetPreallocation_C", NULL));
723: #endif
724: #if defined(PETSC_HAVE_HIP)
725:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", NULL));
726:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", NULL));
727:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", NULL));
728:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensehip_mpidense_C", NULL));
729:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL));
730:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL));
731:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL));
732:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL));
733:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArray_C", NULL));
734:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayRead_C", NULL));
735:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayWrite_C", NULL));
736:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArray_C", NULL));
737:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayRead_C", NULL));
738:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayWrite_C", NULL));
739:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPPlaceArray_C", NULL));
740:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPResetArray_C", NULL));
741:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPReplaceArray_C", NULL));
742:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPSetPreallocation_C", NULL));
743: #endif
744:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL));
745:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL));
746:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL));
747:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL));
748:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL));
749:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL));
750:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL));
751:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL));
752:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL));
753:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL));
754:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultAddColumnRange_C", NULL));
755:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeColumnRange_C", NULL));
756:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeAddColumnRange_C", NULL));

758:   PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL));
759:   PetscFunctionReturn(PETSC_SUCCESS);
760: }

762: #include <petscdraw.h>
763: static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
764: {
765:   Mat_MPIDense     *mdn = (Mat_MPIDense *)mat->data;
766:   PetscMPIInt       rank;
767:   PetscViewerType   vtype;
768:   PetscBool         iascii, isdraw;
769:   PetscViewer       sviewer;
770:   PetscViewerFormat format;

772:   PetscFunctionBegin;
773:   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
774:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
775:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
776:   if (iascii) {
777:     PetscCall(PetscViewerGetType(viewer, &vtype));
778:     PetscCall(PetscViewerGetFormat(viewer, &format));
779:     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
780:       MatInfo info;
781:       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
782:       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
783:       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "  [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
784:                                                    (PetscInt)info.memory));
785:       PetscCall(PetscViewerFlush(viewer));
786:       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
787:       if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer));
788:       PetscFunctionReturn(PETSC_SUCCESS);
789:     } else if (format == PETSC_VIEWER_ASCII_INFO) {
790:       PetscFunctionReturn(PETSC_SUCCESS);
791:     }
792:   } else if (isdraw) {
793:     PetscDraw draw;
794:     PetscBool isnull;

796:     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
797:     PetscCall(PetscDrawIsNull(draw, &isnull));
798:     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
799:   }

801:   {
802:     /* assemble the entire matrix onto first processor. */
803:     Mat          A;
804:     PetscInt     M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz;
805:     PetscInt    *cols;
806:     PetscScalar *vals;

808:     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
809:     if (rank == 0) {
810:       PetscCall(MatSetSizes(A, M, N, M, N));
811:     } else {
812:       PetscCall(MatSetSizes(A, 0, 0, M, N));
813:     }
814:     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
815:     PetscCall(MatSetType(A, MATMPIDENSE));
816:     PetscCall(MatMPIDenseSetPreallocation(A, NULL));

818:     /* Copy the matrix ... This isn't the most efficient means,
819:        but it's quick for now */
820:     A->insertmode = INSERT_VALUES;

822:     row = mat->rmap->rstart;
823:     m   = mdn->A->rmap->n;
824:     for (i = 0; i < m; i++) {
825:       PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals));
826:       PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES));
827:       PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals));
828:       row++;
829:     }

831:     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
832:     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
833:     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
834:     if (rank == 0) {
835:       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)A->data)->A, ((PetscObject)mat)->name));
836:       PetscCall(MatView_SeqDense(((Mat_MPIDense *)A->data)->A, sviewer));
837:     }
838:     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
839:     PetscCall(MatDestroy(&A));
840:   }
841:   PetscFunctionReturn(PETSC_SUCCESS);
842: }

844: static PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer)
845: {
846:   PetscBool iascii, isbinary, isdraw, issocket;

848:   PetscFunctionBegin;
849:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
850:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
851:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
852:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));

854:   if (iascii || issocket || isdraw) {
855:     PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer));
856:   } else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer));
857:   PetscFunctionReturn(PETSC_SUCCESS);
858: }

860: static PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info)
861: {
862:   Mat_MPIDense  *mat = (Mat_MPIDense *)A->data;
863:   Mat            mdn = mat->A;
864:   PetscLogDouble isend[5], irecv[5];

866:   PetscFunctionBegin;
867:   info->block_size = 1.0;

869:   PetscCall(MatGetInfo(mdn, MAT_LOCAL, info));

871:   isend[0] = info->nz_used;
872:   isend[1] = info->nz_allocated;
873:   isend[2] = info->nz_unneeded;
874:   isend[3] = info->memory;
875:   isend[4] = info->mallocs;
876:   if (flag == MAT_LOCAL) {
877:     info->nz_used      = isend[0];
878:     info->nz_allocated = isend[1];
879:     info->nz_unneeded  = isend[2];
880:     info->memory       = isend[3];
881:     info->mallocs      = isend[4];
882:   } else if (flag == MAT_GLOBAL_MAX) {
883:     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A)));

885:     info->nz_used      = irecv[0];
886:     info->nz_allocated = irecv[1];
887:     info->nz_unneeded  = irecv[2];
888:     info->memory       = irecv[3];
889:     info->mallocs      = irecv[4];
890:   } else if (flag == MAT_GLOBAL_SUM) {
891:     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A)));

893:     info->nz_used      = irecv[0];
894:     info->nz_allocated = irecv[1];
895:     info->nz_unneeded  = irecv[2];
896:     info->memory       = irecv[3];
897:     info->mallocs      = irecv[4];
898:   }
899:   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
900:   info->fill_ratio_needed = 0;
901:   info->factor_mallocs    = 0;
902:   PetscFunctionReturn(PETSC_SUCCESS);
903: }

905: static PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg)
906: {
907:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

909:   PetscFunctionBegin;
910:   switch (op) {
911:   case MAT_NEW_NONZERO_LOCATIONS:
912:   case MAT_NEW_NONZERO_LOCATION_ERR:
913:   case MAT_NEW_NONZERO_ALLOCATION_ERR:
914:     MatCheckPreallocated(A, 1);
915:     PetscCall(MatSetOption(a->A, op, flg));
916:     break;
917:   case MAT_ROW_ORIENTED:
918:     MatCheckPreallocated(A, 1);
919:     a->roworiented = flg;
920:     PetscCall(MatSetOption(a->A, op, flg));
921:     break;
922:   case MAT_FORCE_DIAGONAL_ENTRIES:
923:   case MAT_KEEP_NONZERO_PATTERN:
924:   case MAT_USE_HASH_TABLE:
925:   case MAT_SORTED_FULL:
926:     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
927:     break;
928:   case MAT_IGNORE_OFF_PROC_ENTRIES:
929:     a->donotstash = flg;
930:     break;
931:   case MAT_SYMMETRIC:
932:   case MAT_STRUCTURALLY_SYMMETRIC:
933:   case MAT_HERMITIAN:
934:   case MAT_SYMMETRY_ETERNAL:
935:   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
936:   case MAT_SPD:
937:   case MAT_IGNORE_LOWER_TRIANGULAR:
938:   case MAT_IGNORE_ZERO_ENTRIES:
939:   case MAT_SPD_ETERNAL:
940:     /* if the diagonal matrix is square it inherits some of the properties above */
941:     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
942:     break;
943:   default:
944:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]);
945:   }
946:   PetscFunctionReturn(PETSC_SUCCESS);
947: }

949: static PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr)
950: {
951:   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
952:   const PetscScalar *l;
953:   PetscScalar        x, *v, *vv, *r;
954:   PetscInt           i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda;

956:   PetscFunctionBegin;
957:   PetscCall(MatDenseGetArray(mdn->A, &vv));
958:   PetscCall(MatDenseGetLDA(mdn->A, &lda));
959:   PetscCall(MatGetLocalSize(A, &s2, &s3));
960:   if (ll) {
961:     PetscCall(VecGetLocalSize(ll, &s2a));
962:     PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2);
963:     PetscCall(VecGetArrayRead(ll, &l));
964:     for (i = 0; i < m; i++) {
965:       x = l[i];
966:       v = vv + i;
967:       for (j = 0; j < n; j++) {
968:         (*v) *= x;
969:         v += lda;
970:       }
971:     }
972:     PetscCall(VecRestoreArrayRead(ll, &l));
973:     PetscCall(PetscLogFlops(1.0 * n * m));
974:   }
975:   if (rr) {
976:     const PetscScalar *ar;

978:     PetscCall(VecGetLocalSize(rr, &s3a));
979:     PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3);
980:     PetscCall(VecGetArrayRead(rr, &ar));
981:     if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
982:     PetscCall(VecGetArray(mdn->lvec, &r));
983:     PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
984:     PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
985:     PetscCall(VecRestoreArrayRead(rr, &ar));
986:     for (i = 0; i < n; i++) {
987:       x = r[i];
988:       v = vv + i * lda;
989:       for (j = 0; j < m; j++) (*v++) *= x;
990:     }
991:     PetscCall(VecRestoreArray(mdn->lvec, &r));
992:     PetscCall(PetscLogFlops(1.0 * n * m));
993:   }
994:   PetscCall(MatDenseRestoreArray(mdn->A, &vv));
995:   PetscFunctionReturn(PETSC_SUCCESS);
996: }

998: static PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm)
999: {
1000:   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
1001:   PetscInt           i, j;
1002:   PetscMPIInt        size, iN;
1003:   PetscReal          sum = 0.0;
1004:   const PetscScalar *av, *v;

1006:   PetscFunctionBegin;
1007:   PetscCall(MatDenseGetArrayRead(mdn->A, &av));
1008:   v = av;
1009:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
1010:   if (size == 1) {
1011:     PetscCall(MatNorm(mdn->A, type, nrm));
1012:   } else {
1013:     if (type == NORM_FROBENIUS) {
1014:       for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) {
1015:         sum += PetscRealPart(PetscConj(*v) * (*v));
1016:         v++;
1017:       }
1018:       PetscCallMPI(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
1019:       *nrm = PetscSqrtReal(*nrm);
1020:       PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n));
1021:     } else if (type == NORM_1) {
1022:       PetscReal *tmp, *tmp2;
1023:       PetscCall(PetscCalloc2(A->cmap->N, &tmp, A->cmap->N, &tmp2));
1024:       *nrm = 0.0;
1025:       v    = av;
1026:       for (j = 0; j < mdn->A->cmap->n; j++) {
1027:         for (i = 0; i < mdn->A->rmap->n; i++) {
1028:           tmp[j] += PetscAbsScalar(*v);
1029:           v++;
1030:         }
1031:       }
1032:       PetscCall(PetscMPIIntCast(A->cmap->N, &iN));
1033:       PetscCallMPI(MPIU_Allreduce(tmp, tmp2, iN, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
1034:       for (j = 0; j < A->cmap->N; j++) {
1035:         if (tmp2[j] > *nrm) *nrm = tmp2[j];
1036:       }
1037:       PetscCall(PetscFree2(tmp, tmp2));
1038:       PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n));
1039:     } else if (type == NORM_INFINITY) { /* max row norm */
1040:       PetscReal ntemp;
1041:       PetscCall(MatNorm(mdn->A, type, &ntemp));
1042:       PetscCallMPI(MPIU_Allreduce(&ntemp, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
1043:     } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm");
1044:   }
1045:   PetscCall(MatDenseRestoreArrayRead(mdn->A, &av));
1046:   PetscFunctionReturn(PETSC_SUCCESS);
1047: }

1049: static PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout)
1050: {
1051:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1052:   Mat           B;
1053:   PetscInt      M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart;
1054:   PetscInt      j, i, lda;
1055:   PetscScalar  *v;

1057:   PetscFunctionBegin;
1058:   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1059:   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1060:     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1061:     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1062:     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1063:     PetscCall(MatMPIDenseSetPreallocation(B, NULL));
1064:   } else B = *matout;

1066:   m = a->A->rmap->n;
1067:   n = a->A->cmap->n;
1068:   PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v));
1069:   PetscCall(MatDenseGetLDA(a->A, &lda));
1070:   PetscCall(PetscMalloc1(m, &rwork));
1071:   for (i = 0; i < m; i++) rwork[i] = rstart + i;
1072:   for (j = 0; j < n; j++) {
1073:     PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES));
1074:     v = PetscSafePointerPlusOffset(v, lda);
1075:   }
1076:   PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v));
1077:   PetscCall(PetscFree(rwork));
1078:   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1079:   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1080:   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1081:     *matout = B;
1082:   } else {
1083:     PetscCall(MatHeaderMerge(A, &B));
1084:   }
1085:   PetscFunctionReturn(PETSC_SUCCESS);
1086: }

1088: static PetscErrorCode       MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *);
1089: PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar);

1091: static PetscErrorCode MatSetUp_MPIDense(Mat A)
1092: {
1093:   PetscFunctionBegin;
1094:   PetscCall(PetscLayoutSetUp(A->rmap));
1095:   PetscCall(PetscLayoutSetUp(A->cmap));
1096:   if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL));
1097:   PetscFunctionReturn(PETSC_SUCCESS);
1098: }

1100: static PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str)
1101: {
1102:   Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data;

1104:   PetscFunctionBegin;
1105:   PetscCall(MatAXPY(A->A, alpha, B->A, str));
1106:   PetscFunctionReturn(PETSC_SUCCESS);
1107: }

1109: static PetscErrorCode MatConjugate_MPIDense(Mat mat)
1110: {
1111:   Mat_MPIDense *a = (Mat_MPIDense *)mat->data;

1113:   PetscFunctionBegin;
1114:   PetscCall(MatConjugate(a->A));
1115:   PetscFunctionReturn(PETSC_SUCCESS);
1116: }

1118: static PetscErrorCode MatRealPart_MPIDense(Mat A)
1119: {
1120:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

1122:   PetscFunctionBegin;
1123:   PetscCall(MatRealPart(a->A));
1124:   PetscFunctionReturn(PETSC_SUCCESS);
1125: }

1127: static PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1128: {
1129:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

1131:   PetscFunctionBegin;
1132:   PetscCall(MatImaginaryPart(a->A));
1133:   PetscFunctionReturn(PETSC_SUCCESS);
1134: }

1136: static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col)
1137: {
1138:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

1140:   PetscFunctionBegin;
1141:   PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix");
1142:   PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation");
1143:   PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col));
1144:   PetscFunctionReturn(PETSC_SUCCESS);
1145: }

1147: PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *);

1149: static PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions)
1150: {
1151:   PetscInt      i, m, n;
1152:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1153:   PetscReal    *work;
1154:   PetscMPIInt   in;

1156:   PetscFunctionBegin;
1157:   PetscCall(MatGetSize(A, &m, &n));
1158:   PetscCall(PetscMalloc1(n, &work));
1159:   if (type == REDUCTION_MEAN_REALPART) {
1160:     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, work));
1161:   } else if (type == REDUCTION_MEAN_IMAGINARYPART) {
1162:     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, work));
1163:   } else {
1164:     PetscCall(MatGetColumnReductions_SeqDense(a->A, type, work));
1165:   }
1166:   if (type == NORM_2) {
1167:     for (i = 0; i < n; i++) work[i] *= work[i];
1168:   }
1169:   PetscCall(PetscMPIIntCast(n, &in));
1170:   if (type == NORM_INFINITY) {
1171:     PetscCallMPI(MPIU_Allreduce(work, reductions, in, MPIU_REAL, MPIU_MAX, A->hdr.comm));
1172:   } else {
1173:     PetscCallMPI(MPIU_Allreduce(work, reductions, in, MPIU_REAL, MPIU_SUM, A->hdr.comm));
1174:   }
1175:   PetscCall(PetscFree(work));
1176:   if (type == NORM_2) {
1177:     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
1178:   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
1179:     for (i = 0; i < n; i++) reductions[i] /= m;
1180:   }
1181:   PetscFunctionReturn(PETSC_SUCCESS);
1182: }

1184: static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx)
1185: {
1186:   Mat_MPIDense *d = (Mat_MPIDense *)x->data;

1188:   PetscFunctionBegin;
1189:   PetscCall(MatSetRandom(d->A, rctx));
1190: #if defined(PETSC_HAVE_DEVICE)
1191:   x->offloadmask = d->A->offloadmask;
1192: #endif
1193:   PetscFunctionReturn(PETSC_SUCCESS);
1194: }

1196: static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d)
1197: {
1198:   PetscFunctionBegin;
1199:   *missing = PETSC_FALSE;
1200:   PetscFunctionReturn(PETSC_SUCCESS);
1201: }

1203: static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1204: static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1205: static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1206: static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1207: static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *);
1208: static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer);
1209: static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat);

1211: static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1212:                                        MatGetRow_MPIDense,
1213:                                        MatRestoreRow_MPIDense,
1214:                                        MatMult_MPIDense,
1215:                                        /*  4*/ MatMultAdd_MPIDense,
1216:                                        MatMultTranspose_MPIDense,
1217:                                        MatMultTransposeAdd_MPIDense,
1218:                                        NULL,
1219:                                        NULL,
1220:                                        NULL,
1221:                                        /* 10*/ NULL,
1222:                                        NULL,
1223:                                        NULL,
1224:                                        NULL,
1225:                                        MatTranspose_MPIDense,
1226:                                        /* 15*/ MatGetInfo_MPIDense,
1227:                                        MatEqual_MPIDense,
1228:                                        MatGetDiagonal_MPIDense,
1229:                                        MatDiagonalScale_MPIDense,
1230:                                        MatNorm_MPIDense,
1231:                                        /* 20*/ MatAssemblyBegin_MPIDense,
1232:                                        MatAssemblyEnd_MPIDense,
1233:                                        MatSetOption_MPIDense,
1234:                                        MatZeroEntries_MPIDense,
1235:                                        /* 24*/ MatZeroRows_MPIDense,
1236:                                        NULL,
1237:                                        NULL,
1238:                                        NULL,
1239:                                        NULL,
1240:                                        /* 29*/ MatSetUp_MPIDense,
1241:                                        NULL,
1242:                                        NULL,
1243:                                        MatGetDiagonalBlock_MPIDense,
1244:                                        NULL,
1245:                                        /* 34*/ MatDuplicate_MPIDense,
1246:                                        NULL,
1247:                                        NULL,
1248:                                        NULL,
1249:                                        NULL,
1250:                                        /* 39*/ MatAXPY_MPIDense,
1251:                                        MatCreateSubMatrices_MPIDense,
1252:                                        NULL,
1253:                                        MatGetValues_MPIDense,
1254:                                        MatCopy_MPIDense,
1255:                                        /* 44*/ NULL,
1256:                                        MatScale_MPIDense,
1257:                                        MatShift_MPIDense,
1258:                                        NULL,
1259:                                        NULL,
1260:                                        /* 49*/ MatSetRandom_MPIDense,
1261:                                        NULL,
1262:                                        NULL,
1263:                                        NULL,
1264:                                        NULL,
1265:                                        /* 54*/ NULL,
1266:                                        NULL,
1267:                                        NULL,
1268:                                        NULL,
1269:                                        NULL,
1270:                                        /* 59*/ MatCreateSubMatrix_MPIDense,
1271:                                        MatDestroy_MPIDense,
1272:                                        MatView_MPIDense,
1273:                                        NULL,
1274:                                        NULL,
1275:                                        /* 64*/ NULL,
1276:                                        NULL,
1277:                                        NULL,
1278:                                        NULL,
1279:                                        NULL,
1280:                                        /* 69*/ NULL,
1281:                                        NULL,
1282:                                        NULL,
1283:                                        NULL,
1284:                                        NULL,
1285:                                        /* 74*/ NULL,
1286:                                        NULL,
1287:                                        NULL,
1288:                                        NULL,
1289:                                        NULL,
1290:                                        /* 79*/ NULL,
1291:                                        NULL,
1292:                                        NULL,
1293:                                        NULL,
1294:                                        /* 83*/ MatLoad_MPIDense,
1295:                                        NULL,
1296:                                        NULL,
1297:                                        NULL,
1298:                                        NULL,
1299:                                        NULL,
1300:                                        /* 89*/ NULL,
1301:                                        NULL,
1302:                                        NULL,
1303:                                        NULL,
1304:                                        NULL,
1305:                                        /* 94*/ NULL,
1306:                                        NULL,
1307:                                        MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1308:                                        MatMatTransposeMultNumeric_MPIDense_MPIDense,
1309:                                        NULL,
1310:                                        /* 99*/ MatProductSetFromOptions_MPIDense,
1311:                                        NULL,
1312:                                        NULL,
1313:                                        MatConjugate_MPIDense,
1314:                                        NULL,
1315:                                        /*104*/ NULL,
1316:                                        MatRealPart_MPIDense,
1317:                                        MatImaginaryPart_MPIDense,
1318:                                        NULL,
1319:                                        NULL,
1320:                                        /*109*/ NULL,
1321:                                        NULL,
1322:                                        NULL,
1323:                                        MatGetColumnVector_MPIDense,
1324:                                        MatMissingDiagonal_MPIDense,
1325:                                        /*114*/ NULL,
1326:                                        NULL,
1327:                                        NULL,
1328:                                        NULL,
1329:                                        NULL,
1330:                                        /*119*/ NULL,
1331:                                        NULL,
1332:                                        MatMultHermitianTranspose_MPIDense,
1333:                                        MatMultHermitianTransposeAdd_MPIDense,
1334:                                        NULL,
1335:                                        /*124*/ NULL,
1336:                                        MatGetColumnReductions_MPIDense,
1337:                                        NULL,
1338:                                        NULL,
1339:                                        NULL,
1340:                                        /*129*/ NULL,
1341:                                        NULL,
1342:                                        MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1343:                                        MatTransposeMatMultNumeric_MPIDense_MPIDense,
1344:                                        NULL,
1345:                                        /*134*/ NULL,
1346:                                        NULL,
1347:                                        NULL,
1348:                                        NULL,
1349:                                        NULL,
1350:                                        /*139*/ NULL,
1351:                                        NULL,
1352:                                        NULL,
1353:                                        NULL,
1354:                                        NULL,
1355:                                        MatCreateMPIMatConcatenateSeqMat_MPIDense,
1356:                                        /*145*/ NULL,
1357:                                        NULL,
1358:                                        NULL,
1359:                                        NULL,
1360:                                        NULL,
1361:                                        /*150*/ NULL,
1362:                                        NULL,
1363:                                        NULL,
1364:                                        NULL,
1365:                                        NULL,
1366:                                        NULL};

1368: static PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data)
1369: {
1370:   Mat_MPIDense *a     = (Mat_MPIDense *)mat->data;
1371:   MatType       mtype = MATSEQDENSE;

1373:   PetscFunctionBegin;
1374:   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1375:   PetscCall(PetscLayoutSetUp(mat->rmap));
1376:   PetscCall(PetscLayoutSetUp(mat->cmap));
1377:   if (!a->A) {
1378:     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
1379:     PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N));
1380:   }
1381: #if defined(PETSC_HAVE_CUDA)
1382:   PetscBool iscuda;
1383:   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda));
1384:   if (iscuda) mtype = MATSEQDENSECUDA;
1385: #endif
1386: #if defined(PETSC_HAVE_HIP)
1387:   PetscBool iship;
1388:   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSEHIP, &iship));
1389:   if (iship) mtype = MATSEQDENSEHIP;
1390: #endif
1391:   PetscCall(MatSetType(a->A, mtype));
1392:   PetscCall(MatSeqDenseSetPreallocation(a->A, data));
1393: #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1394:   mat->offloadmask = a->A->offloadmask;
1395: #endif
1396:   mat->preallocated = PETSC_TRUE;
1397:   mat->assembled    = PETSC_TRUE;
1398:   PetscFunctionReturn(PETSC_SUCCESS);
1399: }

1401: PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1402: {
1403:   Mat B, C;

1405:   PetscFunctionBegin;
1406:   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C));
1407:   PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B));
1408:   PetscCall(MatDestroy(&C));
1409:   if (reuse == MAT_REUSE_MATRIX) {
1410:     C = *newmat;
1411:   } else C = NULL;
1412:   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1413:   PetscCall(MatDestroy(&B));
1414:   if (reuse == MAT_INPLACE_MATRIX) {
1415:     PetscCall(MatHeaderReplace(A, &C));
1416:   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1417:   PetscFunctionReturn(PETSC_SUCCESS);
1418: }

1420: static PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1421: {
1422:   Mat B, C;

1424:   PetscFunctionBegin;
1425:   PetscCall(MatDenseGetLocalMatrix(A, &C));
1426:   PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
1427:   if (reuse == MAT_REUSE_MATRIX) {
1428:     C = *newmat;
1429:   } else C = NULL;
1430:   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1431:   PetscCall(MatDestroy(&B));
1432:   if (reuse == MAT_INPLACE_MATRIX) {
1433:     PetscCall(MatHeaderReplace(A, &C));
1434:   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1435:   PetscFunctionReturn(PETSC_SUCCESS);
1436: }

1438: #if defined(PETSC_HAVE_ELEMENTAL)
1439: PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1440: {
1441:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1442:   Mat           mat_elemental;
1443:   PetscScalar  *v;
1444:   PetscInt      m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols, lda;

1446:   PetscFunctionBegin;
1447:   if (reuse == MAT_REUSE_MATRIX) {
1448:     mat_elemental = *newmat;
1449:     PetscCall(MatZeroEntries(*newmat));
1450:   } else {
1451:     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
1452:     PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
1453:     PetscCall(MatSetType(mat_elemental, MATELEMENTAL));
1454:     PetscCall(MatSetUp(mat_elemental));
1455:     PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE));
1456:   }

1458:   PetscCall(PetscMalloc2(m, &rows, N, &cols));
1459:   for (i = 0; i < N; i++) cols[i] = i;
1460:   for (i = 0; i < m; i++) rows[i] = rstart + i;

1462:   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1463:   PetscCall(MatDenseGetArray(A, &v));
1464:   PetscCall(MatDenseGetLDA(a->A, &lda));
1465:   if (lda == m) PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES));
1466:   else {
1467:     for (i = 0; i < N; i++) PetscCall(MatSetValues(mat_elemental, m, rows, 1, &i, v + lda * i, ADD_VALUES));
1468:   }
1469:   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
1470:   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
1471:   PetscCall(MatDenseRestoreArray(A, &v));
1472:   PetscCall(PetscFree2(rows, cols));

1474:   if (reuse == MAT_INPLACE_MATRIX) {
1475:     PetscCall(MatHeaderReplace(A, &mat_elemental));
1476:   } else {
1477:     *newmat = mat_elemental;
1478:   }
1479:   PetscFunctionReturn(PETSC_SUCCESS);
1480: }
1481: #endif

1483: static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals)
1484: {
1485:   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;

1487:   PetscFunctionBegin;
1488:   PetscCall(MatDenseGetColumn(mat->A, col, vals));
1489:   PetscFunctionReturn(PETSC_SUCCESS);
1490: }

1492: static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals)
1493: {
1494:   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;

1496:   PetscFunctionBegin;
1497:   PetscCall(MatDenseRestoreColumn(mat->A, vals));
1498:   PetscFunctionReturn(PETSC_SUCCESS);
1499: }

1501: PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
1502: {
1503:   Mat_MPIDense *mat;
1504:   PetscInt      m, nloc, N;

1506:   PetscFunctionBegin;
1507:   PetscCall(MatGetSize(inmat, &m, &N));
1508:   PetscCall(MatGetLocalSize(inmat, NULL, &nloc));
1509:   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1510:     PetscInt sum;

1512:     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
1513:     /* Check sum(n) = N */
1514:     PetscCallMPI(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
1515:     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);

1517:     PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat));
1518:     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
1519:   }

1521:   /* numeric phase */
1522:   mat = (Mat_MPIDense *)(*outmat)->data;
1523:   PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN));
1524:   PetscFunctionReturn(PETSC_SUCCESS);
1525: }

1527: PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
1528: {
1529:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1530:   PetscInt      lda;

1532:   PetscFunctionBegin;
1533:   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1534:   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1535:   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1536:   a->vecinuse = col + 1;
1537:   PetscCall(MatDenseGetLDA(a->A, &lda));
1538:   PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse));
1539:   PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda)));
1540:   *v = a->cvec;
1541:   PetscFunctionReturn(PETSC_SUCCESS);
1542: }

1544: PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
1545: {
1546:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

1548:   PetscFunctionBegin;
1549:   PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1550:   PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector");
1551:   VecCheckAssembled(a->cvec);
1552:   a->vecinuse = 0;
1553:   PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse));
1554:   PetscCall(VecResetArray(a->cvec));
1555:   if (v) *v = NULL;
1556:   PetscFunctionReturn(PETSC_SUCCESS);
1557: }

1559: PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
1560: {
1561:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1562:   PetscInt      lda;

1564:   PetscFunctionBegin;
1565:   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1566:   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1567:   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1568:   a->vecinuse = col + 1;
1569:   PetscCall(MatDenseGetLDA(a->A, &lda));
1570:   PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse));
1571:   PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda)));
1572:   PetscCall(VecLockReadPush(a->cvec));
1573:   *v = a->cvec;
1574:   PetscFunctionReturn(PETSC_SUCCESS);
1575: }

1577: PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
1578: {
1579:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

1581:   PetscFunctionBegin;
1582:   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1583:   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1584:   VecCheckAssembled(a->cvec);
1585:   a->vecinuse = 0;
1586:   PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse));
1587:   PetscCall(VecLockReadPop(a->cvec));
1588:   PetscCall(VecResetArray(a->cvec));
1589:   if (v) *v = NULL;
1590:   PetscFunctionReturn(PETSC_SUCCESS);
1591: }

1593: PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
1594: {
1595:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1596:   PetscInt      lda;

1598:   PetscFunctionBegin;
1599:   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1600:   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1601:   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1602:   a->vecinuse = col + 1;
1603:   PetscCall(MatDenseGetLDA(a->A, &lda));
1604:   PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1605:   PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda)));
1606:   *v = a->cvec;
1607:   PetscFunctionReturn(PETSC_SUCCESS);
1608: }

1610: PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
1611: {
1612:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

1614:   PetscFunctionBegin;
1615:   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1616:   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1617:   VecCheckAssembled(a->cvec);
1618:   a->vecinuse = 0;
1619:   PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1620:   PetscCall(VecResetArray(a->cvec));
1621:   if (v) *v = NULL;
1622:   PetscFunctionReturn(PETSC_SUCCESS);
1623: }

1625: static PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v)
1626: {
1627:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1628:   Mat_MPIDense *c;
1629:   MPI_Comm      comm;
1630:   PetscInt      pbegin, pend;

1632:   PetscFunctionBegin;
1633:   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1634:   PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1635:   PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1636:   pbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart);
1637:   pend   = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart));
1638:   if (!a->cmat) {
1639:     PetscCall(MatCreate(comm, &a->cmat));
1640:     PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name));
1641:     if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap));
1642:     else {
1643:       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin));
1644:       PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1645:       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1646:     }
1647:     PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
1648:     PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1649:   } else {
1650:     PetscBool same = (PetscBool)(rend - rbegin == a->cmat->rmap->N);
1651:     if (same && a->cmat->rmap->N != A->rmap->N) {
1652:       same = (PetscBool)(pend - pbegin == a->cmat->rmap->n);
1653:       PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &same, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1654:     }
1655:     if (!same) {
1656:       PetscCall(PetscLayoutDestroy(&a->cmat->rmap));
1657:       PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap));
1658:       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin));
1659:       PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1660:       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1661:     }
1662:     if (cend - cbegin != a->cmat->cmap->N) {
1663:       PetscCall(PetscLayoutDestroy(&a->cmat->cmap));
1664:       PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap));
1665:       PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
1666:       PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1667:     }
1668:   }
1669:   c = (Mat_MPIDense *)a->cmat->data;
1670:   PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1671:   PetscCall(MatDenseGetSubMatrix(a->A, pbegin, pend, cbegin, cend, &c->A));

1673:   a->cmat->preallocated = PETSC_TRUE;
1674:   a->cmat->assembled    = PETSC_TRUE;
1675: #if defined(PETSC_HAVE_DEVICE)
1676:   a->cmat->offloadmask = c->A->offloadmask;
1677: #endif
1678:   a->matinuse = cbegin + 1;
1679:   *v          = a->cmat;
1680:   PetscFunctionReturn(PETSC_SUCCESS);
1681: }

1683: static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v)
1684: {
1685:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1686:   Mat_MPIDense *c;

1688:   PetscFunctionBegin;
1689:   PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first");
1690:   PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix");
1691:   PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()");
1692:   a->matinuse = 0;
1693:   c           = (Mat_MPIDense *)a->cmat->data;
1694:   PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A));
1695:   if (v) *v = NULL;
1696: #if defined(PETSC_HAVE_DEVICE)
1697:   A->offloadmask = a->A->offloadmask;
1698: #endif
1699:   PetscFunctionReturn(PETSC_SUCCESS);
1700: }

1702: /*MC
1703:    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.

1705:    Options Database Key:
1706: . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()`

1708:   Level: beginner

1710: .seealso: [](ch_matrices), `Mat`, `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE`
1711: M*/
1712: PetscErrorCode MatCreate_MPIDense(Mat mat)
1713: {
1714:   Mat_MPIDense *a;

1716:   PetscFunctionBegin;
1717:   PetscCall(PetscNew(&a));
1718:   mat->data   = (void *)a;
1719:   mat->ops[0] = MatOps_Values;

1721:   mat->insertmode = NOT_SET_VALUES;

1723:   /* build cache for off array entries formed */
1724:   a->donotstash = PETSC_FALSE;

1726:   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash));

1728:   /* stuff used for matrix vector multiply */
1729:   a->lvec        = NULL;
1730:   a->Mvctx       = NULL;
1731:   a->roworiented = PETSC_TRUE;

1733:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense));
1734:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense));
1735:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense));
1736:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense));
1737:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense));
1738:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense));
1739:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense));
1740:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense));
1741:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense));
1742:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense));
1743:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense));
1744:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense));
1745:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense));
1746:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense));
1747:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense));
1748:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense));
1749:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense));
1750:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense));
1751:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense));
1752:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
1753:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ));
1754: #if defined(PETSC_HAVE_ELEMENTAL)
1755:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental));
1756: #endif
1757: #if defined(PETSC_HAVE_SCALAPACK)
1758:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK));
1759: #endif
1760: #if defined(PETSC_HAVE_CUDA)
1761:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA));
1762: #endif
1763:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense));
1764:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1765:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1766: #if defined(PETSC_HAVE_CUDA)
1767:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1768:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1769: #endif
1770: #if defined(PETSC_HAVE_HIP)
1771:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", MatConvert_MPIDense_MPIDenseHIP));
1772:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1773:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1774: #endif
1775:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense));
1776:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense));
1777:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultAddColumnRange_C", MatMultAddColumnRange_MPIDense));
1778:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeColumnRange_C", MatMultHermitianTransposeColumnRange_MPIDense));
1779:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeAddColumnRange_C", MatMultHermitianTransposeAddColumnRange_MPIDense));
1780:   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE));
1781:   PetscFunctionReturn(PETSC_SUCCESS);
1782: }

1784: /*MC
1785:    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.

1787:    This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator,
1788:    and `MATMPIDENSE` otherwise.

1790:    Options Database Key:
1791: . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()`

1793:   Level: beginner

1795: .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`, `MATDENSEHIP`
1796: M*/

1798: /*@
1799:   MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries

1801:   Collective

1803:   Input Parameters:
1804: + B    - the matrix
1805: - data - optional location of matrix data.  Set to `NULL` for PETSc
1806:          to control all matrix memory allocation.

1808:   Level: intermediate

1810:   Notes:
1811:   The dense format is fully compatible with standard Fortran
1812:   storage by columns.

1814:   The data input variable is intended primarily for Fortran programmers
1815:   who wish to allocate their own matrix memory space.  Most users should
1816:   set `data` to `NULL`.

1818: .seealso: [](ch_matrices), `Mat`, `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
1819: @*/
1820: PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data)
1821: {
1822:   PetscFunctionBegin;
1824:   PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data));
1825:   PetscFunctionReturn(PETSC_SUCCESS);
1826: }

1828: /*@
1829:   MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an
1830:   array provided by the user. This is useful to avoid copying an array
1831:   into a matrix

1833:   Not Collective

1835:   Input Parameters:
1836: + mat   - the matrix
1837: - array - the array in column major order

1839:   Level: developer

1841:   Note:
1842:   You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be
1843:   freed when the matrix is destroyed.

1845: .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`,
1846:           `MatDenseReplaceArray()`
1847: @*/
1848: PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array)
1849: {
1850:   PetscFunctionBegin;
1852:   PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array));
1853:   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1854: #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1855:   mat->offloadmask = PETSC_OFFLOAD_CPU;
1856: #endif
1857:   PetscFunctionReturn(PETSC_SUCCESS);
1858: }

1860: /*@
1861:   MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()`

1863:   Not Collective

1865:   Input Parameter:
1866: . mat - the matrix

1868:   Level: developer

1870:   Note:
1871:   You can only call this after a call to `MatDensePlaceArray()`

1873: .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`
1874: @*/
1875: PetscErrorCode MatDenseResetArray(Mat mat)
1876: {
1877:   PetscFunctionBegin;
1879:   PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat));
1880:   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1881:   PetscFunctionReturn(PETSC_SUCCESS);
1882: }

1884: /*@
1885:   MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
1886:   array provided by the user. This is useful to avoid copying an array
1887:   into a matrix

1889:   Not Collective

1891:   Input Parameters:
1892: + mat   - the matrix
1893: - array - the array in column major order

1895:   Level: developer

1897:   Note:
1898:   The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be
1899:   freed by the user. It will be freed when the matrix is destroyed.

1901: .seealso: [](ch_matrices), `Mat`, `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()`
1902: @*/
1903: PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array)
1904: {
1905:   PetscFunctionBegin;
1907:   PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array));
1908:   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1909: #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1910:   mat->offloadmask = PETSC_OFFLOAD_CPU;
1911: #endif
1912:   PetscFunctionReturn(PETSC_SUCCESS);
1913: }

1915: /*@
1916:   MatCreateDense - Creates a matrix in `MATDENSE` format.

1918:   Collective

1920:   Input Parameters:
1921: + comm - MPI communicator
1922: . m    - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
1923: . n    - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given)
1924: . M    - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given)
1925: . N    - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given)
1926: - data - optional location of matrix data.  Set data to `NULL` (`PETSC_NULL_SCALAR` for Fortran users) for PETSc
1927:    to control all matrix memory allocation.

1929:   Output Parameter:
1930: . A - the matrix

1932:   Level: intermediate

1934:   Notes:
1935:   The dense format is fully compatible with standard Fortran
1936:   storage by columns.

1938:   Although local portions of the matrix are stored in column-major
1939:   order, the matrix is partitioned across MPI ranks by row.

1941:   The data input variable is intended primarily for Fortran programmers
1942:   who wish to allocate their own matrix memory space.  Most users should
1943:   set `data` to `NULL` (`PETSC_NULL_SCALAR` for Fortran users).

1945:   The user MUST specify either the local or global matrix dimensions
1946:   (possibly both).

1948: .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
1949: @*/
1950: PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A)
1951: {
1952:   PetscFunctionBegin;
1953:   PetscCall(MatCreate(comm, A));
1954:   PetscCall(MatSetSizes(*A, m, n, M, N));
1955:   PetscCall(MatSetType(*A, MATDENSE));
1956:   PetscCall(MatSeqDenseSetPreallocation(*A, data));
1957:   PetscCall(MatMPIDenseSetPreallocation(*A, data));
1958:   PetscFunctionReturn(PETSC_SUCCESS);
1959: }

1961: static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat)
1962: {
1963:   Mat           mat;
1964:   Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data;

1966:   PetscFunctionBegin;
1967:   *newmat = NULL;
1968:   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat));
1969:   PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
1970:   PetscCall(MatSetType(mat, ((PetscObject)A)->type_name));
1971:   a = (Mat_MPIDense *)mat->data;

1973:   mat->factortype   = A->factortype;
1974:   mat->assembled    = PETSC_TRUE;
1975:   mat->preallocated = PETSC_TRUE;

1977:   mat->insertmode = NOT_SET_VALUES;
1978:   a->donotstash   = oldmat->donotstash;

1980:   PetscCall(PetscLayoutReference(A->rmap, &mat->rmap));
1981:   PetscCall(PetscLayoutReference(A->cmap, &mat->cmap));

1983:   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));

1985:   *newmat = mat;
1986:   PetscFunctionReturn(PETSC_SUCCESS);
1987: }

1989: static PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
1990: {
1991:   PetscBool isbinary;
1992: #if defined(PETSC_HAVE_HDF5)
1993:   PetscBool ishdf5;
1994: #endif

1996:   PetscFunctionBegin;
1999:   /* force binary viewer to load .info file if it has not yet done so */
2000:   PetscCall(PetscViewerSetUp(viewer));
2001:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
2002: #if defined(PETSC_HAVE_HDF5)
2003:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
2004: #endif
2005:   if (isbinary) {
2006:     PetscCall(MatLoad_Dense_Binary(newMat, viewer));
2007: #if defined(PETSC_HAVE_HDF5)
2008:   } else if (ishdf5) {
2009:     PetscCall(MatLoad_Dense_HDF5(newMat, viewer));
2010: #endif
2011:   } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
2012:   PetscFunctionReturn(PETSC_SUCCESS);
2013: }

2015: static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag)
2016: {
2017:   Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data;
2018:   Mat           a, b;

2020:   PetscFunctionBegin;
2021:   a = matA->A;
2022:   b = matB->A;
2023:   PetscCall(MatEqual(a, b, flag));
2024:   PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
2025:   PetscFunctionReturn(PETSC_SUCCESS);
2026: }

2028: static PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data)
2029: {
2030:   Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;

2032:   PetscFunctionBegin;
2033:   PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts));
2034:   PetscCall(MatDestroy(&atb->atb));
2035:   PetscCall(PetscFree(atb));
2036:   PetscFunctionReturn(PETSC_SUCCESS);
2037: }

2039: static PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data)
2040: {
2041:   Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;

2043:   PetscFunctionBegin;
2044:   PetscCall(PetscFree2(abt->buf[0], abt->buf[1]));
2045:   PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls));
2046:   PetscCall(PetscFree(abt));
2047:   PetscFunctionReturn(PETSC_SUCCESS);
2048: }

2050: static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2051: {
2052:   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2053:   Mat_TransMatMultDense *atb;
2054:   MPI_Comm               comm;
2055:   PetscMPIInt            size, *recvcounts;
2056:   PetscScalar           *carray, *sendbuf;
2057:   const PetscScalar     *atbarray;
2058:   PetscInt               i, cN = C->cmap->N, proc, k, j, lda;
2059:   const PetscInt        *ranges;

2061:   PetscFunctionBegin;
2062:   MatCheckProduct(C, 3);
2063:   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2064:   atb        = (Mat_TransMatMultDense *)C->product->data;
2065:   recvcounts = atb->recvcounts;
2066:   sendbuf    = atb->sendbuf;

2068:   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2069:   PetscCallMPI(MPI_Comm_size(comm, &size));

2071:   /* compute atbarray = aseq^T * bseq */
2072:   PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DETERMINE, &atb->atb));

2074:   PetscCall(MatGetOwnershipRanges(C, &ranges));

2076:   /* arrange atbarray into sendbuf */
2077:   PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray));
2078:   PetscCall(MatDenseGetLDA(atb->atb, &lda));
2079:   for (proc = 0, k = 0; proc < size; proc++) {
2080:     for (j = 0; j < cN; j++) {
2081:       for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda];
2082:     }
2083:   }
2084:   PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray));

2086:   /* sum all atbarray to local values of C */
2087:   PetscCall(MatDenseGetArrayWrite(c->A, &carray));
2088:   PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm));
2089:   PetscCall(MatDenseRestoreArrayWrite(c->A, &carray));
2090:   PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
2091:   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2092:   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2093:   PetscFunctionReturn(PETSC_SUCCESS);
2094: }

2096: static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2097: {
2098:   MPI_Comm               comm;
2099:   PetscMPIInt            size;
2100:   PetscInt               cm = A->cmap->n, cM, cN = B->cmap->N;
2101:   Mat_TransMatMultDense *atb;
2102:   PetscBool              cisdense = PETSC_FALSE;
2103:   const PetscInt        *ranges;

2105:   PetscFunctionBegin;
2106:   MatCheckProduct(C, 4);
2107:   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2108:   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2109:   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
2110:     SETERRQ(comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2111:   }

2113:   /* create matrix product C */
2114:   PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N));
2115: #if defined(PETSC_HAVE_CUDA)
2116:   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, ""));
2117: #endif
2118: #if defined(PETSC_HAVE_HIP)
2119:   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSEHIP, ""));
2120: #endif
2121:   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
2122:   PetscCall(MatSetUp(C));

2124:   /* create data structure for reuse C */
2125:   PetscCallMPI(MPI_Comm_size(comm, &size));
2126:   PetscCall(PetscNew(&atb));
2127:   cM = C->rmap->N;
2128:   PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts));
2129:   PetscCall(MatGetOwnershipRanges(C, &ranges));
2130:   for (PetscMPIInt i = 0; i < size; i++) PetscCall(PetscMPIIntCast((ranges[i + 1] - ranges[i]) * cN, &atb->recvcounts[i]));
2131:   C->product->data    = atb;
2132:   C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2133:   PetscFunctionReturn(PETSC_SUCCESS);
2134: }

2136: static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2137: {
2138:   MPI_Comm               comm;
2139:   PetscMPIInt            i, size;
2140:   PetscInt               maxRows, bufsiz;
2141:   PetscMPIInt            tag;
2142:   PetscInt               alg;
2143:   Mat_MatTransMultDense *abt;
2144:   Mat_Product           *product = C->product;
2145:   PetscBool              flg;

2147:   PetscFunctionBegin;
2148:   MatCheckProduct(C, 4);
2149:   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2150:   /* check local size of A and B */
2151:   PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n);

2153:   PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg));
2154:   alg = flg ? 0 : 1;

2156:   /* setup matrix product C */
2157:   PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N));
2158:   PetscCall(MatSetType(C, MATMPIDENSE));
2159:   PetscCall(MatSetUp(C));
2160:   PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag));

2162:   /* create data structure for reuse C */
2163:   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
2164:   PetscCallMPI(MPI_Comm_size(comm, &size));
2165:   PetscCall(PetscNew(&abt));
2166:   abt->tag = tag;
2167:   abt->alg = alg;
2168:   switch (alg) {
2169:   case 1: /* alg: "cyclic" */
2170:     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, B->rmap->range[i + 1] - B->rmap->range[i]);
2171:     bufsiz = A->cmap->N * maxRows;
2172:     PetscCall(PetscMalloc2(bufsiz, &abt->buf[0], bufsiz, &abt->buf[1]));
2173:     break;
2174:   default: /* alg: "allgatherv" */
2175:     PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &abt->buf[0], B->rmap->N * B->cmap->N, &abt->buf[1]));
2176:     PetscCall(PetscMalloc2(size, &abt->recvcounts, size + 1, &abt->recvdispls));
2177:     for (i = 0; i <= size; i++) PetscCall(PetscMPIIntCast(B->rmap->range[i] * A->cmap->N, &abt->recvdispls[i]));
2178:     for (i = 0; i < size; i++) PetscCall(PetscMPIIntCast(abt->recvdispls[i + 1] - abt->recvdispls[i], &abt->recvcounts[i]));
2179:     break;
2180:   }

2182:   C->product->data                = abt;
2183:   C->product->destroy             = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2184:   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2185:   PetscFunctionReturn(PETSC_SUCCESS);
2186: }

2188: static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2189: {
2190:   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2191:   Mat_MatTransMultDense *abt;
2192:   MPI_Comm               comm;
2193:   PetscMPIInt            rank, size, sendto, recvfrom, recvisfrom;
2194:   PetscScalar           *sendbuf, *recvbuf = NULL, *cv;
2195:   PetscInt               i, cK             = A->cmap->N, sendsiz, recvsiz, k, j, bn;
2196:   PetscScalar            _DOne = 1.0, _DZero = 0.0;
2197:   const PetscScalar     *av, *bv;
2198:   PetscBLASInt           cm, cn, ck, alda, blda = 0, clda;
2199:   MPI_Request            reqs[2];
2200:   const PetscInt        *ranges;

2202:   PetscFunctionBegin;
2203:   MatCheckProduct(C, 3);
2204:   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2205:   abt = (Mat_MatTransMultDense *)C->product->data;
2206:   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
2207:   PetscCallMPI(MPI_Comm_rank(comm, &rank));
2208:   PetscCallMPI(MPI_Comm_size(comm, &size));
2209:   PetscCall(MatDenseGetArrayRead(a->A, &av));
2210:   PetscCall(MatDenseGetArrayRead(b->A, &bv));
2211:   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
2212:   PetscCall(MatDenseGetLDA(a->A, &i));
2213:   PetscCall(PetscBLASIntCast(i, &alda));
2214:   PetscCall(MatDenseGetLDA(b->A, &i));
2215:   PetscCall(PetscBLASIntCast(i, &blda));
2216:   PetscCall(MatDenseGetLDA(c->A, &i));
2217:   PetscCall(PetscBLASIntCast(i, &clda));
2218:   PetscCall(MatGetOwnershipRanges(B, &ranges));
2219:   bn = B->rmap->n;
2220:   if (blda == bn) {
2221:     sendbuf = (PetscScalar *)bv;
2222:   } else {
2223:     sendbuf = abt->buf[0];
2224:     for (k = 0, i = 0; i < cK; i++) {
2225:       for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j];
2226:     }
2227:   }
2228:   if (size > 1) {
2229:     sendto   = (rank + size - 1) % size;
2230:     recvfrom = (rank + size + 1) % size;
2231:   } else {
2232:     sendto = recvfrom = 0;
2233:   }
2234:   PetscCall(PetscBLASIntCast(cK, &ck));
2235:   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
2236:   recvisfrom = rank;
2237:   for (i = 0; i < size; i++) {
2238:     /* we have finished receiving in sending, bufs can be read/modified */
2239:     PetscMPIInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2240:     PetscInt    nextbn         = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];

2242:     if (nextrecvisfrom != rank) {
2243:       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2244:       sendsiz = cK * bn;
2245:       recvsiz = cK * nextbn;
2246:       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2247:       PetscCallMPI(MPIU_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]));
2248:       PetscCallMPI(MPIU_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]));
2249:     }

2251:     /* local aseq * sendbuf^T */
2252:     PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn));
2253:     if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda));

2255:     if (nextrecvisfrom != rank) {
2256:       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2257:       PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE));
2258:     }
2259:     bn         = nextbn;
2260:     recvisfrom = nextrecvisfrom;
2261:     sendbuf    = recvbuf;
2262:   }
2263:   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
2264:   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2265:   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
2266:   PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
2267:   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2268:   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2269:   PetscFunctionReturn(PETSC_SUCCESS);
2270: }

2272: static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2273: {
2274:   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2275:   Mat_MatTransMultDense *abt;
2276:   MPI_Comm               comm;
2277:   PetscMPIInt            size, ibn;
2278:   PetscScalar           *cv, *sendbuf, *recvbuf;
2279:   const PetscScalar     *av, *bv;
2280:   PetscInt               blda, i, cK = A->cmap->N, k, j, bn;
2281:   PetscScalar            _DOne = 1.0, _DZero = 0.0;
2282:   PetscBLASInt           cm, cn, ck, alda, clda;

2284:   PetscFunctionBegin;
2285:   MatCheckProduct(C, 3);
2286:   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2287:   abt = (Mat_MatTransMultDense *)C->product->data;
2288:   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2289:   PetscCallMPI(MPI_Comm_size(comm, &size));
2290:   PetscCall(MatDenseGetArrayRead(a->A, &av));
2291:   PetscCall(MatDenseGetArrayRead(b->A, &bv));
2292:   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
2293:   PetscCall(MatDenseGetLDA(a->A, &i));
2294:   PetscCall(PetscBLASIntCast(i, &alda));
2295:   PetscCall(MatDenseGetLDA(b->A, &blda));
2296:   PetscCall(MatDenseGetLDA(c->A, &i));
2297:   PetscCall(PetscBLASIntCast(i, &clda));
2298:   /* copy transpose of B into buf[0] */
2299:   bn      = B->rmap->n;
2300:   sendbuf = abt->buf[0];
2301:   recvbuf = abt->buf[1];
2302:   for (k = 0, j = 0; j < bn; j++) {
2303:     for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j];
2304:   }
2305:   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2306:   PetscCall(PetscMPIIntCast(bn * cK, &ibn));
2307:   PetscCallMPI(MPI_Allgatherv(sendbuf, ibn, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm));
2308:   PetscCall(PetscBLASIntCast(cK, &ck));
2309:   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
2310:   PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn));
2311:   if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda));
2312:   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
2313:   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2314:   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
2315:   PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
2316:   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2317:   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2318:   PetscFunctionReturn(PETSC_SUCCESS);
2319: }

2321: static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2322: {
2323:   Mat_MatTransMultDense *abt;

2325:   PetscFunctionBegin;
2326:   MatCheckProduct(C, 3);
2327:   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2328:   abt = (Mat_MatTransMultDense *)C->product->data;
2329:   switch (abt->alg) {
2330:   case 1:
2331:     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C));
2332:     break;
2333:   default:
2334:     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C));
2335:     break;
2336:   }
2337:   PetscFunctionReturn(PETSC_SUCCESS);
2338: }

2340: static PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data)
2341: {
2342:   Mat_MatMultDense *ab = (Mat_MatMultDense *)data;

2344:   PetscFunctionBegin;
2345:   PetscCall(MatDestroy(&ab->Ce));
2346:   PetscCall(MatDestroy(&ab->Ae));
2347:   PetscCall(MatDestroy(&ab->Be));
2348:   PetscCall(PetscFree(ab));
2349:   PetscFunctionReturn(PETSC_SUCCESS);
2350: }

2352: static PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2353: {
2354:   Mat_MatMultDense *ab;
2355:   Mat_MPIDense     *mdn = (Mat_MPIDense *)A->data;

2357:   PetscFunctionBegin;
2358:   MatCheckProduct(C, 3);
2359:   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data");
2360:   ab = (Mat_MatMultDense *)C->product->data;
2361:   if (ab->Ae && ab->Ce) {
2362: #if PetscDefined(HAVE_ELEMENTAL)
2363:     PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae));
2364:     PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be));
2365:     PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce));
2366:     PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C));
2367: #else
2368:     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined");
2369: #endif
2370:   } else {
2371:     const PetscScalar *read;
2372:     PetscScalar       *write;
2373:     PetscInt           lda;

2375:     PetscCall(MatDenseGetLDA(B, &lda));
2376:     PetscCall(MatDenseGetArrayRead(B, &read));
2377:     PetscCall(MatDenseGetArrayWrite(ab->Be, &write));
2378:     if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); /* cannot be done during the symbolic phase because of possible calls to MatProductReplaceMats() */
2379:     for (PetscInt i = 0; i < C->cmap->N; ++i) {
2380:       PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE));
2381:       PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE));
2382:     }
2383:     PetscCall(MatDenseRestoreArrayWrite(ab->Be, &write));
2384:     PetscCall(MatDenseRestoreArrayRead(B, &read));
2385:     PetscCall(MatMatMultNumeric_SeqDense_SeqDense(((Mat_MPIDense *)A->data)->A, ab->Be, ((Mat_MPIDense *)C->data)->A));
2386:   }
2387:   PetscFunctionReturn(PETSC_SUCCESS);
2388: }

2390: static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2391: {
2392:   Mat_Product      *product = C->product;
2393:   PetscInt          alg;
2394:   Mat_MatMultDense *ab;
2395:   PetscBool         flg;

2397:   PetscFunctionBegin;
2398:   MatCheckProduct(C, 4);
2399:   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2400:   /* check local size of A and B */
2401:   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ", %" PetscInt_FMT ")",
2402:              A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);

2404:   PetscCall(PetscStrcmp(product->alg, "petsc", &flg));
2405:   alg = flg ? 0 : 1;

2407:   /* setup C */
2408:   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N));
2409:   PetscCall(MatSetType(C, MATMPIDENSE));
2410:   PetscCall(MatSetUp(C));

2412:   /* create data structure for reuse Cdense */
2413:   PetscCall(PetscNew(&ab));

2415:   switch (alg) {
2416:   case 1: /* alg: "elemental" */
2417: #if PetscDefined(HAVE_ELEMENTAL)
2418:     /* create elemental matrices Ae and Be */
2419:     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &ab->Ae));
2420:     PetscCall(MatSetSizes(ab->Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
2421:     PetscCall(MatSetType(ab->Ae, MATELEMENTAL));
2422:     PetscCall(MatSetUp(ab->Ae));
2423:     PetscCall(MatSetOption(ab->Ae, MAT_ROW_ORIENTED, PETSC_FALSE));

2425:     PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &ab->Be));
2426:     PetscCall(MatSetSizes(ab->Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N));
2427:     PetscCall(MatSetType(ab->Be, MATELEMENTAL));
2428:     PetscCall(MatSetUp(ab->Be));
2429:     PetscCall(MatSetOption(ab->Be, MAT_ROW_ORIENTED, PETSC_FALSE));

2431:     /* compute symbolic Ce = Ae*Be */
2432:     PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &ab->Ce));
2433:     PetscCall(MatMatMultSymbolic_Elemental(ab->Ae, ab->Be, fill, ab->Ce));
2434: #else
2435:     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined");
2436: #endif
2437:     break;
2438:   default: /* alg: "petsc" */
2439:     ab->Ae = NULL;
2440:     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, A->cmap->N, B->cmap->N, NULL, &ab->Be));
2441:     ab->Ce = NULL;
2442:     break;
2443:   }

2445:   C->product->data       = ab;
2446:   C->product->destroy    = MatDestroy_MatMatMult_MPIDense_MPIDense;
2447:   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2448:   PetscFunctionReturn(PETSC_SUCCESS);
2449: }

2451: static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
2452: {
2453:   Mat_Product *product     = C->product;
2454:   const char  *algTypes[2] = {"petsc", "elemental"};
2455:   PetscInt     alg, nalg = PetscDefined(HAVE_ELEMENTAL) ? 2 : 1;
2456:   PetscBool    flg = PETSC_FALSE;

2458:   PetscFunctionBegin;
2459:   /* Set default algorithm */
2460:   alg = 0; /* default is petsc */
2461:   PetscCall(PetscStrcmp(product->alg, "default", &flg));
2462:   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));

2464:   /* Get runtime option */
2465:   PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat");
2466:   PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_AB", algTypes, nalg, algTypes[alg], &alg, &flg));
2467:   PetscOptionsEnd();
2468:   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));

2470:   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
2471:   C->ops->productsymbolic = MatProductSymbolic_AB;
2472:   PetscFunctionReturn(PETSC_SUCCESS);
2473: }

2475: static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
2476: {
2477:   Mat_Product *product = C->product;
2478:   Mat          A = product->A, B = product->B;

2480:   PetscFunctionBegin;
2481:   PetscCheck(A->rmap->rstart == B->rmap->rstart && A->rmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
2482:              A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2483:   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
2484:   C->ops->productsymbolic          = MatProductSymbolic_AtB;
2485:   PetscFunctionReturn(PETSC_SUCCESS);
2486: }

2488: static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
2489: {
2490:   Mat_Product *product     = C->product;
2491:   const char  *algTypes[2] = {"allgatherv", "cyclic"};
2492:   PetscInt     alg, nalg = 2;
2493:   PetscBool    flg = PETSC_FALSE;

2495:   PetscFunctionBegin;
2496:   /* Set default algorithm */
2497:   alg = 0; /* default is allgatherv */
2498:   PetscCall(PetscStrcmp(product->alg, "default", &flg));
2499:   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));

2501:   /* Get runtime option */
2502:   if (product->api_user) {
2503:     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat");
2504:     PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg));
2505:     PetscOptionsEnd();
2506:   } else {
2507:     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat");
2508:     PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg));
2509:     PetscOptionsEnd();
2510:   }
2511:   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));

2513:   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
2514:   C->ops->productsymbolic          = MatProductSymbolic_ABt;
2515:   PetscFunctionReturn(PETSC_SUCCESS);
2516: }

2518: static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
2519: {
2520:   Mat_Product *product = C->product;

2522:   PetscFunctionBegin;
2523:   switch (product->type) {
2524:   case MATPRODUCT_AB:
2525:     PetscCall(MatProductSetFromOptions_MPIDense_AB(C));
2526:     break;
2527:   case MATPRODUCT_AtB:
2528:     PetscCall(MatProductSetFromOptions_MPIDense_AtB(C));
2529:     break;
2530:   case MATPRODUCT_ABt:
2531:     PetscCall(MatProductSetFromOptions_MPIDense_ABt(C));
2532:     break;
2533:   default:
2534:     break;
2535:   }
2536:   PetscFunctionReturn(PETSC_SUCCESS);
2537: }