Actual source code: mpidense.c

  1: /*
  2:    Basic functions for basic parallel dense matrices.
  3:    Portions of this code are under:
  4:    Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
  5: */

  7: #include <../src/mat/impls/dense/mpi/mpidense.h>
  8: #include <../src/mat/impls/aij/mpi/mpiaij.h>
  9: #include <petscblaslapack.h>
 10: #include <petsc/private/vecimpl.h>
 11: #include <petsc/private/deviceimpl.h>
 12: #include <petsc/private/sfimpl.h>

 14: /*@
 15:   MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential
 16:   matrix that represents the operator. For sequential matrices it returns itself.

 18:   Input Parameter:
 19: . A - the sequential or MPI `MATDENSE` matrix

 21:   Output Parameter:
 22: . B - the inner matrix

 24:   Level: intermediate

 26: .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE`
 27: @*/
 28: PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B)
 29: {
 30:   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
 31:   PetscBool     flg;

 33:   PetscFunctionBegin;
 35:   PetscAssertPointer(B, 2);
 36:   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg));
 37:   if (flg) *B = mat->A;
 38:   else {
 39:     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg));
 40:     PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name);
 41:     *B = A;
 42:   }
 43:   PetscFunctionReturn(PETSC_SUCCESS);
 44: }

 46: static PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s)
 47: {
 48:   Mat_MPIDense *Amat = (Mat_MPIDense *)A->data;
 49:   Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data;

 51:   PetscFunctionBegin;
 52:   PetscCall(MatCopy(Amat->A, Bmat->A, s));
 53:   PetscFunctionReturn(PETSC_SUCCESS);
 54: }

 56: PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha)
 57: {
 58:   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
 59:   PetscInt      j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2;
 60:   PetscScalar  *v;

 62:   PetscFunctionBegin;
 63:   PetscCall(MatDenseGetArray(mat->A, &v));
 64:   PetscCall(MatDenseGetLDA(mat->A, &lda));
 65:   rend2 = PetscMin(rend, A->cmap->N);
 66:   if (rend2 > rstart) {
 67:     for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha;
 68:     PetscCall(PetscLogFlops(rend2 - rstart));
 69:   }
 70:   PetscCall(MatDenseRestoreArray(mat->A, &v));
 71:   PetscFunctionReturn(PETSC_SUCCESS);
 72: }

 74: static PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
 75: {
 76:   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
 77:   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;

 79:   PetscFunctionBegin;
 80:   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
 81:   lrow = row - rstart;
 82:   PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
 83:   PetscFunctionReturn(PETSC_SUCCESS);
 84: }

 86: static PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
 87: {
 88:   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
 89:   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;

 91:   PetscFunctionBegin;
 92:   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
 93:   lrow = row - rstart;
 94:   PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
 95:   PetscFunctionReturn(PETSC_SUCCESS);
 96: }

 98: static PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a)
 99: {
100:   Mat_MPIDense *mdn = (Mat_MPIDense *)A->data;
101:   PetscInt      m = A->rmap->n, rstart = A->rmap->rstart;
102:   PetscScalar  *array;
103:   MPI_Comm      comm;
104:   PetscBool     flg;
105:   Mat           B;

107:   PetscFunctionBegin;
108:   PetscCall(MatHasCongruentLayouts(A, &flg));
109:   PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported.");
110:   PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B));
111:   if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
112: #if PetscDefined(HAVE_CUDA)
113:     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg));
114:     PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA);
115: #elif PetscDefined(HAVE_HIP)
116:     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSEHIP, &flg));
117:     PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSEHIP);
118: #endif
119:     PetscCall(PetscObjectGetComm((PetscObject)mdn->A, &comm));
120:     PetscCall(MatCreate(comm, &B));
121:     PetscCall(MatSetSizes(B, m, m, m, m));
122:     PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name));
123:     PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array));
124:     PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart));
125:     PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array));
126:     PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B));
127:     *a = B;
128:     PetscCall(MatDestroy(&B));
129:   } else *a = B;
130:   PetscFunctionReturn(PETSC_SUCCESS);
131: }

133: static PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv)
134: {
135:   Mat_MPIDense *A = (Mat_MPIDense *)mat->data;
136:   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
137:   PetscBool     roworiented = A->roworiented;

139:   PetscFunctionBegin;
140:   for (i = 0; i < m; i++) {
141:     if (idxm[i] < 0) continue;
142:     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
143:     if (idxm[i] >= rstart && idxm[i] < rend) {
144:       row = idxm[i] - rstart;
145:       if (roworiented) {
146:         PetscCall(MatSetValues(A->A, 1, &row, n, idxn, PetscSafePointerPlusOffset(v, i * n), addv));
147:       } else {
148:         for (j = 0; j < n; j++) {
149:           if (idxn[j] < 0) continue;
150:           PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
151:           PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], PetscSafePointerPlusOffset(v, i + j * m), addv));
152:         }
153:       }
154:     } else if (!A->donotstash) {
155:       mat->assembled = PETSC_FALSE;
156:       if (roworiented) {
157:         PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, PetscSafePointerPlusOffset(v, i * n), PETSC_FALSE));
158:       } else {
159:         PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, PetscSafePointerPlusOffset(v, i), m, PETSC_FALSE));
160:       }
161:     }
162:   }
163:   PetscFunctionReturn(PETSC_SUCCESS);
164: }

166: static PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
167: {
168:   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
169:   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;

171:   PetscFunctionBegin;
172:   for (i = 0; i < m; i++) {
173:     if (idxm[i] < 0) continue; /* negative row */
174:     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
175:     PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
176:     row = idxm[i] - rstart;
177:     for (j = 0; j < n; j++) {
178:       if (idxn[j] < 0) continue; /* negative column */
179:       PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
180:       PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j));
181:     }
182:   }
183:   PetscFunctionReturn(PETSC_SUCCESS);
184: }

186: static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda)
187: {
188:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

190:   PetscFunctionBegin;
191:   PetscCall(MatDenseGetLDA(a->A, lda));
192:   PetscFunctionReturn(PETSC_SUCCESS);
193: }

195: static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda)
196: {
197:   Mat_MPIDense *a     = (Mat_MPIDense *)A->data;
198:   MatType       mtype = MATSEQDENSE;

200:   PetscFunctionBegin;
201:   if (!a->A) {
202:     PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
203:     PetscCall(PetscLayoutSetUp(A->rmap));
204:     PetscCall(PetscLayoutSetUp(A->cmap));
205:     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
206:     PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N));
207: #if PetscDefined(HAVE_CUDA)
208:     PetscBool iscuda;
209:     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda));
210:     if (iscuda) mtype = MATSEQDENSECUDA;
211: #elif PetscDefined(HAVE_HIP)
212:     PetscBool iship;
213:     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iship));
214:     if (iship) mtype = MATSEQDENSEHIP;
215: #endif
216:     PetscCall(MatSetType(a->A, mtype));
217:   }
218:   PetscCall(MatDenseSetLDA(a->A, lda));
219:   PetscFunctionReturn(PETSC_SUCCESS);
220: }

222: static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array)
223: {
224:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

226:   PetscFunctionBegin;
227:   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
228:   PetscCall(MatDenseGetArray(a->A, array));
229:   PetscFunctionReturn(PETSC_SUCCESS);
230: }

232: static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, PetscScalar **array)
233: {
234:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

236:   PetscFunctionBegin;
237:   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
238:   PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)array));
239:   PetscFunctionReturn(PETSC_SUCCESS);
240: }

242: static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array)
243: {
244:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

246:   PetscFunctionBegin;
247:   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
248:   PetscCall(MatDenseGetArrayWrite(a->A, array));
249:   PetscFunctionReturn(PETSC_SUCCESS);
250: }

252: static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array)
253: {
254:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

256:   PetscFunctionBegin;
257:   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
258:   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
259:   PetscCall(MatDensePlaceArray(a->A, array));
260:   PetscFunctionReturn(PETSC_SUCCESS);
261: }

263: static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
264: {
265:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

267:   PetscFunctionBegin;
268:   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
269:   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
270:   PetscCall(MatDenseResetArray(a->A));
271:   PetscFunctionReturn(PETSC_SUCCESS);
272: }

274: static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array)
275: {
276:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

278:   PetscFunctionBegin;
279:   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
280:   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
281:   PetscCall(MatDenseReplaceArray(a->A, array));
282:   PetscFunctionReturn(PETSC_SUCCESS);
283: }

285: static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B)
286: {
287:   Mat_MPIDense      *mat = (Mat_MPIDense *)A->data, *newmatd;
288:   PetscInt           lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols;
289:   const PetscInt    *irow, *icol;
290:   const PetscScalar *v;
291:   PetscScalar       *bv;
292:   Mat                newmat;
293:   IS                 iscol_local;
294:   MPI_Comm           comm_is, comm_mat;

296:   PetscFunctionBegin;
297:   PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat));
298:   PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is));
299:   PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator");

301:   PetscCall(ISAllGather(iscol, &iscol_local));
302:   PetscCall(ISGetIndices(isrow, &irow));
303:   PetscCall(ISGetIndices(iscol_local, &icol));
304:   PetscCall(ISGetLocalSize(isrow, &nrows));
305:   PetscCall(ISGetLocalSize(iscol, &ncols));
306:   PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */

308:   /* No parallel redistribution currently supported! Should really check each index set
309:      to confirm that it is OK.  ... Currently supports only submatrix same partitioning as
310:      original matrix! */

312:   PetscCall(MatGetLocalSize(A, &nlrows, &nlcols));
313:   PetscCall(MatGetOwnershipRange(A, &rstart, &rend));

315:   /* Check submatrix call */
316:   if (scall == MAT_REUSE_MATRIX) {
317:     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
318:     /* Really need to test rows and column sizes! */
319:     newmat = *B;
320:   } else {
321:     /* Create and fill new matrix */
322:     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat));
323:     PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols));
324:     PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name));
325:     PetscCall(MatMPIDenseSetPreallocation(newmat, NULL));
326:   }

328:   /* Now extract the data pointers and do the copy, column at a time */
329:   newmatd = (Mat_MPIDense *)newmat->data;
330:   PetscCall(MatDenseGetArray(newmatd->A, &bv));
331:   PetscCall(MatDenseGetArrayRead(mat->A, &v));
332:   PetscCall(MatDenseGetLDA(mat->A, &lda));
333:   for (i = 0; i < Ncols; i++) {
334:     const PetscScalar *av = v + lda * icol[i];
335:     for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart];
336:   }
337:   PetscCall(MatDenseRestoreArrayRead(mat->A, &v));
338:   PetscCall(MatDenseRestoreArray(newmatd->A, &bv));

340:   /* Assemble the matrices so that the correct flags are set */
341:   PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY));
342:   PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY));

344:   /* Free work space */
345:   PetscCall(ISRestoreIndices(isrow, &irow));
346:   PetscCall(ISRestoreIndices(iscol_local, &icol));
347:   PetscCall(ISDestroy(&iscol_local));
348:   *B = newmat;
349:   PetscFunctionReturn(PETSC_SUCCESS);
350: }

352: static PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array)
353: {
354:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

356:   PetscFunctionBegin;
357:   PetscCall(MatDenseRestoreArray(a->A, array));
358:   PetscFunctionReturn(PETSC_SUCCESS);
359: }

361: static PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, PetscScalar **array)
362: {
363:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

365:   PetscFunctionBegin;
366:   PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)array));
367:   PetscFunctionReturn(PETSC_SUCCESS);
368: }

370: static PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array)
371: {
372:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

374:   PetscFunctionBegin;
375:   PetscCall(MatDenseRestoreArrayWrite(a->A, array));
376:   PetscFunctionReturn(PETSC_SUCCESS);
377: }

379: static PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode)
380: {
381:   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
382:   PetscInt      nstash, reallocs;

384:   PetscFunctionBegin;
385:   if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);

387:   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
388:   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
389:   PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
390:   PetscFunctionReturn(PETSC_SUCCESS);
391: }

393: static PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode)
394: {
395:   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
396:   PetscInt      i, *row, *col, flg, j, rstart, ncols;
397:   PetscMPIInt   n;
398:   PetscScalar  *val;

400:   PetscFunctionBegin;
401:   if (!mdn->donotstash && !mat->nooffprocentries) {
402:     /*  wait on receives */
403:     while (1) {
404:       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
405:       if (!flg) break;

407:       for (i = 0; i < n;) {
408:         /* Now identify the consecutive vals belonging to the same row */
409:         for (j = i, rstart = row[j]; j < n; j++) {
410:           if (row[j] != rstart) break;
411:         }
412:         if (j < n) ncols = j - i;
413:         else ncols = n - i;
414:         /* Now assemble all these values with a single function call */
415:         PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
416:         i = j;
417:       }
418:     }
419:     PetscCall(MatStashScatterEnd_Private(&mat->stash));
420:   }

422:   PetscCall(MatAssemblyBegin(mdn->A, mode));
423:   PetscCall(MatAssemblyEnd(mdn->A, mode));
424:   PetscFunctionReturn(PETSC_SUCCESS);
425: }

427: static PetscErrorCode MatZeroEntries_MPIDense(Mat A)
428: {
429:   Mat_MPIDense *l = (Mat_MPIDense *)A->data;

431:   PetscFunctionBegin;
432:   PetscCall(MatZeroEntries(l->A));
433:   PetscFunctionReturn(PETSC_SUCCESS);
434: }

436: static PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
437: {
438:   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
439:   PetscInt      i, len, *lrows;

441:   PetscFunctionBegin;
442:   /* get locally owned rows */
443:   PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL));
444:   /* fix right-hand side if needed */
445:   if (x && b) {
446:     const PetscScalar *xx;
447:     PetscScalar       *bb;

449:     PetscCall(VecGetArrayRead(x, &xx));
450:     PetscCall(VecGetArrayWrite(b, &bb));
451:     for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]];
452:     PetscCall(VecRestoreArrayRead(x, &xx));
453:     PetscCall(VecRestoreArrayWrite(b, &bb));
454:   }
455:   PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL));
456:   if (diag != 0.0) {
457:     Vec d;

459:     PetscCall(MatCreateVecs(A, NULL, &d));
460:     PetscCall(VecSet(d, diag));
461:     PetscCall(MatDiagonalSet(A, d, INSERT_VALUES));
462:     PetscCall(VecDestroy(&d));
463:   }
464:   PetscCall(PetscFree(lrows));
465:   PetscFunctionReturn(PETSC_SUCCESS);
466: }

468: PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat, Vec, Vec);
469: PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat, Vec, Vec, Vec);
470: PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat, Vec, Vec);
471: PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat, Vec, Vec, Vec);

473: static PetscErrorCode MatMultColumnRange_MPIDense(Mat mat, Vec xx, Vec yy, PetscInt c_start, PetscInt c_end)
474: {
475:   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
476:   const PetscScalar *ax;
477:   PetscScalar       *ay;
478:   PetscMemType       axmtype, aymtype;

480:   PetscFunctionBegin;
481:   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
482:   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
483:   PetscCall(VecGetArrayWriteAndMemType(mdn->lvec, &ay, &aymtype));
484:   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
485:   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
486:   PetscCall(VecRestoreArrayWriteAndMemType(mdn->lvec, &ay));
487:   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
488:   PetscUseMethod(mdn->A, "MatMultColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (mdn->A, mdn->lvec, yy, c_start, c_end));
489:   PetscFunctionReturn(PETSC_SUCCESS);
490: }

492: static PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy)
493: {
494:   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
495:   const PetscScalar *ax;
496:   PetscScalar       *ay;
497:   PetscMemType       axmtype, aymtype;

499:   PetscFunctionBegin;
500:   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
501:   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
502:   PetscCall(VecGetArrayWriteAndMemType(mdn->lvec, &ay, &aymtype));
503:   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
504:   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
505:   PetscCall(VecRestoreArrayWriteAndMemType(mdn->lvec, &ay));
506:   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
507:   PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy));
508:   PetscFunctionReturn(PETSC_SUCCESS);
509: }

511: static PetscErrorCode MatMultAddColumnRange_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz, PetscInt c_start, PetscInt c_end)
512: {
513:   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
514:   const PetscScalar *ax;
515:   PetscScalar       *ay;
516:   PetscMemType       axmtype, aymtype;

518:   PetscFunctionBegin;
519:   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
520:   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
521:   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
522:   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
523:   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
524:   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
525:   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
526:   PetscUseMethod(mdn->A, "MatMultAddColumnRange_C", (Mat, Vec, Vec, Vec, PetscInt, PetscInt), (mdn->A, mdn->lvec, yy, zz, c_start, c_end));
527:   PetscFunctionReturn(PETSC_SUCCESS);
528: }

530: static PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz)
531: {
532:   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
533:   const PetscScalar *ax;
534:   PetscScalar       *ay;
535:   PetscMemType       axmtype, aymtype;

537:   PetscFunctionBegin;
538:   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
539:   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
540:   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
541:   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
542:   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
543:   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
544:   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
545:   PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz));
546:   PetscFunctionReturn(PETSC_SUCCESS);
547: }

549: static PetscErrorCode MatMultHermitianTransposeColumnRange_MPIDense(Mat A, Vec xx, Vec yy, PetscInt c_start, PetscInt c_end)
550: {
551:   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
552:   const PetscScalar *ax;
553:   PetscScalar       *ay;
554:   PetscMemType       axmtype, aymtype;
555:   PetscInt           r_start, r_end;
556:   PetscInt           c_start_local, c_end_local;

558:   PetscFunctionBegin;
559:   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
560:   PetscCall(VecZeroEntries(a->lvec));
561:   PetscCall(VecGetOwnershipRange(yy, &r_start, &r_end));
562:   c_start_local = PetscMax(c_start, r_start);
563:   c_end_local   = PetscMin(c_end, r_end);
564:   PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype));
565:   if (c_end_local > c_start_local) {
566:     if (PetscMemTypeHost(aymtype)) {
567:       PetscCall(PetscArrayzero(&ay[c_start_local], (size_t)(c_end_local - c_start_local)));
568:     } else {
569:       PetscCall(PetscDeviceRegisterMemory(ay, aymtype, sizeof(*ay) * ((size_t)(r_end - r_start))));
570:       PetscCall(PetscDeviceArrayZero(NULL, &ay[c_start_local], (size_t)(c_end_local - c_start_local)));
571:     }
572:   }
573:   PetscUseMethod(a->A, "MatMultHermitianTransposeColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (a->A, xx, a->lvec, c_start, c_end));
574:   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
575:   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
576:   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
577:   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
578:   PetscCall(VecRestoreArrayAndMemType(yy, &ay));
579:   PetscFunctionReturn(PETSC_SUCCESS);
580: }

582: static PetscErrorCode MatMultTransposeKernel_MPIDense(Mat A, Vec xx, Vec yy, PetscBool herm)
583: {
584:   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
585:   const PetscScalar *ax;
586:   PetscScalar       *ay;
587:   PetscMemType       axmtype, aymtype;

589:   PetscFunctionBegin;
590:   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
591:   PetscCall(VecSet(yy, 0.0));
592:   if (herm) PetscCall((*a->A->ops->multhermitiantranspose)(a->A, xx, a->lvec));
593:   else PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
594:   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
595:   PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype));
596:   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
597:   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
598:   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
599:   PetscCall(VecRestoreArrayAndMemType(yy, &ay));
600:   PetscFunctionReturn(PETSC_SUCCESS);
601: }

603: static PetscErrorCode MatMultHermitianTransposeAddColumnRange_MPIDense(Mat A, Vec xx, Vec yy, Vec zz, PetscInt c_start, PetscInt c_end)
604: {
605:   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
606:   const PetscScalar *ax;
607:   PetscScalar       *ay;
608:   PetscMemType       axmtype, aymtype;

610:   PetscFunctionBegin;
611:   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
612:   PetscCall(VecCopy(yy, zz));
613:   PetscCall(VecZeroEntries(a->lvec));
614:   PetscUseMethod(a->A, "MatMultHermitianTransposeColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (a->A, xx, a->lvec, c_start, c_end));
615:   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
616:   PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype));
617:   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
618:   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
619:   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
620:   PetscCall(VecRestoreArrayAndMemType(zz, &ay));
621:   PetscFunctionReturn(PETSC_SUCCESS);
622: }

624: static PetscErrorCode MatMultTransposeAddKernel_MPIDense(Mat A, Vec xx, Vec yy, Vec zz, PetscBool herm)
625: {
626:   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
627:   const PetscScalar *ax;
628:   PetscScalar       *ay;
629:   PetscMemType       axmtype, aymtype;

631:   PetscFunctionBegin;
632:   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
633:   PetscCall(VecCopy(yy, zz));
634:   if (herm) PetscCall((*a->A->ops->multhermitiantranspose)(a->A, xx, a->lvec));
635:   else PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
636:   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
637:   PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype));
638:   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
639:   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
640:   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
641:   PetscCall(VecRestoreArrayAndMemType(zz, &ay));
642:   PetscFunctionReturn(PETSC_SUCCESS);
643: }

645: static PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy)
646: {
647:   PetscFunctionBegin;
648:   PetscCall(MatMultTransposeKernel_MPIDense(A, xx, yy, PETSC_FALSE));
649:   PetscFunctionReturn(PETSC_SUCCESS);
650: }

652: static PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz)
653: {
654:   PetscFunctionBegin;
655:   PetscCall(MatMultTransposeAddKernel_MPIDense(A, xx, yy, zz, PETSC_FALSE));
656:   PetscFunctionReturn(PETSC_SUCCESS);
657: }

659: static PetscErrorCode MatMultHermitianTranspose_MPIDense(Mat A, Vec xx, Vec yy)
660: {
661:   PetscFunctionBegin;
662:   PetscCall(MatMultTransposeKernel_MPIDense(A, xx, yy, PETSC_TRUE));
663:   PetscFunctionReturn(PETSC_SUCCESS);
664: }

666: static PetscErrorCode MatMultHermitianTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz)
667: {
668:   PetscFunctionBegin;
669:   PetscCall(MatMultTransposeAddKernel_MPIDense(A, xx, yy, zz, PETSC_TRUE));
670:   PetscFunctionReturn(PETSC_SUCCESS);
671: }

673: PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v)
674: {
675:   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
676:   PetscInt           lda, len, i, nl, ng, m = A->rmap->n, radd;
677:   PetscScalar       *x;
678:   const PetscScalar *av;

680:   PetscFunctionBegin;
681:   PetscCall(VecGetArray(v, &x));
682:   PetscCall(VecGetSize(v, &ng));
683:   PetscCheck(ng == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec");
684:   PetscCall(VecGetLocalSize(v, &nl));
685:   len  = PetscMin(a->A->rmap->n, a->A->cmap->n);
686:   radd = A->rmap->rstart * m;
687:   PetscCall(MatDenseGetArrayRead(a->A, &av));
688:   PetscCall(MatDenseGetLDA(a->A, &lda));
689:   for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i];
690:   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
691:   if (nl - i > 0) PetscCall(PetscArrayzero(x + i, nl - i));
692:   PetscCall(VecRestoreArray(v, &x));
693:   PetscFunctionReturn(PETSC_SUCCESS);
694: }

696: static PetscErrorCode MatDestroy_MPIDense(Mat mat)
697: {
698:   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;

700:   PetscFunctionBegin;
701:   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
702:   PetscCall(MatStashDestroy_Private(&mat->stash));
703:   PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
704:   PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
705:   PetscCall(MatDestroy(&mdn->A));
706:   PetscCall(VecDestroy(&mdn->lvec));
707:   PetscCall(PetscSFDestroy(&mdn->Mvctx));
708:   PetscCall(VecDestroy(&mdn->cvec));
709:   PetscCall(MatDestroy(&mdn->cmat));

711:   PetscCall(PetscFree(mat->data));
712:   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));

714:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL));
715:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL));
716:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL));
717:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL));
718:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL));
719:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL));
720:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL));
721:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL));
722:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL));
723:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL));
724:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL));
725:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
726:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL));
727: #if defined(PETSC_HAVE_ELEMENTAL)
728:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL));
729: #endif
730: #if defined(PETSC_HAVE_SCALAPACK) && (defined(PETSC_USE_REAL_SINGLE) || defined(PETSC_USE_REAL_DOUBLE))
731:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL));
732: #endif
733:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL));
734:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL));
735:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL));
736: #if defined(PETSC_HAVE_CUDA)
737:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL));
738:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL));
739:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL));
740:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL));
741:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL));
742:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL));
743:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL));
744:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL));
745:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL));
746:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL));
747:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL));
748:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL));
749:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL));
750:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL));
751:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL));
752:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL));
753:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL));
754:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDASetPreallocation_C", NULL));
755: #endif
756: #if defined(PETSC_HAVE_HIP)
757:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", NULL));
758:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", NULL));
759:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", NULL));
760:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensehip_mpidense_C", NULL));
761:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL));
762:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL));
763:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL));
764:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL));
765:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArray_C", NULL));
766:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayRead_C", NULL));
767:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayWrite_C", NULL));
768:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArray_C", NULL));
769:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayRead_C", NULL));
770:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayWrite_C", NULL));
771:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPPlaceArray_C", NULL));
772:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPResetArray_C", NULL));
773:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPReplaceArray_C", NULL));
774:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPSetPreallocation_C", NULL));
775: #endif
776:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL));
777:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL));
778:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL));
779:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL));
780:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL));
781:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL));
782:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL));
783:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL));
784:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL));
785:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL));
786:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultColumnRange_C", NULL));
787:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultAddColumnRange_C", NULL));
788:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeColumnRange_C", NULL));
789:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeAddColumnRange_C", NULL));

791:   PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL));
792:   PetscFunctionReturn(PETSC_SUCCESS);
793: }

795: #include <petscdraw.h>
796: static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
797: {
798:   Mat_MPIDense     *mdn = (Mat_MPIDense *)mat->data;
799:   PetscMPIInt       rank;
800:   PetscViewerType   vtype;
801:   PetscBool         isascii, isdraw;
802:   PetscViewer       sviewer;
803:   PetscViewerFormat format;

805:   PetscFunctionBegin;
806:   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
807:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
808:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
809:   if (isascii) {
810:     PetscCall(PetscViewerGetType(viewer, &vtype));
811:     PetscCall(PetscViewerGetFormat(viewer, &format));
812:     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
813:       MatInfo info;
814:       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
815:       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
816:       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "  [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
817:                                                    (PetscInt)info.memory));
818:       PetscCall(PetscViewerFlush(viewer));
819:       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
820:       if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer));
821:       PetscFunctionReturn(PETSC_SUCCESS);
822:     } else if (format == PETSC_VIEWER_ASCII_INFO) {
823:       PetscFunctionReturn(PETSC_SUCCESS);
824:     }
825:   } else if (isdraw) {
826:     PetscDraw draw;
827:     PetscBool isnull;

829:     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
830:     PetscCall(PetscDrawIsNull(draw, &isnull));
831:     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
832:   }

834:   {
835:     /* assemble the entire matrix onto first processor. */
836:     Mat          A;
837:     PetscInt     M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz;
838:     PetscInt    *cols;
839:     PetscScalar *vals;

841:     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
842:     if (rank == 0) {
843:       PetscCall(MatSetSizes(A, M, N, M, N));
844:     } else {
845:       PetscCall(MatSetSizes(A, 0, 0, M, N));
846:     }
847:     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
848:     PetscCall(MatSetType(A, MATMPIDENSE));
849:     PetscCall(MatMPIDenseSetPreallocation(A, NULL));

851:     /* Copy the matrix ... This isn't the most efficient means,
852:        but it's quick for now */
853:     A->insertmode = INSERT_VALUES;

855:     row = mat->rmap->rstart;
856:     m   = mdn->A->rmap->n;
857:     for (i = 0; i < m; i++) {
858:       PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals));
859:       PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES));
860:       PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals));
861:       row++;
862:     }

864:     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
865:     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
866:     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
867:     if (rank == 0) {
868:       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)A->data)->A, ((PetscObject)mat)->name));
869:       PetscCall(MatView_SeqDense(((Mat_MPIDense *)A->data)->A, sviewer));
870:     }
871:     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
872:     PetscCall(MatDestroy(&A));
873:   }
874:   PetscFunctionReturn(PETSC_SUCCESS);
875: }

877: static PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer)
878: {
879:   PetscBool isascii, isbinary, isdraw, issocket;

881:   PetscFunctionBegin;
882:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
883:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
884:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
885:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));

887:   if (isascii || issocket || isdraw) PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer));
888:   else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer));
889:   PetscFunctionReturn(PETSC_SUCCESS);
890: }

892: static PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info)
893: {
894:   Mat_MPIDense  *mat = (Mat_MPIDense *)A->data;
895:   Mat            mdn = mat->A;
896:   PetscLogDouble isend[5], irecv[5];

898:   PetscFunctionBegin;
899:   info->block_size = 1.0;

901:   PetscCall(MatGetInfo(mdn, MAT_LOCAL, info));

903:   isend[0] = info->nz_used;
904:   isend[1] = info->nz_allocated;
905:   isend[2] = info->nz_unneeded;
906:   isend[3] = info->memory;
907:   isend[4] = info->mallocs;
908:   if (flag == MAT_LOCAL) {
909:     info->nz_used      = isend[0];
910:     info->nz_allocated = isend[1];
911:     info->nz_unneeded  = isend[2];
912:     info->memory       = isend[3];
913:     info->mallocs      = isend[4];
914:   } else if (flag == MAT_GLOBAL_MAX) {
915:     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A)));

917:     info->nz_used      = irecv[0];
918:     info->nz_allocated = irecv[1];
919:     info->nz_unneeded  = irecv[2];
920:     info->memory       = irecv[3];
921:     info->mallocs      = irecv[4];
922:   } else if (flag == MAT_GLOBAL_SUM) {
923:     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A)));

925:     info->nz_used      = irecv[0];
926:     info->nz_allocated = irecv[1];
927:     info->nz_unneeded  = irecv[2];
928:     info->memory       = irecv[3];
929:     info->mallocs      = irecv[4];
930:   }
931:   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
932:   info->fill_ratio_needed = 0;
933:   info->factor_mallocs    = 0;
934:   PetscFunctionReturn(PETSC_SUCCESS);
935: }

937: static PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg)
938: {
939:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

941:   PetscFunctionBegin;
942:   switch (op) {
943:   case MAT_NEW_NONZERO_LOCATIONS:
944:   case MAT_NEW_NONZERO_LOCATION_ERR:
945:   case MAT_NEW_NONZERO_ALLOCATION_ERR:
946:     MatCheckPreallocated(A, 1);
947:     PetscCall(MatSetOption(a->A, op, flg));
948:     break;
949:   case MAT_ROW_ORIENTED:
950:     MatCheckPreallocated(A, 1);
951:     a->roworiented = flg;
952:     PetscCall(MatSetOption(a->A, op, flg));
953:     break;
954:   case MAT_IGNORE_OFF_PROC_ENTRIES:
955:     a->donotstash = flg;
956:     break;
957:   case MAT_SYMMETRIC:
958:   case MAT_STRUCTURALLY_SYMMETRIC:
959:   case MAT_HERMITIAN:
960:   case MAT_SYMMETRY_ETERNAL:
961:   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
962:   case MAT_SPD:
963:   case MAT_SPD_ETERNAL:
964:     /* if the diagonal matrix is square it inherits some of the properties above */
965:     if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg));
966:     break;
967:   default:
968:     break;
969:   }
970:   PetscFunctionReturn(PETSC_SUCCESS);
971: }

973: static PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr)
974: {
975:   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
976:   const PetscScalar *l;
977:   PetscScalar        x, *v, *vv, *r;
978:   PetscInt           i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda;

980:   PetscFunctionBegin;
981:   PetscCall(MatDenseGetArray(mdn->A, &vv));
982:   PetscCall(MatDenseGetLDA(mdn->A, &lda));
983:   PetscCall(MatGetLocalSize(A, &s2, &s3));
984:   if (ll) {
985:     PetscCall(VecGetLocalSize(ll, &s2a));
986:     PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2);
987:     PetscCall(VecGetArrayRead(ll, &l));
988:     for (i = 0; i < m; i++) {
989:       x = l[i];
990:       v = vv + i;
991:       for (j = 0; j < n; j++) {
992:         (*v) *= x;
993:         v += lda;
994:       }
995:     }
996:     PetscCall(VecRestoreArrayRead(ll, &l));
997:     PetscCall(PetscLogFlops(1.0 * n * m));
998:   }
999:   if (rr) {
1000:     const PetscScalar *ar;

1002:     PetscCall(VecGetLocalSize(rr, &s3a));
1003:     PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3);
1004:     PetscCall(VecGetArrayRead(rr, &ar));
1005:     if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
1006:     PetscCall(VecGetArray(mdn->lvec, &r));
1007:     PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
1008:     PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
1009:     PetscCall(VecRestoreArrayRead(rr, &ar));
1010:     for (i = 0; i < n; i++) {
1011:       x = r[i];
1012:       v = vv + i * lda;
1013:       for (j = 0; j < m; j++) (*v++) *= x;
1014:     }
1015:     PetscCall(VecRestoreArray(mdn->lvec, &r));
1016:     PetscCall(PetscLogFlops(1.0 * n * m));
1017:   }
1018:   PetscCall(MatDenseRestoreArray(mdn->A, &vv));
1019:   PetscFunctionReturn(PETSC_SUCCESS);
1020: }

1022: static PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm)
1023: {
1024:   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
1025:   PetscInt           i, j;
1026:   PetscMPIInt        size;
1027:   PetscReal          sum = 0.0;
1028:   const PetscScalar *av, *v;

1030:   PetscFunctionBegin;
1031:   PetscCall(MatDenseGetArrayRead(mdn->A, &av));
1032:   v = av;
1033:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
1034:   if (size == 1) {
1035:     PetscCall(MatNorm(mdn->A, type, nrm));
1036:   } else {
1037:     if (type == NORM_FROBENIUS) {
1038:       for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) {
1039:         sum += PetscRealPart(PetscConj(*v) * (*v));
1040:         v++;
1041:       }
1042:       PetscCallMPI(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
1043:       *nrm = PetscSqrtReal(*nrm);
1044:       PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n));
1045:     } else if (type == NORM_1) {
1046:       PetscReal *tmp;

1048:       PetscCall(PetscCalloc1(A->cmap->N, &tmp));
1049:       *nrm = 0.0;
1050:       v    = av;
1051:       for (j = 0; j < mdn->A->cmap->n; j++) {
1052:         for (i = 0; i < mdn->A->rmap->n; i++) {
1053:           tmp[j] += PetscAbsScalar(*v);
1054:           v++;
1055:         }
1056:       }
1057:       PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, tmp, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
1058:       for (j = 0; j < A->cmap->N; j++) {
1059:         if (tmp[j] > *nrm) *nrm = tmp[j];
1060:       }
1061:       PetscCall(PetscFree(tmp));
1062:       PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n));
1063:     } else if (type == NORM_INFINITY) { /* max row norm */
1064:       PetscCall(MatNorm(mdn->A, type, nrm));
1065:       PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
1066:     } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm");
1067:   }
1068:   PetscCall(MatDenseRestoreArrayRead(mdn->A, &av));
1069:   PetscFunctionReturn(PETSC_SUCCESS);
1070: }

1072: static PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout)
1073: {
1074:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1075:   Mat           B;
1076:   PetscInt      M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart;
1077:   PetscInt      j, i, lda;
1078:   PetscScalar  *v;

1080:   PetscFunctionBegin;
1081:   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1082:   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1083:     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1084:     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1085:     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1086:     PetscCall(MatMPIDenseSetPreallocation(B, NULL));
1087:   } else B = *matout;

1089:   m = a->A->rmap->n;
1090:   n = a->A->cmap->n;
1091:   PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v));
1092:   PetscCall(MatDenseGetLDA(a->A, &lda));
1093:   PetscCall(PetscMalloc1(m, &rwork));
1094:   for (i = 0; i < m; i++) rwork[i] = rstart + i;
1095:   for (j = 0; j < n; j++) {
1096:     PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES));
1097:     v = PetscSafePointerPlusOffset(v, lda);
1098:   }
1099:   PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v));
1100:   PetscCall(PetscFree(rwork));
1101:   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1102:   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1103:   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1104:     *matout = B;
1105:   } else {
1106:     PetscCall(MatHeaderMerge(A, &B));
1107:   }
1108:   PetscFunctionReturn(PETSC_SUCCESS);
1109: }

1111: static PetscErrorCode       MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *);
1112: PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar);

1114: static PetscErrorCode MatSetUp_MPIDense(Mat A)
1115: {
1116:   PetscFunctionBegin;
1117:   PetscCall(PetscLayoutSetUp(A->rmap));
1118:   PetscCall(PetscLayoutSetUp(A->cmap));
1119:   if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL));
1120:   PetscFunctionReturn(PETSC_SUCCESS);
1121: }

1123: static PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str)
1124: {
1125:   Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data;

1127:   PetscFunctionBegin;
1128:   PetscCall(MatAXPY(A->A, alpha, B->A, str));
1129:   PetscFunctionReturn(PETSC_SUCCESS);
1130: }

1132: static PetscErrorCode MatConjugate_MPIDense(Mat mat)
1133: {
1134:   Mat_MPIDense *a = (Mat_MPIDense *)mat->data;

1136:   PetscFunctionBegin;
1137:   PetscCall(MatConjugate(a->A));
1138:   PetscFunctionReturn(PETSC_SUCCESS);
1139: }

1141: static PetscErrorCode MatRealPart_MPIDense(Mat A)
1142: {
1143:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

1145:   PetscFunctionBegin;
1146:   PetscCall(MatRealPart(a->A));
1147:   PetscFunctionReturn(PETSC_SUCCESS);
1148: }

1150: static PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1151: {
1152:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

1154:   PetscFunctionBegin;
1155:   PetscCall(MatImaginaryPart(a->A));
1156:   PetscFunctionReturn(PETSC_SUCCESS);
1157: }

1159: static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col)
1160: {
1161:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

1163:   PetscFunctionBegin;
1164:   PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix");
1165:   PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation");
1166:   PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col));
1167:   PetscFunctionReturn(PETSC_SUCCESS);
1168: }

1170: PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *);

1172: static PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions)
1173: {
1174:   PetscInt      i, m, n;
1175:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

1177:   PetscFunctionBegin;
1178:   PetscCall(MatGetSize(A, &m, &n));
1179:   if (type == REDUCTION_MEAN_REALPART) {
1180:     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, reductions));
1181:   } else if (type == REDUCTION_MEAN_IMAGINARYPART) {
1182:     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, reductions));
1183:   } else {
1184:     PetscCall(MatGetColumnReductions_SeqDense(a->A, type, reductions));
1185:   }
1186:   if (type == NORM_2) {
1187:     for (i = 0; i < n; i++) reductions[i] *= reductions[i];
1188:   }
1189:   if (type == NORM_INFINITY) {
1190:     PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm));
1191:   } else {
1192:     PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm));
1193:   }
1194:   if (type == NORM_2) {
1195:     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
1196:   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
1197:     for (i = 0; i < n; i++) reductions[i] /= m;
1198:   }
1199:   PetscFunctionReturn(PETSC_SUCCESS);
1200: }

1202: static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx)
1203: {
1204:   Mat_MPIDense *d = (Mat_MPIDense *)x->data;

1206:   PetscFunctionBegin;
1207:   PetscCall(MatSetRandom(d->A, rctx));
1208: #if defined(PETSC_HAVE_DEVICE)
1209:   x->offloadmask = d->A->offloadmask;
1210: #endif
1211:   PetscFunctionReturn(PETSC_SUCCESS);
1212: }

1214: static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1215: static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1216: static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1217: static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1218: static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *);
1219: static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer);
1220: static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat);

1222: static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1223:                                        MatGetRow_MPIDense,
1224:                                        MatRestoreRow_MPIDense,
1225:                                        MatMult_MPIDense,
1226:                                        /*  4*/ MatMultAdd_MPIDense,
1227:                                        MatMultTranspose_MPIDense,
1228:                                        MatMultTransposeAdd_MPIDense,
1229:                                        NULL,
1230:                                        NULL,
1231:                                        NULL,
1232:                                        /* 10*/ NULL,
1233:                                        NULL,
1234:                                        NULL,
1235:                                        NULL,
1236:                                        MatTranspose_MPIDense,
1237:                                        /* 15*/ MatGetInfo_MPIDense,
1238:                                        MatEqual_MPIDense,
1239:                                        MatGetDiagonal_MPIDense,
1240:                                        MatDiagonalScale_MPIDense,
1241:                                        MatNorm_MPIDense,
1242:                                        /* 20*/ MatAssemblyBegin_MPIDense,
1243:                                        MatAssemblyEnd_MPIDense,
1244:                                        MatSetOption_MPIDense,
1245:                                        MatZeroEntries_MPIDense,
1246:                                        /* 24*/ MatZeroRows_MPIDense,
1247:                                        NULL,
1248:                                        NULL,
1249:                                        NULL,
1250:                                        NULL,
1251:                                        /* 29*/ MatSetUp_MPIDense,
1252:                                        NULL,
1253:                                        NULL,
1254:                                        MatGetDiagonalBlock_MPIDense,
1255:                                        NULL,
1256:                                        /* 34*/ MatDuplicate_MPIDense,
1257:                                        NULL,
1258:                                        NULL,
1259:                                        NULL,
1260:                                        NULL,
1261:                                        /* 39*/ MatAXPY_MPIDense,
1262:                                        MatCreateSubMatrices_MPIDense,
1263:                                        NULL,
1264:                                        MatGetValues_MPIDense,
1265:                                        MatCopy_MPIDense,
1266:                                        /* 44*/ NULL,
1267:                                        MatScale_MPIDense,
1268:                                        MatShift_MPIDense,
1269:                                        NULL,
1270:                                        NULL,
1271:                                        /* 49*/ MatSetRandom_MPIDense,
1272:                                        NULL,
1273:                                        NULL,
1274:                                        NULL,
1275:                                        NULL,
1276:                                        /* 54*/ NULL,
1277:                                        NULL,
1278:                                        NULL,
1279:                                        NULL,
1280:                                        NULL,
1281:                                        /* 59*/ MatCreateSubMatrix_MPIDense,
1282:                                        MatDestroy_MPIDense,
1283:                                        MatView_MPIDense,
1284:                                        NULL,
1285:                                        NULL,
1286:                                        /* 64*/ NULL,
1287:                                        NULL,
1288:                                        NULL,
1289:                                        NULL,
1290:                                        NULL,
1291:                                        /* 69*/ NULL,
1292:                                        NULL,
1293:                                        NULL,
1294:                                        NULL,
1295:                                        NULL,
1296:                                        /* 74*/ NULL,
1297:                                        NULL,
1298:                                        NULL,
1299:                                        NULL,
1300:                                        MatLoad_MPIDense,
1301:                                        /* 79*/ NULL,
1302:                                        NULL,
1303:                                        NULL,
1304:                                        NULL,
1305:                                        /* 83*/ NULL,
1306:                                        NULL,
1307:                                        NULL,
1308:                                        NULL,
1309:                                        MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1310:                                        MatMatTransposeMultNumeric_MPIDense_MPIDense,
1311:                                        /* 89*/ NULL,
1312:                                        MatProductSetFromOptions_MPIDense,
1313:                                        NULL,
1314:                                        NULL,
1315:                                        MatConjugate_MPIDense,
1316:                                        /* 94*/ NULL,
1317:                                        NULL,
1318:                                        MatRealPart_MPIDense,
1319:                                        MatImaginaryPart_MPIDense,
1320:                                        NULL,
1321:                                        /*99*/ NULL,
1322:                                        NULL,
1323:                                        NULL,
1324:                                        NULL,
1325:                                        MatGetColumnVector_MPIDense,
1326:                                        /*104*/ NULL,
1327:                                        NULL,
1328:                                        NULL,
1329:                                        NULL,
1330:                                        NULL,
1331:                                        /*109*/ NULL,
1332:                                        NULL,
1333:                                        MatMultHermitianTranspose_MPIDense,
1334:                                        MatMultHermitianTransposeAdd_MPIDense,
1335:                                        NULL,
1336:                                        /*114*/ NULL,
1337:                                        MatGetColumnReductions_MPIDense,
1338:                                        NULL,
1339:                                        NULL,
1340:                                        NULL,
1341:                                        /*120*/ NULL,
1342:                                        MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1343:                                        MatTransposeMatMultNumeric_MPIDense_MPIDense,
1344:                                        NULL,
1345:                                        /*124*/ NULL,
1346:                                        NULL,
1347:                                        NULL,
1348:                                        NULL,
1349:                                        NULL,
1350:                                        /*129*/ NULL,
1351:                                        NULL,
1352:                                        MatCreateMPIMatConcatenateSeqMat_MPIDense,
1353:                                        NULL,
1354:                                        NULL,
1355:                                        /*134*/ NULL,
1356:                                        NULL,
1357:                                        NULL,
1358:                                        NULL,
1359:                                        NULL,
1360:                                        /*139*/ NULL,
1361:                                        NULL,
1362:                                        NULL,
1363:                                        NULL,
1364:                                        NULL,
1365:                                        NULL,
1366:                                        /*144*/ MatADot_Default,
1367:                                        MatANorm_Default,
1368:                                        NULL,
1369:                                        NULL};

1371: static PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data)
1372: {
1373:   Mat_MPIDense *a     = (Mat_MPIDense *)mat->data;
1374:   MatType       mtype = MATSEQDENSE;

1376:   PetscFunctionBegin;
1377:   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1378:   PetscCall(PetscLayoutSetUp(mat->rmap));
1379:   PetscCall(PetscLayoutSetUp(mat->cmap));
1380:   if (!a->A) {
1381:     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
1382:     PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N));
1383:   }
1384: #if defined(PETSC_HAVE_CUDA)
1385:   PetscBool iscuda;
1386:   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda));
1387:   if (iscuda) mtype = MATSEQDENSECUDA;
1388: #endif
1389: #if defined(PETSC_HAVE_HIP)
1390:   PetscBool iship;
1391:   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSEHIP, &iship));
1392:   if (iship) mtype = MATSEQDENSEHIP;
1393: #endif
1394:   PetscCall(MatSetType(a->A, mtype));
1395:   PetscCall(MatSeqDenseSetPreallocation(a->A, data));
1396: #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1397:   mat->offloadmask = a->A->offloadmask;
1398: #endif
1399:   mat->preallocated = PETSC_TRUE;
1400:   mat->assembled    = PETSC_TRUE;
1401:   PetscFunctionReturn(PETSC_SUCCESS);
1402: }

1404: PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1405: {
1406:   Mat B, C;

1408:   PetscFunctionBegin;
1409:   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C));
1410:   PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B));
1411:   PetscCall(MatDestroy(&C));
1412:   if (reuse == MAT_REUSE_MATRIX) {
1413:     C = *newmat;
1414:   } else C = NULL;
1415:   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1416:   PetscCall(MatDestroy(&B));
1417:   if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &C));
1418:   else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1419:   PetscFunctionReturn(PETSC_SUCCESS);
1420: }

1422: static PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1423: {
1424:   Mat B, C;

1426:   PetscFunctionBegin;
1427:   PetscCall(MatDenseGetLocalMatrix(A, &C));
1428:   PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
1429:   if (reuse == MAT_REUSE_MATRIX) {
1430:     C = *newmat;
1431:   } else C = NULL;
1432:   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1433:   PetscCall(MatDestroy(&B));
1434:   if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &C));
1435:   else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1436:   PetscFunctionReturn(PETSC_SUCCESS);
1437: }

1439: #if defined(PETSC_HAVE_ELEMENTAL)
1440: PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1441: {
1442:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1443:   Mat           mat_elemental;
1444:   PetscScalar  *v;
1445:   PetscInt      m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols, lda;

1447:   PetscFunctionBegin;
1448:   if (reuse == MAT_REUSE_MATRIX) {
1449:     mat_elemental = *newmat;
1450:     PetscCall(MatZeroEntries(*newmat));
1451:   } else {
1452:     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
1453:     PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
1454:     PetscCall(MatSetType(mat_elemental, MATELEMENTAL));
1455:     PetscCall(MatSetUp(mat_elemental));
1456:     PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE));
1457:   }

1459:   PetscCall(PetscMalloc2(m, &rows, N, &cols));
1460:   for (i = 0; i < N; i++) cols[i] = i;
1461:   for (i = 0; i < m; i++) rows[i] = rstart + i;

1463:   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1464:   PetscCall(MatDenseGetArray(A, &v));
1465:   PetscCall(MatDenseGetLDA(a->A, &lda));
1466:   if (lda == m) PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES));
1467:   else {
1468:     for (i = 0; i < N; i++) PetscCall(MatSetValues(mat_elemental, m, rows, 1, &i, v + lda * i, ADD_VALUES));
1469:   }
1470:   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
1471:   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
1472:   PetscCall(MatDenseRestoreArray(A, &v));
1473:   PetscCall(PetscFree2(rows, cols));

1475:   if (reuse == MAT_INPLACE_MATRIX) {
1476:     PetscCall(MatHeaderReplace(A, &mat_elemental));
1477:   } else {
1478:     *newmat = mat_elemental;
1479:   }
1480:   PetscFunctionReturn(PETSC_SUCCESS);
1481: }
1482: #endif

1484: static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals)
1485: {
1486:   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;

1488:   PetscFunctionBegin;
1489:   PetscCall(MatDenseGetColumn(mat->A, col, vals));
1490:   PetscFunctionReturn(PETSC_SUCCESS);
1491: }

1493: static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals)
1494: {
1495:   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;

1497:   PetscFunctionBegin;
1498:   PetscCall(MatDenseRestoreColumn(mat->A, vals));
1499:   PetscFunctionReturn(PETSC_SUCCESS);
1500: }

1502: PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
1503: {
1504:   Mat_MPIDense *mat;
1505:   PetscInt      m, nloc, N;

1507:   PetscFunctionBegin;
1508:   PetscCall(MatGetSize(inmat, &m, &N));
1509:   PetscCall(MatGetLocalSize(inmat, NULL, &nloc));
1510:   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1511:     PetscInt sum;

1513:     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
1514:     /* Check sum(n) = N */
1515:     PetscCallMPI(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
1516:     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);

1518:     PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat));
1519:     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
1520:   }

1522:   /* numeric phase */
1523:   mat = (Mat_MPIDense *)(*outmat)->data;
1524:   PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN));
1525:   PetscFunctionReturn(PETSC_SUCCESS);
1526: }

1528: PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
1529: {
1530:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1531:   PetscInt      lda;

1533:   PetscFunctionBegin;
1534:   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1535:   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1536:   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1537:   a->vecinuse = col + 1;
1538:   PetscCall(MatDenseGetLDA(a->A, &lda));
1539:   PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse));
1540:   PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda)));
1541:   *v = a->cvec;
1542:   PetscFunctionReturn(PETSC_SUCCESS);
1543: }

1545: PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
1546: {
1547:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

1549:   PetscFunctionBegin;
1550:   PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1551:   PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector");
1552:   VecCheckAssembled(a->cvec);
1553:   a->vecinuse = 0;
1554:   PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse));
1555:   PetscCall(VecResetArray(a->cvec));
1556:   if (v) *v = NULL;
1557:   PetscFunctionReturn(PETSC_SUCCESS);
1558: }

1560: PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
1561: {
1562:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1563:   PetscInt      lda;

1565:   PetscFunctionBegin;
1566:   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1567:   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1568:   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1569:   a->vecinuse = col + 1;
1570:   PetscCall(MatDenseGetLDA(a->A, &lda));
1571:   PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse));
1572:   PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda)));
1573:   PetscCall(VecLockReadPush(a->cvec));
1574:   *v = a->cvec;
1575:   PetscFunctionReturn(PETSC_SUCCESS);
1576: }

1578: PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
1579: {
1580:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

1582:   PetscFunctionBegin;
1583:   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1584:   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1585:   VecCheckAssembled(a->cvec);
1586:   a->vecinuse = 0;
1587:   PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse));
1588:   PetscCall(VecLockReadPop(a->cvec));
1589:   PetscCall(VecResetArray(a->cvec));
1590:   if (v) *v = NULL;
1591:   PetscFunctionReturn(PETSC_SUCCESS);
1592: }

1594: PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
1595: {
1596:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1597:   PetscInt      lda;

1599:   PetscFunctionBegin;
1600:   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1601:   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1602:   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1603:   a->vecinuse = col + 1;
1604:   PetscCall(MatDenseGetLDA(a->A, &lda));
1605:   PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1606:   PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda)));
1607:   *v = a->cvec;
1608:   PetscFunctionReturn(PETSC_SUCCESS);
1609: }

1611: PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
1612: {
1613:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

1615:   PetscFunctionBegin;
1616:   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1617:   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1618:   VecCheckAssembled(a->cvec);
1619:   a->vecinuse = 0;
1620:   PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1621:   PetscCall(VecResetArray(a->cvec));
1622:   if (v) *v = NULL;
1623:   PetscFunctionReturn(PETSC_SUCCESS);
1624: }

1626: static PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v)
1627: {
1628:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1629:   Mat_MPIDense *c;
1630:   MPI_Comm      comm;
1631:   PetscInt      prbegin, prend, pcbegin, pcend;

1633:   PetscFunctionBegin;
1634:   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1635:   PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1636:   PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1637:   prbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart);
1638:   prend   = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart));
1639:   pcbegin = PetscMax(0, PetscMin(A->cmap->rend, cbegin) - A->cmap->rstart);
1640:   pcend   = PetscMin(A->cmap->n, PetscMax(0, cend - A->cmap->rstart));
1641:   if (!a->cmat) {
1642:     PetscCall(MatCreate(comm, &a->cmat));
1643:     PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name));
1644:     if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap));
1645:     else {
1646:       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, prend - prbegin));
1647:       PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1648:       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1649:     }
1650:     if (cend - cbegin == A->cmap->N) PetscCall(PetscLayoutReference(A->cmap, &a->cmat->cmap));
1651:     else {
1652:       PetscCall(PetscLayoutSetLocalSize(a->cmat->cmap, pcend - pcbegin));
1653:       PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
1654:       PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1655:     }
1656:     c             = (Mat_MPIDense *)a->cmat->data;
1657:     c->sub_rbegin = rbegin;
1658:     c->sub_rend   = rend;
1659:     c->sub_cbegin = cbegin;
1660:     c->sub_cend   = cend;
1661:   }
1662:   c = (Mat_MPIDense *)a->cmat->data;
1663:   if (c->sub_rbegin != rbegin || c->sub_rend != rend) {
1664:     PetscCall(PetscLayoutDestroy(&a->cmat->rmap));
1665:     PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap));
1666:     PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, prend - prbegin));
1667:     PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1668:     PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1669:     c->sub_rbegin = rbegin;
1670:     c->sub_rend   = rend;
1671:   }
1672:   if (c->sub_cbegin != cbegin || c->sub_cend != cend) {
1673:     // special optimization: check if all columns are owned by rank 0, in which case no communication is necessary
1674:     if ((cend - cbegin != a->cmat->cmap->N) || (A->cmap->range[1] != A->cmap->N)) {
1675:       PetscCall(PetscLayoutDestroy(&a->cmat->cmap));
1676:       PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap));
1677:       PetscCall(PetscLayoutSetLocalSize(a->cmat->cmap, pcend - pcbegin));
1678:       PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
1679:       PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1680:       PetscCall(VecDestroy(&c->lvec));
1681:       PetscCall(PetscSFDestroy(&c->Mvctx));
1682:     }
1683:     c->sub_cbegin = cbegin;
1684:     c->sub_cend   = cend;
1685:   }
1686:   PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1687:   PetscCall(MatDenseGetSubMatrix(a->A, prbegin, prend, cbegin, cend, &c->A));

1689:   a->cmat->preallocated = PETSC_TRUE;
1690:   a->cmat->assembled    = PETSC_TRUE;
1691: #if defined(PETSC_HAVE_DEVICE)
1692:   a->cmat->offloadmask = c->A->offloadmask;
1693: #endif
1694:   a->matinuse = cbegin + 1;
1695:   *v          = a->cmat;
1696:   PetscFunctionReturn(PETSC_SUCCESS);
1697: }

1699: static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v)
1700: {
1701:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1702:   Mat_MPIDense *c;

1704:   PetscFunctionBegin;
1705:   PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first");
1706:   PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix");
1707:   PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()");
1708:   a->matinuse = 0;
1709:   c           = (Mat_MPIDense *)a->cmat->data;
1710:   PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A));
1711:   *v = NULL;
1712: #if defined(PETSC_HAVE_DEVICE)
1713:   A->offloadmask = a->A->offloadmask;
1714: #endif
1715:   PetscFunctionReturn(PETSC_SUCCESS);
1716: }

1718: /*MC
1719:    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.

1721:    Options Database Key:
1722: . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()`

1724:   Level: beginner

1726: .seealso: [](ch_matrices), `Mat`, `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE`
1727: M*/
1728: PetscErrorCode MatCreate_MPIDense(Mat mat)
1729: {
1730:   Mat_MPIDense *a;

1732:   PetscFunctionBegin;
1733:   PetscCall(PetscNew(&a));
1734:   mat->data   = (void *)a;
1735:   mat->ops[0] = MatOps_Values;

1737:   mat->insertmode = NOT_SET_VALUES;

1739:   /* build cache for off array entries formed */
1740:   a->donotstash = PETSC_FALSE;

1742:   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash));

1744:   /* stuff used for matrix vector multiply */
1745:   a->lvec        = NULL;
1746:   a->Mvctx       = NULL;
1747:   a->roworiented = PETSC_TRUE;

1749:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense));
1750:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense));
1751:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense));
1752:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense));
1753:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense));
1754:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense));
1755:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense));
1756:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense));
1757:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense));
1758:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense));
1759:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense));
1760:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense));
1761:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense));
1762:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense));
1763:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense));
1764:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense));
1765:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense));
1766:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense));
1767:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense));
1768:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
1769:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ));
1770: #if defined(PETSC_HAVE_ELEMENTAL)
1771:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental));
1772: #endif
1773: #if defined(PETSC_HAVE_SCALAPACK) && (defined(PETSC_USE_REAL_SINGLE) || defined(PETSC_USE_REAL_DOUBLE))
1774:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK));
1775: #endif
1776: #if defined(PETSC_HAVE_CUDA)
1777:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA));
1778: #endif
1779:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense));
1780:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1781:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1782: #if defined(PETSC_HAVE_CUDA)
1783:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1784:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1785: #endif
1786: #if defined(PETSC_HAVE_HIP)
1787:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", MatConvert_MPIDense_MPIDenseHIP));
1788:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1789:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1790: #endif
1791:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense));
1792:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense));
1793:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultColumnRange_C", MatMultColumnRange_MPIDense));
1794:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultAddColumnRange_C", MatMultAddColumnRange_MPIDense));
1795:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeColumnRange_C", MatMultHermitianTransposeColumnRange_MPIDense));
1796:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeAddColumnRange_C", MatMultHermitianTransposeAddColumnRange_MPIDense));
1797:   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE));
1798:   PetscFunctionReturn(PETSC_SUCCESS);
1799: }

1801: /*MC
1802:    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.

1804:    This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator,
1805:    and `MATMPIDENSE` otherwise.

1807:    Options Database Key:
1808: . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()`

1810:   Level: beginner

1812: .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`, `MATDENSEHIP`
1813: M*/

1815: /*@
1816:   MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries

1818:   Collective

1820:   Input Parameters:
1821: + B    - the matrix
1822: - data - optional location of matrix data.  Set to `NULL` for PETSc
1823:          to control all matrix memory allocation.

1825:   Level: intermediate

1827:   Notes:
1828:   The dense format is fully compatible with standard Fortran
1829:   storage by columns.

1831:   The data input variable is intended primarily for Fortran programmers
1832:   who wish to allocate their own matrix memory space.  Most users should
1833:   set `data` to `NULL`.

1835: .seealso: [](ch_matrices), `Mat`, `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
1836: @*/
1837: PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data)
1838: {
1839:   PetscFunctionBegin;
1841:   PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data));
1842:   PetscFunctionReturn(PETSC_SUCCESS);
1843: }

1845: /*@
1846:   MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an
1847:   array provided by the user. This is useful to avoid copying an array
1848:   into a matrix

1850:   Not Collective

1852:   Input Parameters:
1853: + mat   - the matrix
1854: - array - the array in column major order

1856:   Level: developer

1858:   Note:
1859:   Adding `const` to `array` was an oversight, see notes in `VecPlaceArray()`.

1861:   You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be
1862:   freed when the matrix is destroyed.

1864: .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`,
1865:           `MatDenseReplaceArray()`
1866: @*/
1867: PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array)
1868: {
1869:   PetscFunctionBegin;
1871:   PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array));
1872:   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1873: #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1874:   mat->offloadmask = PETSC_OFFLOAD_CPU;
1875: #endif
1876:   PetscFunctionReturn(PETSC_SUCCESS);
1877: }

1879: /*@
1880:   MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()`

1882:   Not Collective

1884:   Input Parameter:
1885: . mat - the matrix

1887:   Level: developer

1889:   Note:
1890:   You can only call this after a call to `MatDensePlaceArray()`

1892: .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`
1893: @*/
1894: PetscErrorCode MatDenseResetArray(Mat mat)
1895: {
1896:   PetscFunctionBegin;
1898:   PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat));
1899:   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1900:   PetscFunctionReturn(PETSC_SUCCESS);
1901: }

1903: /*@
1904:   MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
1905:   array provided by the user. This is useful to avoid copying an array
1906:   into a matrix

1908:   Not Collective

1910:   Input Parameters:
1911: + mat   - the matrix
1912: - array - the array in column major order

1914:   Level: developer

1916:   Note:
1917:   Adding `const` to `array` was an oversight, see notes in `VecPlaceArray()`.

1919:   The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be
1920:   freed by the user. It will be freed when the matrix is destroyed.

1922: .seealso: [](ch_matrices), `Mat`, `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()`
1923: @*/
1924: PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array)
1925: {
1926:   PetscFunctionBegin;
1928:   PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array));
1929:   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1930: #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1931:   mat->offloadmask = PETSC_OFFLOAD_CPU;
1932: #endif
1933:   PetscFunctionReturn(PETSC_SUCCESS);
1934: }

1936: /*@
1937:   MatCreateDense - Creates a matrix in `MATDENSE` format.

1939:   Collective

1941:   Input Parameters:
1942: + comm - MPI communicator
1943: . m    - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
1944: . n    - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given)
1945: . M    - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given)
1946: . N    - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given)
1947: - data - optional location of matrix data.  Set data to `NULL` (`PETSC_NULL_SCALAR_ARRAY` for Fortran users) for PETSc
1948:    to control all matrix memory allocation.

1950:   Output Parameter:
1951: . A - the matrix

1953:   Level: intermediate

1955:   Notes:
1956:   The dense format is fully compatible with standard Fortran
1957:   storage by columns.

1959:   Although local portions of the matrix are stored in column-major
1960:   order, the matrix is partitioned across MPI ranks by row.

1962:   The data input variable is intended primarily for Fortran programmers
1963:   who wish to allocate their own matrix memory space.  Most users should
1964:   set `data` to `NULL` (`PETSC_NULL_SCALAR_ARRAY` for Fortran users).

1966:   The user MUST specify either the local or global matrix dimensions
1967:   (possibly both).

1969: .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
1970: @*/
1971: PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar data[], Mat *A)
1972: {
1973:   PetscFunctionBegin;
1974:   PetscCall(MatCreate(comm, A));
1975:   PetscCall(MatSetSizes(*A, m, n, M, N));
1976:   PetscCall(MatSetType(*A, MATDENSE));
1977:   PetscCall(MatSeqDenseSetPreallocation(*A, data));
1978:   PetscCall(MatMPIDenseSetPreallocation(*A, data));
1979:   PetscFunctionReturn(PETSC_SUCCESS);
1980: }

1982: static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat)
1983: {
1984:   Mat           mat;
1985:   Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data;

1987:   PetscFunctionBegin;
1988:   *newmat = NULL;
1989:   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat));
1990:   PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
1991:   PetscCall(MatSetType(mat, ((PetscObject)A)->type_name));
1992:   a = (Mat_MPIDense *)mat->data;

1994:   mat->factortype   = A->factortype;
1995:   mat->assembled    = PETSC_TRUE;
1996:   mat->preallocated = PETSC_TRUE;

1998:   mat->insertmode = NOT_SET_VALUES;
1999:   a->donotstash   = oldmat->donotstash;

2001:   PetscCall(PetscLayoutReference(A->rmap, &mat->rmap));
2002:   PetscCall(PetscLayoutReference(A->cmap, &mat->cmap));

2004:   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));

2006:   *newmat = mat;
2007:   PetscFunctionReturn(PETSC_SUCCESS);
2008: }

2010: static PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
2011: {
2012:   PetscBool isbinary;
2013: #if defined(PETSC_HAVE_HDF5)
2014:   PetscBool ishdf5;
2015: #endif

2017:   PetscFunctionBegin;
2020:   /* force binary viewer to load .info file if it has not yet done so */
2021:   PetscCall(PetscViewerSetUp(viewer));
2022:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
2023: #if defined(PETSC_HAVE_HDF5)
2024:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
2025: #endif
2026:   if (isbinary) {
2027:     PetscCall(MatLoad_Dense_Binary(newMat, viewer));
2028: #if defined(PETSC_HAVE_HDF5)
2029:   } else if (ishdf5) {
2030:     PetscCall(MatLoad_Dense_HDF5(newMat, viewer));
2031: #endif
2032:   } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
2033:   PetscFunctionReturn(PETSC_SUCCESS);
2034: }

2036: static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag)
2037: {
2038:   Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data;
2039:   Mat           a, b;

2041:   PetscFunctionBegin;
2042:   a = matA->A;
2043:   b = matB->A;
2044:   PetscCall(MatEqual(a, b, flag));
2045:   PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
2046:   PetscFunctionReturn(PETSC_SUCCESS);
2047: }

2049: static PetscErrorCode MatProductCtxDestroy_MatTransMatMult_MPIDense_MPIDense(PetscCtxRt data)
2050: {
2051:   MatProductCtx_TransMatMultDense *atb = *(MatProductCtx_TransMatMultDense **)data;

2053:   PetscFunctionBegin;
2054:   PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts));
2055:   PetscCall(MatDestroy(&atb->atb));
2056:   PetscCall(PetscFree(atb));
2057:   PetscFunctionReturn(PETSC_SUCCESS);
2058: }

2060: static PetscErrorCode MatProductCtxDestroy_MatMatTransMult_MPIDense_MPIDense(PetscCtxRt data)
2061: {
2062:   MatProductCtx_MatTransMultDense *abt = *(MatProductCtx_MatTransMultDense **)data;

2064:   PetscFunctionBegin;
2065:   PetscCall(PetscFree2(abt->buf[0], abt->buf[1]));
2066:   PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls));
2067:   PetscCall(PetscFree(abt));
2068:   PetscFunctionReturn(PETSC_SUCCESS);
2069: }

2071: static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2072: {
2073:   Mat_MPIDense                    *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2074:   MatProductCtx_TransMatMultDense *atb;
2075:   MPI_Comm                         comm;
2076:   PetscMPIInt                      size, *recvcounts;
2077:   PetscScalar                     *carray, *sendbuf;
2078:   const PetscScalar               *atbarray;
2079:   PetscInt                         i, cN = C->cmap->N, proc, k, j, lda;
2080:   const PetscInt                  *ranges;

2082:   PetscFunctionBegin;
2083:   MatCheckProduct(C, 3);
2084:   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2085:   atb        = (MatProductCtx_TransMatMultDense *)C->product->data;
2086:   recvcounts = atb->recvcounts;
2087:   sendbuf    = atb->sendbuf;

2089:   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2090:   PetscCallMPI(MPI_Comm_size(comm, &size));

2092:   /* compute atbarray = aseq^T * bseq */
2093:   PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DETERMINE, &atb->atb));

2095:   PetscCall(MatGetOwnershipRanges(C, &ranges));

2097:   if (ranges[1] == C->rmap->N) {
2098:     /* all of the values are being reduced to rank 0: optimize this case to use MPI_Reduce and GPU aware MPI if available */
2099:     PetscInt           atb_lda, c_lda;
2100:     Mat                atb_local = atb->atb;
2101:     Mat                atb_alloc = NULL;
2102:     Mat                c_local   = c->A;
2103:     Mat                c_alloc   = NULL;
2104:     PetscMemType       atb_memtype, c_memtype;
2105:     const PetscScalar *atb_array = NULL;
2106:     MPI_Datatype       vector_type;
2107:     PetscScalar       *c_array = NULL;
2108:     PetscMPIInt        rank;

2110:     PetscCallMPI(MPI_Comm_rank(comm, &rank));

2112:     PetscCall(MatDenseGetLDA(atb_local, &atb_lda));
2113:     if (atb_lda != C->rmap->N) {
2114:       // copy atb to a matrix that will have lda == the number of rows
2115:       PetscCall(MatDuplicate(atb_local, MAT_DO_NOT_COPY_VALUES, &atb_alloc));
2116:       PetscCall(MatCopy(atb_local, atb_alloc, DIFFERENT_NONZERO_PATTERN));
2117:       atb_local = atb_alloc;
2118:     }

2120:     if (rank == 0) {
2121:       PetscCall(MatDenseGetLDA(c_local, &c_lda));
2122:       if (c_lda != C->rmap->N) {
2123:         // copy c to a matrix that will have lda == the number of rows
2124:         PetscCall(MatDuplicate(c_local, MAT_DO_NOT_COPY_VALUES, &c_alloc));
2125:         c_local = c_alloc;
2126:       }
2127:       PetscCall(MatZeroEntries(c_local));
2128:     }
2129:     /* atb_local and c_local have nrows = lda = A->cmap->N and ncols =
2130:      * B->cmap->N: use the a->Mvctx to use the best reduction method */
2131:     if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
2132:     vector_type = MPIU_SCALAR;
2133:     if (B->cmap->N > 1) {
2134:       PetscMPIInt mpi_N;

2136:       PetscCall(PetscMPIIntCast(B->cmap->N, &mpi_N));
2137:       PetscCallMPI(MPI_Type_contiguous(mpi_N, MPIU_SCALAR, &vector_type));
2138:       PetscCallMPI(MPI_Type_commit(&vector_type));
2139:     }
2140:     PetscCall(MatDenseGetArrayReadAndMemType(atb_local, &atb_array, &atb_memtype));
2141:     PetscCall(MatDenseGetArrayWriteAndMemType(c_local, &c_array, &c_memtype));
2142:     PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, vector_type, atb_memtype, atb_array, c_memtype, c_array, MPIU_SUM));
2143:     PetscCall(PetscSFReduceEnd(a->Mvctx, vector_type, atb_array, c_array, MPIU_SUM));
2144:     PetscCall(MatDenseRestoreArrayWriteAndMemType(c_local, &c_array));
2145:     PetscCall(MatDenseRestoreArrayReadAndMemType(atb_local, &atb_array));
2146:     if (rank == 0 && c_local != c->A) PetscCall(MatCopy(c_local, c->A, DIFFERENT_NONZERO_PATTERN));
2147:     if (B->cmap->N > 1) PetscCallMPI(MPI_Type_free(&vector_type));
2148:     PetscCall(MatDestroy(&atb_alloc));
2149:     PetscCall(MatDestroy(&c_alloc));
2150:     PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
2151:     PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2152:     PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2153:     PetscFunctionReturn(PETSC_SUCCESS);
2154:   }

2156:   /* arrange atbarray into sendbuf */
2157:   PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray));
2158:   PetscCall(MatDenseGetLDA(atb->atb, &lda));
2159:   for (proc = 0, k = 0; proc < size; proc++) {
2160:     for (j = 0; j < cN; j++) {
2161:       for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda];
2162:     }
2163:   }
2164:   PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray));

2166:   /* sum all atbarray to local values of C */
2167:   PetscCall(MatDenseGetArrayWrite(c->A, &carray));
2168:   PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm));
2169:   PetscCall(MatDenseRestoreArrayWrite(c->A, &carray));
2170:   PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
2171:   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2172:   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2173:   PetscFunctionReturn(PETSC_SUCCESS);
2174: }

2176: static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2177: {
2178:   MPI_Comm                         comm;
2179:   PetscMPIInt                      size;
2180:   PetscInt                         cm = A->cmap->n, cM, cN = B->cmap->N;
2181:   MatProductCtx_TransMatMultDense *atb;
2182:   PetscBool                        cisdense = PETSC_FALSE;
2183:   const PetscInt                  *ranges;

2185:   PetscFunctionBegin;
2186:   MatCheckProduct(C, 4);
2187:   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2188:   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2189:   PetscCheck(A->rmap->rstart == B->rmap->rstart && A->rmap->rend == B->rmap->rend, comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart,
2190:              A->rmap->rend, B->rmap->rstart, B->rmap->rend);

2192:   /* create matrix product C */
2193:   PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N));
2194: #if defined(PETSC_HAVE_CUDA)
2195:   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, ""));
2196: #endif
2197: #if defined(PETSC_HAVE_HIP)
2198:   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSEHIP, ""));
2199: #endif
2200:   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
2201:   PetscCall(MatSetUp(C));

2203:   /* create data structure for reuse C */
2204:   PetscCallMPI(MPI_Comm_size(comm, &size));
2205:   PetscCall(PetscNew(&atb));
2206:   cM = C->rmap->N;
2207:   PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts));
2208:   PetscCall(MatGetOwnershipRanges(C, &ranges));
2209:   for (PetscMPIInt i = 0; i < size; i++) PetscCall(PetscMPIIntCast((ranges[i + 1] - ranges[i]) * cN, &atb->recvcounts[i]));
2210:   C->product->data    = atb;
2211:   C->product->destroy = MatProductCtxDestroy_MatTransMatMult_MPIDense_MPIDense;
2212:   PetscFunctionReturn(PETSC_SUCCESS);
2213: }

2215: static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2216: {
2217:   MPI_Comm                         comm;
2218:   PetscMPIInt                      i, size;
2219:   PetscInt                         maxRows, bufsiz;
2220:   PetscMPIInt                      tag;
2221:   PetscInt                         alg;
2222:   MatProductCtx_MatTransMultDense *abt;
2223:   Mat_Product                     *product = C->product;
2224:   PetscBool                        flg;

2226:   PetscFunctionBegin;
2227:   MatCheckProduct(C, 4);
2228:   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2229:   /* check local size of A and B */
2230:   PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n);

2232:   PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg));
2233:   alg = flg ? 0 : 1;

2235:   /* setup matrix product C */
2236:   PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N));
2237:   PetscCall(MatSetType(C, MATMPIDENSE));
2238:   PetscCall(MatSetUp(C));
2239:   PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag));

2241:   /* create data structure for reuse C */
2242:   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
2243:   PetscCallMPI(MPI_Comm_size(comm, &size));
2244:   PetscCall(PetscNew(&abt));
2245:   abt->tag = tag;
2246:   abt->alg = alg;
2247:   switch (alg) {
2248:   case 1: /* alg: "cyclic" */
2249:     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, B->rmap->range[i + 1] - B->rmap->range[i]);
2250:     bufsiz = A->cmap->N * maxRows;
2251:     PetscCall(PetscMalloc2(bufsiz, &abt->buf[0], bufsiz, &abt->buf[1]));
2252:     break;
2253:   default: /* alg: "allgatherv" */
2254:     PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &abt->buf[0], B->rmap->N * B->cmap->N, &abt->buf[1]));
2255:     PetscCall(PetscMalloc2(size, &abt->recvcounts, size + 1, &abt->recvdispls));
2256:     for (i = 0; i <= size; i++) PetscCall(PetscMPIIntCast(B->rmap->range[i] * A->cmap->N, &abt->recvdispls[i]));
2257:     for (i = 0; i < size; i++) PetscCall(PetscMPIIntCast(abt->recvdispls[i + 1] - abt->recvdispls[i], &abt->recvcounts[i]));
2258:     break;
2259:   }

2261:   C->product->data                = abt;
2262:   C->product->destroy             = MatProductCtxDestroy_MatMatTransMult_MPIDense_MPIDense;
2263:   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2264:   PetscFunctionReturn(PETSC_SUCCESS);
2265: }

2267: static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2268: {
2269:   Mat_MPIDense                    *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2270:   MatProductCtx_MatTransMultDense *abt;
2271:   MPI_Comm                         comm;
2272:   PetscMPIInt                      rank, size, sendto, recvfrom, recvisfrom;
2273:   PetscScalar                     *sendbuf, *recvbuf = NULL, *cv;
2274:   PetscInt                         i, cK             = A->cmap->N, sendsiz, recvsiz, k, j, bn;
2275:   PetscScalar                      _DOne = 1.0, _DZero = 0.0;
2276:   const PetscScalar               *av, *bv;
2277:   PetscBLASInt                     cm, cn, ck, alda, blda = 0, clda;
2278:   MPI_Request                      reqs[2];
2279:   const PetscInt                  *ranges;

2281:   PetscFunctionBegin;
2282:   MatCheckProduct(C, 3);
2283:   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2284:   abt = (MatProductCtx_MatTransMultDense *)C->product->data;
2285:   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
2286:   PetscCallMPI(MPI_Comm_rank(comm, &rank));
2287:   PetscCallMPI(MPI_Comm_size(comm, &size));
2288:   PetscCall(MatDenseGetArrayRead(a->A, &av));
2289:   PetscCall(MatDenseGetArrayRead(b->A, &bv));
2290:   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
2291:   PetscCall(MatDenseGetLDA(a->A, &i));
2292:   PetscCall(PetscBLASIntCast(i, &alda));
2293:   PetscCall(MatDenseGetLDA(b->A, &i));
2294:   PetscCall(PetscBLASIntCast(i, &blda));
2295:   PetscCall(MatDenseGetLDA(c->A, &i));
2296:   PetscCall(PetscBLASIntCast(i, &clda));
2297:   PetscCall(MatGetOwnershipRanges(B, &ranges));
2298:   bn = B->rmap->n;
2299:   if (blda == bn) {
2300:     sendbuf = (PetscScalar *)bv;
2301:   } else {
2302:     sendbuf = abt->buf[0];
2303:     for (k = 0, i = 0; i < cK; i++) {
2304:       for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j];
2305:     }
2306:   }
2307:   if (size > 1) {
2308:     sendto   = (rank + size - 1) % size;
2309:     recvfrom = (rank + size + 1) % size;
2310:   } else {
2311:     sendto = recvfrom = 0;
2312:   }
2313:   PetscCall(PetscBLASIntCast(cK, &ck));
2314:   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
2315:   recvisfrom = rank;
2316:   for (i = 0; i < size; i++) {
2317:     /* we have finished receiving in sending, bufs can be read/modified */
2318:     PetscMPIInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2319:     PetscInt    nextbn         = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];

2321:     if (nextrecvisfrom != rank) {
2322:       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2323:       sendsiz = cK * bn;
2324:       recvsiz = cK * nextbn;
2325:       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2326:       PetscCallMPI(MPIU_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]));
2327:       PetscCallMPI(MPIU_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]));
2328:     }

2330:     /* local aseq * sendbuf^T */
2331:     PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn));
2332:     if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda));

2334:     if (nextrecvisfrom != rank) {
2335:       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2336:       PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE));
2337:     }
2338:     bn         = nextbn;
2339:     recvisfrom = nextrecvisfrom;
2340:     sendbuf    = recvbuf;
2341:   }
2342:   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
2343:   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2344:   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
2345:   PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
2346:   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2347:   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2348:   PetscFunctionReturn(PETSC_SUCCESS);
2349: }

2351: static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2352: {
2353:   Mat_MPIDense                    *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2354:   MatProductCtx_MatTransMultDense *abt;
2355:   MPI_Comm                         comm;
2356:   PetscMPIInt                      size, ibn;
2357:   PetscScalar                     *cv, *sendbuf, *recvbuf;
2358:   const PetscScalar               *av, *bv;
2359:   PetscInt                         blda, i, cK = A->cmap->N, k, j, bn;
2360:   PetscScalar                      _DOne = 1.0, _DZero = 0.0;
2361:   PetscBLASInt                     cm, cn, ck, alda, clda;

2363:   PetscFunctionBegin;
2364:   MatCheckProduct(C, 3);
2365:   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2366:   abt = (MatProductCtx_MatTransMultDense *)C->product->data;
2367:   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2368:   PetscCallMPI(MPI_Comm_size(comm, &size));
2369:   PetscCall(MatDenseGetArrayRead(a->A, &av));
2370:   PetscCall(MatDenseGetArrayRead(b->A, &bv));
2371:   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
2372:   PetscCall(MatDenseGetLDA(a->A, &i));
2373:   PetscCall(PetscBLASIntCast(i, &alda));
2374:   PetscCall(MatDenseGetLDA(b->A, &blda));
2375:   PetscCall(MatDenseGetLDA(c->A, &i));
2376:   PetscCall(PetscBLASIntCast(i, &clda));
2377:   /* copy transpose of B into buf[0] */
2378:   bn      = B->rmap->n;
2379:   sendbuf = abt->buf[0];
2380:   recvbuf = abt->buf[1];
2381:   for (k = 0, j = 0; j < bn; j++) {
2382:     for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j];
2383:   }
2384:   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2385:   PetscCall(PetscMPIIntCast(bn * cK, &ibn));
2386:   PetscCallMPI(MPI_Allgatherv(sendbuf, ibn, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm));
2387:   PetscCall(PetscBLASIntCast(cK, &ck));
2388:   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
2389:   PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn));
2390:   if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda));
2391:   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
2392:   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2393:   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
2394:   PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
2395:   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2396:   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2397:   PetscFunctionReturn(PETSC_SUCCESS);
2398: }

2400: static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2401: {
2402:   MatProductCtx_MatTransMultDense *abt;

2404:   PetscFunctionBegin;
2405:   MatCheckProduct(C, 3);
2406:   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2407:   abt = (MatProductCtx_MatTransMultDense *)C->product->data;
2408:   switch (abt->alg) {
2409:   case 1:
2410:     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C));
2411:     break;
2412:   default:
2413:     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C));
2414:     break;
2415:   }
2416:   PetscFunctionReturn(PETSC_SUCCESS);
2417: }

2419: static PetscErrorCode MatProductCtxDestroy_MatMatMult_MPIDense_MPIDense(PetscCtxRt data)
2420: {
2421:   MatProductCtx_MatMultDense *ab = *(MatProductCtx_MatMultDense **)data;

2423:   PetscFunctionBegin;
2424:   PetscCall(MatDestroy(&ab->Ce));
2425:   PetscCall(MatDestroy(&ab->Ae));
2426:   PetscCall(MatDestroy(&ab->Be));
2427:   PetscCall(PetscFree(ab));
2428:   PetscFunctionReturn(PETSC_SUCCESS);
2429: }

2431: static PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2432: {
2433:   MatProductCtx_MatMultDense *ab;
2434:   Mat_MPIDense               *mdn = (Mat_MPIDense *)A->data;
2435:   Mat_MPIDense               *b   = (Mat_MPIDense *)B->data;

2437:   PetscFunctionBegin;
2438:   MatCheckProduct(C, 3);
2439:   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data");
2440:   ab = (MatProductCtx_MatMultDense *)C->product->data;
2441:   if (ab->Ae && ab->Ce) {
2442: #if PetscDefined(HAVE_ELEMENTAL)
2443:     PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae));
2444:     PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be));
2445:     PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce));
2446:     PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C));
2447: #else
2448:     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined");
2449: #endif
2450:   } else {
2451:     MPI_Comm           comm;
2452:     const PetscScalar *read;
2453:     PetscScalar       *write;
2454:     PetscInt           lda;
2455:     const PetscInt    *ranges;
2456:     PetscMPIInt        size;

2458:     if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); /* cannot be done during the symbolic phase because of possible calls to MatProductReplaceMats() */
2459:     comm = PetscObjectComm((PetscObject)B);
2460:     PetscCallMPI(MPI_Comm_size(comm, &size));
2461:     PetscCall(PetscLayoutGetRanges(B->rmap, &ranges));
2462:     if (ranges[1] == ranges[size]) {
2463:       // optimize for the case where the B matrix is broadcast from rank 0
2464:       PetscInt           b_lda, be_lda;
2465:       Mat                b_local  = b->A;
2466:       Mat                b_alloc  = NULL;
2467:       Mat                be_local = ab->Be;
2468:       Mat                be_alloc = NULL;
2469:       PetscMemType       b_memtype, be_memtype;
2470:       const PetscScalar *b_array = NULL;
2471:       MPI_Datatype       vector_type;
2472:       PetscScalar       *be_array = NULL;
2473:       PetscMPIInt        rank;

2475:       PetscCallMPI(MPI_Comm_rank(comm, &rank));
2476:       PetscCall(MatDenseGetLDA(be_local, &be_lda));
2477:       if (be_lda != B->rmap->N) {
2478:         PetscCall(MatDuplicate(be_local, MAT_DO_NOT_COPY_VALUES, &be_alloc));
2479:         be_local = be_alloc;
2480:       }

2482:       if (rank == 0) {
2483:         PetscCall(MatDenseGetLDA(b_local, &b_lda));
2484:         if (b_lda != B->rmap->N) {
2485:           PetscCall(MatDuplicate(b_local, MAT_DO_NOT_COPY_VALUES, &b_alloc));
2486:           PetscCall(MatCopy(b_local, b_alloc, DIFFERENT_NONZERO_PATTERN));
2487:           b_local = b_alloc;
2488:         }
2489:       }
2490:       vector_type = MPIU_SCALAR;
2491:       if (B->cmap->N > 1) {
2492:         PetscMPIInt mpi_N;

2494:         PetscCall(PetscMPIIntCast(B->cmap->N, &mpi_N));
2495:         PetscCallMPI(MPI_Type_contiguous(mpi_N, MPIU_SCALAR, &vector_type));
2496:         PetscCallMPI(MPI_Type_commit(&vector_type));
2497:       }
2498:       PetscCall(MatDenseGetArrayReadAndMemType(b_local, &b_array, &b_memtype));
2499:       PetscCall(MatDenseGetArrayWriteAndMemType(be_local, &be_array, &be_memtype));
2500:       PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, vector_type, b_memtype, b_array, be_memtype, be_array, MPI_REPLACE));
2501:       PetscCall(PetscSFBcastEnd(mdn->Mvctx, vector_type, b_array, be_array, MPI_REPLACE));
2502:       PetscCall(MatDenseRestoreArrayWriteAndMemType(be_local, &be_array));
2503:       PetscCall(MatDenseRestoreArrayReadAndMemType(b_local, &b_array));
2504:       if (be_local != ab->Be) PetscCall(MatCopy(be_local, ab->Be, DIFFERENT_NONZERO_PATTERN));
2505:       if (B->cmap->N > 1) PetscCallMPI(MPI_Type_free(&vector_type));
2506:       PetscCall(MatDestroy(&be_alloc));
2507:       PetscCall(MatDestroy(&b_alloc));
2508:     } else {
2509:       PetscCall(MatDenseGetLDA(B, &lda));
2510:       PetscCall(MatDenseGetArrayRead(B, &read));
2511:       PetscCall(MatDenseGetArrayWrite(ab->Be, &write));
2512:       for (PetscInt i = 0; i < C->cmap->N; ++i) {
2513:         PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE));
2514:         PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE));
2515:       }
2516:       PetscCall(MatDenseRestoreArrayWrite(ab->Be, &write));
2517:       PetscCall(MatDenseRestoreArrayRead(B, &read));
2518:     }
2519:     PetscCall(MatMatMultNumeric_SeqDense_SeqDense(((Mat_MPIDense *)A->data)->A, ab->Be, ((Mat_MPIDense *)C->data)->A));
2520:   }
2521:   PetscFunctionReturn(PETSC_SUCCESS);
2522: }

2524: static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2525: {
2526:   Mat_Product                *product = C->product;
2527:   PetscInt                    alg;
2528:   MatProductCtx_MatMultDense *ab;
2529:   PetscBool                   flg;

2531:   PetscFunctionBegin;
2532:   MatCheckProduct(C, 4);
2533:   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2534:   /* check local size of A and B */
2535:   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ", %" PetscInt_FMT ")",
2536:              A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);

2538:   PetscCall(PetscStrcmp(product->alg, "petsc", &flg));
2539:   alg = flg ? 0 : 1;

2541:   /* setup C */
2542:   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N));
2543:   PetscCall(MatSetType(C, MATMPIDENSE));
2544:   PetscCall(MatSetUp(C));

2546:   /* create data structure for reuse Cdense */
2547:   PetscCall(PetscNew(&ab));

2549:   switch (alg) {
2550:   case 1: /* alg: "elemental" */
2551: #if PetscDefined(HAVE_ELEMENTAL)
2552:     /* create elemental matrices Ae and Be */
2553:     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &ab->Ae));
2554:     PetscCall(MatSetSizes(ab->Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
2555:     PetscCall(MatSetType(ab->Ae, MATELEMENTAL));
2556:     PetscCall(MatSetUp(ab->Ae));
2557:     PetscCall(MatSetOption(ab->Ae, MAT_ROW_ORIENTED, PETSC_FALSE));

2559:     PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &ab->Be));
2560:     PetscCall(MatSetSizes(ab->Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N));
2561:     PetscCall(MatSetType(ab->Be, MATELEMENTAL));
2562:     PetscCall(MatSetUp(ab->Be));
2563:     PetscCall(MatSetOption(ab->Be, MAT_ROW_ORIENTED, PETSC_FALSE));

2565:     /* compute symbolic Ce = Ae*Be */
2566:     PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &ab->Ce));
2567:     PetscCall(MatMatMultSymbolic_Elemental(ab->Ae, ab->Be, fill, ab->Ce));
2568: #else
2569:     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined");
2570: #endif
2571:     break;
2572:   default: /* alg: "petsc" */
2573:     ab->Ae = NULL;
2574:     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, A->cmap->N, B->cmap->N, NULL, &ab->Be));
2575:     ab->Ce = NULL;
2576:     break;
2577:   }

2579:   C->product->data       = ab;
2580:   C->product->destroy    = MatProductCtxDestroy_MatMatMult_MPIDense_MPIDense;
2581:   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2582:   PetscFunctionReturn(PETSC_SUCCESS);
2583: }

2585: static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
2586: {
2587:   Mat_Product *product     = C->product;
2588:   const char  *algTypes[2] = {"petsc", "elemental"};
2589:   PetscInt     alg, nalg = PetscDefined(HAVE_ELEMENTAL) ? 2 : 1;
2590:   PetscBool    flg = PETSC_FALSE;

2592:   PetscFunctionBegin;
2593:   /* Set default algorithm */
2594:   alg = 0; /* default is PETSc */
2595:   PetscCall(PetscStrcmp(product->alg, "default", &flg));
2596:   if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg]));

2598:   /* Get runtime option */
2599:   PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat");
2600:   PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_AB", algTypes, nalg, algTypes[alg], &alg, &flg));
2601:   PetscOptionsEnd();
2602:   if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg]));

2604:   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
2605:   C->ops->productsymbolic = MatProductSymbolic_AB;
2606:   PetscFunctionReturn(PETSC_SUCCESS);
2607: }

2609: static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
2610: {
2611:   Mat_Product *product = C->product;
2612:   Mat          A = product->A, B = product->B;

2614:   PetscFunctionBegin;
2615:   PetscCheck(A->rmap->rstart == B->rmap->rstart && A->rmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
2616:              A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2617:   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
2618:   C->ops->productsymbolic          = MatProductSymbolic_AtB;
2619:   PetscFunctionReturn(PETSC_SUCCESS);
2620: }

2622: static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
2623: {
2624:   Mat_Product *product     = C->product;
2625:   const char  *algTypes[2] = {"allgatherv", "cyclic"};
2626:   PetscInt     alg, nalg = 2;
2627:   PetscBool    flg = PETSC_FALSE;

2629:   PetscFunctionBegin;
2630:   /* Set default algorithm */
2631:   alg = 0; /* default is allgatherv */
2632:   PetscCall(PetscStrcmp(product->alg, "default", &flg));
2633:   if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg]));

2635:   /* Get runtime option */
2636:   if (product->api_user) {
2637:     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat");
2638:     PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg));
2639:     PetscOptionsEnd();
2640:   } else {
2641:     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat");
2642:     PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg));
2643:     PetscOptionsEnd();
2644:   }
2645:   if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg]));

2647:   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
2648:   C->ops->productsymbolic          = MatProductSymbolic_ABt;
2649:   PetscFunctionReturn(PETSC_SUCCESS);
2650: }

2652: static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
2653: {
2654:   Mat_Product *product = C->product;

2656:   PetscFunctionBegin;
2657:   switch (product->type) {
2658:   case MATPRODUCT_AB:
2659:     PetscCall(MatProductSetFromOptions_MPIDense_AB(C));
2660:     break;
2661:   case MATPRODUCT_AtB:
2662:     PetscCall(MatProductSetFromOptions_MPIDense_AtB(C));
2663:     break;
2664:   case MATPRODUCT_ABt:
2665:     PetscCall(MatProductSetFromOptions_MPIDense_ABt(C));
2666:     break;
2667:   default:
2668:     break;
2669:   }
2670:   PetscFunctionReturn(PETSC_SUCCESS);
2671: }

2673: PetscErrorCode MatDenseScatter_Private(PetscSF sf, Mat X, Mat Y, InsertMode mode, ScatterMode smode)
2674: {
2675:   const PetscScalar *in;
2676:   PetscScalar       *out;
2677:   PetscSF            vsf;
2678:   PetscInt           N, ny, rld, lld;
2679:   PetscMemType       mtype[2];
2680:   MPI_Op             op = MPI_OP_NULL;

2682:   PetscFunctionBegin;
2686:   if (mode == INSERT_VALUES) op = MPI_REPLACE;
2687:   else if (mode == ADD_VALUES) op = MPIU_SUM;
2688:   else if (mode == MAX_VALUES) op = MPIU_MAX;
2689:   else if (mode == MIN_VALUES) op = MPIU_MIN;
2690:   PetscCheck(op != MPI_OP_NULL, PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "Unsupported InsertMode %d in MatDenseScatter_Private()", mode);
2691:   PetscCheck(smode == SCATTER_FORWARD || smode == SCATTER_REVERSE, PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "Unsupported ScatterMode %d in MatDenseScatter_Private()", smode);
2692:   PetscCall(MatGetSize(X, NULL, &N));
2693:   PetscCall(MatGetSize(Y, NULL, &ny));
2694:   PetscCheck(N == ny, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_SIZ, "Matrix column sizes must match: %" PetscInt_FMT " != %" PetscInt_FMT, N, ny);
2695:   PetscCall(MatDenseGetLDA(X, &rld));
2696:   PetscCall(MatDenseGetLDA(Y, &lld));
2697:   /* get cached or create new strided PetscSF when the number of columns is greater than one */
2698:   if (N > 1) {
2699:     PetscCall(PetscObjectQuery((PetscObject)sf, "_MatDenseScatter_StridedSF", (PetscObject *)&vsf));
2700:     if (vsf) {
2701:       PetscInt nr[2], nl[2];

2703:       PetscCall(PetscSFGetGraph(sf, nr, nl, NULL, NULL));
2704:       PetscCall(PetscSFGetGraph(vsf, nr + 1, nl + 1, NULL, NULL));
2705:       if (N * nr[0] != nr[1] || N * nl[0] != nl[1]) vsf = NULL;
2706:     }
2707:     if (!vsf) {
2708:       PetscCall(PetscSFCreateStridedSF(sf, N, rld, lld, &vsf));
2709:       PetscCall(PetscObjectCompose((PetscObject)sf, "_MatDenseScatter_StridedSF", (PetscObject)vsf));
2710:       PetscCall(PetscObjectDereference((PetscObject)vsf));
2711:     }
2712:   } else vsf = sf;
2713:   /* the output array is accessed in read and write mode,
2714:     but write-only in the INSERT_VALUES case could be worth exploring */
2715:   PetscCall(MatDenseGetArrayReadAndMemType(X, &in, &mtype[0]));
2716:   PetscCall(MatDenseGetArrayAndMemType(Y, &out, &mtype[1]));
2717:   if (smode == SCATTER_FORWARD) {
2718:     PetscCall(PetscSFBcastWithMemTypeBegin(vsf, vsf->vscat.unit, mtype[0], in, mtype[1], out, op));
2719:     PetscCall(PetscSFBcastEnd(vsf, vsf->vscat.unit, in, out, op));
2720:   } else {
2721:     PetscCall(PetscSFReduceWithMemTypeBegin(vsf, vsf->vscat.unit, mtype[0], in, mtype[1], out, op));
2722:     PetscCall(PetscSFReduceEnd(vsf, vsf->vscat.unit, in, out, op));
2723:   }
2724:   PetscCall(MatDenseRestoreArrayAndMemType(Y, &out));
2725:   PetscCall(MatDenseRestoreArrayReadAndMemType(X, &in));
2726:   PetscFunctionReturn(PETSC_SUCCESS);
2727: }