Actual source code: mpidense.c


  2: /*
  3:    Basic functions for basic parallel dense matrices.
  4:    Portions of this code are under:
  5:    Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
  6: */

  8: #include <../src/mat/impls/dense/mpi/mpidense.h>
  9: #include <../src/mat/impls/aij/mpi/mpiaij.h>
 10: #include <petscblaslapack.h>

 12: /*@
 13:       MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential
 14:               matrix that represents the operator. For sequential matrices it returns itself.

 16:     Input Parameter:
 17: .      A - the sequential or MPI dense matrix

 19:     Output Parameter:
 20: .      B - the inner matrix

 22:     Level: intermediate

 24: .seealso: `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE`
 25: @*/
 26: PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B)
 27: {
 28:   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
 29:   PetscBool     flg;

 33:   PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg);
 34:   if (flg) *B = mat->A;
 35:   else {
 36:     PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg);
 38:     *B = A;
 39:   }
 40:   return 0;
 41: }

 43: PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s)
 44: {
 45:   Mat_MPIDense *Amat = (Mat_MPIDense *)A->data;
 46:   Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data;

 48:   MatCopy(Amat->A, Bmat->A, s);
 49:   return 0;
 50: }

 52: PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha)
 53: {
 54:   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
 55:   PetscInt      j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2;
 56:   PetscScalar  *v;

 58:   MatDenseGetArray(mat->A, &v);
 59:   MatDenseGetLDA(mat->A, &lda);
 60:   rend2 = PetscMin(rend, A->cmap->N);
 61:   if (rend2 > rstart) {
 62:     for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha;
 63:     PetscLogFlops(rend2 - rstart);
 64:   }
 65:   MatDenseRestoreArray(mat->A, &v);
 66:   return 0;
 67: }

 69: PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
 70: {
 71:   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
 72:   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;

 75:   lrow = row - rstart;
 76:   MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v);
 77:   return 0;
 78: }

 80: PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
 81: {
 82:   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
 83:   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;

 86:   lrow = row - rstart;
 87:   MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v);
 88:   return 0;
 89: }

 91: PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a)
 92: {
 93:   Mat_MPIDense *mdn = (Mat_MPIDense *)A->data;
 94:   PetscInt      m = A->rmap->n, rstart = A->rmap->rstart;
 95:   PetscScalar  *array;
 96:   MPI_Comm      comm;
 97:   PetscBool     flg;
 98:   Mat           B;

100:   MatHasCongruentLayouts(A, &flg);
102:   PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B);
103:   if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
104: #if defined(PETSC_HAVE_CUDA)
105:     PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg);
107: #elif (PETSC_HAVE_HIP)
108:     PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSEHIP, &flg);
110: #endif
111:     PetscObjectGetComm((PetscObject)(mdn->A), &comm);
112:     MatCreate(comm, &B);
113:     MatSetSizes(B, m, m, m, m);
114:     MatSetType(B, ((PetscObject)mdn->A)->type_name);
115:     MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array);
116:     MatSeqDenseSetPreallocation(B, array + m * rstart);
117:     MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array);
118:     PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B);
119:     *a = B;
120:     MatDestroy(&B);
121:   } else *a = B;
122:   return 0;
123: }

125: PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv)
126: {
127:   Mat_MPIDense *A = (Mat_MPIDense *)mat->data;
128:   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
129:   PetscBool     roworiented = A->roworiented;

131:   for (i = 0; i < m; i++) {
132:     if (idxm[i] < 0) continue;
134:     if (idxm[i] >= rstart && idxm[i] < rend) {
135:       row = idxm[i] - rstart;
136:       if (roworiented) {
137:         MatSetValues(A->A, 1, &row, n, idxn, v + i * n, addv);
138:       } else {
139:         for (j = 0; j < n; j++) {
140:           if (idxn[j] < 0) continue;
142:           MatSetValues(A->A, 1, &row, 1, &idxn[j], v + i + j * m, addv);
143:         }
144:       }
145:     } else if (!A->donotstash) {
146:       mat->assembled = PETSC_FALSE;
147:       if (roworiented) {
148:         MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, v + i * n, PETSC_FALSE);
149:       } else {
150:         MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, v + i, m, PETSC_FALSE);
151:       }
152:     }
153:   }
154:   return 0;
155: }

157: PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
158: {
159:   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
160:   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;

162:   for (i = 0; i < m; i++) {
163:     if (idxm[i] < 0) continue; /* negative row */
165:     if (idxm[i] >= rstart && idxm[i] < rend) {
166:       row = idxm[i] - rstart;
167:       for (j = 0; j < n; j++) {
168:         if (idxn[j] < 0) continue; /* negative column */
170:         MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j);
171:       }
172:     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
173:   }
174:   return 0;
175: }

177: static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda)
178: {
179:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

181:   MatDenseGetLDA(a->A, lda);
182:   return 0;
183: }

185: static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda)
186: {
187:   Mat_MPIDense *a     = (Mat_MPIDense *)A->data;
188:   MatType       mtype = MATSEQDENSE;

190:   if (!a->A) {
192:     PetscLayoutSetUp(A->rmap);
193:     PetscLayoutSetUp(A->cmap);
194:     MatCreate(PETSC_COMM_SELF, &a->A);
195:     MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N);
196: #if defined(PETSC_HAVE_CUDA)
197:     PetscBool iscuda;
198:     PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda);
199:     if (iscuda) mtype = MATSEQDENSECUDA;
200: #elif (PETSC_HAVE_HIP)
201:     PetscBool iship;
202:     PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iship);
203:     if (iship) mtype = MATSEQDENSEHIP;
204: #endif
205:     MatSetType(a->A, mtype);
206:   }
207:   MatDenseSetLDA(a->A, lda);
208:   return 0;
209: }

211: static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array)
212: {
213:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

216:   MatDenseGetArray(a->A, array);
217:   return 0;
218: }

220: static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, const PetscScalar **array)
221: {
222:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

225:   MatDenseGetArrayRead(a->A, array);
226:   return 0;
227: }

229: static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array)
230: {
231:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

234:   MatDenseGetArrayWrite(a->A, array);
235:   return 0;
236: }

238: static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array)
239: {
240:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

244:   MatDensePlaceArray(a->A, array);
245:   return 0;
246: }

248: static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
249: {
250:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

254:   MatDenseResetArray(a->A);
255:   return 0;
256: }

258: static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array)
259: {
260:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

264:   MatDenseReplaceArray(a->A, array);
265:   return 0;
266: }

268: static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B)
269: {
270:   Mat_MPIDense      *mat = (Mat_MPIDense *)A->data, *newmatd;
271:   PetscInt           lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols;
272:   const PetscInt    *irow, *icol;
273:   const PetscScalar *v;
274:   PetscScalar       *bv;
275:   Mat                newmat;
276:   IS                 iscol_local;
277:   MPI_Comm           comm_is, comm_mat;

279:   PetscObjectGetComm((PetscObject)A, &comm_mat);
280:   PetscObjectGetComm((PetscObject)iscol, &comm_is);

283:   ISAllGather(iscol, &iscol_local);
284:   ISGetIndices(isrow, &irow);
285:   ISGetIndices(iscol_local, &icol);
286:   ISGetLocalSize(isrow, &nrows);
287:   ISGetLocalSize(iscol, &ncols);
288:   ISGetSize(iscol, &Ncols); /* global number of columns, size of iscol_local */

290:   /* No parallel redistribution currently supported! Should really check each index set
291:      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
292:      original matrix! */

294:   MatGetLocalSize(A, &nlrows, &nlcols);
295:   MatGetOwnershipRange(A, &rstart, &rend);

297:   /* Check submatrix call */
298:   if (scall == MAT_REUSE_MATRIX) {
299:     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
300:     /* Really need to test rows and column sizes! */
301:     newmat = *B;
302:   } else {
303:     /* Create and fill new matrix */
304:     MatCreate(PetscObjectComm((PetscObject)A), &newmat);
305:     MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols);
306:     MatSetType(newmat, ((PetscObject)A)->type_name);
307:     MatMPIDenseSetPreallocation(newmat, NULL);
308:   }

310:   /* Now extract the data pointers and do the copy, column at a time */
311:   newmatd = (Mat_MPIDense *)newmat->data;
312:   MatDenseGetArray(newmatd->A, &bv);
313:   MatDenseGetArrayRead(mat->A, &v);
314:   MatDenseGetLDA(mat->A, &lda);
315:   for (i = 0; i < Ncols; i++) {
316:     const PetscScalar *av = v + lda * icol[i];
317:     for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart];
318:   }
319:   MatDenseRestoreArrayRead(mat->A, &v);
320:   MatDenseRestoreArray(newmatd->A, &bv);

322:   /* Assemble the matrices so that the correct flags are set */
323:   MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY);
324:   MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY);

326:   /* Free work space */
327:   ISRestoreIndices(isrow, &irow);
328:   ISRestoreIndices(iscol_local, &icol);
329:   ISDestroy(&iscol_local);
330:   *B = newmat;
331:   return 0;
332: }

334: PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array)
335: {
336:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

338:   MatDenseRestoreArray(a->A, array);
339:   return 0;
340: }

342: PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, const PetscScalar **array)
343: {
344:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

346:   MatDenseRestoreArrayRead(a->A, array);
347:   return 0;
348: }

350: PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array)
351: {
352:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

354:   MatDenseRestoreArrayWrite(a->A, array);
355:   return 0;
356: }

358: PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode)
359: {
360:   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
361:   PetscInt      nstash, reallocs;

363:   if (mdn->donotstash || mat->nooffprocentries) return 0;

365:   MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range);
366:   MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs);
367:   PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs);
368:   return 0;
369: }

371: PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode)
372: {
373:   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
374:   PetscInt      i, *row, *col, flg, j, rstart, ncols;
375:   PetscMPIInt   n;
376:   PetscScalar  *val;

378:   if (!mdn->donotstash && !mat->nooffprocentries) {
379:     /*  wait on receives */
380:     while (1) {
381:       MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg);
382:       if (!flg) break;

384:       for (i = 0; i < n;) {
385:         /* Now identify the consecutive vals belonging to the same row */
386:         for (j = i, rstart = row[j]; j < n; j++) {
387:           if (row[j] != rstart) break;
388:         }
389:         if (j < n) ncols = j - i;
390:         else ncols = n - i;
391:         /* Now assemble all these values with a single function call */
392:         MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode);
393:         i = j;
394:       }
395:     }
396:     MatStashScatterEnd_Private(&mat->stash);
397:   }

399:   MatAssemblyBegin(mdn->A, mode);
400:   MatAssemblyEnd(mdn->A, mode);
401:   return 0;
402: }

404: PetscErrorCode MatZeroEntries_MPIDense(Mat A)
405: {
406:   Mat_MPIDense *l = (Mat_MPIDense *)A->data;

408:   MatZeroEntries(l->A);
409:   return 0;
410: }

412: PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
413: {
414:   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
415:   PetscInt      i, len, *lrows;

417:   /* get locally owned rows */
418:   PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL);
419:   /* fix right hand side if needed */
420:   if (x && b) {
421:     const PetscScalar *xx;
422:     PetscScalar       *bb;

424:     VecGetArrayRead(x, &xx);
425:     VecGetArrayWrite(b, &bb);
426:     for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]];
427:     VecRestoreArrayRead(x, &xx);
428:     VecRestoreArrayWrite(b, &bb);
429:   }
430:   MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL);
431:   if (diag != 0.0) {
432:     Vec d;

434:     MatCreateVecs(A, NULL, &d);
435:     VecSet(d, diag);
436:     MatDiagonalSet(A, d, INSERT_VALUES);
437:     VecDestroy(&d);
438:   }
439:   PetscFree(lrows);
440:   return 0;
441: }

443: PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat, Vec, Vec);
444: PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat, Vec, Vec, Vec);
445: PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat, Vec, Vec);
446: PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat, Vec, Vec, Vec);

448: PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy)
449: {
450:   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
451:   const PetscScalar *ax;
452:   PetscScalar       *ay;
453:   PetscMemType       axmtype, aymtype;

455:   if (!mdn->Mvctx) MatSetUpMultiply_MPIDense(mat);
456:   VecGetArrayReadAndMemType(xx, &ax, &axmtype);
457:   VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype);
458:   PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE);
459:   PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE);
460:   VecRestoreArrayAndMemType(mdn->lvec, &ay);
461:   VecRestoreArrayReadAndMemType(xx, &ax);
462:   (*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy);
463:   return 0;
464: }

466: PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz)
467: {
468:   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
469:   const PetscScalar *ax;
470:   PetscScalar       *ay;
471:   PetscMemType       axmtype, aymtype;

473:   if (!mdn->Mvctx) MatSetUpMultiply_MPIDense(mat);
474:   VecGetArrayReadAndMemType(xx, &ax, &axmtype);
475:   VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype);
476:   PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE);
477:   PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE);
478:   VecRestoreArrayAndMemType(mdn->lvec, &ay);
479:   VecRestoreArrayReadAndMemType(xx, &ax);
480:   (*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz);
481:   return 0;
482: }

484: PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy)
485: {
486:   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
487:   const PetscScalar *ax;
488:   PetscScalar       *ay;
489:   PetscMemType       axmtype, aymtype;

491:   if (!a->Mvctx) MatSetUpMultiply_MPIDense(A);
492:   VecSet(yy, 0.0);
493:   (*a->A->ops->multtranspose)(a->A, xx, a->lvec);
494:   VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype);
495:   VecGetArrayAndMemType(yy, &ay, &aymtype);
496:   PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM);
497:   PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM);
498:   VecRestoreArrayReadAndMemType(a->lvec, &ax);
499:   VecRestoreArrayAndMemType(yy, &ay);
500:   return 0;
501: }

503: PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz)
504: {
505:   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
506:   const PetscScalar *ax;
507:   PetscScalar       *ay;
508:   PetscMemType       axmtype, aymtype;

510:   if (!a->Mvctx) MatSetUpMultiply_MPIDense(A);
511:   VecCopy(yy, zz);
512:   (*a->A->ops->multtranspose)(a->A, xx, a->lvec);
513:   VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype);
514:   VecGetArrayAndMemType(zz, &ay, &aymtype);
515:   PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM);
516:   PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM);
517:   VecRestoreArrayReadAndMemType(a->lvec, &ax);
518:   VecRestoreArrayAndMemType(zz, &ay);
519:   return 0;
520: }

522: PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v)
523: {
524:   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
525:   PetscInt           lda, len, i, n, m = A->rmap->n, radd;
526:   PetscScalar       *x, zero = 0.0;
527:   const PetscScalar *av;

529:   VecSet(v, zero);
530:   VecGetArray(v, &x);
531:   VecGetSize(v, &n);
533:   len  = PetscMin(a->A->rmap->n, a->A->cmap->n);
534:   radd = A->rmap->rstart * m;
535:   MatDenseGetArrayRead(a->A, &av);
536:   MatDenseGetLDA(a->A, &lda);
537:   for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i];
538:   MatDenseRestoreArrayRead(a->A, &av);
539:   VecRestoreArray(v, &x);
540:   return 0;
541: }

543: PetscErrorCode MatDestroy_MPIDense(Mat mat)
544: {
545:   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;

547: #if defined(PETSC_USE_LOG)
548:   PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N);
549: #endif
550:   MatStashDestroy_Private(&mat->stash);
553:   MatDestroy(&mdn->A);
554:   VecDestroy(&mdn->lvec);
555:   PetscSFDestroy(&mdn->Mvctx);
556:   VecDestroy(&mdn->cvec);
557:   MatDestroy(&mdn->cmat);

559:   PetscFree(mat->data);
560:   PetscObjectChangeTypeName((PetscObject)mat, NULL);

562:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL);
563:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL);
564:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL);
565:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL);
566:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL);
567:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL);
568:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL);
569:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL);
570:   PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL);
571:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL);
572:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL);
573:   PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL);
574:   PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL);
575: #if defined(PETSC_HAVE_ELEMENTAL)
576:   PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL);
577: #endif
578: #if defined(PETSC_HAVE_SCALAPACK)
579:   PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL);
580: #endif
581:   PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL);
582:   PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL);
583:   PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL);
584: #if defined(PETSC_HAVE_CUDA)
585:   PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL);
586:   PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL);
587:   PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL);
588:   PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL);
589:   PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL);
590:   PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL);
591:   PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL);
592:   PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL);
593:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL);
594:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL);
595:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL);
596:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL);
597:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL);
598:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL);
599:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL);
600:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL);
601:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL);
602: #endif
603: #if defined(PETSC_HAVE_HIP)
604:   PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", NULL);
605:   PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", NULL);
606:   PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", NULL);
607:   PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensehip_mpidense_C", NULL);
608:   PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL);
609:   PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL);
610:   PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL);
611:   PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL);
612:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArray_C", NULL);
613:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayRead_C", NULL);
614:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayWrite_C", NULL);
615:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArray_C", NULL);
616:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayRead_C", NULL);
617:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayWrite_C", NULL);
618:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPPlaceArray_C", NULL);
619:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPResetArray_C", NULL);
620:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPReplaceArray_C", NULL);
621: #endif
622:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL);
623:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL);
624:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL);
625:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL);
626:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL);
627:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL);
628:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL);
629:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL);
630:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL);
631:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL);

633:   PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL);
634:   return 0;
635: }

637: PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat, PetscViewer);

639: #include <petscdraw.h>
640: static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
641: {
642:   Mat_MPIDense     *mdn = (Mat_MPIDense *)mat->data;
643:   PetscMPIInt       rank;
644:   PetscViewerType   vtype;
645:   PetscBool         iascii, isdraw;
646:   PetscViewer       sviewer;
647:   PetscViewerFormat format;

649:   MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank);
650:   PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii);
651:   PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw);
652:   if (iascii) {
653:     PetscViewerGetType(viewer, &vtype);
654:     PetscViewerGetFormat(viewer, &format);
655:     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
656:       MatInfo info;
657:       MatGetInfo(mat, MAT_LOCAL, &info);
658:       PetscViewerASCIIPushSynchronized(viewer);
659:       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "  [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
660:                                                    (PetscInt)info.memory));
661:       PetscViewerFlush(viewer);
662:       PetscViewerASCIIPopSynchronized(viewer);
663:       if (mdn->Mvctx) PetscSFView(mdn->Mvctx, viewer);
664:       return 0;
665:     } else if (format == PETSC_VIEWER_ASCII_INFO) {
666:       return 0;
667:     }
668:   } else if (isdraw) {
669:     PetscDraw draw;
670:     PetscBool isnull;

672:     PetscViewerDrawGetDraw(viewer, 0, &draw);
673:     PetscDrawIsNull(draw, &isnull);
674:     if (isnull) return 0;
675:   }

677:   {
678:     /* assemble the entire matrix onto first processor. */
679:     Mat          A;
680:     PetscInt     M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz;
681:     PetscInt    *cols;
682:     PetscScalar *vals;

684:     MatCreate(PetscObjectComm((PetscObject)mat), &A);
685:     if (rank == 0) {
686:       MatSetSizes(A, M, N, M, N);
687:     } else {
688:       MatSetSizes(A, 0, 0, M, N);
689:     }
690:     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
691:     MatSetType(A, MATMPIDENSE);
692:     MatMPIDenseSetPreallocation(A, NULL);

694:     /* Copy the matrix ... This isn't the most efficient means,
695:        but it's quick for now */
696:     A->insertmode = INSERT_VALUES;

698:     row = mat->rmap->rstart;
699:     m   = mdn->A->rmap->n;
700:     for (i = 0; i < m; i++) {
701:       MatGetRow_MPIDense(mat, row, &nz, &cols, &vals);
702:       MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES);
703:       MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals);
704:       row++;
705:     }

707:     MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
708:     MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
709:     PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer);
710:     if (rank == 0) {
711:       PetscObjectSetName((PetscObject)((Mat_MPIDense *)(A->data))->A, ((PetscObject)mat)->name);
712:       MatView_SeqDense(((Mat_MPIDense *)(A->data))->A, sviewer);
713:     }
714:     PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer);
715:     PetscViewerFlush(viewer);
716:     MatDestroy(&A);
717:   }
718:   return 0;
719: }

721: PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer)
722: {
723:   PetscBool iascii, isbinary, isdraw, issocket;

725:   PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii);
726:   PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary);
727:   PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket);
728:   PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw);

730:   if (iascii || issocket || isdraw) {
731:     MatView_MPIDense_ASCIIorDraworSocket(mat, viewer);
732:   } else if (isbinary) MatView_Dense_Binary(mat, viewer);
733:   return 0;
734: }

736: PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info)
737: {
738:   Mat_MPIDense  *mat = (Mat_MPIDense *)A->data;
739:   Mat            mdn = mat->A;
740:   PetscLogDouble isend[5], irecv[5];

742:   info->block_size = 1.0;

744:   MatGetInfo(mdn, MAT_LOCAL, info);

746:   isend[0] = info->nz_used;
747:   isend[1] = info->nz_allocated;
748:   isend[2] = info->nz_unneeded;
749:   isend[3] = info->memory;
750:   isend[4] = info->mallocs;
751:   if (flag == MAT_LOCAL) {
752:     info->nz_used      = isend[0];
753:     info->nz_allocated = isend[1];
754:     info->nz_unneeded  = isend[2];
755:     info->memory       = isend[3];
756:     info->mallocs      = isend[4];
757:   } else if (flag == MAT_GLOBAL_MAX) {
758:     MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A));

760:     info->nz_used      = irecv[0];
761:     info->nz_allocated = irecv[1];
762:     info->nz_unneeded  = irecv[2];
763:     info->memory       = irecv[3];
764:     info->mallocs      = irecv[4];
765:   } else if (flag == MAT_GLOBAL_SUM) {
766:     MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A));

768:     info->nz_used      = irecv[0];
769:     info->nz_allocated = irecv[1];
770:     info->nz_unneeded  = irecv[2];
771:     info->memory       = irecv[3];
772:     info->mallocs      = irecv[4];
773:   }
774:   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
775:   info->fill_ratio_needed = 0;
776:   info->factor_mallocs    = 0;
777:   return 0;
778: }

780: PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg)
781: {
782:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

784:   switch (op) {
785:   case MAT_NEW_NONZERO_LOCATIONS:
786:   case MAT_NEW_NONZERO_LOCATION_ERR:
787:   case MAT_NEW_NONZERO_ALLOCATION_ERR:
788:     MatCheckPreallocated(A, 1);
789:     MatSetOption(a->A, op, flg);
790:     break;
791:   case MAT_ROW_ORIENTED:
792:     MatCheckPreallocated(A, 1);
793:     a->roworiented = flg;
794:     MatSetOption(a->A, op, flg);
795:     break;
796:   case MAT_FORCE_DIAGONAL_ENTRIES:
797:   case MAT_KEEP_NONZERO_PATTERN:
798:   case MAT_USE_HASH_TABLE:
799:   case MAT_SORTED_FULL:
800:     PetscInfo(A, "Option %s ignored\n", MatOptions[op]);
801:     break;
802:   case MAT_IGNORE_OFF_PROC_ENTRIES:
803:     a->donotstash = flg;
804:     break;
805:   case MAT_SYMMETRIC:
806:   case MAT_STRUCTURALLY_SYMMETRIC:
807:   case MAT_HERMITIAN:
808:   case MAT_SYMMETRY_ETERNAL:
809:   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
810:   case MAT_SPD:
811:   case MAT_IGNORE_LOWER_TRIANGULAR:
812:   case MAT_IGNORE_ZERO_ENTRIES:
813:   case MAT_SPD_ETERNAL:
814:     /* if the diagonal matrix is square it inherits some of the properties above */
815:     PetscInfo(A, "Option %s ignored\n", MatOptions[op]);
816:     break;
817:   default:
818:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]);
819:   }
820:   return 0;
821: }

823: PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr)
824: {
825:   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
826:   const PetscScalar *l;
827:   PetscScalar        x, *v, *vv, *r;
828:   PetscInt           i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda;

830:   MatDenseGetArray(mdn->A, &vv);
831:   MatDenseGetLDA(mdn->A, &lda);
832:   MatGetLocalSize(A, &s2, &s3);
833:   if (ll) {
834:     VecGetLocalSize(ll, &s2a);
836:     VecGetArrayRead(ll, &l);
837:     for (i = 0; i < m; i++) {
838:       x = l[i];
839:       v = vv + i;
840:       for (j = 0; j < n; j++) {
841:         (*v) *= x;
842:         v += lda;
843:       }
844:     }
845:     VecRestoreArrayRead(ll, &l);
846:     PetscLogFlops(1.0 * n * m);
847:   }
848:   if (rr) {
849:     const PetscScalar *ar;

851:     VecGetLocalSize(rr, &s3a);
853:     VecGetArrayRead(rr, &ar);
854:     if (!mdn->Mvctx) MatSetUpMultiply_MPIDense(A);
855:     VecGetArray(mdn->lvec, &r);
856:     PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE);
857:     PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE);
858:     VecRestoreArrayRead(rr, &ar);
859:     for (i = 0; i < n; i++) {
860:       x = r[i];
861:       v = vv + i * lda;
862:       for (j = 0; j < m; j++) (*v++) *= x;
863:     }
864:     VecRestoreArray(mdn->lvec, &r);
865:     PetscLogFlops(1.0 * n * m);
866:   }
867:   MatDenseRestoreArray(mdn->A, &vv);
868:   return 0;
869: }

871: PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm)
872: {
873:   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
874:   PetscInt           i, j;
875:   PetscMPIInt        size;
876:   PetscReal          sum = 0.0;
877:   const PetscScalar *av, *v;

879:   MatDenseGetArrayRead(mdn->A, &av);
880:   v = av;
881:   MPI_Comm_size(PetscObjectComm((PetscObject)A), &size);
882:   if (size == 1) {
883:     MatNorm(mdn->A, type, nrm);
884:   } else {
885:     if (type == NORM_FROBENIUS) {
886:       for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) {
887:         sum += PetscRealPart(PetscConj(*v) * (*v));
888:         v++;
889:       }
890:       MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A));
891:       *nrm = PetscSqrtReal(*nrm);
892:       PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n);
893:     } else if (type == NORM_1) {
894:       PetscReal *tmp, *tmp2;
895:       PetscCalloc2(A->cmap->N, &tmp, A->cmap->N, &tmp2);
896:       *nrm = 0.0;
897:       v    = av;
898:       for (j = 0; j < mdn->A->cmap->n; j++) {
899:         for (i = 0; i < mdn->A->rmap->n; i++) {
900:           tmp[j] += PetscAbsScalar(*v);
901:           v++;
902:         }
903:       }
904:       MPIU_Allreduce(tmp, tmp2, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A));
905:       for (j = 0; j < A->cmap->N; j++) {
906:         if (tmp2[j] > *nrm) *nrm = tmp2[j];
907:       }
908:       PetscFree2(tmp, tmp2);
909:       PetscLogFlops(A->cmap->n * A->rmap->n);
910:     } else if (type == NORM_INFINITY) { /* max row norm */
911:       PetscReal ntemp;
912:       MatNorm(mdn->A, type, &ntemp);
913:       MPIU_Allreduce(&ntemp, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A));
914:     } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm");
915:   }
916:   MatDenseRestoreArrayRead(mdn->A, &av);
917:   return 0;
918: }

920: PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout)
921: {
922:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
923:   Mat           B;
924:   PetscInt      M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart;
925:   PetscInt      j, i, lda;
926:   PetscScalar  *v;

928:   if (reuse == MAT_REUSE_MATRIX) MatTransposeCheckNonzeroState_Private(A, *matout);
929:   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
930:     MatCreate(PetscObjectComm((PetscObject)A), &B);
931:     MatSetSizes(B, A->cmap->n, A->rmap->n, N, M);
932:     MatSetType(B, ((PetscObject)A)->type_name);
933:     MatMPIDenseSetPreallocation(B, NULL);
934:   } else B = *matout;

936:   m = a->A->rmap->n;
937:   n = a->A->cmap->n;
938:   MatDenseGetArrayRead(a->A, (const PetscScalar **)&v);
939:   MatDenseGetLDA(a->A, &lda);
940:   PetscMalloc1(m, &rwork);
941:   for (i = 0; i < m; i++) rwork[i] = rstart + i;
942:   for (j = 0; j < n; j++) {
943:     MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES);
944:     v += lda;
945:   }
946:   MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v);
947:   PetscFree(rwork);
948:   MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY);
949:   MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY);
950:   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
951:     *matout = B;
952:   } else {
953:     MatHeaderMerge(A, &B);
954:   }
955:   return 0;
956: }

958: static PetscErrorCode       MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *);
959: PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar);

961: PetscErrorCode MatSetUp_MPIDense(Mat A)
962: {
963:   PetscLayoutSetUp(A->rmap);
964:   PetscLayoutSetUp(A->cmap);
965:   if (!A->preallocated) MatMPIDenseSetPreallocation(A, NULL);
966:   return 0;
967: }

969: PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str)
970: {
971:   Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data;

973:   MatAXPY(A->A, alpha, B->A, str);
974:   return 0;
975: }

977: PetscErrorCode MatConjugate_MPIDense(Mat mat)
978: {
979:   Mat_MPIDense *a = (Mat_MPIDense *)mat->data;

981:   MatConjugate(a->A);
982:   return 0;
983: }

985: PetscErrorCode MatRealPart_MPIDense(Mat A)
986: {
987:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

989:   MatRealPart(a->A);
990:   return 0;
991: }

993: PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
994: {
995:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

997:   MatImaginaryPart(a->A);
998:   return 0;
999: }

1001: static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col)
1002: {
1003:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

1007:   (*a->A->ops->getcolumnvector)(a->A, v, col);
1008:   return 0;
1009: }

1011: PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *);

1013: PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions)
1014: {
1015:   PetscInt      i, m, n;
1016:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1017:   PetscReal    *work;

1019:   MatGetSize(A, &m, &n);
1020:   PetscMalloc1(n, &work);
1021:   if (type == REDUCTION_MEAN_REALPART) {
1022:     MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, work);
1023:   } else if (type == REDUCTION_MEAN_IMAGINARYPART) {
1024:     MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, work);
1025:   } else {
1026:     MatGetColumnReductions_SeqDense(a->A, type, work);
1027:   }
1028:   if (type == NORM_2) {
1029:     for (i = 0; i < n; i++) work[i] *= work[i];
1030:   }
1031:   if (type == NORM_INFINITY) {
1032:     MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm);
1033:   } else {
1034:     MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm);
1035:   }
1036:   PetscFree(work);
1037:   if (type == NORM_2) {
1038:     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
1039:   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
1040:     for (i = 0; i < n; i++) reductions[i] /= m;
1041:   }
1042:   return 0;
1043: }

1045: #if defined(PETSC_HAVE_CUDA)
1046: PetscErrorCode MatShift_MPIDenseCUDA(Mat A, PetscScalar alpha)
1047: {
1048:   PetscScalar *da;
1049:   PetscInt     lda;

1051:   MatDenseCUDAGetArray(A, &da);
1052:   MatDenseGetLDA(A, &lda);
1053:   PetscInfo(A, "Performing Shift on backend\n");
1054:   MatShift_DenseCUDA_Private(da, alpha, lda, A->rmap->rstart, A->rmap->rend, A->cmap->N);
1055:   MatDenseCUDARestoreArray(A, &da);
1056:   return 0;
1057: }

1059: static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A, PetscInt col, Vec *v)
1060: {
1061:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1062:   PetscInt      lda;

1066:   if (!a->cvec) { VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec); }
1067:   a->vecinuse = col + 1;
1068:   MatDenseGetLDA(a->A, &lda);
1069:   MatDenseCUDAGetArray(a->A, (PetscScalar **)&a->ptrinuse);
1070:   VecCUDAPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda);
1071:   *v = a->cvec;
1072:   return 0;
1073: }

1075: static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A, PetscInt col, Vec *v)
1076: {
1077:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

1081:   a->vecinuse = 0;
1082:   MatDenseCUDARestoreArray(a->A, (PetscScalar **)&a->ptrinuse);
1083:   VecCUDAResetArray(a->cvec);
1084:   if (v) *v = NULL;
1085:   return 0;
1086: }

1088: static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A, PetscInt col, Vec *v)
1089: {
1090:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1091:   PetscInt      lda;

1095:   if (!a->cvec) { VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec); }
1096:   a->vecinuse = col + 1;
1097:   MatDenseGetLDA(a->A, &lda);
1098:   MatDenseCUDAGetArrayRead(a->A, &a->ptrinuse);
1099:   VecCUDAPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda);
1100:   VecLockReadPush(a->cvec);
1101:   *v = a->cvec;
1102:   return 0;
1103: }

1105: static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A, PetscInt col, Vec *v)
1106: {
1107:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

1111:   a->vecinuse = 0;
1112:   MatDenseCUDARestoreArrayRead(a->A, &a->ptrinuse);
1113:   VecLockReadPop(a->cvec);
1114:   VecCUDAResetArray(a->cvec);
1115:   if (v) *v = NULL;
1116:   return 0;
1117: }

1119: static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A, PetscInt col, Vec *v)
1120: {
1121:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1122:   PetscInt      lda;

1126:   if (!a->cvec) { VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec); }
1127:   a->vecinuse = col + 1;
1128:   MatDenseGetLDA(a->A, &lda);
1129:   MatDenseCUDAGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse);
1130:   VecCUDAPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda);
1131:   *v = a->cvec;
1132:   return 0;
1133: }

1135: static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A, PetscInt col, Vec *v)
1136: {
1137:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

1141:   a->vecinuse = 0;
1142:   MatDenseCUDARestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse);
1143:   VecCUDAResetArray(a->cvec);
1144:   if (v) *v = NULL;
1145:   return 0;
1146: }

1148: static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1149: {
1150:   Mat_MPIDense *l = (Mat_MPIDense *)A->data;

1154:   MatDenseCUDAPlaceArray(l->A, a);
1155:   return 0;
1156: }

1158: static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A)
1159: {
1160:   Mat_MPIDense *l = (Mat_MPIDense *)A->data;

1164:   MatDenseCUDAResetArray(l->A);
1165:   return 0;
1166: }

1168: static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1169: {
1170:   Mat_MPIDense *l = (Mat_MPIDense *)A->data;

1174:   MatDenseCUDAReplaceArray(l->A, a);
1175:   return 0;
1176: }

1178: static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1179: {
1180:   Mat_MPIDense *l = (Mat_MPIDense *)A->data;

1182:   MatDenseCUDAGetArrayWrite(l->A, a);
1183:   return 0;
1184: }

1186: static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1187: {
1188:   Mat_MPIDense *l = (Mat_MPIDense *)A->data;

1190:   MatDenseCUDARestoreArrayWrite(l->A, a);
1191:   return 0;
1192: }

1194: static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1195: {
1196:   Mat_MPIDense *l = (Mat_MPIDense *)A->data;

1198:   MatDenseCUDAGetArrayRead(l->A, a);
1199:   return 0;
1200: }

1202: static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1203: {
1204:   Mat_MPIDense *l = (Mat_MPIDense *)A->data;

1206:   MatDenseCUDARestoreArrayRead(l->A, a);
1207:   return 0;
1208: }

1210: static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1211: {
1212:   Mat_MPIDense *l = (Mat_MPIDense *)A->data;

1214:   MatDenseCUDAGetArray(l->A, a);
1215:   return 0;
1216: }

1218: static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1219: {
1220:   Mat_MPIDense *l = (Mat_MPIDense *)A->data;

1222:   MatDenseCUDARestoreArray(l->A, a);
1223:   return 0;
1224: }

1226: static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat, PetscInt, Vec *);
1227: static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat, PetscInt, Vec *);
1228: static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat, PetscInt, Vec *);
1229: static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat, PetscInt, Vec *);
1230: static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat, PetscInt, Vec *);
1231: static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat, PetscInt, Vec *);
1232: static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat, Mat *);

1234: static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat, PetscBool bind)
1235: {
1236:   Mat_MPIDense *d = (Mat_MPIDense *)mat->data;

1240:   if (d->A) MatBindToCPU(d->A, bind);
1241:   mat->boundtocpu = bind;
1242:   if (!bind) {
1243:     PetscBool iscuda;

1245:     PetscFree(mat->defaultrandtype);
1246:     PetscStrallocpy(PETSCCURAND, &mat->defaultrandtype);
1247:     PetscObjectTypeCompare((PetscObject)d->cvec, VECMPICUDA, &iscuda);
1248:     if (!iscuda) VecDestroy(&d->cvec);
1249:     PetscObjectTypeCompare((PetscObject)d->cmat, MATMPIDENSECUDA, &iscuda);
1250:     if (!iscuda) MatDestroy(&d->cmat);
1251:     PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDenseCUDA);
1252:     PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDenseCUDA);
1253:     PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDenseCUDA);
1254:     PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDenseCUDA);
1255:     PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDenseCUDA);
1256:     PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDenseCUDA);
1257:     mat->ops->shift = MatShift_MPIDenseCUDA;
1258:   } else {
1259:     PetscFree(mat->defaultrandtype);
1260:     PetscStrallocpy(PETSCRANDER48, &mat->defaultrandtype);
1261:     PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense);
1262:     PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense);
1263:     PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense);
1264:     PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense);
1265:     PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense);
1266:     PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense);
1267:     mat->ops->shift = MatShift_MPIDense;
1268:   }
1269:   if (d->cmat) MatBindToCPU(d->cmat, bind);
1270:   return 0;
1271: }

1273: PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data)
1274: {
1275:   Mat_MPIDense *d = (Mat_MPIDense *)A->data;
1276:   PetscBool     iscuda;

1279:   PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda);
1280:   if (!iscuda) return 0;
1281:   PetscLayoutSetUp(A->rmap);
1282:   PetscLayoutSetUp(A->cmap);
1283:   if (!d->A) {
1284:     MatCreate(PETSC_COMM_SELF, &d->A);
1285:     MatSetSizes(d->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N);
1286:   }
1287:   MatSetType(d->A, MATSEQDENSECUDA);
1288:   MatSeqDenseCUDASetPreallocation(d->A, d_data);
1289:   A->preallocated = PETSC_TRUE;
1290:   A->assembled    = PETSC_TRUE;
1291:   return 0;
1292: }
1293: #endif

1295: #if defined(PETSC_HAVE_HIP)
1296: PetscErrorCode MatShift_MPIDenseHIP(Mat A, PetscScalar alpha)
1297: {
1298:   PetscScalar *da;
1299:   PetscInt     lda;

1301:   MatDenseHIPGetArray(A, &da);
1302:   MatDenseGetLDA(A, &lda);
1303:   PetscInfo(A, "Performing Shift on backend\n");
1304:   MatShift_DenseHIP_Private(da, alpha, lda, A->rmap->rstart, A->rmap->rend, A->cmap->N);
1305:   MatDenseHIPRestoreArray(A, &da);
1306:   return 0;
1307: }

1309: static PetscErrorCode MatDenseGetColumnVec_MPIDenseHIP(Mat A, PetscInt col, Vec *v)
1310: {
1311:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1312:   PetscInt      lda;

1316:   if (!a->cvec) {
1317:     VecCreateMPIHIPWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec);
1318:     PetscLogObjectParent((PetscObject)A, (PetscObject)a->cvec);
1319:   }
1320:   a->vecinuse = col + 1;
1321:   MatDenseGetLDA(a->A, &lda);
1322:   MatDenseHIPGetArray(a->A, (PetscScalar **)&a->ptrinuse);
1323:   VecHIPPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda);
1324:   *v = a->cvec;
1325:   return 0;
1326: }

1328: static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseHIP(Mat A, PetscInt col, Vec *v)
1329: {
1330:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

1334:   a->vecinuse = 0;
1335:   MatDenseHIPRestoreArray(a->A, (PetscScalar **)&a->ptrinuse);
1336:   VecHIPResetArray(a->cvec);
1337:   if (v) *v = NULL;
1338:   return 0;
1339: }

1341: static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseHIP(Mat A, PetscInt col, Vec *v)
1342: {
1343:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1344:   PetscInt      lda;

1348:   if (!a->cvec) {
1349:     VecCreateMPIHIPWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec);
1350:     PetscLogObjectParent((PetscObject)A, (PetscObject)a->cvec);
1351:   }
1352:   a->vecinuse = col + 1;
1353:   MatDenseGetLDA(a->A, &lda);
1354:   MatDenseHIPGetArrayRead(a->A, &a->ptrinuse);
1355:   VecHIPPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda);
1356:   VecLockReadPush(a->cvec);
1357:   *v = a->cvec;
1358:   return 0;
1359: }

1361: static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseHIP(Mat A, PetscInt col, Vec *v)
1362: {
1363:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

1367:   a->vecinuse = 0;
1368:   MatDenseHIPRestoreArrayRead(a->A, &a->ptrinuse);
1369:   VecLockReadPop(a->cvec);
1370:   VecHIPResetArray(a->cvec);
1371:   if (v) *v = NULL;
1372:   return 0;
1373: }

1375: static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseHIP(Mat A, PetscInt col, Vec *v)
1376: {
1377:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1378:   PetscInt      lda;

1382:   if (!a->cvec) {
1383:     VecCreateMPIHIPWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec);
1384:     PetscLogObjectParent((PetscObject)A, (PetscObject)a->cvec);
1385:   }
1386:   a->vecinuse = col + 1;
1387:   MatDenseGetLDA(a->A, &lda);
1388:   MatDenseHIPGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse);
1389:   VecHIPPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda);
1390:   *v = a->cvec;
1391:   return 0;
1392: }

1394: static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseHIP(Mat A, PetscInt col, Vec *v)
1395: {
1396:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

1400:   a->vecinuse = 0;
1401:   MatDenseHIPRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse);
1402:   VecHIPResetArray(a->cvec);
1403:   if (v) *v = NULL;
1404:   return 0;
1405: }

1407: static PetscErrorCode MatDenseHIPPlaceArray_MPIDenseHIP(Mat A, const PetscScalar *a)
1408: {
1409:   Mat_MPIDense *l = (Mat_MPIDense *)A->data;

1413:   MatDenseHIPPlaceArray(l->A, a);
1414:   return 0;
1415: }

1417: static PetscErrorCode MatDenseHIPResetArray_MPIDenseHIP(Mat A)
1418: {
1419:   Mat_MPIDense *l = (Mat_MPIDense *)A->data;

1423:   MatDenseHIPResetArray(l->A);
1424:   return 0;
1425: }

1427: static PetscErrorCode MatDenseHIPReplaceArray_MPIDenseHIP(Mat A, const PetscScalar *a)
1428: {
1429:   Mat_MPIDense *l = (Mat_MPIDense *)A->data;

1433:   MatDenseHIPReplaceArray(l->A, a);
1434:   return 0;
1435: }

1437: static PetscErrorCode MatDenseHIPGetArrayWrite_MPIDenseHIP(Mat A, PetscScalar **a)
1438: {
1439:   Mat_MPIDense *l = (Mat_MPIDense *)A->data;

1441:   MatDenseHIPGetArrayWrite(l->A, a);
1442:   return 0;
1443: }

1445: static PetscErrorCode MatDenseHIPRestoreArrayWrite_MPIDenseHIP(Mat A, PetscScalar **a)
1446: {
1447:   Mat_MPIDense *l = (Mat_MPIDense *)A->data;

1449:   MatDenseHIPRestoreArrayWrite(l->A, a);
1450:   return 0;
1451: }

1453: static PetscErrorCode MatDenseHIPGetArrayRead_MPIDenseHIP(Mat A, const PetscScalar **a)
1454: {
1455:   Mat_MPIDense *l = (Mat_MPIDense *)A->data;

1457:   MatDenseHIPGetArrayRead(l->A, a);
1458:   return 0;
1459: }

1461: static PetscErrorCode MatDenseHIPRestoreArrayRead_MPIDenseHIP(Mat A, const PetscScalar **a)
1462: {
1463:   Mat_MPIDense *l = (Mat_MPIDense *)A->data;

1465:   MatDenseHIPRestoreArrayRead(l->A, a);
1466:   return 0;
1467: }

1469: static PetscErrorCode MatDenseHIPGetArray_MPIDenseHIP(Mat A, PetscScalar **a)
1470: {
1471:   Mat_MPIDense *l = (Mat_MPIDense *)A->data;

1473:   MatDenseHIPGetArray(l->A, a);
1474:   return 0;
1475: }

1477: static PetscErrorCode MatDenseHIPRestoreArray_MPIDenseHIP(Mat A, PetscScalar **a)
1478: {
1479:   Mat_MPIDense *l = (Mat_MPIDense *)A->data;

1481:   MatDenseHIPRestoreArray(l->A, a);
1482:   return 0;
1483: }

1485: static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat, PetscInt, Vec *);
1486: static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat, PetscInt, Vec *);
1487: static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat, PetscInt, Vec *);
1488: static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat, PetscInt, Vec *);
1489: static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat, PetscInt, Vec *);
1490: static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat, PetscInt, Vec *);
1491: static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat, Mat *);

1493: static PetscErrorCode MatBindToCPU_MPIDenseHIP(Mat mat, PetscBool bind)
1494: {
1495:   Mat_MPIDense *d = (Mat_MPIDense *)mat->data;

1499:   if (d->A) MatBindToCPU(d->A, bind);
1500:   mat->boundtocpu = bind;
1501:   if (!bind) {
1502:     PetscBool iscuda;

1504:     PetscFree(mat->defaultrandtype);
1505:     PetscStrallocpy(PETSCCURAND, &mat->defaultrandtype);
1506:     PetscObjectTypeCompare((PetscObject)d->cvec, VECMPIHIP, &iscuda);
1507:     if (!iscuda) VecDestroy(&d->cvec);
1508:     PetscObjectTypeCompare((PetscObject)d->cmat, MATMPIDENSEHIP, &iscuda);
1509:     if (!iscuda) MatDestroy(&d->cmat);
1510:     PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDenseHIP);
1511:     PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDenseHIP);
1512:     PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDenseHIP);
1513:     PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDenseHIP);
1514:     PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDenseHIP);
1515:     PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDenseHIP);
1516:     mat->ops->shift = MatShift_MPIDenseHIP;
1517:   } else {
1518:     PetscFree(mat->defaultrandtype);
1519:     PetscStrallocpy(PETSCRANDER48, &mat->defaultrandtype);
1520:     PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense);
1521:     PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense);
1522:     PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense);
1523:     PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense);
1524:     PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense);
1525:     PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense);
1526:     mat->ops->shift = MatShift_MPIDense;
1527:   }
1528:   if (d->cmat) MatBindToCPU(d->cmat, bind);
1529:   return 0;
1530: }

1532: PetscErrorCode MatMPIDenseHIPSetPreallocation(Mat A, PetscScalar *d_data)
1533: {
1534:   Mat_MPIDense *d = (Mat_MPIDense *)A->data;
1535:   PetscBool     iscuda;

1538:   PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iscuda);
1539:   if (!iscuda) return 0;
1540:   PetscLayoutSetUp(A->rmap);
1541:   PetscLayoutSetUp(A->cmap);
1542:   if (!d->A) {
1543:     MatCreate(PETSC_COMM_SELF, &d->A);
1544:     PetscLogObjectParent((PetscObject)A, (PetscObject)d->A);
1545:     MatSetSizes(d->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N);
1546:   }
1547:   MatSetType(d->A, MATSEQDENSEHIP);
1548:   MatSeqDenseHIPSetPreallocation(d->A, d_data);
1549:   A->preallocated = PETSC_TRUE;
1550:   A->assembled    = PETSC_TRUE;
1551:   return 0;
1552: }
1553: #endif

1555: static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx)
1556: {
1557:   Mat_MPIDense *d = (Mat_MPIDense *)x->data;

1559:   MatSetRandom(d->A, rctx);
1560: #if defined(PETSC_HAVE_DEVICE)
1561:   x->offloadmask = d->A->offloadmask;
1562: #endif
1563:   return 0;
1564: }

1566: static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d)
1567: {
1568:   *missing = PETSC_FALSE;
1569:   return 0;
1570: }

1572: static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1573: static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1574: static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1575: static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1576: static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *);
1577: static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer);

1579: /* -------------------------------------------------------------------*/
1580: static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1581:                                        MatGetRow_MPIDense,
1582:                                        MatRestoreRow_MPIDense,
1583:                                        MatMult_MPIDense,
1584:                                        /*  4*/ MatMultAdd_MPIDense,
1585:                                        MatMultTranspose_MPIDense,
1586:                                        MatMultTransposeAdd_MPIDense,
1587:                                        NULL,
1588:                                        NULL,
1589:                                        NULL,
1590:                                        /* 10*/ NULL,
1591:                                        NULL,
1592:                                        NULL,
1593:                                        NULL,
1594:                                        MatTranspose_MPIDense,
1595:                                        /* 15*/ MatGetInfo_MPIDense,
1596:                                        MatEqual_MPIDense,
1597:                                        MatGetDiagonal_MPIDense,
1598:                                        MatDiagonalScale_MPIDense,
1599:                                        MatNorm_MPIDense,
1600:                                        /* 20*/ MatAssemblyBegin_MPIDense,
1601:                                        MatAssemblyEnd_MPIDense,
1602:                                        MatSetOption_MPIDense,
1603:                                        MatZeroEntries_MPIDense,
1604:                                        /* 24*/ MatZeroRows_MPIDense,
1605:                                        NULL,
1606:                                        NULL,
1607:                                        NULL,
1608:                                        NULL,
1609:                                        /* 29*/ MatSetUp_MPIDense,
1610:                                        NULL,
1611:                                        NULL,
1612:                                        MatGetDiagonalBlock_MPIDense,
1613:                                        NULL,
1614:                                        /* 34*/ MatDuplicate_MPIDense,
1615:                                        NULL,
1616:                                        NULL,
1617:                                        NULL,
1618:                                        NULL,
1619:                                        /* 39*/ MatAXPY_MPIDense,
1620:                                        MatCreateSubMatrices_MPIDense,
1621:                                        NULL,
1622:                                        MatGetValues_MPIDense,
1623:                                        MatCopy_MPIDense,
1624:                                        /* 44*/ NULL,
1625:                                        MatScale_MPIDense,
1626:                                        MatShift_MPIDense,
1627:                                        NULL,
1628:                                        NULL,
1629:                                        /* 49*/ MatSetRandom_MPIDense,
1630:                                        NULL,
1631:                                        NULL,
1632:                                        NULL,
1633:                                        NULL,
1634:                                        /* 54*/ NULL,
1635:                                        NULL,
1636:                                        NULL,
1637:                                        NULL,
1638:                                        NULL,
1639:                                        /* 59*/ MatCreateSubMatrix_MPIDense,
1640:                                        MatDestroy_MPIDense,
1641:                                        MatView_MPIDense,
1642:                                        NULL,
1643:                                        NULL,
1644:                                        /* 64*/ NULL,
1645:                                        NULL,
1646:                                        NULL,
1647:                                        NULL,
1648:                                        NULL,
1649:                                        /* 69*/ NULL,
1650:                                        NULL,
1651:                                        NULL,
1652:                                        NULL,
1653:                                        NULL,
1654:                                        /* 74*/ NULL,
1655:                                        NULL,
1656:                                        NULL,
1657:                                        NULL,
1658:                                        NULL,
1659:                                        /* 79*/ NULL,
1660:                                        NULL,
1661:                                        NULL,
1662:                                        NULL,
1663:                                        /* 83*/ MatLoad_MPIDense,
1664:                                        NULL,
1665:                                        NULL,
1666:                                        NULL,
1667:                                        NULL,
1668:                                        NULL,
1669:                                        /* 89*/ NULL,
1670:                                        NULL,
1671:                                        NULL,
1672:                                        NULL,
1673:                                        NULL,
1674:                                        /* 94*/ NULL,
1675:                                        NULL,
1676:                                        MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1677:                                        MatMatTransposeMultNumeric_MPIDense_MPIDense,
1678:                                        NULL,
1679:                                        /* 99*/ MatProductSetFromOptions_MPIDense,
1680:                                        NULL,
1681:                                        NULL,
1682:                                        MatConjugate_MPIDense,
1683:                                        NULL,
1684:                                        /*104*/ NULL,
1685:                                        MatRealPart_MPIDense,
1686:                                        MatImaginaryPart_MPIDense,
1687:                                        NULL,
1688:                                        NULL,
1689:                                        /*109*/ NULL,
1690:                                        NULL,
1691:                                        NULL,
1692:                                        MatGetColumnVector_MPIDense,
1693:                                        MatMissingDiagonal_MPIDense,
1694:                                        /*114*/ NULL,
1695:                                        NULL,
1696:                                        NULL,
1697:                                        NULL,
1698:                                        NULL,
1699:                                        /*119*/ NULL,
1700:                                        NULL,
1701:                                        NULL,
1702:                                        NULL,
1703:                                        NULL,
1704:                                        /*124*/ NULL,
1705:                                        MatGetColumnReductions_MPIDense,
1706:                                        NULL,
1707:                                        NULL,
1708:                                        NULL,
1709:                                        /*129*/ NULL,
1710:                                        NULL,
1711:                                        MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1712:                                        MatTransposeMatMultNumeric_MPIDense_MPIDense,
1713:                                        NULL,
1714:                                        /*134*/ NULL,
1715:                                        NULL,
1716:                                        NULL,
1717:                                        NULL,
1718:                                        NULL,
1719:                                        /*139*/ NULL,
1720:                                        NULL,
1721:                                        NULL,
1722:                                        NULL,
1723:                                        NULL,
1724:                                        MatCreateMPIMatConcatenateSeqMat_MPIDense,
1725:                                        /*145*/ NULL,
1726:                                        NULL,
1727:                                        NULL,
1728:                                        NULL,
1729:                                        NULL,
1730:                                        /*150*/ NULL};

1732: PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data)
1733: {
1734:   Mat_MPIDense *a     = (Mat_MPIDense *)mat->data;
1735:   MatType       mtype = MATSEQDENSE;

1738:   PetscLayoutSetUp(mat->rmap);
1739:   PetscLayoutSetUp(mat->cmap);
1740:   if (!a->A) {
1741:     MatCreate(PETSC_COMM_SELF, &a->A);
1742:     MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N);
1743:   }
1744: #if defined(PETSC_HAVE_CUDA)
1745:   PetscBool iscuda;
1746:   PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda);
1747:   if (iscuda) mtype = MATSEQDENSECUDA;
1748: #endif
1749: #if defined(PETSC_HAVE_HIP)
1750:   PetscBool iship;
1751:   PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSEHIP, &iship);
1752:   if (iship) mtype = MATSEQDENSEHIP;
1753: #endif
1754:   MatSetType(a->A, mtype);
1755:   MatSeqDenseSetPreallocation(a->A, data);
1756: #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1757:   mat->offloadmask = a->A->offloadmask;
1758: #endif
1759:   mat->preallocated = PETSC_TRUE;
1760:   mat->assembled    = PETSC_TRUE;
1761:   return 0;
1762: }

1764: PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1765: {
1766:   Mat B, C;

1768:   MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C);
1769:   MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B);
1770:   MatDestroy(&C);
1771:   if (reuse == MAT_REUSE_MATRIX) {
1772:     C = *newmat;
1773:   } else C = NULL;
1774:   MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C);
1775:   MatDestroy(&B);
1776:   if (reuse == MAT_INPLACE_MATRIX) {
1777:     MatHeaderReplace(A, &C);
1778:   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1779:   return 0;
1780: }

1782: PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1783: {
1784:   Mat B, C;

1786:   MatDenseGetLocalMatrix(A, &C);
1787:   MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B);
1788:   if (reuse == MAT_REUSE_MATRIX) {
1789:     C = *newmat;
1790:   } else C = NULL;
1791:   MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C);
1792:   MatDestroy(&B);
1793:   if (reuse == MAT_INPLACE_MATRIX) {
1794:     MatHeaderReplace(A, &C);
1795:   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1796:   return 0;
1797: }

1799: #if defined(PETSC_HAVE_ELEMENTAL)
1800: PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1801: {
1802:   Mat          mat_elemental;
1803:   PetscScalar *v;
1804:   PetscInt     m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols;

1806:   if (reuse == MAT_REUSE_MATRIX) {
1807:     mat_elemental = *newmat;
1808:     MatZeroEntries(*newmat);
1809:   } else {
1810:     MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);
1811:     MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N);
1812:     MatSetType(mat_elemental, MATELEMENTAL);
1813:     MatSetUp(mat_elemental);
1814:     MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE);
1815:   }

1817:   PetscMalloc2(m, &rows, N, &cols);
1818:   for (i = 0; i < N; i++) cols[i] = i;
1819:   for (i = 0; i < m; i++) rows[i] = rstart + i;

1821:   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1822:   MatDenseGetArray(A, &v);
1823:   MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES);
1824:   MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);
1825:   MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);
1826:   MatDenseRestoreArray(A, &v);
1827:   PetscFree2(rows, cols);

1829:   if (reuse == MAT_INPLACE_MATRIX) {
1830:     MatHeaderReplace(A, &mat_elemental);
1831:   } else {
1832:     *newmat = mat_elemental;
1833:   }
1834:   return 0;
1835: }
1836: #endif

1838: static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals)
1839: {
1840:   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;

1842:   MatDenseGetColumn(mat->A, col, vals);
1843:   return 0;
1844: }

1846: static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals)
1847: {
1848:   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;

1850:   MatDenseRestoreColumn(mat->A, vals);
1851:   return 0;
1852: }

1854: PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
1855: {
1856:   Mat_MPIDense *mat;
1857:   PetscInt      m, nloc, N;

1859:   MatGetSize(inmat, &m, &N);
1860:   MatGetLocalSize(inmat, NULL, &nloc);
1861:   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1862:     PetscInt sum;

1864:     if (n == PETSC_DECIDE) PetscSplitOwnership(comm, &n, &N);
1865:     /* Check sum(n) = N */
1866:     MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm);

1869:     MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat);
1870:     MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE);
1871:   }

1873:   /* numeric phase */
1874:   mat = (Mat_MPIDense *)(*outmat)->data;
1875:   MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN);
1876:   return 0;
1877: }

1879: #if defined(PETSC_HAVE_CUDA)
1880: PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M, MatType type, MatReuse reuse, Mat *newmat)
1881: {
1882:   Mat           B;
1883:   Mat_MPIDense *m;

1885:   if (reuse == MAT_INITIAL_MATRIX) {
1886:     MatDuplicate(M, MAT_COPY_VALUES, newmat);
1887:   } else if (reuse == MAT_REUSE_MATRIX) {
1888:     MatCopy(M, *newmat, SAME_NONZERO_PATTERN);
1889:   }

1891:   B = *newmat;
1892:   MatBindToCPU_MPIDenseCUDA(B, PETSC_TRUE);
1893:   PetscFree(B->defaultvectype);
1894:   PetscStrallocpy(VECSTANDARD, &B->defaultvectype);
1895:   PetscObjectChangeTypeName((PetscObject)B, MATMPIDENSE);
1896:   PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpidensecuda_mpidense_C", NULL);
1897:   PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL);
1898:   PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL);
1899:   PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL);
1900:   PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL);
1901:   PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArray_C", NULL);
1902:   PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayRead_C", NULL);
1903:   PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayWrite_C", NULL);
1904:   PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArray_C", NULL);
1905:   PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayRead_C", NULL);
1906:   PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayWrite_C", NULL);
1907:   PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAPlaceArray_C", NULL);
1908:   PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAResetArray_C", NULL);
1909:   PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAReplaceArray_C", NULL);
1910:   m = (Mat_MPIDense *)(B)->data;
1911:   if (m->A) MatConvert(m->A, MATSEQDENSE, MAT_INPLACE_MATRIX, &m->A);
1912:   B->ops->bindtocpu = NULL;
1913:   B->offloadmask    = PETSC_OFFLOAD_CPU;
1914:   return 0;
1915: }

1917: PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M, MatType type, MatReuse reuse, Mat *newmat)
1918: {
1919:   Mat           B;
1920:   Mat_MPIDense *m;

1922:   if (reuse == MAT_INITIAL_MATRIX) {
1923:     MatDuplicate(M, MAT_COPY_VALUES, newmat);
1924:   } else if (reuse == MAT_REUSE_MATRIX) {
1925:     MatCopy(M, *newmat, SAME_NONZERO_PATTERN);
1926:   }

1928:   B = *newmat;
1929:   PetscFree(B->defaultvectype);
1930:   PetscStrallocpy(VECCUDA, &B->defaultvectype);
1931:   PetscObjectChangeTypeName((PetscObject)B, MATMPIDENSECUDA);
1932:   PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpidensecuda_mpidense_C", MatConvert_MPIDenseCUDA_MPIDense);
1933:   PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", MatProductSetFromOptions_MPIAIJ_MPIDense);
1934:   PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", MatProductSetFromOptions_MPIAIJ_MPIDense);
1935:   PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ);
1936:   PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ);
1937:   PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArray_C", MatDenseCUDAGetArray_MPIDenseCUDA);
1938:   PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayRead_C", MatDenseCUDAGetArrayRead_MPIDenseCUDA);
1939:   PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayWrite_C", MatDenseCUDAGetArrayWrite_MPIDenseCUDA);
1940:   PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArray_C", MatDenseCUDARestoreArray_MPIDenseCUDA);
1941:   PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayRead_C", MatDenseCUDARestoreArrayRead_MPIDenseCUDA);
1942:   PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayWrite_C", MatDenseCUDARestoreArrayWrite_MPIDenseCUDA);
1943:   PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAPlaceArray_C", MatDenseCUDAPlaceArray_MPIDenseCUDA);
1944:   PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAResetArray_C", MatDenseCUDAResetArray_MPIDenseCUDA);
1945:   PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAReplaceArray_C", MatDenseCUDAReplaceArray_MPIDenseCUDA);
1946:   m = (Mat_MPIDense *)(B->data);
1947:   if (m->A) {
1948:     MatConvert(m->A, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &m->A);
1949:     B->offloadmask = PETSC_OFFLOAD_BOTH;
1950:   } else {
1951:     B->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1952:   }
1953:   MatBindToCPU_MPIDenseCUDA(B, PETSC_FALSE);

1955:   B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA;
1956:   return 0;
1957: }
1958: #endif

1960: #if defined(PETSC_HAVE_HIP)
1961: PetscErrorCode MatConvert_MPIDenseHIP_MPIDense(Mat M, MatType type, MatReuse reuse, Mat *newmat)
1962: {
1963:   Mat           B;
1964:   Mat_MPIDense *m;

1966:   if (reuse == MAT_INITIAL_MATRIX) {
1967:     MatDuplicate(M, MAT_COPY_VALUES, newmat);
1968:   } else if (reuse == MAT_REUSE_MATRIX) {
1969:     MatCopy(M, *newmat, SAME_NONZERO_PATTERN);
1970:   }

1972:   B = *newmat;
1973:   MatBindToCPU_MPIDenseHIP(B, PETSC_TRUE);
1974:   PetscFree(B->defaultvectype);
1975:   PetscStrallocpy(VECSTANDARD, &B->defaultvectype);
1976:   PetscObjectChangeTypeName((PetscObject)B, MATMPIDENSE);
1977:   PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpidensehip_mpidense_C", NULL);
1978:   PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL);
1979:   PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL);
1980:   PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL);
1981:   PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL);
1982:   PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPGetArray_C", NULL);
1983:   PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPGetArrayRead_C", NULL);
1984:   PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPGetArrayWrite_C", NULL);
1985:   PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPRestoreArray_C", NULL);
1986:   PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPRestoreArrayRead_C", NULL);
1987:   PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPRestoreArrayWrite_C", NULL);
1988:   PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPPlaceArray_C", NULL);
1989:   PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPResetArray_C", NULL);
1990:   PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPReplaceArray_C", NULL);
1991:   m = (Mat_MPIDense *)(B)->data;
1992:   if (m->A) MatConvert(m->A, MATSEQDENSE, MAT_INPLACE_MATRIX, &m->A);
1993:   B->ops->bindtocpu = NULL;
1994:   B->offloadmask    = PETSC_OFFLOAD_CPU;
1995:   return 0;
1996: }

1998: PetscErrorCode MatConvert_MPIDense_MPIDenseHIP(Mat M, MatType type, MatReuse reuse, Mat *newmat)
1999: {
2000:   Mat           B;
2001:   Mat_MPIDense *m;

2003:   if (reuse == MAT_INITIAL_MATRIX) {
2004:     MatDuplicate(M, MAT_COPY_VALUES, newmat);
2005:   } else if (reuse == MAT_REUSE_MATRIX) {
2006:     MatCopy(M, *newmat, SAME_NONZERO_PATTERN);
2007:   }

2009:   B = *newmat;
2010:   PetscFree(B->defaultvectype);
2011:   PetscStrallocpy(VECHIP, &B->defaultvectype);
2012:   PetscObjectChangeTypeName((PetscObject)B, MATMPIDENSEHIP);
2013:   PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpidensehip_mpidense_C", MatConvert_MPIDenseHIP_MPIDense);
2014:   PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpidensehip_C", MatProductSetFromOptions_MPIAIJ_MPIDense);
2015:   PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", MatProductSetFromOptions_MPIAIJ_MPIDense);
2016:   PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensehip_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ);
2017:   PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ);
2018:   PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPGetArray_C", MatDenseHIPGetArray_MPIDenseHIP);
2019:   PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPGetArrayRead_C", MatDenseHIPGetArrayRead_MPIDenseHIP);
2020:   PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPGetArrayWrite_C", MatDenseHIPGetArrayWrite_MPIDenseHIP);
2021:   PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPRestoreArray_C", MatDenseHIPRestoreArray_MPIDenseHIP);
2022:   PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPRestoreArrayRead_C", MatDenseHIPRestoreArrayRead_MPIDenseHIP);
2023:   PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPRestoreArrayWrite_C", MatDenseHIPRestoreArrayWrite_MPIDenseHIP);
2024:   PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPPlaceArray_C", MatDenseHIPPlaceArray_MPIDenseHIP);
2025:   PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPResetArray_C", MatDenseHIPResetArray_MPIDenseHIP);
2026:   PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPReplaceArray_C", MatDenseHIPReplaceArray_MPIDenseHIP);
2027:   m = (Mat_MPIDense *)(B->data);
2028:   if (m->A) {
2029:     MatConvert(m->A, MATSEQDENSEHIP, MAT_INPLACE_MATRIX, &m->A);
2030:     B->offloadmask = PETSC_OFFLOAD_BOTH;
2031:   } else {
2032:     B->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
2033:   }
2034:   MatBindToCPU_MPIDenseHIP(B, PETSC_FALSE);

2036:   B->ops->bindtocpu = MatBindToCPU_MPIDenseHIP;
2037:   return 0;
2038: }
2039: #endif

2041: PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
2042: {
2043:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
2044:   PetscInt      lda;

2048:   if (!a->cvec) VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec);
2049:   a->vecinuse = col + 1;
2050:   MatDenseGetLDA(a->A, &lda);
2051:   MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse);
2052:   VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda);
2053:   *v = a->cvec;
2054:   return 0;
2055: }

2057: PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
2058: {
2059:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

2063:   a->vecinuse = 0;
2064:   MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse);
2065:   VecResetArray(a->cvec);
2066:   if (v) *v = NULL;
2067:   return 0;
2068: }

2070: PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
2071: {
2072:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
2073:   PetscInt      lda;

2077:   if (!a->cvec) VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec);
2078:   a->vecinuse = col + 1;
2079:   MatDenseGetLDA(a->A, &lda);
2080:   MatDenseGetArrayRead(a->A, &a->ptrinuse);
2081:   VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda);
2082:   VecLockReadPush(a->cvec);
2083:   *v = a->cvec;
2084:   return 0;
2085: }

2087: PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
2088: {
2089:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

2093:   a->vecinuse = 0;
2094:   MatDenseRestoreArrayRead(a->A, &a->ptrinuse);
2095:   VecLockReadPop(a->cvec);
2096:   VecResetArray(a->cvec);
2097:   if (v) *v = NULL;
2098:   return 0;
2099: }

2101: PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
2102: {
2103:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
2104:   PetscInt      lda;

2108:   if (!a->cvec) VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec);
2109:   a->vecinuse = col + 1;
2110:   MatDenseGetLDA(a->A, &lda);
2111:   MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse);
2112:   VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda);
2113:   *v = a->cvec;
2114:   return 0;
2115: }

2117: PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
2118: {
2119:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

2123:   a->vecinuse = 0;
2124:   MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse);
2125:   VecResetArray(a->cvec);
2126:   if (v) *v = NULL;
2127:   return 0;
2128: }

2130: PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v)
2131: {
2132:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
2133:   Mat_MPIDense *c;
2134:   MPI_Comm      comm;
2135:   PetscInt      pbegin, pend;

2137:   PetscObjectGetComm((PetscObject)A, &comm);
2140:   pbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart);
2141:   pend   = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart));
2142:   if (!a->cmat) {
2143:     MatCreate(comm, &a->cmat);
2144:     MatSetType(a->cmat, ((PetscObject)A)->type_name);
2145:     if (rend - rbegin == A->rmap->N) PetscLayoutReference(A->rmap, &a->cmat->rmap);
2146:     else {
2147:       PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin);
2148:       PetscLayoutSetSize(a->cmat->rmap, rend - rbegin);
2149:       PetscLayoutSetUp(a->cmat->rmap);
2150:     }
2151:     PetscLayoutSetSize(a->cmat->cmap, cend - cbegin);
2152:     PetscLayoutSetUp(a->cmat->cmap);
2153:   } else {
2154:     PetscBool same = (PetscBool)(rend - rbegin == a->cmat->rmap->N);
2155:     if (same && a->cmat->rmap->N != A->rmap->N) {
2156:       same = (PetscBool)(pend - pbegin == a->cmat->rmap->n);
2157:       MPIU_Allreduce(MPI_IN_PLACE, &same, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A));
2158:     }
2159:     if (!same) {
2160:       PetscLayoutDestroy(&a->cmat->rmap);
2161:       PetscLayoutCreate(comm, &a->cmat->rmap);
2162:       PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin);
2163:       PetscLayoutSetSize(a->cmat->rmap, rend - rbegin);
2164:       PetscLayoutSetUp(a->cmat->rmap);
2165:     }
2166:     if (cend - cbegin != a->cmat->cmap->N) {
2167:       PetscLayoutDestroy(&a->cmat->cmap);
2168:       PetscLayoutCreate(comm, &a->cmat->cmap);
2169:       PetscLayoutSetSize(a->cmat->cmap, cend - cbegin);
2170:       PetscLayoutSetUp(a->cmat->cmap);
2171:     }
2172:   }
2173:   c = (Mat_MPIDense *)a->cmat->data;
2175:   MatDenseGetSubMatrix(a->A, pbegin, pend, cbegin, cend, &c->A);

2177:   a->cmat->preallocated = PETSC_TRUE;
2178:   a->cmat->assembled    = PETSC_TRUE;
2179: #if defined(PETSC_HAVE_DEVICE)
2180:   a->cmat->offloadmask = c->A->offloadmask;
2181: #endif
2182:   a->matinuse = cbegin + 1;
2183:   *v          = a->cmat;
2184:   return 0;
2185: }

2187: PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v)
2188: {
2189:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
2190:   Mat_MPIDense *c;

2195:   a->matinuse = 0;
2196:   c           = (Mat_MPIDense *)a->cmat->data;
2197:   MatDenseRestoreSubMatrix(a->A, &c->A);
2198:   if (v) *v = NULL;
2199: #if defined(PETSC_HAVE_DEVICE)
2200:   A->offloadmask = a->A->offloadmask;
2201: #endif
2202:   return 0;
2203: }

2205: /*MC
2206:    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.

2208:    Options Database Keys:
2209: . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()`

2211:   Level: beginner

2213: .seealso: `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE`
2214: M*/
2215: PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
2216: {
2217:   Mat_MPIDense *a;

2219:   PetscNew(&a);
2220:   mat->data = (void *)a;
2221:   PetscMemcpy(mat->ops, &MatOps_Values, sizeof(struct _MatOps));

2223:   mat->insertmode = NOT_SET_VALUES;

2225:   /* build cache for off array entries formed */
2226:   a->donotstash = PETSC_FALSE;

2228:   MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash);

2230:   /* stuff used for matrix vector multiply */
2231:   a->lvec        = NULL;
2232:   a->Mvctx       = NULL;
2233:   a->roworiented = PETSC_TRUE;

2235:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense);
2236:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense);
2237:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense);
2238:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense);
2239:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense);
2240:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense);
2241:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense);
2242:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense);
2243:   PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense);
2244:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense);
2245:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense);
2246:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense);
2247:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense);
2248:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense);
2249:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense);
2250:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense);
2251:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense);
2252:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense);
2253:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense);
2254:   PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense);
2255:   PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ);
2256: #if defined(PETSC_HAVE_ELEMENTAL)
2257:   PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental);
2258: #endif
2259: #if defined(PETSC_HAVE_SCALAPACK)
2260:   PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK);
2261: #endif
2262: #if defined(PETSC_HAVE_CUDA)
2263:   PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA);
2264: #endif
2265:   PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense);
2266:   PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense);
2267:   PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ);
2268: #if defined(PETSC_HAVE_CUDA)
2269:   PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense);
2270:   PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ);
2271: #endif
2272: #if defined(PETSC_HAVE_HIP)
2273:   PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", MatConvert_MPIDense_MPIDenseHIP);
2274:   PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense);
2275:   PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ);
2276: #endif
2277:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense);
2278:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense);
2279:   PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE);
2280:   return 0;
2281: }

2283: /*MC
2284:    MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs.

2286:    Options Database Keys:
2287: . -mat_type mpidensecuda - sets the matrix type to `MATMPIDENSECUDA` during a call to `MatSetFromOptions()`

2289:   Level: beginner

2291: .seealso: `MATMPIDENSE`, `MATSEQDENSE`, `MATSEQDENSECUDA`, `MATSEQDENSEHIP`
2292: M*/
2293: #if defined(PETSC_HAVE_CUDA)
2294: #include <petsc/private/deviceimpl.h>
2295: PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B)
2296: {
2297:   PetscDeviceInitialize(PETSC_DEVICE_CUDA);
2298:   MatCreate_MPIDense(B);
2299:   MatConvert_MPIDense_MPIDenseCUDA(B, MATMPIDENSECUDA, MAT_INPLACE_MATRIX, &B);
2300:   return 0;
2301: }
2302: #endif

2304: /*MC
2305:    MATMPIDENSEHIP - MATMPIDENSEHIP = "mpidensehip" - A matrix type to be used for distributed dense matrices on GPUs.

2307:    Options Database Keys:
2308: . -mat_type mpidensehip - sets the matrix type to `MATMPIDENSEHIP` during a call to `MatSetFromOptions()`

2310:   Level: beginner

2312: .seealso: `MATMPIDENSE`, `MATSEQDENSE`, `MATSEQDENSECUDA`, `MATMPIDENSEHIP`
2313: M*/
2314: #if defined(PETSC_HAVE_HIP)
2315: #include <petsc/private/deviceimpl.h>
2316: PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseHIP(Mat B)
2317: {
2318:   PetscDeviceInitialize(PETSC_DEVICE_HIP);
2319:   MatCreate_MPIDense(B);
2320:   MatConvert_MPIDense_MPIDenseHIP(B, MATMPIDENSEHIP, MAT_INPLACE_MATRIX, &B);
2321:   return 0;
2322: }
2323: #endif

2325: /*MC
2326:    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.

2328:    This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator,
2329:    and `MATMPIDENSE` otherwise.

2331:    Options Database Keys:
2332: . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()`

2334:   Level: beginner

2336: .seealso: `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`, `MATDENSEHIP`
2337: M*/

2339: /*MC
2340:    MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs.
2341:    Similarly,
2342:    MATDENSEHIP - MATDENSEHIP = "densehip"

2344:    This matrix type is identical to `MATSEQDENSECUDA` when constructed with a single process communicator,
2345:    and `MATMPIDENSECUDA` otherwise.

2347:    Options Database Keys:
2348: . -mat_type densecuda - sets the matrix type to `MATDENSECUDA` during a call to `MatSetFromOptions()`

2350:   Level: beginner

2352: .seealso: `MATSEQDENSECUDA`, `MATMPIDENSECUDA`, `MATSEQDENSEHIP`, `MATMPIDENSEHIP`, `MATDENSE`
2353: M*/

2355: /*MC
2356:    MATDENSEHIP - MATDENSEHIP = "densehip" - A matrix type to be used for dense matrices on GPUs.

2358:    This matrix type is identical to `MATSEQDENSEHIP` when constructed with a single process communicator,
2359:    and `MATMPIDENSEHIP` otherwise.

2361:    Options Database Keys:
2362: . -mat_type densehip - sets the matrix type to `MATDENSEHIP` during a call to `MatSetFromOptions()`

2364:   Level: beginner

2366: .seealso: `MATSEQDENSECUDA`, `MATMPIDENSECUDA`, `MATSEQDENSEHIP`, `MATMPIDENSEHIP`, `MATDENSE`
2367: M*/

2369: /*@C
2370:    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries

2372:    Collective

2374:    Input Parameters:
2375: .  B - the matrix
2376: -  data - optional location of matrix data.  Set data=NULL for PETSc
2377:    to control all matrix memory allocation.

2379:    Notes:
2380:    The dense format is fully compatible with standard Fortran 77
2381:    storage by columns.

2383:    The data input variable is intended primarily for Fortran programmers
2384:    who wish to allocate their own matrix memory space.  Most users should
2385:    set data=NULL.

2387:    Level: intermediate

2389: .seealso: `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
2390: @*/
2391: PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data)
2392: {
2394:   PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data));
2395:   return 0;
2396: }

2398: /*@
2399:    MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an
2400:    array provided by the user. This is useful to avoid copying an array
2401:    into a matrix

2403:    Not Collective

2405:    Input Parameters:
2406: +  mat - the matrix
2407: -  array - the array in column major order

2409:    Note:
2410:    You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be
2411:    freed when the matrix is destroyed.

2413:    Level: developer

2415: .seealso: `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`,
2416:           `MatDenseReplaceArray()`
2417: @*/
2418: PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array)
2419: {
2421:   PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array));
2422:   PetscObjectStateIncrease((PetscObject)mat);
2423: #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
2424:   mat->offloadmask = PETSC_OFFLOAD_CPU;
2425: #endif
2426:   return 0;
2427: }

2429: /*@
2430:    MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()`

2432:    Not Collective

2434:    Input Parameters:
2435: .  mat - the matrix

2437:    Note:
2438:    You can only call this after a call to `MatDensePlaceArray()`

2440:    Level: developer

2442: .seealso: `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`
2443: @*/
2444: PetscErrorCode MatDenseResetArray(Mat mat)
2445: {
2447:   PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat));
2448:   PetscObjectStateIncrease((PetscObject)mat);
2449:   return 0;
2450: }

2452: /*@
2453:    MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
2454:    array provided by the user. This is useful to avoid copying an array
2455:    into a matrix

2457:    Not Collective

2459:    Input Parameters:
2460: +  mat - the matrix
2461: -  array - the array in column major order

2463:    Note:
2464:    The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be
2465:    freed by the user. It will be freed when the matrix is destroyed.

2467:    Level: developer

2469: .seealso: `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()`
2470: @*/
2471: PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array)
2472: {
2474:   PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array));
2475:   PetscObjectStateIncrease((PetscObject)mat);
2476: #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
2477:   mat->offloadmask = PETSC_OFFLOAD_CPU;
2478: #endif
2479:   return 0;
2480: }

2482: #if defined(PETSC_HAVE_CUDA)
2483: /*@C
2484:    MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a `MATDENSECUDA` matrix with an
2485:    array provided by the user. This is useful to avoid copying an array
2486:    into a matrix

2488:    Not Collective

2490:    Input Parameters:
2491: +  mat - the matrix
2492: -  array - the array in column major order

2494:    Note:
2495:    You can return to the original array with a call to `MatDenseCUDAResetArray()`. The user is responsible for freeing this array; it will not be
2496:    freed when the matrix is destroyed. The array must have been allocated with cudaMalloc().

2498:    Level: developer

2500: .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDAResetArray()`, `MatDenseCUDAReplaceArray()`
2501: @*/
2502: PetscErrorCode MatDenseCUDAPlaceArray(Mat mat, const PetscScalar *array)
2503: {
2505:   PetscUseMethod(mat, "MatDenseCUDAPlaceArray_C", (Mat, const PetscScalar *), (mat, array));
2506:   PetscObjectStateIncrease((PetscObject)mat);
2507:   mat->offloadmask = PETSC_OFFLOAD_GPU;
2508:   return 0;
2509: }

2511: /*@C
2512:    MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to `MatDenseCUDAPlaceArray()`

2514:    Not Collective

2516:    Input Parameters:
2517: .  mat - the matrix

2519:    Note:
2520:    You can only call this after a call to `MatDenseCUDAPlaceArray()`

2522:    Level: developer

2524: .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`
2525: @*/
2526: PetscErrorCode MatDenseCUDAResetArray(Mat mat)
2527: {
2529:   PetscUseMethod(mat, "MatDenseCUDAResetArray_C", (Mat), (mat));
2530:   PetscObjectStateIncrease((PetscObject)mat);
2531:   return 0;
2532: }

2534: /*@C
2535:    MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a `MATDENSECUDA` matrix with an
2536:    array provided by the user. This is useful to avoid copying an array
2537:    into a matrix

2539:    Not Collective

2541:    Input Parameters:
2542: +  mat - the matrix
2543: -  array - the array in column major order

2545:    Note:
2546:    This permanently replaces the GPU array and frees the memory associated with the old GPU array.
2547:    The memory passed in CANNOT be freed by the user. It will be freed
2548:    when the matrix is destroyed. The array should respect the matrix leading dimension.

2550:    Level: developer

2552: .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`, `MatDenseCUDAResetArray()`
2553: @*/
2554: PetscErrorCode MatDenseCUDAReplaceArray(Mat mat, const PetscScalar *array)
2555: {
2557:   PetscUseMethod(mat, "MatDenseCUDAReplaceArray_C", (Mat, const PetscScalar *), (mat, array));
2558:   PetscObjectStateIncrease((PetscObject)mat);
2559:   mat->offloadmask = PETSC_OFFLOAD_GPU;
2560:   return 0;
2561: }

2563: /*@C
2564:    MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a `MATDENSECUDA` matrix.

2566:    Not Collective

2568:    Input Parameters:
2569: .  A - the matrix

2571:    Output Parameters
2572: .  array - the GPU array in column major order

2574:    Notes:
2575:    The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use `MatDenseCUDAGetArray()`.

2577:    The array must be restored with `MatDenseCUDARestoreArrayWrite()` when no longer needed.

2579:    Level: developer

2581: .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArrayRead()`
2582: @*/
2583: PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a)
2584: {
2586:   PetscUseMethod(A, "MatDenseCUDAGetArrayWrite_C", (Mat, PetscScalar **), (A, a));
2587:   PetscObjectStateIncrease((PetscObject)A);
2588:   return 0;
2589: }

2591: /*@C
2592:    MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a `MATDENSECUDA` matrix previously obtained with `MatDenseCUDAGetArrayWrite()`.

2594:    Not Collective

2596:    Input Parameters:
2597: +  A - the matrix
2598: -  array - the GPU array in column major order

2600:    Level: developer

2602: .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()`
2603: @*/
2604: PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a)
2605: {
2607:   PetscUseMethod(A, "MatDenseCUDARestoreArrayWrite_C", (Mat, PetscScalar **), (A, a));
2608:   PetscObjectStateIncrease((PetscObject)A);
2609:   A->offloadmask = PETSC_OFFLOAD_GPU;
2610:   return 0;
2611: }

2613: /*@C
2614:    MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a `MATDENSECUDA` matrix. The array must be restored with `MatDenseCUDARestoreArrayRead()` when no longer needed.

2616:    Not Collective

2618:    Input Parameters:
2619: .  A - the matrix

2621:    Output Parameters
2622: .  array - the GPU array in column major order

2624:    Note:
2625:    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseCUDAGetArrayWrite()`.

2627:    Level: developer

2629: .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`
2630: @*/
2631: PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a)
2632: {
2634:   PetscUseMethod(A, "MatDenseCUDAGetArrayRead_C", (Mat, const PetscScalar **), (A, a));
2635:   return 0;
2636: }

2638: /*@C
2639:    MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a `MATDENSECUDA` matrix previously obtained with a call to `MatDenseCUDAGetArrayRead()`.

2641:    Not Collective

2643:    Input Parameters:
2644: +  A - the matrix
2645: -  array - the GPU array in column major order

2647:    Note:
2648:    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseCUDAGetArrayWrite()`.

2650:    Level: developer

2652: .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDAGetArrayRead()`
2653: @*/
2654: PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a)
2655: {
2656:   PetscUseMethod(A, "MatDenseCUDARestoreArrayRead_C", (Mat, const PetscScalar **), (A, a));
2657:   return 0;
2658: }

2660: /*@C
2661:    MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a `MATDENSECUDA` matrix. The array must be restored with `MatDenseCUDARestoreArray()` when no longer needed.

2663:    Not Collective

2665:    Input Parameters:
2666: .  A - the matrix

2668:    Output Parameters
2669: .  array - the GPU array in column major order

2671:    Note:
2672:    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseCUDAGetArrayWrite()`. For read-only access, use `MatDenseCUDAGetArrayRead()`.

2674:    Level: developer

2676: .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`
2677: @*/
2678: PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a)
2679: {
2681:   PetscUseMethod(A, "MatDenseCUDAGetArray_C", (Mat, PetscScalar **), (A, a));
2682:   PetscObjectStateIncrease((PetscObject)A);
2683:   return 0;
2684: }

2686: /*@C
2687:    MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a `MATDENSECUDA` matrix previously obtained with `MatDenseCUDAGetArray()`.

2689:    Not Collective

2691:    Input Parameters:
2692: +  A - the matrix
2693: -  array - the GPU array in column major order

2695:    Level: developer

2697: .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()`
2698: @*/
2699: PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a)
2700: {
2702:   PetscUseMethod(A, "MatDenseCUDARestoreArray_C", (Mat, PetscScalar **), (A, a));
2703:   PetscObjectStateIncrease((PetscObject)A);
2704:   A->offloadmask = PETSC_OFFLOAD_GPU;
2705:   return 0;
2706: }
2707: #endif

2709: #if defined(PETSC_HAVE_HIP)
2710: /*@C
2711:    MatDenseHIPPlaceArray - Allows one to replace the GPU array in a dense matrix with an
2712:    array provided by the user. This is useful to avoid copying an array
2713:    into a matrix

2715:    Not Collective

2717:    Input Parameters:
2718: +  mat - the matrix
2719: -  array - the array in column major order

2721:    Notes:
2722:    You can return to the original array with a call to MatDenseHIPResetArray(). The user is responsible for freeing this array; it will not be
2723:    freed when the matrix is destroyed. The array must have been allocated with hipMalloc().

2725:    Level: developer

2727: .seealso: MatDenseHIPGetArray(), MatDenseHIPResetArray()
2728: @*/
2729: PetscErrorCode MatDenseHIPPlaceArray(Mat mat, const PetscScalar *array)
2730: {
2732:   PetscUseMethod(mat, "MatDenseHIPPlaceArray_C", (Mat, const PetscScalar *), (mat, array));
2733:   PetscObjectStateIncrease((PetscObject)mat);
2734:   mat->offloadmask = PETSC_OFFLOAD_GPU;
2735:   return 0;
2736: }

2738: /*@C
2739:    MatDenseHIPResetArray - Resets the matrix array to that it previously had before the call to MatDenseHIPPlaceArray()

2741:    Not Collective

2743:    Input Parameters:
2744: .  mat - the matrix

2746:    Notes:
2747:    You can only call this after a call to MatDenseHIPPlaceArray()

2749:    Level: developer

2751: .seealso: MatDenseHIPGetArray(), MatDenseHIPPlaceArray()

2753: @*/
2754: PetscErrorCode MatDenseHIPResetArray(Mat mat)
2755: {
2757:   PetscUseMethod(mat, "MatDenseHIPResetArray_C", (Mat), (mat));
2758:   PetscObjectStateIncrease((PetscObject)mat);
2759:   return 0;
2760: }

2762: /*@C
2763:    MatDenseHIPReplaceArray - Allows one to replace the GPU array in a dense matrix with an
2764:    array provided by the user. This is useful to avoid copying an array
2765:    into a matrix

2767:    Not Collective

2769:    Input Parameters:
2770: +  mat - the matrix
2771: -  array - the array in column major order

2773:    Notes:
2774:    This permanently replaces the GPU array and frees the memory associated with the old GPU array.
2775:    The memory passed in CANNOT be freed by the user. It will be freed
2776:    when the matrix is destroyed. The array should respect the matrix leading dimension.

2778:    Level: developer

2780: .seealso: MatDenseHIPGetArray(), MatDenseHIPPlaceArray(), MatDenseHIPResetArray()
2781: @*/
2782: PetscErrorCode MatDenseHIPReplaceArray(Mat mat, const PetscScalar *array)
2783: {
2785:   PetscUseMethod(mat, "MatDenseHIPReplaceArray_C", (Mat, const PetscScalar *), (mat, array));
2786:   PetscObjectStateIncrease((PetscObject)mat);
2787:   mat->offloadmask = PETSC_OFFLOAD_GPU;
2788:   return 0;
2789: }

2791: /*@C
2792:    MatDenseHIPGetArrayWrite - Provides write access to the HIP buffer inside a dense matrix.

2794:    Not Collective

2796:    Input Parameters:
2797: .  A - the matrix

2799:    Output Parameters
2800: .  array - the GPU array in column major order

2802:    Notes:
2803:    The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseHIPGetArray(). The array must be restored with MatDenseHIPRestoreArrayWrite() when no longer needed.

2805:    Level: developer

2807: .seealso: MatDenseHIPGetArray(), MatDenseHIPRestoreArray(), MatDenseHIPRestoreArrayWrite(), MatDenseHIPGetArrayRead(), MatDenseHIPRestoreArrayRead()
2808: @*/
2809: PetscErrorCode MatDenseHIPGetArrayWrite(Mat A, PetscScalar **a)
2810: {
2812:   PetscUseMethod(A, "MatDenseHIPGetArrayWrite_C", (Mat, PetscScalar **), (A, a));
2813:   PetscObjectStateIncrease((PetscObject)A);
2814:   return 0;
2815: }

2817: /*@C
2818:    MatDenseHIPRestoreArrayWrite - Restore write access to the HIP buffer inside a dense matrix previously obtained with MatDenseHIPGetArrayWrite().

2820:    Not Collective

2822:    Input Parameters:
2823: +  A - the matrix
2824: -  array - the GPU array in column major order

2826:    Notes:

2828:    Level: developer

2830: .seealso: MatDenseHIPGetArray(), MatDenseHIPRestoreArray(), MatDenseHIPGetArrayWrite(), MatDenseHIPRestoreArrayRead(), MatDenseHIPGetArrayRead()
2831: @*/
2832: PetscErrorCode MatDenseHIPRestoreArrayWrite(Mat A, PetscScalar **a)
2833: {
2835:   PetscUseMethod(A, "MatDenseHIPRestoreArrayWrite_C", (Mat, PetscScalar **), (A, a));
2836:   PetscObjectStateIncrease((PetscObject)A);
2837:   A->offloadmask = PETSC_OFFLOAD_GPU;
2838:   return 0;
2839: }

2841: /*@C
2842:    MatDenseHIPGetArrayRead - Provides read-only access to the HIP buffer inside a dense matrix. The array must be restored with MatDenseHIPRestoreArrayRead() when no longer needed.

2844:    Not Collective

2846:    Input Parameters:
2847: .  A - the matrix

2849:    Output Parameters
2850: .  array - the GPU array in column major order

2852:    Notes:
2853:    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseHIPGetArrayWrite().

2855:    Level: developer

2857: .seealso: MatDenseHIPGetArray(), MatDenseHIPRestoreArray(), MatDenseHIPRestoreArrayWrite(), MatDenseHIPGetArrayWrite(), MatDenseHIPRestoreArrayRead()
2858: @*/
2859: PetscErrorCode MatDenseHIPGetArrayRead(Mat A, const PetscScalar **a)
2860: {
2862:   PetscUseMethod(A, "MatDenseHIPGetArrayRead_C", (Mat, const PetscScalar **), (A, a));
2863:   return 0;
2864: }

2866: /*@C
2867:    MatDenseHIPRestoreArrayRead - Restore read-only access to the HIP buffer inside a dense matrix previously obtained with a call to MatDenseHIPGetArrayRead().

2869:    Not Collective

2871:    Input Parameters:
2872: +  A - the matrix
2873: -  array - the GPU array in column major order

2875:    Notes:
2876:    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseHIPGetArrayWrite().

2878:    Level: developer

2880: .seealso: MatDenseHIPGetArray(), MatDenseHIPRestoreArray(), MatDenseHIPRestoreArrayWrite(), MatDenseHIPGetArrayWrite(), MatDenseHIPGetArrayRead()
2881: @*/
2882: PetscErrorCode MatDenseHIPRestoreArrayRead(Mat A, const PetscScalar **a)
2883: {
2884:   PetscUseMethod(A, "MatDenseHIPRestoreArrayRead_C", (Mat, const PetscScalar **), (A, a));
2885:   return 0;
2886: }

2888: /*@C
2889:    MatDenseHIPGetArray - Provides access to the HIP buffer inside a dense matrix. The array must be restored with MatDenseHIPRestoreArray() when no longer needed.

2891:    Not Collective

2893:    Input Parameters:
2894: .  A - the matrix

2896:    Output Parameters
2897: .  array - the GPU array in column major order

2899:    Notes:
2900:    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseHIPGetArrayWrite(). For read-only access, use MatDenseHIPGetArrayRead().

2902:    Level: developer

2904: .seealso: MatDenseHIPGetArrayRead(), MatDenseHIPRestoreArray(), MatDenseHIPRestoreArrayWrite(), MatDenseHIPGetArrayWrite(), MatDenseHIPRestoreArrayRead()
2905: @*/
2906: PetscErrorCode MatDenseHIPGetArray(Mat A, PetscScalar **a)
2907: {
2909:   PetscUseMethod(A, "MatDenseHIPGetArray_C", (Mat, PetscScalar **), (A, a));
2910:   PetscObjectStateIncrease((PetscObject)A);
2911:   return 0;
2912: }

2914: /*@C
2915:    MatDenseHIPRestoreArray - Restore access to the HIP buffer inside a dense matrix previously obtained with MatDenseHIPGetArray().

2917:    Not Collective

2919:    Input Parameters:
2920: +  A - the matrix
2921: -  array - the GPU array in column major order

2923:    Notes:

2925:    Level: developer

2927: .seealso: MatDenseHIPGetArray(), MatDenseHIPRestoreArrayWrite(), MatDenseHIPGetArrayWrite(), MatDenseHIPRestoreArrayRead(), MatDenseHIPGetArrayRead()
2928: @*/
2929: PetscErrorCode MatDenseHIPRestoreArray(Mat A, PetscScalar **a)
2930: {
2932:   PetscUseMethod(A, "MatDenseHIPRestoreArray_C", (Mat, PetscScalar **), (A, a));
2933:   PetscObjectStateIncrease((PetscObject)A);
2934:   A->offloadmask = PETSC_OFFLOAD_GPU;
2935:   return 0;
2936: }
2937: #endif

2939: /*@C
2940:    MatCreateDense - Creates a matrix in `MATDENSE` format.

2942:    Collective

2944:    Input Parameters:
2945: +  comm - MPI communicator
2946: .  m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
2947: .  n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given)
2948: .  M - number of global rows (or `PETSC_DECIDE` to have calculated if m is given)
2949: .  N - number of global columns (or `PETSC_DECIDE` to have calculated if n is given)
2950: -  data - optional location of matrix data.  Set data to NULL (`PETSC_NULL_SCALAR` for Fortran users) for PETSc
2951:    to control all matrix memory allocation.

2953:    Output Parameter:
2954: .  A - the matrix

2956:    Notes:
2957:    The dense format is fully compatible with standard Fortran 77
2958:    storage by columns.

2960:    Although local portions of the matrix are stored in column-major
2961:    order, the matrix is partitioned across MPI ranks by row.

2963:    The data input variable is intended primarily for Fortran programmers
2964:    who wish to allocate their own matrix memory space.  Most users should
2965:    set data=NULL (`PETSC_NULL_SCALAR` for Fortran users).

2967:    The user MUST specify either the local or global matrix dimensions
2968:    (possibly both).

2970:    Level: intermediate

2972: .seealso: `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
2973: @*/
2974: PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A)
2975: {
2976:   MatCreate(comm, A);
2977:   MatSetSizes(*A, m, n, M, N);
2978:   MatSetType(*A, MATDENSE);
2979:   MatSeqDenseSetPreallocation(*A, data);
2980:   MatMPIDenseSetPreallocation(*A, data);
2981:   return 0;
2982: }

2984: #if defined(PETSC_HAVE_CUDA)
2985: /*@C
2986:    MatCreateDenseCUDA - Creates a matrix in `MATDENSECUDA` format using CUDA.

2988:    Collective

2990:    Input Parameters:
2991: +  comm - MPI communicator
2992: .  m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
2993: .  n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given)
2994: .  M - number of global rows (or `PETSC_DECIDE` to have calculated if m is given)
2995: .  N - number of global columns (or `PETSC_DECIDE` to have calculated if n is given)
2996: -  data - optional location of GPU matrix data.  Set data=NULL for PETSc
2997:    to control matrix memory allocation.

2999:    Output Parameter:
3000: .  A - the matrix

3002:    Level: intermediate

3004: .seealso: `MATDENSECUDA`, `MatCreate()`, `MatCreateDense()`
3005: @*/
3006: PetscErrorCode MatCreateDenseCUDA(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A)
3007: {
3008:   MatCreate(comm, A);
3010:   MatSetSizes(*A, m, n, M, N);
3011:   MatSetType(*A, MATDENSECUDA);
3012:   MatSeqDenseCUDASetPreallocation(*A, data);
3013:   MatMPIDenseCUDASetPreallocation(*A, data);
3014:   return 0;
3015: }
3016: #endif

3018: #if defined(PETSC_HAVE_HIP)
3019: /*@C
3020:    MatCreateDenseHIP - Creates a matrix in `MATDENSEHIP` format using HIP.

3022:    Collective

3024:    Input Parameters:
3025: +  comm - MPI communicator
3026: .  m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
3027: .  n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given)
3028: .  M - number of global rows (or `PETSC_DECIDE` to have calculated if m is given)
3029: .  N - number of global columns (or `PETSC_DECIDE` to have calculated if n is given)
3030: -  data - optional location of GPU matrix data.  Set data=NULL for PETSc
3031:    to control matrix memory allocation.

3033:    Output Parameter:
3034: .  A - the matrix

3036:    Level: intermediate

3038: .seealso: `MATDENSEHIP`, `MatCreate()`, `MatCreateDense()`
3039: @*/
3040: PetscErrorCode MatCreateDenseHIP(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A)
3041: {
3042:   MatCreate(comm, A);
3044:   MatSetSizes(*A, m, n, M, N);
3045:   MatSetType(*A, MATDENSEHIP);
3046:   MatSeqDenseHIPSetPreallocation(*A, data);
3047:   MatMPIDenseHIPSetPreallocation(*A, data);
3048:   return 0;
3049: }
3050: #endif

3052: static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat)
3053: {
3054:   Mat           mat;
3055:   Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data;

3057:   *newmat = NULL;
3058:   MatCreate(PetscObjectComm((PetscObject)A), &mat);
3059:   MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N);
3060:   MatSetType(mat, ((PetscObject)A)->type_name);
3061:   a = (Mat_MPIDense *)mat->data;

3063:   mat->factortype   = A->factortype;
3064:   mat->assembled    = PETSC_TRUE;
3065:   mat->preallocated = PETSC_TRUE;

3067:   mat->insertmode = NOT_SET_VALUES;
3068:   a->donotstash   = oldmat->donotstash;

3070:   PetscLayoutReference(A->rmap, &mat->rmap);
3071:   PetscLayoutReference(A->cmap, &mat->cmap);

3073:   MatDuplicate(oldmat->A, cpvalues, &a->A);

3075:   *newmat = mat;
3076:   return 0;
3077: }

3079: PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
3080: {
3081:   PetscBool isbinary;
3082: #if defined(PETSC_HAVE_HDF5)
3083:   PetscBool ishdf5;
3084: #endif

3088:   /* force binary viewer to load .info file if it has not yet done so */
3089:   PetscViewerSetUp(viewer);
3090:   PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary);
3091: #if defined(PETSC_HAVE_HDF5)
3092:   PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5);
3093: #endif
3094:   if (isbinary) {
3095:     MatLoad_Dense_Binary(newMat, viewer);
3096: #if defined(PETSC_HAVE_HDF5)
3097:   } else if (ishdf5) {
3098:     MatLoad_Dense_HDF5(newMat, viewer);
3099: #endif
3100:   } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
3101:   return 0;
3102: }

3104: static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag)
3105: {
3106:   Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data;
3107:   Mat           a, b;

3109:   a = matA->A;
3110:   b = matB->A;
3111:   MatEqual(a, b, flag);
3112:   MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A));
3113:   return 0;
3114: }

3116: PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data)
3117: {
3118:   Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;

3120:   PetscFree2(atb->sendbuf, atb->recvcounts);
3121:   MatDestroy(&atb->atb);
3122:   PetscFree(atb);
3123:   return 0;
3124: }

3126: PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data)
3127: {
3128:   Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;

3130:   PetscFree2(abt->buf[0], abt->buf[1]);
3131:   PetscFree2(abt->recvcounts, abt->recvdispls);
3132:   PetscFree(abt);
3133:   return 0;
3134: }

3136: static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
3137: {
3138:   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
3139:   Mat_TransMatMultDense *atb;
3140:   MPI_Comm               comm;
3141:   PetscMPIInt            size, *recvcounts;
3142:   PetscScalar           *carray, *sendbuf;
3143:   const PetscScalar     *atbarray;
3144:   PetscInt               i, cN = C->cmap->N, proc, k, j, lda;
3145:   const PetscInt        *ranges;

3147:   MatCheckProduct(C, 3);
3149:   atb        = (Mat_TransMatMultDense *)C->product->data;
3150:   recvcounts = atb->recvcounts;
3151:   sendbuf    = atb->sendbuf;

3153:   PetscObjectGetComm((PetscObject)A, &comm);
3154:   MPI_Comm_size(comm, &size);

3156:   /* compute atbarray = aseq^T * bseq */
3157:   MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DEFAULT, &atb->atb);

3159:   MatGetOwnershipRanges(C, &ranges);

3161:   /* arrange atbarray into sendbuf */
3162:   MatDenseGetArrayRead(atb->atb, &atbarray);
3163:   MatDenseGetLDA(atb->atb, &lda);
3164:   for (proc = 0, k = 0; proc < size; proc++) {
3165:     for (j = 0; j < cN; j++) {
3166:       for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda];
3167:     }
3168:   }
3169:   MatDenseRestoreArrayRead(atb->atb, &atbarray);

3171:   /* sum all atbarray to local values of C */
3172:   MatDenseGetArrayWrite(c->A, &carray);
3173:   MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm);
3174:   MatDenseRestoreArrayWrite(c->A, &carray);
3175:   MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY);
3176:   MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY);
3177:   return 0;
3178: }

3180: static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
3181: {
3182:   MPI_Comm               comm;
3183:   PetscMPIInt            size;
3184:   PetscInt               cm = A->cmap->n, cM, cN = B->cmap->N;
3185:   Mat_TransMatMultDense *atb;
3186:   PetscBool              cisdense = PETSC_FALSE;
3187:   PetscInt               i;
3188:   const PetscInt        *ranges;

3190:   MatCheckProduct(C, 4);
3192:   PetscObjectGetComm((PetscObject)A, &comm);
3193:   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
3194:     SETERRQ(comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
3195:   }

3197:   /* create matrix product C */
3198:   MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N);
3199: #if defined(PETSC_HAVE_CUDA)
3200:   PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, "");
3201: #endif
3202: #if defined(PETSC_HAVE_HIP)
3203:   PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSEHIP, "");
3204: #endif
3205:   if (!cisdense) MatSetType(C, ((PetscObject)A)->type_name);
3206:   MatSetUp(C);

3208:   /* create data structure for reuse C */
3209:   MPI_Comm_size(comm, &size);
3210:   PetscNew(&atb);
3211:   cM = C->rmap->N;
3212:   PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts);
3213:   MatGetOwnershipRanges(C, &ranges);
3214:   for (i = 0; i < size; i++) atb->recvcounts[i] = (ranges[i + 1] - ranges[i]) * cN;

3216:   C->product->data    = atb;
3217:   C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
3218:   return 0;
3219: }

3221: static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
3222: {
3223:   MPI_Comm               comm;
3224:   PetscMPIInt            i, size;
3225:   PetscInt               maxRows, bufsiz;
3226:   PetscMPIInt            tag;
3227:   PetscInt               alg;
3228:   Mat_MatTransMultDense *abt;
3229:   Mat_Product           *product = C->product;
3230:   PetscBool              flg;

3232:   MatCheckProduct(C, 4);
3234:   /* check local size of A and B */

3237:   PetscStrcmp(product->alg, "allgatherv", &flg);
3238:   alg = flg ? 0 : 1;

3240:   /* setup matrix product C */
3241:   MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N);
3242:   MatSetType(C, MATMPIDENSE);
3243:   MatSetUp(C);
3244:   PetscObjectGetNewTag((PetscObject)C, &tag);

3246:   /* create data structure for reuse C */
3247:   PetscObjectGetComm((PetscObject)C, &comm);
3248:   MPI_Comm_size(comm, &size);
3249:   PetscNew(&abt);
3250:   abt->tag = tag;
3251:   abt->alg = alg;
3252:   switch (alg) {
3253:   case 1: /* alg: "cyclic" */
3254:     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
3255:     bufsiz = A->cmap->N * maxRows;
3256:     PetscMalloc2(bufsiz, &(abt->buf[0]), bufsiz, &(abt->buf[1]));
3257:     break;
3258:   default: /* alg: "allgatherv" */
3259:     PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));
3260:     PetscMalloc2(size, &(abt->recvcounts), size + 1, &(abt->recvdispls));
3261:     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
3262:     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
3263:     break;
3264:   }

3266:   C->product->data                = abt;
3267:   C->product->destroy             = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
3268:   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
3269:   return 0;
3270: }

3272: static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
3273: {
3274:   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
3275:   Mat_MatTransMultDense *abt;
3276:   MPI_Comm               comm;
3277:   PetscMPIInt            rank, size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
3278:   PetscScalar           *sendbuf, *recvbuf = NULL, *cv;
3279:   PetscInt               i, cK             = A->cmap->N, k, j, bn;
3280:   PetscScalar            _DOne = 1.0, _DZero = 0.0;
3281:   const PetscScalar     *av, *bv;
3282:   PetscBLASInt           cm, cn, ck, alda, blda = 0, clda;
3283:   MPI_Request            reqs[2];
3284:   const PetscInt        *ranges;

3286:   MatCheckProduct(C, 3);
3288:   abt = (Mat_MatTransMultDense *)C->product->data;
3289:   PetscObjectGetComm((PetscObject)C, &comm);
3290:   MPI_Comm_rank(comm, &rank);
3291:   MPI_Comm_size(comm, &size);
3292:   MatDenseGetArrayRead(a->A, &av);
3293:   MatDenseGetArrayRead(b->A, &bv);
3294:   MatDenseGetArrayWrite(c->A, &cv);
3295:   MatDenseGetLDA(a->A, &i);
3296:   PetscBLASIntCast(i, &alda);
3297:   MatDenseGetLDA(b->A, &i);
3298:   PetscBLASIntCast(i, &blda);
3299:   MatDenseGetLDA(c->A, &i);
3300:   PetscBLASIntCast(i, &clda);
3301:   MatGetOwnershipRanges(B, &ranges);
3302:   bn = B->rmap->n;
3303:   if (blda == bn) {
3304:     sendbuf = (PetscScalar *)bv;
3305:   } else {
3306:     sendbuf = abt->buf[0];
3307:     for (k = 0, i = 0; i < cK; i++) {
3308:       for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j];
3309:     }
3310:   }
3311:   if (size > 1) {
3312:     sendto   = (rank + size - 1) % size;
3313:     recvfrom = (rank + size + 1) % size;
3314:   } else {
3315:     sendto = recvfrom = 0;
3316:   }
3317:   PetscBLASIntCast(cK, &ck);
3318:   PetscBLASIntCast(c->A->rmap->n, &cm);
3319:   recvisfrom = rank;
3320:   for (i = 0; i < size; i++) {
3321:     /* we have finished receiving in sending, bufs can be read/modified */
3322:     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
3323:     PetscInt nextbn         = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];

3325:     if (nextrecvisfrom != rank) {
3326:       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
3327:       sendsiz = cK * bn;
3328:       recvsiz = cK * nextbn;
3329:       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
3330:       MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);
3331:       MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);
3332:     }

3334:     /* local aseq * sendbuf^T */
3335:     PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);
3336:     if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda));

3338:     if (nextrecvisfrom != rank) {
3339:       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
3340:       MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);
3341:     }
3342:     bn         = nextbn;
3343:     recvisfrom = nextrecvisfrom;
3344:     sendbuf    = recvbuf;
3345:   }
3346:   MatDenseRestoreArrayRead(a->A, &av);
3347:   MatDenseRestoreArrayRead(b->A, &bv);
3348:   MatDenseRestoreArrayWrite(c->A, &cv);
3349:   MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY);
3350:   MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY);
3351:   return 0;
3352: }

3354: static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
3355: {
3356:   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
3357:   Mat_MatTransMultDense *abt;
3358:   MPI_Comm               comm;
3359:   PetscMPIInt            size;
3360:   PetscScalar           *cv, *sendbuf, *recvbuf;
3361:   const PetscScalar     *av, *bv;
3362:   PetscInt               blda, i, cK = A->cmap->N, k, j, bn;
3363:   PetscScalar            _DOne = 1.0, _DZero = 0.0;
3364:   PetscBLASInt           cm, cn, ck, alda, clda;

3366:   MatCheckProduct(C, 3);
3368:   abt = (Mat_MatTransMultDense *)C->product->data;
3369:   PetscObjectGetComm((PetscObject)A, &comm);
3370:   MPI_Comm_size(comm, &size);
3371:   MatDenseGetArrayRead(a->A, &av);
3372:   MatDenseGetArrayRead(b->A, &bv);
3373:   MatDenseGetArrayWrite(c->A, &cv);
3374:   MatDenseGetLDA(a->A, &i);
3375:   PetscBLASIntCast(i, &alda);
3376:   MatDenseGetLDA(b->A, &blda);
3377:   MatDenseGetLDA(c->A, &i);
3378:   PetscBLASIntCast(i, &clda);
3379:   /* copy transpose of B into buf[0] */
3380:   bn      = B->rmap->n;
3381:   sendbuf = abt->buf[0];
3382:   recvbuf = abt->buf[1];
3383:   for (k = 0, j = 0; j < bn; j++) {
3384:     for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j];
3385:   }
3386:   MatDenseRestoreArrayRead(b->A, &bv);
3387:   MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);
3388:   PetscBLASIntCast(cK, &ck);
3389:   PetscBLASIntCast(c->A->rmap->n, &cm);
3390:   PetscBLASIntCast(c->A->cmap->n, &cn);
3391:   if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda));
3392:   MatDenseRestoreArrayRead(a->A, &av);
3393:   MatDenseRestoreArrayRead(b->A, &bv);
3394:   MatDenseRestoreArrayWrite(c->A, &cv);
3395:   MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY);
3396:   MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY);
3397:   return 0;
3398: }

3400: static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
3401: {
3402:   Mat_MatTransMultDense *abt;

3404:   MatCheckProduct(C, 3);
3406:   abt = (Mat_MatTransMultDense *)C->product->data;
3407:   switch (abt->alg) {
3408:   case 1:
3409:     MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);
3410:     break;
3411:   default:
3412:     MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);
3413:     break;
3414:   }
3415:   return 0;
3416: }

3418: PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data)
3419: {
3420:   Mat_MatMultDense *ab = (Mat_MatMultDense *)data;

3422:   MatDestroy(&ab->Ce);
3423:   MatDestroy(&ab->Ae);
3424:   MatDestroy(&ab->Be);
3425:   PetscFree(ab);
3426:   return 0;
3427: }

3429: #if defined(PETSC_HAVE_ELEMENTAL)
3430: PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
3431: {
3432:   Mat_MatMultDense *ab;

3434:   MatCheckProduct(C, 3);
3436:   ab = (Mat_MatMultDense *)C->product->data;
3437:   MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae);
3438:   MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be);
3439:   MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce);
3440:   MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C);
3441:   return 0;
3442: }

3444: static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
3445: {
3446:   Mat               Ae, Be, Ce;
3447:   Mat_MatMultDense *ab;

3449:   MatCheckProduct(C, 4);
3451:   /* check local size of A and B */
3452:   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
3453:     SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
3454:   }

3456:   /* create elemental matrices Ae and Be */
3457:   MatCreate(PetscObjectComm((PetscObject)A), &Ae);
3458:   MatSetSizes(Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N);
3459:   MatSetType(Ae, MATELEMENTAL);
3460:   MatSetUp(Ae);
3461:   MatSetOption(Ae, MAT_ROW_ORIENTED, PETSC_FALSE);

3463:   MatCreate(PetscObjectComm((PetscObject)B), &Be);
3464:   MatSetSizes(Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N);
3465:   MatSetType(Be, MATELEMENTAL);
3466:   MatSetUp(Be);
3467:   MatSetOption(Be, MAT_ROW_ORIENTED, PETSC_FALSE);

3469:   /* compute symbolic Ce = Ae*Be */
3470:   MatCreate(PetscObjectComm((PetscObject)C), &Ce);
3471:   MatMatMultSymbolic_Elemental(Ae, Be, fill, Ce);

3473:   /* setup C */
3474:   MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE);
3475:   MatSetType(C, MATDENSE);
3476:   MatSetUp(C);

3478:   /* create data structure for reuse Cdense */
3479:   PetscNew(&ab);
3480:   ab->Ae = Ae;
3481:   ab->Be = Be;
3482:   ab->Ce = Ce;

3484:   C->product->data       = ab;
3485:   C->product->destroy    = MatDestroy_MatMatMult_MPIDense_MPIDense;
3486:   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
3487:   return 0;
3488: }
3489: #endif
3490: /* ----------------------------------------------- */
3491: #if defined(PETSC_HAVE_ELEMENTAL)
3492: static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
3493: {
3494:   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
3495:   C->ops->productsymbolic = MatProductSymbolic_AB;
3496:   return 0;
3497: }
3498: #endif

3500: static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
3501: {
3502:   Mat_Product *product = C->product;
3503:   Mat          A = product->A, B = product->B;

3505:   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend)
3506:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
3507:   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
3508:   C->ops->productsymbolic          = MatProductSymbolic_AtB;
3509:   return 0;
3510: }

3512: static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
3513: {
3514:   Mat_Product *product     = C->product;
3515:   const char  *algTypes[2] = {"allgatherv", "cyclic"};
3516:   PetscInt     alg, nalg = 2;
3517:   PetscBool    flg = PETSC_FALSE;

3519:   /* Set default algorithm */
3520:   alg = 0; /* default is allgatherv */
3521:   PetscStrcmp(product->alg, "default", &flg);
3522:   if (flg) MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]);

3524:   /* Get runtime option */
3525:   if (product->api_user) {
3526:     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat");
3527:     PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg);
3528:     PetscOptionsEnd();
3529:   } else {
3530:     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat");
3531:     PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg);
3532:     PetscOptionsEnd();
3533:   }
3534:   if (flg) MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]);

3536:   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
3537:   C->ops->productsymbolic          = MatProductSymbolic_ABt;
3538:   return 0;
3539: }

3541: PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
3542: {
3543:   Mat_Product *product = C->product;

3545:   switch (product->type) {
3546: #if defined(PETSC_HAVE_ELEMENTAL)
3547:   case MATPRODUCT_AB:
3548:     MatProductSetFromOptions_MPIDense_AB(C);
3549:     break;
3550: #endif
3551:   case MATPRODUCT_AtB:
3552:     MatProductSetFromOptions_MPIDense_AtB(C);
3553:     break;
3554:   case MATPRODUCT_ABt:
3555:     MatProductSetFromOptions_MPIDense_ABt(C);
3556:     break;
3557:   default:
3558:     break;
3559:   }
3560:   return 0;
3561: }