Actual source code: mpiaijhipsparse.hip.cxx

  1: /* Portions of this code are under:
  2:    Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
  3: */
  4: #include <../src/mat/impls/aij/mpi/mpiaij.h>
  5: #include <../src/mat/impls/aij/seq/seqhipsparse/hipsparsematimpl.h>
  6: #include <../src/mat/impls/aij/mpi/mpihipsparse/mpihipsparsematimpl.h>
  7: #include <../src/mat/impls/aij/mpi/cupm/mpiaijcupm.hpp>
  8: #include <thrust/advance.h>
  9: #include <thrust/partition.h>
 10: #include <thrust/sort.h>
 11: #include <thrust/unique.h>

 13: struct MatMPIAIJHIPSPARSE_Policy {
 14:   typedef Mat_MPIAIJHIPSPARSE mat_struct_type;

 16:   static const char *mpi_mat_type;
 17:   static const char *seq_mat_type;
 18:   static const char *vec_seq_type;
 19:   static const char *set_format_c;
 20:   static const char *mpi_convert_hypre_c;

 22:   static PetscErrorCode CopyToGPU(Mat A) { return MatSeqAIJHIPSPARSECopyToGPU(A); }
 23:   static PetscErrorCode MergeMats(Mat A, Mat B, MatReuse r, Mat *C) { return MatSeqAIJHIPSPARSEMergeMats(A, B, r, C); }

 25:   static PetscErrorCode GetArray(Mat A, PetscScalar **a) { return MatSeqAIJHIPSPARSEGetArray(A, a); }
 26:   static PetscErrorCode GetArrayWrite(Mat A, PetscScalar **a) { return MatSeqAIJHIPSPARSEGetArrayWrite(A, a); }
 27:   static PetscErrorCode RestoreArray(Mat A, PetscScalar **a) { return MatSeqAIJHIPSPARSERestoreArray(A, a); }
 28:   static PetscErrorCode RestoreArrayWrite(Mat A, PetscScalar **a) { return MatSeqAIJHIPSPARSERestoreArrayWrite(A, a); }

 30:   static PetscErrorCode SetSubMatFormats(Mat A, Mat B, Mat_MPIAIJHIPSPARSE *spptr)
 31:   {
 32:     PetscFunctionBegin;
 33:     PetscCall(MatHIPSPARSESetFormat(A, MAT_HIPSPARSE_MULT, spptr->diagGPUMatFormat));
 34:     PetscCall(MatHIPSPARSESetFormat(B, MAT_HIPSPARSE_MULT, spptr->offdiagGPUMatFormat));
 35:     PetscFunctionReturn(PETSC_SUCCESS);
 36:   }
 37: };
 38: const char *MatMPIAIJHIPSPARSE_Policy::mpi_mat_type        = MATMPIAIJHIPSPARSE;
 39: const char *MatMPIAIJHIPSPARSE_Policy::seq_mat_type        = MATSEQAIJHIPSPARSE;
 40: const char *MatMPIAIJHIPSPARSE_Policy::vec_seq_type        = VECSEQHIP;
 41: const char *MatMPIAIJHIPSPARSE_Policy::set_format_c        = "MatHIPSPARSESetFormat_C";
 42: const char *MatMPIAIJHIPSPARSE_Policy::mpi_convert_hypre_c = "MatConvert_mpiaijhipsparse_hypre_C";

 44: using MatMPIAIJHIPSPARSE_CUPM_t = Petsc::mat::aij::cupm::impl::MatMPIAIJCUSPARSE_CUPM<Petsc::device::cupm::DeviceType::HIP, MatMPIAIJHIPSPARSE_Policy>;

 46: static PetscErrorCode MatSetPreallocationCOO_MPIAIJHIPSPARSE(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[])
 47: {
 48:   return MatMPIAIJHIPSPARSE_CUPM_t::SetPreallocationCOO(mat, coo_n, coo_i, coo_j);
 49: }

 51: static PetscErrorCode MatSetValuesCOO_MPIAIJHIPSPARSE(Mat mat, const PetscScalar v[], InsertMode imode)
 52: {
 53:   return MatMPIAIJHIPSPARSE_CUPM_t::SetValuesCOO(mat, v, imode);
 54: }

 56: static PetscErrorCode MatMPIAIJGetLocalMatMerge_MPIAIJHIPSPARSE(Mat A, MatReuse scall, IS *glob, Mat *A_loc)
 57: {
 58:   return MatMPIAIJHIPSPARSE_CUPM_t::GetLocalMatMerge(A, scall, glob, A_loc);
 59: }

 61: static PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJHIPSPARSE(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
 62: {
 63:   return MatMPIAIJHIPSPARSE_CUPM_t::SetPreallocation(B, d_nz, d_nnz, o_nz, o_nnz);
 64: }

 66: static PetscErrorCode MatMult_MPIAIJHIPSPARSE(Mat A, Vec xx, Vec yy)
 67: {
 68:   return MatMPIAIJHIPSPARSE_CUPM_t::Mult(A, xx, yy);
 69: }

 71: static PetscErrorCode MatZeroEntries_MPIAIJHIPSPARSE(Mat A)
 72: {
 73:   return MatMPIAIJHIPSPARSE_CUPM_t::ZeroEntries(A);
 74: }

 76: static PetscErrorCode MatMultAdd_MPIAIJHIPSPARSE(Mat A, Vec xx, Vec yy, Vec zz)
 77: {
 78:   return MatMPIAIJHIPSPARSE_CUPM_t::MultAdd(A, xx, yy, zz);
 79: }

 81: static PetscErrorCode MatMultTranspose_MPIAIJHIPSPARSE(Mat A, Vec xx, Vec yy)
 82: {
 83:   return MatMPIAIJHIPSPARSE_CUPM_t::MultTranspose(A, xx, yy);
 84: }

 86: static PetscErrorCode MatAssemblyEnd_MPIAIJHIPSPARSE(Mat A, MatAssemblyType mode)
 87: {
 88:   return MatMPIAIJHIPSPARSE_CUPM_t::AssemblyEnd(A, mode);
 89: }

 91: static PetscErrorCode MatHIPSPARSESetFormat_MPIAIJHIPSPARSE(Mat A, MatHIPSPARSEFormatOperation op, MatHIPSPARSEStorageFormat format)
 92: {
 93:   Mat_MPIAIJ          *a               = (Mat_MPIAIJ *)A->data;
 94:   Mat_MPIAIJHIPSPARSE *hipsparseStruct = (Mat_MPIAIJHIPSPARSE *)a->spptr;

 96:   PetscFunctionBegin;
 97:   switch (op) {
 98:   case MAT_HIPSPARSE_MULT_DIAG:
 99:     hipsparseStruct->diagGPUMatFormat = format;
100:     break;
101:   case MAT_HIPSPARSE_MULT_OFFDIAG:
102:     hipsparseStruct->offdiagGPUMatFormat = format;
103:     break;
104:   case MAT_HIPSPARSE_ALL:
105:     hipsparseStruct->diagGPUMatFormat    = format;
106:     hipsparseStruct->offdiagGPUMatFormat = format;
107:     break;
108:   default:
109:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unsupported operation %d for MatHIPSPARSEFormatOperation. Only MAT_HIPSPARSE_MULT_DIAG, MAT_HIPSPARSE_MULT_DIAG, and MAT_HIPSPARSE_MULT_ALL are currently supported.", op);
110:   }
111:   PetscFunctionReturn(PETSC_SUCCESS);
112: }

114: static PetscErrorCode MatSetFromOptions_MPIAIJHIPSPARSE(Mat A, PetscOptionItems PetscOptionsObject)
115: {
116:   MatHIPSPARSEStorageFormat format;
117:   PetscBool                 flg;
118:   Mat_MPIAIJ               *a               = (Mat_MPIAIJ *)A->data;
119:   Mat_MPIAIJHIPSPARSE      *hipsparseStruct = (Mat_MPIAIJHIPSPARSE *)a->spptr;

121:   PetscFunctionBegin;
122:   PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJHIPSPARSE options");
123:   if (A->factortype == MAT_FACTOR_NONE) {
124:     PetscCall(PetscOptionsEnum("-mat_hipsparse_mult_diag_storage_format", "sets storage format of the diagonal blocks of (mpi)aijhipsparse gpu matrices for SpMV", "MatHIPSPARSESetFormat", MatHIPSPARSEStorageFormats, (PetscEnum)hipsparseStruct->diagGPUMatFormat, (PetscEnum *)&format, &flg));
125:     if (flg) PetscCall(MatHIPSPARSESetFormat(A, MAT_HIPSPARSE_MULT_DIAG, format));
126:     PetscCall(PetscOptionsEnum("-mat_hipsparse_mult_offdiag_storage_format", "sets storage format of the off-diagonal blocks (mpi)aijhipsparse gpu matrices for SpMV", "MatHIPSPARSESetFormat", MatHIPSPARSEStorageFormats, (PetscEnum)hipsparseStruct->offdiagGPUMatFormat, (PetscEnum *)&format, &flg));
127:     if (flg) PetscCall(MatHIPSPARSESetFormat(A, MAT_HIPSPARSE_MULT_OFFDIAG, format));
128:     PetscCall(PetscOptionsEnum("-mat_hipsparse_storage_format", "sets storage format of the diagonal and off-diagonal blocks (mpi)aijhipsparse gpu matrices for SpMV", "MatHIPSPARSESetFormat", MatHIPSPARSEStorageFormats, (PetscEnum)hipsparseStruct->diagGPUMatFormat, (PetscEnum *)&format, &flg));
129:     if (flg) PetscCall(MatHIPSPARSESetFormat(A, MAT_HIPSPARSE_ALL, format));
130:   }
131:   PetscOptionsHeadEnd();
132:   PetscFunctionReturn(PETSC_SUCCESS);
133: }

135: static PetscErrorCode MatDestroy_MPIAIJHIPSPARSE(Mat A)
136: {
137:   return MatMPIAIJHIPSPARSE_CUPM_t::Destroy(A);
138: }

140: PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat B, MatType mtype, MatReuse reuse, Mat *newmat)
141: {
142:   Mat_MPIAIJ *a;
143:   Mat         A;

145:   PetscFunctionBegin;
146:   PetscCall(PetscDeviceInitialize(PETSC_DEVICE_HIP));
147:   if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(B, MAT_COPY_VALUES, newmat));
148:   else if (reuse == MAT_REUSE_MATRIX) PetscCall(MatCopy(B, *newmat, SAME_NONZERO_PATTERN));
149:   A             = *newmat;
150:   A->boundtocpu = PETSC_FALSE;
151:   PetscCall(PetscFree(A->defaultvectype));
152:   PetscCall(PetscStrallocpy(VECHIP, &A->defaultvectype));

154:   a = (Mat_MPIAIJ *)A->data;
155:   if (a->A) PetscCall(MatSetType(a->A, MATSEQAIJHIPSPARSE));
156:   if (a->B) PetscCall(MatSetType(a->B, MATSEQAIJHIPSPARSE));
157:   if (a->lvec) PetscCall(VecSetType(a->lvec, VECSEQHIP));

159:   if (reuse != MAT_REUSE_MATRIX && !a->spptr) PetscCallCXX(a->spptr = new Mat_MPIAIJHIPSPARSE);

161:   A->ops->assemblyend           = MatAssemblyEnd_MPIAIJHIPSPARSE;
162:   A->ops->mult                  = MatMult_MPIAIJHIPSPARSE;
163:   A->ops->multadd               = MatMultAdd_MPIAIJHIPSPARSE;
164:   A->ops->multtranspose         = MatMultTranspose_MPIAIJHIPSPARSE;
165:   A->ops->setfromoptions        = MatSetFromOptions_MPIAIJHIPSPARSE;
166:   A->ops->destroy               = MatDestroy_MPIAIJHIPSPARSE;
167:   A->ops->zeroentries           = MatZeroEntries_MPIAIJHIPSPARSE;
168:   A->ops->productsetfromoptions = MatProductSetFromOptions_MPIAIJBACKEND;
169:   A->ops->getcurrentmemtype     = MatGetCurrentMemType_MPIAIJ;

171:   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPIAIJHIPSPARSE));
172:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", MatMPIAIJGetLocalMatMerge_MPIAIJHIPSPARSE));
173:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJHIPSPARSE));
174:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHIPSPARSESetFormat_C", MatHIPSPARSESetFormat_MPIAIJHIPSPARSE));
175:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJHIPSPARSE));
176:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJHIPSPARSE));
177: #if defined(PETSC_HAVE_HYPRE)
178:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpiaijhipsparse_hypre_C", MatConvert_AIJ_HYPRE));
179: #endif
180:   PetscFunctionReturn(PETSC_SUCCESS);
181: }

183: PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJHIPSPARSE(Mat A)
184: {
185:   PetscFunctionBegin;
186:   PetscCall(PetscDeviceInitialize(PETSC_DEVICE_HIP));
187:   PetscCall(MatCreate_MPIAIJ(A));
188:   PetscCall(MatConvert_MPIAIJ_MPIAIJHIPSPARSE(A, MATMPIAIJHIPSPARSE, MAT_INPLACE_MATRIX, &A));
189:   PetscFunctionReturn(PETSC_SUCCESS);
190: }

192: /*@
193:   MatCreateAIJHIPSPARSE - Creates a sparse matrix in AIJ (compressed row) format
194:   (the default parallel PETSc format).

196:   Collective

198:   Input Parameters:
199: + comm  - MPI communicator, set to `PETSC_COMM_SELF`
200: . m     - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
201:            This value should be the same as the local size used in creating the
202:            y vector for the matrix-vector product y = Ax.
203: . n     - This value should be the same as the local size used in creating the
204:        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
205:        calculated if `N` is given) For square matrices `n` is almost always `m`.
206: . M     - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
207: . N     - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
208: . d_nz  - number of nonzeros per row (same for all rows), for the "diagonal" portion of the matrix
209: . d_nnz - array containing the number of nonzeros in the various rows (possibly different for each row) or `NULL`, for the "diagonal" portion of the matrix
210: . o_nz  - number of nonzeros per row (same for all rows), for the "off-diagonal" portion of the matrix
211: - o_nnz - array containing the number of nonzeros in the various rows (possibly different for each row) or `NULL`, for the "off-diagonal" portion of the matrix

213:   Output Parameter:
214: . A - the matrix

216:   Level: intermediate

218:   Notes:
219:   This matrix will ultimately pushed down to AMD GPUs and use the HIPSPARSE library for
220:   calculations. For good matrix assembly performance the user should preallocate the matrix
221:   storage by setting the parameter `nz` (or the array `nnz`).

223:   It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
224:   MatXXXXSetPreallocation() paradigm instead of this routine directly.
225:   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]

227:   If `d_nnz` (`o_nnz`) is given then `d_nz` (`o_nz`) is ignored

229:   The `MATAIJ` format (compressed row storage), is fully compatible with standard Fortran
230:   storage.  That is, the stored row and column indices can begin at
231:   either one (as in Fortran) or zero.

233:   Specify the preallocated storage with either `d_nz` (`o_nz`) or `d_nnz` (`o_nnz`) (not both).
234:   Set `d_nz` (`o_nz`) = `PETSC_DEFAULT` and `d_nnz` (`o_nnz`) = `NULL` for PETSc to control dynamic memory
235:   allocation.

237: .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateAIJ()`, `MatSetValues()`, `MatSeqAIJSetColumnIndices()`, `MatCreateSeqAIJWithArrays()`, `MATMPIAIJHIPSPARSE`, `MATAIJHIPSPARSE`
238: @*/
239: PetscErrorCode MatCreateAIJHIPSPARSE(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
240: {
241:   return MatMPIAIJHIPSPARSE_CUPM_t::CreateAIJ(comm, m, n, M, N, d_nz, d_nnz, o_nz, o_nnz, A);
242: }

244: /*MC
245:    MATAIJHIPSPARSE - A matrix type to be used for sparse matrices; it is as same as `MATMPIAIJHIPSPARSE`.

247:    A matrix type whose data resides on GPUs. These matrices can be in either
248:    CSR, ELL, or Hybrid format. All matrix calculations are performed on AMD GPUs using the HIPSPARSE library.

250:    This matrix type is identical to `MATSEQAIJHIPSPARSE` when constructed with a single process communicator,
251:    and `MATMPIAIJHIPSPARSE` otherwise.  As a result, for single process communicators,
252:    `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
253:    for communicators controlling multiple processes.  It is recommended that you call both of
254:    the above preallocation routines for simplicity.

256:    Options Database Keys:
257: +  -mat_type mpiaijhipsparse - sets the matrix type to `MATMPIAIJHIPSPARSE`
258: .  -mat_hipsparse_storage_format csr - sets the storage format of diagonal and off-diagonal matrices. Other options include ell (ellpack) or hyb (hybrid).
259: .  -mat_hipsparse_mult_diag_storage_format csr - sets the storage format of diagonal matrix. Other options include ell (ellpack) or hyb (hybrid).
260: -  -mat_hipsparse_mult_offdiag_storage_format csr - sets the storage format of off-diagonal matrix. Other options include ell (ellpack) or hyb (hybrid).

262:   Level: beginner

264: .seealso: [](ch_matrices), `Mat`, `MatCreateAIJHIPSPARSE()`, `MATSEQAIJHIPSPARSE`, `MATMPIAIJHIPSPARSE`, `MatCreateSeqAIJHIPSPARSE()`, `MatHIPSPARSESetFormat()`, `MatHIPSPARSEStorageFormat`, `MatHIPSPARSEFormatOperation`
265: M*/

267: /*MC
268:    MATMPIAIJHIPSPARSE - A matrix type to be used for sparse matrices; it is as same as `MATAIJHIPSPARSE`.

270:   Level: beginner

272: .seealso: [](ch_matrices), `Mat`, `MATAIJHIPSPARSE`, `MATSEQAIJHIPSPARSE`
273: M*/