Actual source code: mpiaijhipsparse.hip.cxx
1: /* Portions of this code are under:
2: Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
3: */
4: #include <../src/mat/impls/aij/mpi/mpiaij.h>
5: #include <../src/mat/impls/aij/seq/seqhipsparse/hipsparsematimpl.h>
6: #include <../src/mat/impls/aij/mpi/mpihipsparse/mpihipsparsematimpl.h>
7: #include <../src/mat/impls/aij/mpi/cupm/mpiaijcupm.hpp>
8: #include <thrust/advance.h>
9: #include <thrust/partition.h>
10: #include <thrust/sort.h>
11: #include <thrust/unique.h>
13: struct MatMPIAIJHIPSPARSE_Policy {
14: typedef Mat_MPIAIJHIPSPARSE mat_struct_type;
16: static const char *mpi_mat_type;
17: static const char *seq_mat_type;
18: static const char *vec_seq_type;
19: static const char *set_format_c;
20: static const char *mpi_convert_hypre_c;
22: static PetscErrorCode CopyToGPU(Mat A) { return MatSeqAIJHIPSPARSECopyToGPU(A); }
23: static PetscErrorCode MergeMats(Mat A, Mat B, MatReuse r, Mat *C) { return MatSeqAIJHIPSPARSEMergeMats(A, B, r, C); }
25: static PetscErrorCode GetArray(Mat A, PetscScalar **a) { return MatSeqAIJHIPSPARSEGetArray(A, a); }
26: static PetscErrorCode GetArrayWrite(Mat A, PetscScalar **a) { return MatSeqAIJHIPSPARSEGetArrayWrite(A, a); }
27: static PetscErrorCode RestoreArray(Mat A, PetscScalar **a) { return MatSeqAIJHIPSPARSERestoreArray(A, a); }
28: static PetscErrorCode RestoreArrayWrite(Mat A, PetscScalar **a) { return MatSeqAIJHIPSPARSERestoreArrayWrite(A, a); }
30: static PetscErrorCode SetSubMatFormats(Mat A, Mat B, Mat_MPIAIJHIPSPARSE *spptr)
31: {
32: PetscFunctionBegin;
33: PetscCall(MatHIPSPARSESetFormat(A, MAT_HIPSPARSE_MULT, spptr->diagGPUMatFormat));
34: PetscCall(MatHIPSPARSESetFormat(B, MAT_HIPSPARSE_MULT, spptr->offdiagGPUMatFormat));
35: PetscFunctionReturn(PETSC_SUCCESS);
36: }
37: };
38: const char *MatMPIAIJHIPSPARSE_Policy::mpi_mat_type = MATMPIAIJHIPSPARSE;
39: const char *MatMPIAIJHIPSPARSE_Policy::seq_mat_type = MATSEQAIJHIPSPARSE;
40: const char *MatMPIAIJHIPSPARSE_Policy::vec_seq_type = VECSEQHIP;
41: const char *MatMPIAIJHIPSPARSE_Policy::set_format_c = "MatHIPSPARSESetFormat_C";
42: const char *MatMPIAIJHIPSPARSE_Policy::mpi_convert_hypre_c = "MatConvert_mpiaijhipsparse_hypre_C";
44: using MatMPIAIJHIPSPARSE_CUPM_t = Petsc::mat::aij::cupm::impl::MatMPIAIJCUSPARSE_CUPM<Petsc::device::cupm::DeviceType::HIP, MatMPIAIJHIPSPARSE_Policy>;
46: static PetscErrorCode MatSetPreallocationCOO_MPIAIJHIPSPARSE(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[])
47: {
48: return MatMPIAIJHIPSPARSE_CUPM_t::SetPreallocationCOO(mat, coo_n, coo_i, coo_j);
49: }
51: static PetscErrorCode MatSetValuesCOO_MPIAIJHIPSPARSE(Mat mat, const PetscScalar v[], InsertMode imode)
52: {
53: return MatMPIAIJHIPSPARSE_CUPM_t::SetValuesCOO(mat, v, imode);
54: }
56: static PetscErrorCode MatMPIAIJGetLocalMatMerge_MPIAIJHIPSPARSE(Mat A, MatReuse scall, IS *glob, Mat *A_loc)
57: {
58: return MatMPIAIJHIPSPARSE_CUPM_t::GetLocalMatMerge(A, scall, glob, A_loc);
59: }
61: static PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJHIPSPARSE(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
62: {
63: return MatMPIAIJHIPSPARSE_CUPM_t::SetPreallocation(B, d_nz, d_nnz, o_nz, o_nnz);
64: }
66: static PetscErrorCode MatMult_MPIAIJHIPSPARSE(Mat A, Vec xx, Vec yy)
67: {
68: return MatMPIAIJHIPSPARSE_CUPM_t::Mult(A, xx, yy);
69: }
71: static PetscErrorCode MatZeroEntries_MPIAIJHIPSPARSE(Mat A)
72: {
73: return MatMPIAIJHIPSPARSE_CUPM_t::ZeroEntries(A);
74: }
76: static PetscErrorCode MatMultAdd_MPIAIJHIPSPARSE(Mat A, Vec xx, Vec yy, Vec zz)
77: {
78: return MatMPIAIJHIPSPARSE_CUPM_t::MultAdd(A, xx, yy, zz);
79: }
81: static PetscErrorCode MatMultTranspose_MPIAIJHIPSPARSE(Mat A, Vec xx, Vec yy)
82: {
83: return MatMPIAIJHIPSPARSE_CUPM_t::MultTranspose(A, xx, yy);
84: }
86: static PetscErrorCode MatAssemblyEnd_MPIAIJHIPSPARSE(Mat A, MatAssemblyType mode)
87: {
88: return MatMPIAIJHIPSPARSE_CUPM_t::AssemblyEnd(A, mode);
89: }
91: static PetscErrorCode MatHIPSPARSESetFormat_MPIAIJHIPSPARSE(Mat A, MatHIPSPARSEFormatOperation op, MatHIPSPARSEStorageFormat format)
92: {
93: Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
94: Mat_MPIAIJHIPSPARSE *hipsparseStruct = (Mat_MPIAIJHIPSPARSE *)a->spptr;
96: PetscFunctionBegin;
97: switch (op) {
98: case MAT_HIPSPARSE_MULT_DIAG:
99: hipsparseStruct->diagGPUMatFormat = format;
100: break;
101: case MAT_HIPSPARSE_MULT_OFFDIAG:
102: hipsparseStruct->offdiagGPUMatFormat = format;
103: break;
104: case MAT_HIPSPARSE_ALL:
105: hipsparseStruct->diagGPUMatFormat = format;
106: hipsparseStruct->offdiagGPUMatFormat = format;
107: break;
108: default:
109: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unsupported operation %d for MatHIPSPARSEFormatOperation. Only MAT_HIPSPARSE_MULT_DIAG, MAT_HIPSPARSE_MULT_DIAG, and MAT_HIPSPARSE_MULT_ALL are currently supported.", op);
110: }
111: PetscFunctionReturn(PETSC_SUCCESS);
112: }
114: static PetscErrorCode MatSetFromOptions_MPIAIJHIPSPARSE(Mat A, PetscOptionItems PetscOptionsObject)
115: {
116: MatHIPSPARSEStorageFormat format;
117: PetscBool flg;
118: Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
119: Mat_MPIAIJHIPSPARSE *hipsparseStruct = (Mat_MPIAIJHIPSPARSE *)a->spptr;
121: PetscFunctionBegin;
122: PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJHIPSPARSE options");
123: if (A->factortype == MAT_FACTOR_NONE) {
124: PetscCall(PetscOptionsEnum("-mat_hipsparse_mult_diag_storage_format", "sets storage format of the diagonal blocks of (mpi)aijhipsparse gpu matrices for SpMV", "MatHIPSPARSESetFormat", MatHIPSPARSEStorageFormats, (PetscEnum)hipsparseStruct->diagGPUMatFormat, (PetscEnum *)&format, &flg));
125: if (flg) PetscCall(MatHIPSPARSESetFormat(A, MAT_HIPSPARSE_MULT_DIAG, format));
126: PetscCall(PetscOptionsEnum("-mat_hipsparse_mult_offdiag_storage_format", "sets storage format of the off-diagonal blocks (mpi)aijhipsparse gpu matrices for SpMV", "MatHIPSPARSESetFormat", MatHIPSPARSEStorageFormats, (PetscEnum)hipsparseStruct->offdiagGPUMatFormat, (PetscEnum *)&format, &flg));
127: if (flg) PetscCall(MatHIPSPARSESetFormat(A, MAT_HIPSPARSE_MULT_OFFDIAG, format));
128: PetscCall(PetscOptionsEnum("-mat_hipsparse_storage_format", "sets storage format of the diagonal and off-diagonal blocks (mpi)aijhipsparse gpu matrices for SpMV", "MatHIPSPARSESetFormat", MatHIPSPARSEStorageFormats, (PetscEnum)hipsparseStruct->diagGPUMatFormat, (PetscEnum *)&format, &flg));
129: if (flg) PetscCall(MatHIPSPARSESetFormat(A, MAT_HIPSPARSE_ALL, format));
130: }
131: PetscOptionsHeadEnd();
132: PetscFunctionReturn(PETSC_SUCCESS);
133: }
135: static PetscErrorCode MatDestroy_MPIAIJHIPSPARSE(Mat A)
136: {
137: return MatMPIAIJHIPSPARSE_CUPM_t::Destroy(A);
138: }
140: PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat B, MatType mtype, MatReuse reuse, Mat *newmat)
141: {
142: Mat_MPIAIJ *a;
143: Mat A;
145: PetscFunctionBegin;
146: PetscCall(PetscDeviceInitialize(PETSC_DEVICE_HIP));
147: if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(B, MAT_COPY_VALUES, newmat));
148: else if (reuse == MAT_REUSE_MATRIX) PetscCall(MatCopy(B, *newmat, SAME_NONZERO_PATTERN));
149: A = *newmat;
150: A->boundtocpu = PETSC_FALSE;
151: PetscCall(PetscFree(A->defaultvectype));
152: PetscCall(PetscStrallocpy(VECHIP, &A->defaultvectype));
154: a = (Mat_MPIAIJ *)A->data;
155: if (a->A) PetscCall(MatSetType(a->A, MATSEQAIJHIPSPARSE));
156: if (a->B) PetscCall(MatSetType(a->B, MATSEQAIJHIPSPARSE));
157: if (a->lvec) PetscCall(VecSetType(a->lvec, VECSEQHIP));
159: if (reuse != MAT_REUSE_MATRIX && !a->spptr) PetscCallCXX(a->spptr = new Mat_MPIAIJHIPSPARSE);
161: A->ops->assemblyend = MatAssemblyEnd_MPIAIJHIPSPARSE;
162: A->ops->mult = MatMult_MPIAIJHIPSPARSE;
163: A->ops->multadd = MatMultAdd_MPIAIJHIPSPARSE;
164: A->ops->multtranspose = MatMultTranspose_MPIAIJHIPSPARSE;
165: A->ops->setfromoptions = MatSetFromOptions_MPIAIJHIPSPARSE;
166: A->ops->destroy = MatDestroy_MPIAIJHIPSPARSE;
167: A->ops->zeroentries = MatZeroEntries_MPIAIJHIPSPARSE;
168: A->ops->productsetfromoptions = MatProductSetFromOptions_MPIAIJBACKEND;
169: A->ops->getcurrentmemtype = MatGetCurrentMemType_MPIAIJ;
171: PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPIAIJHIPSPARSE));
172: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", MatMPIAIJGetLocalMatMerge_MPIAIJHIPSPARSE));
173: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJHIPSPARSE));
174: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHIPSPARSESetFormat_C", MatHIPSPARSESetFormat_MPIAIJHIPSPARSE));
175: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJHIPSPARSE));
176: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJHIPSPARSE));
177: #if defined(PETSC_HAVE_HYPRE)
178: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpiaijhipsparse_hypre_C", MatConvert_AIJ_HYPRE));
179: #endif
180: PetscFunctionReturn(PETSC_SUCCESS);
181: }
183: PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJHIPSPARSE(Mat A)
184: {
185: PetscFunctionBegin;
186: PetscCall(PetscDeviceInitialize(PETSC_DEVICE_HIP));
187: PetscCall(MatCreate_MPIAIJ(A));
188: PetscCall(MatConvert_MPIAIJ_MPIAIJHIPSPARSE(A, MATMPIAIJHIPSPARSE, MAT_INPLACE_MATRIX, &A));
189: PetscFunctionReturn(PETSC_SUCCESS);
190: }
192: /*@
193: MatCreateAIJHIPSPARSE - Creates a sparse matrix in AIJ (compressed row) format
194: (the default parallel PETSc format).
196: Collective
198: Input Parameters:
199: + comm - MPI communicator, set to `PETSC_COMM_SELF`
200: . m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
201: This value should be the same as the local size used in creating the
202: y vector for the matrix-vector product y = Ax.
203: . n - This value should be the same as the local size used in creating the
204: x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
205: calculated if `N` is given) For square matrices `n` is almost always `m`.
206: . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
207: . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
208: . d_nz - number of nonzeros per row (same for all rows), for the "diagonal" portion of the matrix
209: . d_nnz - array containing the number of nonzeros in the various rows (possibly different for each row) or `NULL`, for the "diagonal" portion of the matrix
210: . o_nz - number of nonzeros per row (same for all rows), for the "off-diagonal" portion of the matrix
211: - o_nnz - array containing the number of nonzeros in the various rows (possibly different for each row) or `NULL`, for the "off-diagonal" portion of the matrix
213: Output Parameter:
214: . A - the matrix
216: Level: intermediate
218: Notes:
219: This matrix will ultimately pushed down to AMD GPUs and use the HIPSPARSE library for
220: calculations. For good matrix assembly performance the user should preallocate the matrix
221: storage by setting the parameter `nz` (or the array `nnz`).
223: It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
224: MatXXXXSetPreallocation() paradigm instead of this routine directly.
225: [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
227: If `d_nnz` (`o_nnz`) is given then `d_nz` (`o_nz`) is ignored
229: The `MATAIJ` format (compressed row storage), is fully compatible with standard Fortran
230: storage. That is, the stored row and column indices can begin at
231: either one (as in Fortran) or zero.
233: Specify the preallocated storage with either `d_nz` (`o_nz`) or `d_nnz` (`o_nnz`) (not both).
234: Set `d_nz` (`o_nz`) = `PETSC_DEFAULT` and `d_nnz` (`o_nnz`) = `NULL` for PETSc to control dynamic memory
235: allocation.
237: .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateAIJ()`, `MatSetValues()`, `MatSeqAIJSetColumnIndices()`, `MatCreateSeqAIJWithArrays()`, `MATMPIAIJHIPSPARSE`, `MATAIJHIPSPARSE`
238: @*/
239: PetscErrorCode MatCreateAIJHIPSPARSE(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
240: {
241: return MatMPIAIJHIPSPARSE_CUPM_t::CreateAIJ(comm, m, n, M, N, d_nz, d_nnz, o_nz, o_nnz, A);
242: }
244: /*MC
245: MATAIJHIPSPARSE - A matrix type to be used for sparse matrices; it is as same as `MATMPIAIJHIPSPARSE`.
247: A matrix type whose data resides on GPUs. These matrices can be in either
248: CSR, ELL, or Hybrid format. All matrix calculations are performed on AMD GPUs using the HIPSPARSE library.
250: This matrix type is identical to `MATSEQAIJHIPSPARSE` when constructed with a single process communicator,
251: and `MATMPIAIJHIPSPARSE` otherwise. As a result, for single process communicators,
252: `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
253: for communicators controlling multiple processes. It is recommended that you call both of
254: the above preallocation routines for simplicity.
256: Options Database Keys:
257: + -mat_type mpiaijhipsparse - sets the matrix type to `MATMPIAIJHIPSPARSE`
258: . -mat_hipsparse_storage_format csr - sets the storage format of diagonal and off-diagonal matrices. Other options include ell (ellpack) or hyb (hybrid).
259: . -mat_hipsparse_mult_diag_storage_format csr - sets the storage format of diagonal matrix. Other options include ell (ellpack) or hyb (hybrid).
260: - -mat_hipsparse_mult_offdiag_storage_format csr - sets the storage format of off-diagonal matrix. Other options include ell (ellpack) or hyb (hybrid).
262: Level: beginner
264: .seealso: [](ch_matrices), `Mat`, `MatCreateAIJHIPSPARSE()`, `MATSEQAIJHIPSPARSE`, `MATMPIAIJHIPSPARSE`, `MatCreateSeqAIJHIPSPARSE()`, `MatHIPSPARSESetFormat()`, `MatHIPSPARSEStorageFormat`, `MatHIPSPARSEFormatOperation`
265: M*/
267: /*MC
268: MATMPIAIJHIPSPARSE - A matrix type to be used for sparse matrices; it is as same as `MATAIJHIPSPARSE`.
270: Level: beginner
272: .seealso: [](ch_matrices), `Mat`, `MATAIJHIPSPARSE`, `MATSEQAIJHIPSPARSE`
273: M*/