Actual source code: mpiaijcusparse.cu
1: #define PETSC_SKIP_IMMINTRIN_H_CUDAWORKAROUND 1
3: #include <petscconf.h>
4: #include <../src/mat/impls/aij/mpi/mpiaij.h>
5: #include <../src/mat/impls/aij/seq/seqcusparse/cusparsematimpl.h>
6: #include <../src/mat/impls/aij/mpi/mpicusparse/mpicusparsematimpl.h>
7: #include <../src/mat/impls/aij/mpi/cupm/mpiaijcupm.hpp>
8: #include <thrust/advance.h>
9: #include <thrust/partition.h>
10: #include <thrust/sort.h>
11: #include <thrust/unique.h>
13: struct MatMPIAIJCUSPARSE_Policy {
14: typedef Mat_MPIAIJCUSPARSE mat_struct_type;
16: static const char *mpi_mat_type;
17: static const char *seq_mat_type;
18: static const char *vec_seq_type;
19: static const char *set_format_c;
20: static const char *mpi_convert_hypre_c;
22: static PetscErrorCode CopyToGPU(Mat A) { return MatSeqAIJCUSPARSECopyToGPU(A); }
23: static PetscErrorCode MergeMats(Mat A, Mat B, MatReuse r, Mat *C) { return MatSeqAIJCUSPARSEMergeMats(A, B, r, C); }
25: static PetscErrorCode GetArray(Mat A, PetscScalar **a) { return MatSeqAIJCUSPARSEGetArray(A, a); }
26: static PetscErrorCode GetArrayWrite(Mat A, PetscScalar **a) { return MatSeqAIJCUSPARSEGetArrayWrite(A, a); }
27: static PetscErrorCode RestoreArray(Mat A, PetscScalar **a) { return MatSeqAIJCUSPARSERestoreArray(A, a); }
28: static PetscErrorCode RestoreArrayWrite(Mat A, PetscScalar **a) { return MatSeqAIJCUSPARSERestoreArrayWrite(A, a); }
30: static PetscErrorCode SetSubMatFormats(Mat A, Mat B, Mat_MPIAIJCUSPARSE *spptr)
31: {
32: PetscFunctionBegin;
33: PetscCall(MatCUSPARSESetFormat(A, MAT_CUSPARSE_MULT, spptr->diagGPUMatFormat));
34: PetscCall(MatCUSPARSESetFormat(B, MAT_CUSPARSE_MULT, spptr->offdiagGPUMatFormat));
35: PetscFunctionReturn(PETSC_SUCCESS);
36: }
37: };
38: const char *MatMPIAIJCUSPARSE_Policy::mpi_mat_type = MATMPIAIJCUSPARSE;
39: const char *MatMPIAIJCUSPARSE_Policy::seq_mat_type = MATSEQAIJCUSPARSE;
40: const char *MatMPIAIJCUSPARSE_Policy::vec_seq_type = VECSEQCUDA;
41: const char *MatMPIAIJCUSPARSE_Policy::set_format_c = "MatCUSPARSESetFormat_C";
42: const char *MatMPIAIJCUSPARSE_Policy::mpi_convert_hypre_c = "MatConvert_mpiaijcusparse_hypre_C";
44: using MatMPIAIJCUSPARSE_CUPM_t = Petsc::mat::aij::cupm::impl::MatMPIAIJCUSPARSE_CUPM<Petsc::device::cupm::DeviceType::CUDA, MatMPIAIJCUSPARSE_Policy>;
46: static PetscErrorCode MatSetPreallocationCOO_MPIAIJCUSPARSE(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[])
47: {
48: return MatMPIAIJCUSPARSE_CUPM_t::SetPreallocationCOO(mat, coo_n, coo_i, coo_j);
49: }
51: static PetscErrorCode MatSetValuesCOO_MPIAIJCUSPARSE(Mat mat, const PetscScalar v[], InsertMode imode)
52: {
53: return MatMPIAIJCUSPARSE_CUPM_t::SetValuesCOO(mat, v, imode);
54: }
56: static PetscErrorCode MatMPIAIJGetLocalMatMerge_MPIAIJCUSPARSE(Mat A, MatReuse scall, IS *glob, Mat *A_loc)
57: {
58: return MatMPIAIJCUSPARSE_CUPM_t::GetLocalMatMerge(A, scall, glob, A_loc);
59: }
61: static PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJCUSPARSE(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
62: {
63: return MatMPIAIJCUSPARSE_CUPM_t::SetPreallocation(B, d_nz, d_nnz, o_nz, o_nnz);
64: }
66: static PetscErrorCode MatMult_MPIAIJCUSPARSE(Mat A, Vec xx, Vec yy)
67: {
68: return MatMPIAIJCUSPARSE_CUPM_t::Mult(A, xx, yy);
69: }
71: static PetscErrorCode MatZeroEntries_MPIAIJCUSPARSE(Mat A)
72: {
73: return MatMPIAIJCUSPARSE_CUPM_t::ZeroEntries(A);
74: }
76: static PetscErrorCode MatMultAdd_MPIAIJCUSPARSE(Mat A, Vec xx, Vec yy, Vec zz)
77: {
78: return MatMPIAIJCUSPARSE_CUPM_t::MultAdd(A, xx, yy, zz);
79: }
81: static PetscErrorCode MatMultTranspose_MPIAIJCUSPARSE(Mat A, Vec xx, Vec yy)
82: {
83: return MatMPIAIJCUSPARSE_CUPM_t::MultTranspose(A, xx, yy);
84: }
86: static PetscErrorCode MatAssemblyEnd_MPIAIJCUSPARSE(Mat A, MatAssemblyType mode)
87: {
88: return MatMPIAIJCUSPARSE_CUPM_t::AssemblyEnd(A, mode);
89: }
91: static PetscErrorCode MatCUSPARSESetFormat_MPIAIJCUSPARSE(Mat A, MatCUSPARSEFormatOperation op, MatCUSPARSEStorageFormat format)
92: {
93: Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
94: Mat_MPIAIJCUSPARSE *cusparseStruct = (Mat_MPIAIJCUSPARSE *)a->spptr;
96: PetscFunctionBegin;
97: switch (op) {
98: case MAT_CUSPARSE_MULT_DIAG:
99: cusparseStruct->diagGPUMatFormat = format;
100: break;
101: case MAT_CUSPARSE_MULT_OFFDIAG:
102: cusparseStruct->offdiagGPUMatFormat = format;
103: break;
104: case MAT_CUSPARSE_ALL:
105: cusparseStruct->diagGPUMatFormat = format;
106: cusparseStruct->offdiagGPUMatFormat = format;
107: break;
108: default:
109: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unsupported operation %d for MatCUSPARSEFormatOperation. Only MAT_CUSPARSE_MULT_DIAG, MAT_CUSPARSE_MULT_DIAG, and MAT_CUSPARSE_MULT_ALL are currently supported.", op);
110: }
111: PetscFunctionReturn(PETSC_SUCCESS);
112: }
114: static PetscErrorCode MatSetFromOptions_MPIAIJCUSPARSE(Mat A, PetscOptionItems PetscOptionsObject)
115: {
116: MatCUSPARSEStorageFormat format;
117: PetscBool flg;
118: Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
119: Mat_MPIAIJCUSPARSE *cusparseStruct = (Mat_MPIAIJCUSPARSE *)a->spptr;
121: PetscFunctionBegin;
122: PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJCUSPARSE options");
123: if (A->factortype == MAT_FACTOR_NONE) {
124: PetscCall(PetscOptionsEnum("-mat_cusparse_mult_diag_storage_format", "sets storage format of the diagonal blocks of (mpi)aijcusparse gpu matrices for SpMV", "MatCUSPARSESetFormat", MatCUSPARSEStorageFormats, (PetscEnum)cusparseStruct->diagGPUMatFormat, (PetscEnum *)&format, &flg));
125: if (flg) PetscCall(MatCUSPARSESetFormat(A, MAT_CUSPARSE_MULT_DIAG, format));
126: PetscCall(PetscOptionsEnum("-mat_cusparse_mult_offdiag_storage_format", "sets storage format of the off-diagonal blocks (mpi)aijcusparse gpu matrices for SpMV", "MatCUSPARSESetFormat", MatCUSPARSEStorageFormats, (PetscEnum)cusparseStruct->offdiagGPUMatFormat, (PetscEnum *)&format, &flg));
127: if (flg) PetscCall(MatCUSPARSESetFormat(A, MAT_CUSPARSE_MULT_OFFDIAG, format));
128: PetscCall(PetscOptionsEnum("-mat_cusparse_storage_format", "sets storage format of the diagonal and off-diagonal blocks (mpi)aijcusparse gpu matrices for SpMV", "MatCUSPARSESetFormat", MatCUSPARSEStorageFormats, (PetscEnum)cusparseStruct->diagGPUMatFormat, (PetscEnum *)&format, &flg));
129: if (flg) PetscCall(MatCUSPARSESetFormat(A, MAT_CUSPARSE_ALL, format));
130: }
131: PetscOptionsHeadEnd();
132: PetscFunctionReturn(PETSC_SUCCESS);
133: }
135: static PetscErrorCode MatDestroy_MPIAIJCUSPARSE(Mat A)
136: {
137: return MatMPIAIJCUSPARSE_CUPM_t::Destroy(A);
138: }
140: /* defines MatSetValues_MPICUSPARSE_Hash() */
141: #define TYPE AIJ
142: #define TYPE_AIJ
143: #define SUB_TYPE_CUSPARSE
144: #include "../src/mat/impls/aij/mpi/mpihashmat.h"
145: #undef TYPE
146: #undef TYPE_AIJ
147: #undef SUB_TYPE_CUSPARSE
149: static PetscErrorCode MatSetUp_MPI_HASH_CUSPARSE(Mat A)
150: {
151: Mat_MPIAIJ *b = (Mat_MPIAIJ *)A->data;
152: Mat_MPIAIJCUSPARSE *cusparseStruct = (Mat_MPIAIJCUSPARSE *)b->spptr;
154: PetscFunctionBegin;
155: PetscCall(MatSetUp_MPI_Hash(A));
156: PetscCall(MatCUSPARSESetFormat(b->A, MAT_CUSPARSE_MULT, cusparseStruct->diagGPUMatFormat));
157: PetscCall(MatCUSPARSESetFormat(b->B, MAT_CUSPARSE_MULT, cusparseStruct->offdiagGPUMatFormat));
158: A->preallocated = PETSC_TRUE;
159: PetscFunctionReturn(PETSC_SUCCESS);
160: }
162: PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat B, MatType, MatReuse reuse, Mat *newmat)
163: {
164: Mat_MPIAIJ *a;
165: Mat A;
167: PetscFunctionBegin;
168: PetscCall(PetscDeviceInitialize(PETSC_DEVICE_CUDA));
169: if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(B, MAT_COPY_VALUES, newmat));
170: else if (reuse == MAT_REUSE_MATRIX) PetscCall(MatCopy(B, *newmat, SAME_NONZERO_PATTERN));
171: A = *newmat;
172: A->boundtocpu = PETSC_FALSE;
173: PetscCall(PetscFree(A->defaultvectype));
174: PetscCall(PetscStrallocpy(VECCUDA, &A->defaultvectype));
176: a = (Mat_MPIAIJ *)A->data;
177: if (a->A) PetscCall(MatSetType(a->A, MATSEQAIJCUSPARSE));
178: if (a->B) PetscCall(MatSetType(a->B, MATSEQAIJCUSPARSE));
179: if (a->lvec) PetscCall(VecSetType(a->lvec, VECSEQCUDA));
181: if (reuse != MAT_REUSE_MATRIX && !a->spptr) PetscCallCXX(a->spptr = new Mat_MPIAIJCUSPARSE);
183: A->ops->assemblyend = MatAssemblyEnd_MPIAIJCUSPARSE;
184: A->ops->mult = MatMult_MPIAIJCUSPARSE;
185: A->ops->multadd = MatMultAdd_MPIAIJCUSPARSE;
186: A->ops->multtranspose = MatMultTranspose_MPIAIJCUSPARSE;
187: A->ops->setfromoptions = MatSetFromOptions_MPIAIJCUSPARSE;
188: A->ops->destroy = MatDestroy_MPIAIJCUSPARSE;
189: A->ops->zeroentries = MatZeroEntries_MPIAIJCUSPARSE;
190: A->ops->productsetfromoptions = MatProductSetFromOptions_MPIAIJBACKEND;
191: A->ops->setup = MatSetUp_MPI_HASH_CUSPARSE;
192: A->ops->getcurrentmemtype = MatGetCurrentMemType_MPIAIJ;
194: PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPIAIJCUSPARSE));
195: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", MatMPIAIJGetLocalMatMerge_MPIAIJCUSPARSE));
196: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJCUSPARSE));
197: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatCUSPARSESetFormat_C", MatCUSPARSESetFormat_MPIAIJCUSPARSE));
198: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJCUSPARSE));
199: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJCUSPARSE));
200: #if defined(PETSC_HAVE_HYPRE)
201: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpiaijcusparse_hypre_C", MatConvert_AIJ_HYPRE));
202: #endif
203: PetscFunctionReturn(PETSC_SUCCESS);
204: }
206: PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJCUSPARSE(Mat A)
207: {
208: PetscFunctionBegin;
209: PetscCall(PetscDeviceInitialize(PETSC_DEVICE_CUDA));
210: PetscCall(MatCreate_MPIAIJ(A));
211: PetscCall(MatConvert_MPIAIJ_MPIAIJCUSPARSE(A, MATMPIAIJCUSPARSE, MAT_INPLACE_MATRIX, &A));
212: PetscFunctionReturn(PETSC_SUCCESS);
213: }
215: /*@
216: MatCreateAIJCUSPARSE - Creates a sparse matrix in `MATAIJCUSPARSE` (compressed row) format. This matrix will ultimately be pushed down
217: to NVIDIA GPUs and use the CuSPARSE library for calculations.
219: Collective
221: Input Parameters:
222: + comm - MPI communicator, set to `PETSC_COMM_SELF`
223: . m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
224: This value should be the same as the local size used in creating the
225: $y$ vector for the matrix-vector product $y = Ax$.
226: . n - This value should be the same as the local size used in creating the
227: $x$ vector for the matrix-vector product $y = Ax$. (or `PETSC_DECIDE` to have
228: calculated if `N` is given) For square matrices `n` is almost always `m`.
229: . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
230: . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
231: . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix
232: (same value is used for all local rows)
233: . d_nnz - array containing the number of nonzeros in the various rows of the
234: DIAGONAL portion of the local submatrix (possibly different for each row)
235: or `NULL`, if `d_nz` is used to specify the nonzero structure.
236: The size of this array is equal to the number of local rows, i.e `m`.
237: For matrices you plan to factor you must leave room for the diagonal entry and
238: put in the entry even if it is zero.
239: . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local
240: submatrix (same value is used for all local rows).
241: - o_nnz - array containing the number of nonzeros in the various rows of the
242: OFF-DIAGONAL portion of the local submatrix (possibly different for
243: each row) or `NULL`, if `o_nz` is used to specify the nonzero
244: structure. The size of this array is equal to the number
245: of local rows, i.e `m`.
247: Output Parameter:
248: . A - the matrix
250: Level: intermediate
252: Notes:
253: It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
254: MatXXXXSetPreallocation() paradigm instead of this routine directly.
255: [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
257: The AIJ format, also called the
258: compressed row storage), is fully compatible with standard Fortran
259: storage. That is, the stored row and column indices can begin at
260: either one (as in Fortran) or zero.
262: When working with matrices for GPUs, it is often better to use the `MatSetPreallocationCOO()` and `MatSetValuesCOO()` paradigm rather than using this routine and `MatSetValues()`
264: .seealso: [](ch_matrices), `Mat`, `MATAIJCUSPARSE`, `MatCreate()`, `MatCreateAIJ()`, `MatSetValues()`, `MatSeqAIJSetColumnIndices()`, `MatCreateSeqAIJWithArrays()`, `MATMPIAIJCUSPARSE`
265: @*/
266: PetscErrorCode MatCreateAIJCUSPARSE(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
267: {
268: return MatMPIAIJCUSPARSE_CUPM_t::CreateAIJ(comm, m, n, M, N, d_nz, d_nnz, o_nz, o_nnz, A);
269: }
271: /*MC
272: MATMPIAIJCUSPARSE - A matrix type to be used for sparse matrices on NVIDIA GPUs.
274: Options Database Keys:
275: + -mat_type mpiaijcusparse - sets the matrix type to `MATMPIAIJCUSPARSE`
276: . -mat_cusparse_storage_format csr - sets the storage format of diagonal and off-diagonal matrices. Other options include ell (ellpack) or hyb (hybrid).
277: . -mat_cusparse_mult_diag_storage_format csr - sets the storage format of diagonal matrix. Other options include ell (ellpack) or hyb (hybrid).
278: - -mat_cusparse_mult_offdiag_storage_format csr - sets the storage format of off-diagonal matrix. Other options include ell (ellpack) or hyb (hybrid).
280: Level: beginner
282: Notes:
283: These matrices can be in either CSR, ELL, or HYB format. The ELL and HYB formats require CUDA 4.2 or later.
285: All matrix calculations are performed on NVIDIA GPUs using the cuSPARSE library.
287: Uses 32-bit integers internally. If PETSc is configured `--with-64-bit-indices`, the integer row and column indices are stored on the GPU with `int`. It is unclear what happens
288: if some integer values passed in do not fit in `int`.
290: .seealso: [](ch_matrices), `Mat`, `MatCreateAIJCUSPARSE()`, `MATSEQAIJCUSPARSE`, `MATMPIAIJCUSPARSE`, `MatCreateSeqAIJCUSPARSE()`, `MatCUSPARSESetFormat()`, `MatCUSPARSEStorageFormat`, `MatCUSPARSEFormatOperation`
291: M*/
293: /*MC
294: MATAIJCUSPARSE - A matrix type to be used for sparse matrices on NVIDIA GPUs; it is as same as `MATSEQAIJCUSPARSE` on one MPI process and `MATMPIAIJCUSPARSE` on multiple processes.
296: Level: beginner
298: .seealso: [](ch_matrices), `Mat`, `MATAIJCUSPARSE`, `MATSEQAIJCUSPARSE`
299: M*/