Actual source code: mpisellcuda.cu
1: #include <petscconf.h>
2: #include <petscdevice.h>
3: #include <../src/mat/impls/sell/mpi/mpisell.h>
5: static PetscErrorCode MatMPISELLSetPreallocation_MPISELLCUDA(Mat B, PetscInt d_rlenmax, const PetscInt d_rlen[], PetscInt o_rlenmax, const PetscInt o_rlen[])
6: {
7: Mat_MPISELL *b = (Mat_MPISELL *)B->data;
9: PetscFunctionBegin;
10: PetscCall(PetscLayoutSetUp(B->rmap));
11: PetscCall(PetscLayoutSetUp(B->cmap));
13: if (!B->preallocated) {
14: /* Explicitly create 2 MATSEQSELLCUDA matrices. */
15: PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
16: PetscCall(MatBindToCPU(b->A, B->boundtocpu));
17: PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
18: PetscCall(MatSetType(b->A, MATSEQSELLCUDA));
19: PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
20: PetscCall(MatBindToCPU(b->B, B->boundtocpu));
21: PetscCall(MatSetSizes(b->B, B->rmap->n, B->cmap->N, B->rmap->n, B->cmap->N));
22: PetscCall(MatSetType(b->B, MATSEQSELLCUDA));
23: }
24: PetscCall(MatSeqSELLSetPreallocation(b->A, d_rlenmax, d_rlen));
25: PetscCall(MatSeqSELLSetPreallocation(b->B, o_rlenmax, o_rlen));
26: B->preallocated = PETSC_TRUE;
27: B->was_assembled = PETSC_FALSE;
28: B->assembled = PETSC_FALSE;
29: PetscFunctionReturn(PETSC_SUCCESS);
30: }
32: static PetscErrorCode MatSetFromOptions_MPISELLCUDA(Mat, PetscOptionItems *)
33: {
34: return PETSC_SUCCESS;
35: }
37: static PetscErrorCode MatAssemblyEnd_MPISELLCUDA(Mat A, MatAssemblyType mode)
38: {
39: PetscFunctionBegin;
40: PetscCall(MatAssemblyEnd_MPISELL(A, mode));
41: if (!A->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(VecSetType(((Mat_MPISELL *)A->data)->lvec, VECSEQCUDA));
42: PetscFunctionReturn(PETSC_SUCCESS);
43: }
45: static PetscErrorCode MatDestroy_MPISELLCUDA(Mat A)
46: {
47: PetscFunctionBegin;
48: PetscCall(MatDestroy_MPISELL(A));
49: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMPISELLSetPreallocation_C", NULL));
50: PetscFunctionReturn(PETSC_SUCCESS);
51: }
53: PETSC_INTERN PetscErrorCode MatConvert_MPISELL_MPISELLCUDA(Mat B, MatType, MatReuse reuse, Mat *newmat)
54: {
55: Mat_MPISELL *a;
56: Mat A;
58: PetscFunctionBegin;
59: PetscCall(PetscDeviceInitialize(PETSC_DEVICE_CUDA));
60: if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(B, MAT_COPY_VALUES, newmat));
61: else if (reuse == MAT_REUSE_MATRIX) PetscCall(MatCopy(B, *newmat, SAME_NONZERO_PATTERN));
62: A = *newmat;
63: A->boundtocpu = PETSC_FALSE;
64: PetscCall(PetscFree(A->defaultvectype));
65: PetscCall(PetscStrallocpy(VECCUDA, &A->defaultvectype));
67: a = (Mat_MPISELL *)A->data;
68: if (a->A) PetscCall(MatSetType(a->A, MATSEQSELLCUDA));
69: if (a->B) PetscCall(MatSetType(a->B, MATSEQSELLCUDA));
70: if (a->lvec) PetscCall(VecSetType(a->lvec, VECSEQCUDA));
72: A->ops->assemblyend = MatAssemblyEnd_MPISELLCUDA;
73: A->ops->setfromoptions = MatSetFromOptions_MPISELLCUDA;
74: A->ops->destroy = MatDestroy_MPISELLCUDA;
76: PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPISELLCUDA));
77: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMPISELLSetPreallocation_C", MatMPISELLSetPreallocation_MPISELLCUDA));
78: PetscFunctionReturn(PETSC_SUCCESS);
79: }
81: PETSC_EXTERN PetscErrorCode MatCreate_MPISELLCUDA(Mat A)
82: {
83: PetscFunctionBegin;
84: PetscCall(PetscDeviceInitialize(PETSC_DEVICE_CUDA));
85: PetscCall(MatCreate_MPISELL(A));
86: PetscCall(MatConvert_MPISELL_MPISELLCUDA(A, MATMPISELLCUDA, MAT_INPLACE_MATRIX, &A));
87: PetscFunctionReturn(PETSC_SUCCESS);
88: }
90: /*@
91: MatCreateSELLCUDA - Creates a sparse matrix in SELL format.
92: This matrix will ultimately pushed down to NVIDIA GPUs.
94: Collective
96: Input Parameters:
97: + comm - MPI communicator, set to `PETSC_COMM_SELF`
98: . m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
99: This value should be the same as the local size used in creating the
100: y vector for the matrix-vector product y = Ax.
101: . n - This value should be the same as the local size used in creating the
102: x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
103: calculated if `N` is given) For square matrices `n` is almost always `m`.
104: . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
105: . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
106: . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix
107: (same value is used for all local rows)
108: . d_nnz - array containing the number of nonzeros in the various rows of the
109: DIAGONAL portion of the local submatrix (possibly different for each row)
110: or `NULL`, if `d_nz` is used to specify the nonzero structure.
111: The size of this array is equal to the number of local rows, i.e `m`.
112: For matrices you plan to factor you must leave room for the diagonal entry and
113: put in the entry even if it is zero.
114: . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local
115: submatrix (same value is used for all local rows).
116: - o_nnz - array containing the number of nonzeros in the various rows of the
117: OFF-DIAGONAL portion of the local submatrix (possibly different for
118: each row) or `NULL`, if `o_nz` is used to specify the nonzero
119: structure. The size of this array is equal to the number
120: of local rows, i.e `m`.
122: Output Parameter:
123: . A - the matrix
125: Level: intermediate
127: Notes:
128: If `nnz` is given then `nz` is ignored
130: Specify the preallocated storage with either `nz` or `nnz` (not both).
131: Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory
132: allocation.
134: .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSELL()`, `MatSetValues()`, `MATMPISELLCUDA`, `MATSELLCUDA`
135: @*/
136: PetscErrorCode MatCreateSELLCUDA(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
137: {
138: PetscMPIInt size;
140: PetscFunctionBegin;
141: PetscCall(MatCreate(comm, A));
142: PetscCall(MatSetSizes(*A, m, n, M, N));
143: PetscCallMPI(MPI_Comm_size(comm, &size));
144: if (size > 1) {
145: PetscCall(MatSetType(*A, MATMPISELLCUDA));
146: PetscCall(MatMPISELLSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz));
147: } else {
148: PetscCall(MatSetType(*A, MATSEQSELLCUDA));
149: PetscCall(MatSeqSELLSetPreallocation(*A, d_nz, d_nnz));
150: }
151: PetscFunctionReturn(PETSC_SUCCESS);
152: }
154: /*MC
155: MATSELLCUDA - "sellcuda" = "mpisellcuda" - A matrix type to be used for sparse matrices.
157: Sliced ELLPACK matrix type whose data resides on NVIDIA GPUs.
159: This matrix type is identical to `MATSEQSELLCUDA` when constructed with a single process communicator,
160: and `MATMPISELLCUDA` otherwise. As a result, for single process communicators,
161: `MatSeqSELLSetPreallocation()` is supported, and similarly `MatMPISELLSetPreallocation()` is supported
162: for communicators controlling multiple processes. It is recommended that you call both of
163: the above preallocation routines for simplicity.
165: Options Database Key:
166: . -mat_type mpisellcuda - sets the matrix type to `MATMPISELLCUDA` during a call to MatSetFromOptions()
168: Level: beginner
170: .seealso: `MatCreateSELLCUDA()`, `MATSEQSELLCUDA`, `MatCreateSeqSELLCUDA()`, `MatCUDAFormatOperation()`
171: M*/