Actual source code: matmpidensecuda.cu
1: #include "../matmpidensecupm.hpp"
3: using namespace Petsc::mat::cupm;
4: using Petsc::device::cupm::DeviceType;
6: static constexpr impl::MatDense_MPI_CUPM<DeviceType::CUDA> mat_cupm{};
8: /*MC
9: MATDENSECUDA - "densecuda" - A matrix type to be used for dense matrices on GPUs.
11: This matrix type is identical to `MATSEQDENSECUDA` when constructed with a single process
12: communicator, and `MATMPIDENSECUDA` otherwise.
14: Options Database Key:
15: . -mat_type densecuda - sets the matrix type to `MATDENSECUDA` during a call to
16: `MatSetFromOptions()`
18: Level: beginner
20: .seealso: [](ch_matrices), `Mat`, `MATSEQDENSECUDA`, `MATMPIDENSECUDA`, `MATSEQDENSEHIP`,
21: `MATMPIDENSEHIP`, `MATDENSE`
22: M*/
24: /*MC
25: MATMPIDENSECUDA - "mpidensecuda" - A matrix type to be used for distributed dense matrices on
26: GPUs.
28: Options Database Key:
29: . -mat_type mpidensecuda - sets the matrix type to `MATMPIDENSECUDA` during a call to
30: `MatSetFromOptions()`
32: Level: beginner
34: .seealso: [](ch_matrices), `Mat`, `MATDENSECUDA`, `MATMPIDENSE`, `MATSEQDENSE`,
35: `MATSEQDENSECUDA`, `MATSEQDENSEHIP`
36: M*/
37: PETSC_INTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat A)
38: {
39: PetscFunctionBegin;
40: PetscCall(mat_cupm.Create(A));
41: PetscFunctionReturn(PETSC_SUCCESS);
42: }
44: PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat A, MatType type, MatReuse reuse, Mat *ret)
45: {
46: PetscFunctionBegin;
47: PetscCall(mat_cupm.Convert_MPIDense_MPIDenseCUPM(A, type, reuse, ret));
48: PetscFunctionReturn(PETSC_SUCCESS);
49: }
51: /*@C
52: MatCreateDenseCUDA - Creates a matrix in `MATDENSECUDA` format using CUDA.
54: Collective
56: Input Parameters:
57: + comm - MPI communicator
58: . m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
59: . n - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given)
60: . M - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given)
61: . N - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given)
62: - data - optional location of GPU matrix data. Pass `NULL` to have PETSc to control matrix memory allocation.
64: Output Parameter:
65: . A - the matrix
67: Level: intermediate
69: .seealso: `MATDENSECUDA`, `MatCreate()`, `MatCreateDense()`
70: @*/
71: PetscErrorCode MatCreateDenseCUDA(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A)
72: {
73: PetscFunctionBegin;
74: PetscCall(MatCreateDenseCUPM<DeviceType::CUDA>(comm, m, n, M, N, data, A));
75: PetscFunctionReturn(PETSC_SUCCESS);
76: }
78: /*@C
79: MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a `MATDENSECUDA` matrix with an
80: array provided by the user. This is useful to avoid copying an array into a matrix.
82: Not Collective
84: Input Parameters:
85: + mat - the matrix
86: - array - the array in column major order
88: Level: developer
90: Note:
91: Adding `const` to `array` was an oversight, see notes in `VecPlaceArray()`.
93: You can return to the original array with a call to `MatDenseCUDAResetArray()`. The user is
94: responsible for freeing this array; it will not be freed when the matrix is destroyed. The
95: array must have been allocated with `cudaMalloc()`.
97: .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDAResetArray()`,
98: `MatDenseCUDAReplaceArray()`
99: @*/
100: PetscErrorCode MatDenseCUDAPlaceArray(Mat mat, const PetscScalar *array)
101: {
102: PetscFunctionBegin;
103: PetscCall(MatDenseCUPMPlaceArray<DeviceType::CUDA>(mat, array));
104: PetscFunctionReturn(PETSC_SUCCESS);
105: }
107: /*@C
108: MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to
109: `MatDenseCUDAPlaceArray()`
111: Not Collective
113: Input Parameter:
114: . mat - the matrix
116: Level: developer
118: Note:
119: You can only call this after a call to `MatDenseCUDAPlaceArray()`
121: .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`
122: @*/
123: PetscErrorCode MatDenseCUDAResetArray(Mat mat)
124: {
125: PetscFunctionBegin;
126: PetscCall(MatDenseCUPMResetArray<DeviceType::CUDA>(mat));
127: PetscFunctionReturn(PETSC_SUCCESS);
128: }
130: /*@C
131: MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a `MATDENSECUDA` matrix
132: with an array provided by the user. This is useful to avoid copying an array into a matrix.
134: Not Collective
136: Input Parameters:
137: + mat - the matrix
138: - array - the array in column major order
140: Level: developer
142: Note:
143: Adding `const` to `array` was an oversight, see notes in `VecPlaceArray()`.
145: This permanently replaces the GPU array and frees the memory associated with the old GPU
146: array. The memory passed in CANNOT be freed by the user. It will be freed when the matrix is
147: destroyed. The array should respect the matrix leading dimension.
149: .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`, `MatDenseCUDAResetArray()`
150: @*/
151: PetscErrorCode MatDenseCUDAReplaceArray(Mat mat, const PetscScalar *array)
152: {
153: PetscFunctionBegin;
154: PetscCall(MatDenseCUPMReplaceArray<DeviceType::CUDA>(mat, array));
155: PetscFunctionReturn(PETSC_SUCCESS);
156: }
158: /*@C
159: MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a `MATDENSECUDA`
160: matrix.
162: Not Collective
164: Input Parameter:
165: . A - the matrix
167: Output Parameter:
168: . a - the GPU array in column major order
170: Level: developer
172: Notes:
173: The data on the GPU may not be updated due to operations done on the CPU. If you need updated
174: data, use `MatDenseCUDAGetArray()`.
176: The array must be restored with `MatDenseCUDARestoreArrayWrite()` when no longer needed.
178: .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`,
179: `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayRead()`,
180: `MatDenseCUDARestoreArrayRead()`
181: @*/
182: PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a)
183: {
184: PetscFunctionBegin;
185: PetscCall(MatDenseCUPMGetArrayWrite<DeviceType::CUDA>(A, a));
186: PetscFunctionReturn(PETSC_SUCCESS);
187: }
189: /*@C
190: MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a
191: `MATDENSECUDA` matrix previously obtained with `MatDenseCUDAGetArrayWrite()`.
193: Not Collective
195: Input Parameters:
196: + A - the matrix
197: - a - the GPU array in column major order
199: Level: developer
201: .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`,
202: `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()`
203: @*/
204: PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a)
205: {
206: PetscFunctionBegin;
207: PetscCall(MatDenseCUPMRestoreArrayWrite<DeviceType::CUDA>(A, a));
208: PetscFunctionReturn(PETSC_SUCCESS);
209: }
211: /*@C
212: MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a
213: `MATDENSECUDA` matrix. The array must be restored with `MatDenseCUDARestoreArrayRead()` when
214: no longer needed.
216: Not Collective
218: Input Parameter:
219: . A - the matrix
221: Output Parameter:
222: . a - the GPU array in column major order
224: Level: developer
226: Note:
227: Data may be copied to the GPU due to operations done on the CPU. If you need write only
228: access, use `MatDenseCUDAGetArrayWrite()`.
230: .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`,
231: `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`,
232: `MatDenseCUDARestoreArrayRead()`
233: @*/
234: PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a)
235: {
236: PetscFunctionBegin;
237: PetscCall(MatDenseCUPMGetArrayRead<DeviceType::CUDA>(A, a));
238: PetscFunctionReturn(PETSC_SUCCESS);
239: }
241: /*@C
242: MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a
243: `MATDENSECUDA` matrix previously obtained with a call to `MatDenseCUDAGetArrayRead()`.
245: Not Collective
247: Input Parameters:
248: + A - the matrix
249: - a - the GPU array in column major order
251: Level: developer
253: Note:
254: Data can be copied to the GPU due to operations done on the CPU. If you need write only
255: access, use `MatDenseCUDAGetArrayWrite()`.
257: .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`,
258: `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDAGetArrayRead()`
259: @*/
260: PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a)
261: {
262: PetscFunctionBegin;
263: PetscCall(MatDenseCUPMRestoreArrayRead<DeviceType::CUDA>(A, a));
264: PetscFunctionReturn(PETSC_SUCCESS);
265: }
267: /*@C
268: MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a `MATDENSECUDA` matrix. The
269: array must be restored with `MatDenseCUDARestoreArray()` when no longer needed.
271: Not Collective
273: Input Parameter:
274: . A - the matrix
276: Output Parameter:
277: . a - the GPU array in column major order
279: Level: developer
281: Note:
282: Data can be copied to the GPU due to operations done on the CPU. If you need write only
283: access, use `MatDenseCUDAGetArrayWrite()`. For read-only access, use
284: `MatDenseCUDAGetArrayRead()`.
286: .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArray()`,
287: `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`,
288: `MatDenseCUDARestoreArrayRead()`
289: @*/
290: PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a)
291: {
292: PetscFunctionBegin;
293: PetscCall(MatDenseCUPMGetArray<DeviceType::CUDA>(A, a));
294: PetscFunctionReturn(PETSC_SUCCESS);
295: }
297: /*@C
298: MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a `MATDENSECUDA` matrix
299: previously obtained with `MatDenseCUDAGetArray()`.
301: Not Collective
303: Level: developer
305: Input Parameters:
306: + A - the matrix
307: - a - the GPU array in column major order
309: .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArrayWrite()`,
310: `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()`
311: @*/
312: PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a)
313: {
314: PetscFunctionBegin;
315: PetscCall(MatDenseCUPMRestoreArray<DeviceType::CUDA>(A, a));
316: PetscFunctionReturn(PETSC_SUCCESS);
317: }
319: /*@C
320: MatDenseCUDASetPreallocation - Set the device array used for storing the matrix elements of a
321: `MATDENSECUDA` matrix
323: Collective
325: Input Parameters:
326: + A - the matrix
327: - device_array - the array (or `NULL`)
329: Level: intermediate
331: .seealso: [](ch_matrices), `Mat`, `MATDENSECUDA`, `MatCreate()`, `MatCreateDenseCUDA()`,
332: `MatSetValues()`, `MatDenseSetLDA()`
333: @*/
334: PetscErrorCode MatDenseCUDASetPreallocation(Mat A, PetscScalar *device_array)
335: {
336: PetscFunctionBegin;
337: PetscCall(MatDenseCUPMSetPreallocation<DeviceType::CUDA>(A, device_array));
338: PetscFunctionReturn(PETSC_SUCCESS);
339: }