Actual source code: matmpidensecuda.cu
  1: #include "../matmpidensecupm.hpp"
  3: using namespace Petsc::mat::cupm;
  4: using Petsc::device::cupm::DeviceType;
  6: static constexpr impl::MatDense_MPI_CUPM<DeviceType::CUDA> mat_cupm{};
  8: /*MC
  9:   MATDENSECUDA - "densecuda" - A matrix type to be used for dense matrices on GPUs.
 11:   This matrix type is identical to `MATSEQDENSECUDA` when constructed with a single process
 12:   communicator, and `MATMPIDENSECUDA` otherwise.
 14:   Options Database Key:
 15: . -mat_type densecuda - sets the matrix type to `MATDENSECUDA` during a call to
 16:                         `MatSetFromOptions()`
 18:   Level: beginner
 20: .seealso: [](ch_matrices), `Mat`, `MATSEQDENSECUDA`, `MATMPIDENSECUDA`, `MATSEQDENSEHIP`,
 21: `MATMPIDENSEHIP`, `MATDENSE`
 22: M*/
 24: /*MC
 25:   MATMPIDENSECUDA - "mpidensecuda" - A matrix type to be used for distributed dense matrices on
 26:   GPUs.
 28:   Options Database Key:
 29: . -mat_type mpidensecuda - sets the matrix type to `MATMPIDENSECUDA` during a call to
 30:                            `MatSetFromOptions()`
 32:   Level: beginner
 34: .seealso: [](ch_matrices), `Mat`, `MATDENSECUDA`, `MATMPIDENSE`, `MATSEQDENSE`,
 35: `MATSEQDENSECUDA`, `MATSEQDENSEHIP`
 36: M*/
 37: PETSC_INTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat A)
 38: {
 39:   PetscFunctionBegin;
 40:   PetscCall(mat_cupm.Create(A));
 41:   PetscFunctionReturn(PETSC_SUCCESS);
 42: }
 44: PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat A, MatType type, MatReuse reuse, Mat *ret)
 45: {
 46:   PetscFunctionBegin;
 47:   PetscCall(mat_cupm.Convert_MPIDense_MPIDenseCUPM(A, type, reuse, ret));
 48:   PetscFunctionReturn(PETSC_SUCCESS);
 49: }
 51: /*@C
 52:   MatCreateDenseCUDA - Creates a matrix in `MATDENSECUDA` format using CUDA.
 54:   Collective
 56:   Input Parameters:
 57: + comm - MPI communicator
 58: . m    - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
 59: . n    - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given)
 60: . M    - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given)
 61: . N    - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given)
 62: - data - optional location of GPU matrix data. Pass `NULL` to have PETSc to control matrix memory allocation.
 64:   Output Parameter:
 65: . A - the matrix
 67:   Level: intermediate
 69: .seealso: `MATDENSECUDA`, `MatCreate()`, `MatCreateDense()`
 70: @*/
 71: PetscErrorCode MatCreateDenseCUDA(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A)
 72: {
 73:   PetscFunctionBegin;
 74:   PetscCall(MatCreateDenseCUPM<DeviceType::CUDA>(comm, m, n, M, N, data, A));
 75:   PetscFunctionReturn(PETSC_SUCCESS);
 76: }
 78: /*@C
 79:   MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a `MATDENSECUDA` matrix with an
 80:   array provided by the user. This is useful to avoid copying an array into a matrix.
 82:   Not Collective
 84:   Input Parameters:
 85: + mat   - the matrix
 86: - array - the array in column major order
 88:   Level: developer
 90:   Note:
 91:   Adding `const` to `array` was an oversight, see notes in `VecPlaceArray()`.
 93:   You can return to the original array with a call to `MatDenseCUDAResetArray()`. The user is
 94:   responsible for freeing this array; it will not be freed when the matrix is destroyed. The
 95:   array must have been allocated with `cudaMalloc()`.
 97: .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDAResetArray()`,
 98:           `MatDenseCUDAReplaceArray()`
 99: @*/
100: PetscErrorCode MatDenseCUDAPlaceArray(Mat mat, const PetscScalar *array)
101: {
102:   PetscFunctionBegin;
103:   PetscCall(MatDenseCUPMPlaceArray<DeviceType::CUDA>(mat, array));
104:   PetscFunctionReturn(PETSC_SUCCESS);
105: }
107: /*@C
108:   MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to
109:   `MatDenseCUDAPlaceArray()`
111:   Not Collective
113:   Input Parameter:
114: . mat - the matrix
116:   Level: developer
118:   Note:
119:   You can only call this after a call to `MatDenseCUDAPlaceArray()`
121: .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`
122: @*/
123: PetscErrorCode MatDenseCUDAResetArray(Mat mat)
124: {
125:   PetscFunctionBegin;
126:   PetscCall(MatDenseCUPMResetArray<DeviceType::CUDA>(mat));
127:   PetscFunctionReturn(PETSC_SUCCESS);
128: }
130: /*@C
131:   MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a `MATDENSECUDA` matrix
132:   with an array provided by the user. This is useful to avoid copying an array into a matrix.
134:   Not Collective
136:   Input Parameters:
137: + mat   - the matrix
138: - array - the array in column major order
140:   Level: developer
142:   Note:
143:   Adding `const` to `array` was an oversight, see notes in `VecPlaceArray()`.
145:   This permanently replaces the GPU array and frees the memory associated with the old GPU
146:   array. The memory passed in CANNOT be freed by the user. It will be freed when the matrix is
147:   destroyed. The array should respect the matrix leading dimension.
149: .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`, `MatDenseCUDAResetArray()`
150: @*/
151: PetscErrorCode MatDenseCUDAReplaceArray(Mat mat, const PetscScalar *array)
152: {
153:   PetscFunctionBegin;
154:   PetscCall(MatDenseCUPMReplaceArray<DeviceType::CUDA>(mat, array));
155:   PetscFunctionReturn(PETSC_SUCCESS);
156: }
158: /*@C
159:   MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a `MATDENSECUDA`
160:   matrix.
162:   Not Collective
164:   Input Parameter:
165: . A - the matrix
167:   Output Parameter:
168: . a - the GPU array in column major order
170:   Level: developer
172:   Notes:
173:   The data on the GPU may not be updated due to operations done on the CPU. If you need updated
174:   data, use `MatDenseCUDAGetArray()`.
176:   The array must be restored with `MatDenseCUDARestoreArrayWrite()` when no longer needed.
178: .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`,
179:           `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayRead()`,
180:           `MatDenseCUDARestoreArrayRead()`
181: @*/
182: PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a)
183: {
184:   PetscFunctionBegin;
185:   PetscCall(MatDenseCUPMGetArrayWrite<DeviceType::CUDA>(A, a));
186:   PetscFunctionReturn(PETSC_SUCCESS);
187: }
189: /*@C
190:   MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a
191:   `MATDENSECUDA` matrix previously obtained with `MatDenseCUDAGetArrayWrite()`.
193:   Not Collective
195:   Input Parameters:
196: + A - the matrix
197: - a - the GPU array in column major order
199:   Level: developer
201: .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`,
202: `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()`
203: @*/
204: PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a)
205: {
206:   PetscFunctionBegin;
207:   PetscCall(MatDenseCUPMRestoreArrayWrite<DeviceType::CUDA>(A, a));
208:   PetscFunctionReturn(PETSC_SUCCESS);
209: }
211: /*@C
212:   MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a
213:   `MATDENSECUDA` matrix. The array must be restored with `MatDenseCUDARestoreArrayRead()` when
214:   no longer needed.
216:   Not Collective
218:   Input Parameter:
219: . A - the matrix
221:   Output Parameter:
222: . a - the GPU array in column major order
224:   Level: developer
226:   Note:
227:   Data may be copied to the GPU due to operations done on the CPU. If you need write only
228:   access, use `MatDenseCUDAGetArrayWrite()`.
230: .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`,
231:           `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`,
232:           `MatDenseCUDARestoreArrayRead()`
233: @*/
234: PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a)
235: {
236:   PetscFunctionBegin;
237:   PetscCall(MatDenseCUPMGetArrayRead<DeviceType::CUDA>(A, a));
238:   PetscFunctionReturn(PETSC_SUCCESS);
239: }
241: /*@C
242:   MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a
243:   `MATDENSECUDA` matrix previously obtained with a call to `MatDenseCUDAGetArrayRead()`.
245:   Not Collective
247:   Input Parameters:
248: + A - the matrix
249: - a - the GPU array in column major order
251:   Level: developer
253:   Note:
254:   Data can be copied to the GPU due to operations done on the CPU. If you need write only
255:   access, use `MatDenseCUDAGetArrayWrite()`.
257: .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`,
258:           `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDAGetArrayRead()`
259: @*/
260: PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a)
261: {
262:   PetscFunctionBegin;
263:   PetscCall(MatDenseCUPMRestoreArrayRead<DeviceType::CUDA>(A, a));
264:   PetscFunctionReturn(PETSC_SUCCESS);
265: }
267: /*@C
268:   MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a `MATDENSECUDA` matrix. The
269:   array must be restored with `MatDenseCUDARestoreArray()` when no longer needed.
271:   Not Collective
273:   Input Parameter:
274: . A - the matrix
276:   Output Parameter:
277: . a - the GPU array in column major order
279:   Level: developer
281:   Note:
282:   Data can be copied to the GPU due to operations done on the CPU. If you need write only
283:   access, use `MatDenseCUDAGetArrayWrite()`. For read-only access, use
284:   `MatDenseCUDAGetArrayRead()`.
286: .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArray()`,
287:           `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`,
288:           `MatDenseCUDARestoreArrayRead()`
289: @*/
290: PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a)
291: {
292:   PetscFunctionBegin;
293:   PetscCall(MatDenseCUPMGetArray<DeviceType::CUDA>(A, a));
294:   PetscFunctionReturn(PETSC_SUCCESS);
295: }
297: /*@C
298:   MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a `MATDENSECUDA` matrix
299:   previously obtained with `MatDenseCUDAGetArray()`.
301:   Not Collective
303:   Level: developer
305:   Input Parameters:
306: + A - the matrix
307: - a - the GPU array in column major order
309: .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArrayWrite()`,
310:           `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()`
311: @*/
312: PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a)
313: {
314:   PetscFunctionBegin;
315:   PetscCall(MatDenseCUPMRestoreArray<DeviceType::CUDA>(A, a));
316:   PetscFunctionReturn(PETSC_SUCCESS);
317: }
319: /*@C
320:   MatDenseCUDASetPreallocation - Set the device array used for storing the matrix elements of a
321:   `MATDENSECUDA` matrix
323:   Collective
325:   Input Parameters:
326: + A            - the matrix
327: - device_array - the array (or `NULL`)
329:   Level: intermediate
331: .seealso: [](ch_matrices), `Mat`, `MATDENSECUDA`, `MatCreate()`, `MatCreateDenseCUDA()`,
332: `MatSetValues()`, `MatDenseSetLDA()`
333: @*/
334: PetscErrorCode MatDenseCUDASetPreallocation(Mat A, PetscScalar *device_array)
335: {
336:   PetscFunctionBegin;
337:   PetscCall(MatDenseCUPMSetPreallocation<DeviceType::CUDA>(A, device_array));
338:   PetscFunctionReturn(PETSC_SUCCESS);
339: }