Actual source code: matmpidensecuda.cu
  1: #include "../matmpidensecupm.hpp"

  3: using namespace Petsc::mat::cupm;
  4: using Petsc::device::cupm::DeviceType;

  6: static constexpr impl::MatDense_MPI_CUPM<DeviceType::CUDA> mat_cupm{};

  8: /*MC
  9:   MATDENSECUDA - "densecuda" - A matrix type to be used for dense matrices on GPUs.

 11:   This matrix type is identical to `MATSEQDENSECUDA` when constructed with a single process
 12:   communicator, and `MATMPIDENSECUDA` otherwise.

 14:   Options Database Key:
 15: . -mat_type densecuda - sets the matrix type to `MATDENSECUDA` during a call to
 16:                         `MatSetFromOptions()`

 18:   Level: beginner

 20: .seealso: [](ch_matrices), `Mat`, `MATSEQDENSECUDA`, `MATMPIDENSECUDA`, `MATSEQDENSEHIP`,
 21: `MATMPIDENSEHIP`, `MATDENSE`
 22: M*/

 24: /*MC
 25:   MATMPIDENSECUDA - "mpidensecuda" - A matrix type to be used for distributed dense matrices on
 26:   GPUs.

 28:   Options Database Key:
 29: . -mat_type mpidensecuda - sets the matrix type to `MATMPIDENSECUDA` during a call to
 30:                            `MatSetFromOptions()`

 32:   Level: beginner

 34: .seealso: [](ch_matrices), `Mat`, `MATDENSECUDA`, `MATMPIDENSE`, `MATSEQDENSE`,
 35: `MATSEQDENSECUDA`, `MATSEQDENSEHIP`
 36: M*/
 37: PETSC_INTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat A)
 38: {
 39:   PetscFunctionBegin;
 40:   PetscCall(mat_cupm.Create(A));
 41:   PetscFunctionReturn(PETSC_SUCCESS);
 42: }

 44: PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat A, MatType type, MatReuse reuse, Mat *ret)
 45: {
 46:   PetscFunctionBegin;
 47:   PetscCall(mat_cupm.Convert_MPIDense_MPIDenseCUPM(A, type, reuse, ret));
 48:   PetscFunctionReturn(PETSC_SUCCESS);
 49: }

 51: /*@C
 52:   MatCreateDenseCUDA - Creates a matrix in `MATDENSECUDA` format using CUDA.

 54:   Collective

 56:   Input Parameters:
 57: + comm - MPI communicator
 58: . m    - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
 59: . n    - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given)
 60: . M    - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given)
 61: . N    - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given)
 62: - data - optional location of GPU matrix data. Pass `NULL` to have PETSc to control matrix memory allocation.

 64:   Output Parameter:
 65: . A - the matrix

 67:   Level: intermediate

 69: .seealso: `MATDENSECUDA`, `MatCreate()`, `MatCreateDense()`
 70: @*/
 71: PetscErrorCode MatCreateDenseCUDA(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A)
 72: {
 73:   PetscFunctionBegin;
 74:   PetscCall(MatCreateDenseCUPM<DeviceType::CUDA>(comm, m, n, M, N, data, A));
 75:   PetscFunctionReturn(PETSC_SUCCESS);
 76: }

 78: /*@C
 79:   MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a `MATDENSECUDA` matrix with an
 80:   array provided by the user. This is useful to avoid copying an array into a matrix.

 82:   Not Collective

 84:   Input Parameters:
 85: + mat   - the matrix
 86: - array - the array in column major order

 88:   Level: developer

 90:   Note:
 91:   You can return to the original array with a call to `MatDenseCUDAResetArray()`. The user is
 92:   responsible for freeing this array; it will not be freed when the matrix is destroyed. The
 93:   array must have been allocated with `cudaMalloc()`.

 95: .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDAResetArray()`,
 96:           `MatDenseCUDAReplaceArray()`
 97: @*/
 98: PetscErrorCode MatDenseCUDAPlaceArray(Mat mat, const PetscScalar *array)
 99: {
100:   PetscFunctionBegin;
101:   PetscCall(MatDenseCUPMPlaceArray<DeviceType::CUDA>(mat, array));
102:   PetscFunctionReturn(PETSC_SUCCESS);
103: }

105: /*@C
106:   MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to
107:   `MatDenseCUDAPlaceArray()`

109:   Not Collective

111:   Input Parameter:
112: . mat - the matrix

114:   Level: developer

116:   Note:
117:   You can only call this after a call to `MatDenseCUDAPlaceArray()`

119: .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`
120: @*/
121: PetscErrorCode MatDenseCUDAResetArray(Mat mat)
122: {
123:   PetscFunctionBegin;
124:   PetscCall(MatDenseCUPMResetArray<DeviceType::CUDA>(mat));
125:   PetscFunctionReturn(PETSC_SUCCESS);
126: }

128: /*@C
129:   MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a `MATDENSECUDA` matrix
130:   with an array provided by the user. This is useful to avoid copying an array into a matrix.

132:   Not Collective

134:   Input Parameters:
135: + mat   - the matrix
136: - array - the array in column major order

138:   Level: developer

140:   Note:
141:   This permanently replaces the GPU array and frees the memory associated with the old GPU
142:   array. The memory passed in CANNOT be freed by the user. It will be freed when the matrix is
143:   destroyed. The array should respect the matrix leading dimension.

145: .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`, `MatDenseCUDAResetArray()`
146: @*/
147: PetscErrorCode MatDenseCUDAReplaceArray(Mat mat, const PetscScalar *array)
148: {
149:   PetscFunctionBegin;
150:   PetscCall(MatDenseCUPMReplaceArray<DeviceType::CUDA>(mat, array));
151:   PetscFunctionReturn(PETSC_SUCCESS);
152: }

154: /*@C
155:   MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a `MATDENSECUDA`
156:   matrix.

158:   Not Collective

160:   Input Parameter:
161: . A - the matrix

163:   Output Parameter:
164: . a - the GPU array in column major order

166:   Level: developer

168:   Notes:
169:   The data on the GPU may not be updated due to operations done on the CPU. If you need updated
170:   data, use `MatDenseCUDAGetArray()`.

172:   The array must be restored with `MatDenseCUDARestoreArrayWrite()` when no longer needed.

174: .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`,
175:           `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayRead()`,
176:           `MatDenseCUDARestoreArrayRead()`
177: @*/
178: PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a)
179: {
180:   PetscFunctionBegin;
181:   PetscCall(MatDenseCUPMGetArrayWrite<DeviceType::CUDA>(A, a));
182:   PetscFunctionReturn(PETSC_SUCCESS);
183: }

185: /*@C
186:   MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a
187:   `MATDENSECUDA` matrix previously obtained with `MatDenseCUDAGetArrayWrite()`.

189:   Not Collective

191:   Input Parameters:
192: + A - the matrix
193: - a - the GPU array in column major order

195:   Level: developer

197: .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`,
198: `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()`
199: @*/
200: PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a)
201: {
202:   PetscFunctionBegin;
203:   PetscCall(MatDenseCUPMRestoreArrayWrite<DeviceType::CUDA>(A, a));
204:   PetscFunctionReturn(PETSC_SUCCESS);
205: }

207: /*@C
208:   MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a
209:   `MATDENSECUDA` matrix. The array must be restored with `MatDenseCUDARestoreArrayRead()` when
210:   no longer needed.

212:   Not Collective

214:   Input Parameter:
215: . A - the matrix

217:   Output Parameter:
218: . a - the GPU array in column major order

220:   Level: developer

222:   Note:
223:   Data may be copied to the GPU due to operations done on the CPU. If you need write only
224:   access, use `MatDenseCUDAGetArrayWrite()`.

226: .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`,
227:           `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`,
228:           `MatDenseCUDARestoreArrayRead()`
229: @*/
230: PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a)
231: {
232:   PetscFunctionBegin;
233:   PetscCall(MatDenseCUPMGetArrayRead<DeviceType::CUDA>(A, a));
234:   PetscFunctionReturn(PETSC_SUCCESS);
235: }

237: /*@C
238:   MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a
239:   `MATDENSECUDA` matrix previously obtained with a call to `MatDenseCUDAGetArrayRead()`.

241:   Not Collective

243:   Input Parameters:
244: + A - the matrix
245: - a - the GPU array in column major order

247:   Level: developer

249:   Note:
250:   Data can be copied to the GPU due to operations done on the CPU. If you need write only
251:   access, use `MatDenseCUDAGetArrayWrite()`.

253: .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`,
254:           `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDAGetArrayRead()`
255: @*/
256: PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a)
257: {
258:   PetscFunctionBegin;
259:   PetscCall(MatDenseCUPMRestoreArrayRead<DeviceType::CUDA>(A, a));
260:   PetscFunctionReturn(PETSC_SUCCESS);
261: }

263: /*@C
264:   MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a `MATDENSECUDA` matrix. The
265:   array must be restored with `MatDenseCUDARestoreArray()` when no longer needed.

267:   Not Collective

269:   Input Parameter:
270: . A - the matrix

272:   Output Parameter:
273: . a - the GPU array in column major order

275:   Level: developer

277:   Note:
278:   Data can be copied to the GPU due to operations done on the CPU. If you need write only
279:   access, use `MatDenseCUDAGetArrayWrite()`. For read-only access, use
280:   `MatDenseCUDAGetArrayRead()`.

282: .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArray()`,
283:           `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`,
284:           `MatDenseCUDARestoreArrayRead()`
285: @*/
286: PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a)
287: {
288:   PetscFunctionBegin;
289:   PetscCall(MatDenseCUPMGetArray<DeviceType::CUDA>(A, a));
290:   PetscFunctionReturn(PETSC_SUCCESS);
291: }

293: /*@C
294:   MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a `MATDENSECUDA` matrix
295:   previously obtained with `MatDenseCUDAGetArray()`.

297:   Not Collective

299:   Level: developer

301:   Input Parameters:
302: + A - the matrix
303: - a - the GPU array in column major order

305: .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArrayWrite()`,
306:           `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()`
307: @*/
308: PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a)
309: {
310:   PetscFunctionBegin;
311:   PetscCall(MatDenseCUPMRestoreArray<DeviceType::CUDA>(A, a));
312:   PetscFunctionReturn(PETSC_SUCCESS);
313: }

315: /*@C
316:   MatDenseCUDASetPreallocation - Set the device array used for storing the matrix elements of a
317:   `MATDENSECUDA` matrix

319:   Collective

321:   Input Parameters:
322: + A            - the matrix
323: - device_array - the array (or `NULL`)

325:   Level: intermediate

327: .seealso: [](ch_matrices), `Mat`, `MATDENSECUDA`, `MatCreate()`, `MatCreateDenseCUDA()`,
328: `MatSetValues()`, `MatDenseSetLDA()`
329: @*/
330: PetscErrorCode MatDenseCUDASetPreallocation(Mat A, PetscScalar *device_array)
331: {
332:   PetscFunctionBegin;
333:   PetscCall(MatDenseCUPMSetPreallocation<DeviceType::CUDA>(A, device_array));
334:   PetscFunctionReturn(PETSC_SUCCESS);
335: }