Actual source code: matmpidensecuda.cu

  1: #include "../matmpidensecupm.hpp"

  3: using namespace Petsc::mat::cupm;
  4: using Petsc::device::cupm::DeviceType;

  6: static constexpr impl::MatDense_MPI_CUPM<DeviceType::CUDA> mat_cupm{};

  8: /*MC
  9:   MATDENSECUDA - "densecuda" - A matrix type to be used for dense matrices on GPUs.

 11:   This matrix type is identical to `MATSEQDENSECUDA` when constructed with a single process
 12:   communicator, and `MATMPIDENSECUDA` otherwise.

 14:   Options Database Key:
 15: . -mat_type densecuda - sets the matrix type to `MATDENSECUDA` during a call to
 16:                         `MatSetFromOptions()`

 18:   Level: beginner

 20: .seealso: [](ch_matrices), `Mat`, `MATSEQDENSECUDA`, `MATMPIDENSECUDA`, `MATSEQDENSEHIP`,
 21: `MATMPIDENSEHIP`, `MATDENSE`
 22: M*/

 24: /*MC
 25:   MATMPIDENSECUDA - "mpidensecuda" - A matrix type to be used for distributed dense matrices on
 26:   GPUs.

 28:   Options Database Key:
 29: . -mat_type mpidensecuda - sets the matrix type to `MATMPIDENSECUDA` during a call to
 30:                            `MatSetFromOptions()`

 32:   Level: beginner

 34: .seealso: [](ch_matrices), `Mat`, `MATDENSECUDA`, `MATMPIDENSE`, `MATSEQDENSE`,
 35: `MATSEQDENSECUDA`, `MATSEQDENSEHIP`
 36: M*/
 37: PETSC_INTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat A)
 38: {
 39:   PetscFunctionBegin;
 40:   PetscCall(mat_cupm.Create(A));
 41:   PetscFunctionReturn(PETSC_SUCCESS);
 42: }

 44: PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat A, MatType type, MatReuse reuse, Mat *ret)
 45: {
 46:   PetscFunctionBegin;
 47:   PetscCall(mat_cupm.Convert_MPIDense_MPIDenseCUPM(A, type, reuse, ret));
 48:   PetscFunctionReturn(PETSC_SUCCESS);
 49: }

 51: /*@C
 52:   MatCreateDenseCUDA - Creates a matrix in `MATDENSECUDA` format using CUDA.

 54:   Collective

 56:   Input Parameters:
 57: + comm - MPI communicator
 58: . m    - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
 59: . n    - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given)
 60: . M    - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given)
 61: . N    - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given)
 62: - data - optional location of GPU matrix data. Pass `NULL` to have PETSc to control matrix memory allocation.

 64:   Output Parameter:
 65: . A - the matrix

 67:   Level: intermediate

 69: .seealso: `MATDENSECUDA`, `MatCreate()`, `MatCreateDense()`
 70: @*/
 71: PetscErrorCode MatCreateDenseCUDA(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A)
 72: {
 73:   PetscFunctionBegin;
 74:   PetscCall(MatCreateDenseCUPM<DeviceType::CUDA>(comm, m, n, M, N, data, A));
 75:   PetscFunctionReturn(PETSC_SUCCESS);
 76: }

 78: /*@C
 79:   MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a `MATDENSECUDA` matrix with an
 80:   array provided by the user. This is useful to avoid copying an array into a matrix.

 82:   Not Collective

 84:   Input Parameters:
 85: + mat   - the matrix
 86: - array - the array in column major order

 88:   Level: developer

 90:   Note:
 91:   Adding `const` to `array` was an oversight, see notes in `VecPlaceArray()`.

 93:   You can return to the original array with a call to `MatDenseCUDAResetArray()`. The user is
 94:   responsible for freeing this array; it will not be freed when the matrix is destroyed. The
 95:   array must have been allocated with `cudaMalloc()`.

 97: .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDAResetArray()`,
 98:           `MatDenseCUDAReplaceArray()`
 99: @*/
100: PetscErrorCode MatDenseCUDAPlaceArray(Mat mat, const PetscScalar *array)
101: {
102:   PetscFunctionBegin;
103:   PetscCall(MatDenseCUPMPlaceArray<DeviceType::CUDA>(mat, array));
104:   PetscFunctionReturn(PETSC_SUCCESS);
105: }

107: /*@C
108:   MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to
109:   `MatDenseCUDAPlaceArray()`

111:   Not Collective

113:   Input Parameter:
114: . mat - the matrix

116:   Level: developer

118:   Note:
119:   You can only call this after a call to `MatDenseCUDAPlaceArray()`

121: .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`
122: @*/
123: PetscErrorCode MatDenseCUDAResetArray(Mat mat)
124: {
125:   PetscFunctionBegin;
126:   PetscCall(MatDenseCUPMResetArray<DeviceType::CUDA>(mat));
127:   PetscFunctionReturn(PETSC_SUCCESS);
128: }

130: /*@C
131:   MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a `MATDENSECUDA` matrix
132:   with an array provided by the user. This is useful to avoid copying an array into a matrix.

134:   Not Collective

136:   Input Parameters:
137: + mat   - the matrix
138: - array - the array in column major order

140:   Level: developer

142:   Note:
143:   Adding `const` to `array` was an oversight, see notes in `VecPlaceArray()`.

145:   This permanently replaces the GPU array and frees the memory associated with the old GPU
146:   array. The memory passed in CANNOT be freed by the user. It will be freed when the matrix is
147:   destroyed. The array should respect the matrix leading dimension.

149: .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`, `MatDenseCUDAResetArray()`
150: @*/
151: PetscErrorCode MatDenseCUDAReplaceArray(Mat mat, const PetscScalar *array)
152: {
153:   PetscFunctionBegin;
154:   PetscCall(MatDenseCUPMReplaceArray<DeviceType::CUDA>(mat, array));
155:   PetscFunctionReturn(PETSC_SUCCESS);
156: }

158: /*@C
159:   MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a `MATDENSECUDA`
160:   matrix.

162:   Not Collective

164:   Input Parameter:
165: . A - the matrix

167:   Output Parameter:
168: . a - the GPU array in column major order

170:   Level: developer

172:   Notes:
173:   The data on the GPU may not be updated due to operations done on the CPU. If you need updated
174:   data, use `MatDenseCUDAGetArray()`.

176:   The array must be restored with `MatDenseCUDARestoreArrayWrite()` when no longer needed.

178: .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`,
179:           `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayRead()`,
180:           `MatDenseCUDARestoreArrayRead()`
181: @*/
182: PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a)
183: {
184:   PetscFunctionBegin;
185:   PetscCall(MatDenseCUPMGetArrayWrite<DeviceType::CUDA>(A, a));
186:   PetscFunctionReturn(PETSC_SUCCESS);
187: }

189: /*@C
190:   MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a
191:   `MATDENSECUDA` matrix previously obtained with `MatDenseCUDAGetArrayWrite()`.

193:   Not Collective

195:   Input Parameters:
196: + A - the matrix
197: - a - the GPU array in column major order

199:   Level: developer

201: .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`,
202: `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()`
203: @*/
204: PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a)
205: {
206:   PetscFunctionBegin;
207:   PetscCall(MatDenseCUPMRestoreArrayWrite<DeviceType::CUDA>(A, a));
208:   PetscFunctionReturn(PETSC_SUCCESS);
209: }

211: /*@C
212:   MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a
213:   `MATDENSECUDA` matrix. The array must be restored with `MatDenseCUDARestoreArrayRead()` when
214:   no longer needed.

216:   Not Collective

218:   Input Parameter:
219: . A - the matrix

221:   Output Parameter:
222: . a - the GPU array in column major order

224:   Level: developer

226:   Note:
227:   Data may be copied to the GPU due to operations done on the CPU. If you need write only
228:   access, use `MatDenseCUDAGetArrayWrite()`.

230: .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`,
231:           `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`,
232:           `MatDenseCUDARestoreArrayRead()`
233: @*/
234: PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a)
235: {
236:   PetscFunctionBegin;
237:   PetscCall(MatDenseCUPMGetArrayRead<DeviceType::CUDA>(A, a));
238:   PetscFunctionReturn(PETSC_SUCCESS);
239: }

241: /*@C
242:   MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a
243:   `MATDENSECUDA` matrix previously obtained with a call to `MatDenseCUDAGetArrayRead()`.

245:   Not Collective

247:   Input Parameters:
248: + A - the matrix
249: - a - the GPU array in column major order

251:   Level: developer

253:   Note:
254:   Data can be copied to the GPU due to operations done on the CPU. If you need write only
255:   access, use `MatDenseCUDAGetArrayWrite()`.

257: .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`,
258:           `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDAGetArrayRead()`
259: @*/
260: PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a)
261: {
262:   PetscFunctionBegin;
263:   PetscCall(MatDenseCUPMRestoreArrayRead<DeviceType::CUDA>(A, a));
264:   PetscFunctionReturn(PETSC_SUCCESS);
265: }

267: /*@C
268:   MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a `MATDENSECUDA` matrix. The
269:   array must be restored with `MatDenseCUDARestoreArray()` when no longer needed.

271:   Not Collective

273:   Input Parameter:
274: . A - the matrix

276:   Output Parameter:
277: . a - the GPU array in column major order

279:   Level: developer

281:   Note:
282:   Data can be copied to the GPU due to operations done on the CPU. If you need write only
283:   access, use `MatDenseCUDAGetArrayWrite()`. For read-only access, use
284:   `MatDenseCUDAGetArrayRead()`.

286: .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArray()`,
287:           `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`,
288:           `MatDenseCUDARestoreArrayRead()`
289: @*/
290: PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a)
291: {
292:   PetscFunctionBegin;
293:   PetscCall(MatDenseCUPMGetArray<DeviceType::CUDA>(A, a));
294:   PetscFunctionReturn(PETSC_SUCCESS);
295: }

297: /*@C
298:   MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a `MATDENSECUDA` matrix
299:   previously obtained with `MatDenseCUDAGetArray()`.

301:   Not Collective

303:   Level: developer

305:   Input Parameters:
306: + A - the matrix
307: - a - the GPU array in column major order

309: .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArrayWrite()`,
310:           `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()`
311: @*/
312: PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a)
313: {
314:   PetscFunctionBegin;
315:   PetscCall(MatDenseCUPMRestoreArray<DeviceType::CUDA>(A, a));
316:   PetscFunctionReturn(PETSC_SUCCESS);
317: }

319: /*@C
320:   MatDenseCUDASetPreallocation - Set the device array used for storing the matrix elements of a
321:   `MATDENSECUDA` matrix

323:   Collective

325:   Input Parameters:
326: + A            - the matrix
327: - device_array - the array (or `NULL`)

329:   Level: intermediate

331: .seealso: [](ch_matrices), `Mat`, `MATDENSECUDA`, `MatCreate()`, `MatCreateDenseCUDA()`,
332: `MatSetValues()`, `MatDenseSetLDA()`
333: @*/
334: PetscErrorCode MatDenseCUDASetPreallocation(Mat A, PetscScalar *device_array)
335: {
336:   PetscFunctionBegin;
337:   PetscCall(MatDenseCUPMSetPreallocation<DeviceType::CUDA>(A, device_array));
338:   PetscFunctionReturn(PETSC_SUCCESS);
339: }