Actual source code: vecmpicupm.cu

  1: #include "../vecmpicupm.hpp" /*I <petscvec.h> I*/
  2: #include "../vecmpicupm_impl.hpp"

  4: using namespace Petsc::vec::cupm;
  5: using Petsc::device::cupm::DeviceType;

  7: template class impl::VecMPI_CUPM<DeviceType::CUDA>;

  9: static constexpr auto VecMPI_CUDA = impl::VecMPI_CUPM<DeviceType::CUDA>{};

 11: /*MC
 12:   VECCUDA - VECCUDA = "cuda" - A `VECSEQCUDA` on a single-process MPI communicator, and `VECMPICUDA`
 13:   otherwise.

 15:   Options Database Keys:
 16: . -vec_type cuda - sets the vector type to `VECCUDA` during a call to `VecSetFromOptions()`

 18:   Level: beginner

 20: .seealso: `VecCreate()`, `VecSetType()`, `VecSetFromOptions()`, `VecCreateMPIWithArray()`, `VECSEQCUDA`,
 21: `VECMPICUDA`, `VECSTANDARD`, `VecType`, `VecCreateMPI()`, `VecSetPinnedMemoryMin()`, `VECHIP`
 22: M*/

 24: PetscErrorCode VecCreate_CUDA(Vec v)
 25: {
 26:   PetscFunctionBegin;
 27:   PetscCall(VecMPI_CUDA.Create_CUPM(v));
 28:   PetscFunctionReturn(PETSC_SUCCESS);
 29: }

 31: /*MC
 32:   VECMPICUDA - VECMPICUDA = "mpicuda" - The basic parallel vector, modified to use CUDA

 34:   Options Database Keys:
 35: . -vec_type mpicuda - sets the vector type to `VECMPICUDA` during a call to `VecSetFromOptions()`

 37:   Level: beginner

 39: .seealso: `VecCreate()`, `VecSetType()`, `VecSetFromOptions()`, `VecCreateMPIWithArray()`, `VECMPI`,
 40: `VecType`, `VecCreateMPI()`, `VecSetPinnedMemoryMin()`, `VECSEQHIP`, `VECMPIHIP`
 41: M*/

 43: PetscErrorCode VecCreate_MPICUDA(Vec v)
 44: {
 45:   PetscFunctionBegin;
 46:   PetscCall(VecMPI_CUDA.Create(v));
 47:   PetscFunctionReturn(PETSC_SUCCESS);
 48: }

 50: PetscErrorCode VecConvert_MPI_MPICUDA_inplace(Vec v)
 51: {
 52:   PetscFunctionBegin;
 53:   PetscCall(VecMPI_CUDA.Convert_IMPL_IMPLCUPM(v));
 54:   PetscFunctionReturn(PETSC_SUCCESS);
 55: }

 57: PetscErrorCode VecCUDAGetArrays_Private(Vec v, const PetscScalar **host_array, const PetscScalar **device_array, PetscOffloadMask *mask)
 58: {
 59:   PetscDeviceContext dctx;

 61:   PetscFunctionBegin;
 63:   PetscCall(PetscDeviceContextGetCurrentContextAssertType_Internal(&dctx, PETSC_DEVICE_CUDA));
 64:   PetscCall(VecMPI_CUDA.GetArrays_CUPMBase(v, host_array, device_array, mask, dctx));
 65:   PetscFunctionReturn(PETSC_SUCCESS);
 66: }

 68: // PetscClangLinter pragma disable: -fdoc-internal-linkage
 69: /*@
 70:   VecCreateMPICUDA - Creates a standard, parallel, array-style vector for CUDA devices.

 72:   Collective, Possibly Synchronous

 74:   Input Parameters:
 75: + comm - the MPI communicator to use
 76: . n    - local vector length (or `PETSC_DECIDE` to have calculated if `N` is given)
 77: - N    - global vector length (or `PETSC_DETERMINE` to have calculated if `n` is given)

 79:   Output Parameter:
 80: . v - the vector

 82:   Notes:
 83:   Use `VecDuplicate()` or `VecDuplicateVecs()` to form additional vectors of the same type as an
 84:   existing vector.

 86:   This function may initialize `PetscDevice`, which may incur a device synchronization.

 88:   Level: intermediate

 90: .seealso: `VecCreateMPICUDAWithArray()`, `VecCreateMPICUDAWithArrays()`, `VecCreateSeqCUDA(),
 91: `VecCreateSeq()`, `VecCreateMPI()`, `VecCreate()`, `VecDuplicate()`, `VecDuplicateVecs()`,
 92: `VecCreateGhost()`, `VecCreateMPIWithArray()`, `VecCreateGhostWithArray()`, `VecMPISetGhost()`
 93: @*/
 94: PetscErrorCode VecCreateMPICUDA(MPI_Comm comm, PetscInt n, PetscInt N, Vec *v)
 95: {
 96:   PetscFunctionBegin;
 97:   PetscAssertPointer(v, 4);
 98:   PetscCall(VecCreateMPICUPMAsync<DeviceType::CUDA>(comm, n, N, v));
 99:   PetscFunctionReturn(PETSC_SUCCESS);
100: }

102: // PetscClangLinter pragma disable: -fdoc-internal-linkage
103: /*@C
104:   VecCreateMPICUDAWithArrays - Creates a parallel, array-style vector using CUDA, where the
105:   user provides the complete array space to store the vector values.

107:   Collective, Possibly Synchronous

109:   Input Parameters:
110: + comm     - the MPI communicator to use
111: . bs       - block size, same meaning as `VecSetBlockSize()`
112: . n        - local vector length, cannot be `PETSC_DECIDE`
113: . N        - global vector length (or `PETSC_DECIDE` to have calculated)
114: . cpuarray - CPU memory where the vector elements are to be stored (or `NULL`)
115: - gpuarray - GPU memory where the vector elements are to be stored (or `NULL`)

117:   Output Parameter:
118: . v - the vector

120:   Notes:
121:   See `VecCreateSeqCUDAWithArrays()` for further discussion, this routine shares identical
122:   semantics.

124:   Level: intermediate

126: .seealso: `VecCreateMPICUDA()`, `VecCreateSeqCUDAWithArrays()`, `VecCreateMPIWithArray()`,
127: `VecCreateSeqWithArray()`, `VecCreate()`, `VecDuplicate()`, `VecDuplicateVecs()`, `VecCreateGhost()`,
128: `VecCreateMPI()`, `VecCreateGhostWithArray()`, `VecPlaceArray()`
129: @*/
130: PetscErrorCode VecCreateMPICUDAWithArrays(MPI_Comm comm, PetscInt bs, PetscInt n, PetscInt N, const PetscScalar cpuarray[], const PetscScalar gpuarray[], Vec *v)
131: {
132:   PetscFunctionBegin;
133:   PetscCall(VecCreateMPICUPMWithArrays<DeviceType::CUDA>(comm, bs, n, N, cpuarray, gpuarray, v));
134:   PetscFunctionReturn(PETSC_SUCCESS);
135: }

137: // PetscClangLinter pragma disable: -fdoc-internal-linkage
138: /*@C
139:   VecCreateMPICUDAWithArray - Creates a parallel, array-style vector using CUDA, where the
140:   user provides the device array space to store the vector values.

142:   Collective

144:   Input Parameters:
145: + comm  - the MPI communicator to use
146: . bs    - block size, same meaning as `VecSetBlockSize()`
147: . n     - local vector length, cannot be `PETSC_DECIDE`
148: . N     - global vector length (or `PETSC_DECIDE` to have calculated)
149: - gpuarray - the user provided GPU array to store the vector values

151:   Output Parameter:
152: . v - the vector

154:   Notes:
155:   See `VecCreateSeqCUDAWithArray()` for further discussion, this routine shares identical
156:   semantics.

158:   Level: intermediate

160: .seealso: `VecCreateMPICUDA()`, `VecCreateSeqCUDAWithArray()`, `VecCreateMPIWithArray()`,
161: `VecCreateSeqWithArray()`, `VecCreate()`, `VecDuplicate()`, `VecDuplicateVecs()`, `VecCreateGhost()`,
162: `VecCreateMPI()`, `VecCreateGhostWithArray()`, `VecPlaceArray()`
163: @*/
164: PetscErrorCode VecCreateMPICUDAWithArray(MPI_Comm comm, PetscInt bs, PetscInt n, PetscInt N, const PetscScalar gpuarray[], Vec *v)
165: {
166:   PetscFunctionBegin;
167:   PetscCall(VecCreateMPICUPMWithArray<DeviceType::CUDA>(comm, bs, n, N, gpuarray, v));
168:   PetscFunctionReturn(PETSC_SUCCESS);
169: }