Actual source code: vecmpicupm.cu
1: #include "../vecmpicupm.hpp" /*I <petscvec.h> I*/
2: #include "../vecmpicupm_impl.hpp"
4: using namespace Petsc::vec::cupm;
5: using Petsc::device::cupm::DeviceType;
7: template class impl::VecMPI_CUPM<DeviceType::CUDA>;
9: static constexpr auto VecMPI_CUDA = impl::VecMPI_CUPM<DeviceType::CUDA>{};
11: /*MC
12: VECCUDA - VECCUDA = "cuda" - A `VECSEQCUDA` on a single-process MPI communicator, and `VECMPICUDA`
13: otherwise.
15: Options Database Keys:
16: . -vec_type cuda - sets the vector type to `VECCUDA` during a call to `VecSetFromOptions()`
18: Level: beginner
20: .seealso: `VecCreate()`, `VecSetType()`, `VecSetFromOptions()`, `VecCreateMPIWithArray()`, `VECSEQCUDA`,
21: `VECMPICUDA`, `VECSTANDARD`, `VecType`, `VecCreateMPI()`, `VecSetPinnedMemoryMin()`, `VECHIP`
22: M*/
24: PetscErrorCode VecCreate_CUDA(Vec v)
25: {
26: PetscFunctionBegin;
27: PetscCall(VecMPI_CUDA.Create_CUPM(v));
28: PetscFunctionReturn(PETSC_SUCCESS);
29: }
31: /*MC
32: VECMPICUDA - VECMPICUDA = "mpicuda" - The basic parallel vector, modified to use CUDA
34: Options Database Keys:
35: . -vec_type mpicuda - sets the vector type to `VECMPICUDA` during a call to `VecSetFromOptions()`
37: Level: beginner
39: .seealso: `VecCreate()`, `VecSetType()`, `VecSetFromOptions()`, `VecCreateMPIWithArray()`, `VECMPI`,
40: `VecType`, `VecCreateMPI()`, `VecSetPinnedMemoryMin()`, `VECSEQHIP`, `VECMPIHIP`
41: M*/
43: PetscErrorCode VecCreate_MPICUDA(Vec v)
44: {
45: PetscFunctionBegin;
46: PetscCall(VecMPI_CUDA.Create(v));
47: PetscFunctionReturn(PETSC_SUCCESS);
48: }
50: PetscErrorCode VecConvert_MPI_MPICUDA_inplace(Vec v)
51: {
52: PetscFunctionBegin;
53: PetscCall(VecMPI_CUDA.Convert_IMPL_IMPLCUPM(v));
54: PetscFunctionReturn(PETSC_SUCCESS);
55: }
57: PetscErrorCode VecCUDAGetArrays_Private(Vec v, const PetscScalar **host_array, const PetscScalar **device_array, PetscOffloadMask *mask)
58: {
59: PetscDeviceContext dctx;
61: PetscFunctionBegin;
63: PetscCall(PetscDeviceContextGetCurrentContextAssertType_Internal(&dctx, PETSC_DEVICE_CUDA));
64: PetscCall(VecMPI_CUDA.GetArrays_CUPMBase(v, host_array, device_array, mask, dctx));
65: PetscFunctionReturn(PETSC_SUCCESS);
66: }
68: // PetscClangLinter pragma disable: -fdoc-internal-linkage
69: /*@
70: VecCreateMPICUDA - Creates a standard, parallel, array-style vector for CUDA devices.
72: Collective, Possibly Synchronous
74: Input Parameters:
75: + comm - the MPI communicator to use
76: . n - local vector length (or `PETSC_DECIDE` to have calculated if `N` is given)
77: - N - global vector length (or `PETSC_DETERMINE` to have calculated if `n` is given)
79: Output Parameter:
80: . v - the vector
82: Notes:
83: Use `VecDuplicate()` or `VecDuplicateVecs()` to form additional vectors of the same type as an
84: existing vector.
86: This function may initialize `PetscDevice`, which may incur a device synchronization.
88: Level: intermediate
90: .seealso: `VecCreateMPICUDAWithArray()`, `VecCreateMPICUDAWithArrays()`, `VecCreateSeqCUDA(),
91: `VecCreateSeq()`, `VecCreateMPI()`, `VecCreate()`, `VecDuplicate()`, `VecDuplicateVecs()`,
92: `VecCreateGhost()`, `VecCreateMPIWithArray()`, `VecCreateGhostWithArray()`, `VecMPISetGhost()`
93: @*/
94: PetscErrorCode VecCreateMPICUDA(MPI_Comm comm, PetscInt n, PetscInt N, Vec *v)
95: {
96: PetscFunctionBegin;
97: PetscAssertPointer(v, 4);
98: PetscCall(VecCreateMPICUPMAsync<DeviceType::CUDA>(comm, n, N, v));
99: PetscFunctionReturn(PETSC_SUCCESS);
100: }
102: // PetscClangLinter pragma disable: -fdoc-internal-linkage
103: /*@C
104: VecCreateMPICUDAWithArrays - Creates a parallel, array-style vector using CUDA, where the
105: user provides the complete array space to store the vector values.
107: Collective, Possibly Synchronous
109: Input Parameters:
110: + comm - the MPI communicator to use
111: . bs - block size, same meaning as `VecSetBlockSize()`
112: . n - local vector length, cannot be `PETSC_DECIDE`
113: . N - global vector length (or `PETSC_DECIDE` to have calculated)
114: . cpuarray - CPU memory where the vector elements are to be stored (or `NULL`)
115: - gpuarray - GPU memory where the vector elements are to be stored (or `NULL`)
117: Output Parameter:
118: . v - the vector
120: Notes:
121: See `VecCreateSeqCUDAWithArrays()` for further discussion, this routine shares identical
122: semantics.
124: Level: intermediate
126: .seealso: `VecCreateMPICUDA()`, `VecCreateSeqCUDAWithArrays()`, `VecCreateMPIWithArray()`,
127: `VecCreateSeqWithArray()`, `VecCreate()`, `VecDuplicate()`, `VecDuplicateVecs()`, `VecCreateGhost()`,
128: `VecCreateMPI()`, `VecCreateGhostWithArray()`, `VecPlaceArray()`
129: @*/
130: PetscErrorCode VecCreateMPICUDAWithArrays(MPI_Comm comm, PetscInt bs, PetscInt n, PetscInt N, const PetscScalar cpuarray[], const PetscScalar gpuarray[], Vec *v)
131: {
132: PetscFunctionBegin;
133: PetscCall(VecCreateMPICUPMWithArrays<DeviceType::CUDA>(comm, bs, n, N, cpuarray, gpuarray, v));
134: PetscFunctionReturn(PETSC_SUCCESS);
135: }
137: // PetscClangLinter pragma disable: -fdoc-internal-linkage
138: /*@C
139: VecCreateMPICUDAWithArray - Creates a parallel, array-style vector using CUDA, where the
140: user provides the device array space to store the vector values.
142: Collective
144: Input Parameters:
145: + comm - the MPI communicator to use
146: . bs - block size, same meaning as `VecSetBlockSize()`
147: . n - local vector length, cannot be `PETSC_DECIDE`
148: . N - global vector length (or `PETSC_DECIDE` to have calculated)
149: - gpuarray - the user provided GPU array to store the vector values
151: Output Parameter:
152: . v - the vector
154: Notes:
155: See `VecCreateSeqCUDAWithArray()` for further discussion, this routine shares identical
156: semantics.
158: Level: intermediate
160: .seealso: `VecCreateMPICUDA()`, `VecCreateSeqCUDAWithArray()`, `VecCreateMPIWithArray()`,
161: `VecCreateSeqWithArray()`, `VecCreate()`, `VecDuplicate()`, `VecDuplicateVecs()`, `VecCreateGhost()`,
162: `VecCreateMPI()`, `VecCreateGhostWithArray()`, `VecPlaceArray()`
163: @*/
164: PetscErrorCode VecCreateMPICUDAWithArray(MPI_Comm comm, PetscInt bs, PetscInt n, PetscInt N, const PetscScalar gpuarray[], Vec *v)
165: {
166: PetscFunctionBegin;
167: PetscCall(VecCreateMPICUPMWithArray<DeviceType::CUDA>(comm, bs, n, N, gpuarray, v));
168: PetscFunctionReturn(PETSC_SUCCESS);
169: }