Actual source code: vecseqcupm.cu
1: #include "../vecseqcupm.hpp" /*I <petscvec.h> I*/
2: #include "../vecseqcupm_impl.hpp"
4: using namespace ::Petsc::vec::cupm;
5: using ::Petsc::device::cupm::DeviceType;
7: template class impl::VecSeq_CUPM<DeviceType::CUDA>;
9: static constexpr auto VecSeq_CUDA = impl::VecSeq_CUPM<DeviceType::CUDA>{};
11: /*MC
12: VECSEQCUDA - VECSEQCUDA = "seqcuda" - The basic sequential vector, modified to use CUDA
14: Options Database Key:
15: . -vec_type seqcuda - sets the vector type to `VECSEQCUDA` during a call to `VecSetFromOptions()`
17: Level: beginner
19: .seealso: `VecCreate()`, `VecSetType()`, `VecSetFromOptions()`, `VecCreateMPIWithArray()`, `VECSEQ`,
20: `VecType`, `VecCreateMPI()`, `VecSetPinnedMemoryMin()`, `VECCUDA`, `VECHIP`, VECMPICUDA`, `VECMPIHIP`, `VECSEQHIP`
21: M*/
23: PetscErrorCode VecCreate_SeqCUDA(Vec v)
24: {
25: PetscFunctionBegin;
26: PetscCall(VecSeq_CUDA.Create(v));
27: PetscFunctionReturn(PETSC_SUCCESS);
28: }
30: PetscErrorCode VecConvert_Seq_SeqCUDA_inplace(Vec v)
31: {
32: PetscFunctionBegin;
33: PetscCall(VecSeq_CUDA.Convert_IMPL_IMPLCUPM(v));
34: PetscFunctionReturn(PETSC_SUCCESS);
35: }
37: // PetscClangLinter pragma disable: -fdoc-internal-linkage
38: /*@
39: VecCreateSeqCUDA - Creates a standard, sequential, array-style vector.
41: Collective, Possibly Synchronous
43: Input Parameters:
44: + comm - the communicator, must be `PETSC_COMM_SELF`
45: - n - the vector length
47: Output Parameter:
48: . v - the vector
50: Level: intermediate
52: Notes:
53: Use `VecDuplicate()` or `VecDuplicateVecs()` to form additional vectors of the same type as an
54: existing vector.
56: This function may initialize `PetscDevice`, which may incur a device synchronization.
58: .seealso: [](ch_vectors), `PetscDeviceInitialize()`, `VecCreate()`, `VecCreateSeq()`, `VecCreateSeqCUDAWithArray()`,
59: `VecCreateMPI()`, `VecCreateMPICUDA()`, `VecDuplicate()`, `VecDuplicateVecs()`, `VecCreateGhost()`
60: @*/
61: PetscErrorCode VecCreateSeqCUDA(MPI_Comm comm, PetscInt n, Vec *v)
62: {
63: PetscFunctionBegin;
64: PetscCall(VecCreateSeqCUPMAsync<DeviceType::CUDA>(comm, n, v));
65: PetscFunctionReturn(PETSC_SUCCESS);
66: }
68: // PetscClangLinter pragma disable: -fdoc-internal-linkage
69: /*@C
70: VecCreateSeqCUDAWithArrays - Creates a sequential, array-style vector using CUDA, where the
71: user provides the complete array space to store the vector values.
73: Collective, Possibly Synchronous
75: Input Parameters:
76: + comm - the communicator, must be `PETSC_COMM_SELF`
77: . bs - the block size
78: . n - the local vector length
79: . cpuarray - CPU memory where the vector elements are to be stored (or `NULL`)
80: - gpuarray - GPU memory where the vector elements are to be stored (or `NULL`)
82: Output Parameter:
83: . v - the vector
85: Level: intermediate
87: Notes:
88: If the user-provided array is `NULL`, then `VecCUDAPlaceArray()` can be used at a later stage to
89: SET the array for storing the vector values. Otherwise, the array must be allocated on the
90: device.
92: If both cpuarray and gpuarray are provided, the provided arrays must have identical
93: values.
95: The arrays are NOT freed when the vector is destroyed via `VecDestroy()`. The user must free
96: them themselves, but not until the vector is destroyed.
98: This function may initialize `PetscDevice`, which may incur a device synchronization.
100: .seealso: [](ch_vectors), `PetscDeviceInitialize()`, `VecCreate()`, `VecCreateSeqWithArray()`, `VecCreateSeqCUDA()`,
101: `VecCreateSeqCUDAWithArray()`, `VecCreateMPICUDA()`, `VecCreateMPICUDAWithArray()`,
102: `VecCreateMPICUDAWithArrays()`, `VecCUDAPlaceArray()`
103: C@*/
104: PetscErrorCode VecCreateSeqCUDAWithArrays(MPI_Comm comm, PetscInt bs, PetscInt n, const PetscScalar cpuarray[], const PetscScalar gpuarray[], Vec *v)
105: {
106: PetscFunctionBegin;
107: PetscCall(VecCreateSeqCUPMWithArraysAsync<DeviceType::CUDA>(comm, bs, n, cpuarray, gpuarray, v));
108: PetscFunctionReturn(PETSC_SUCCESS);
109: }
111: // PetscClangLinter pragma disable: -fdoc-internal-linkage
112: /*@C
113: VecCreateSeqCUDAWithArray - Creates a sequential, array-style vector using CUDA, where the
114: user provides the device array space to store the vector values.
116: Collective, Possibly Synchronous
118: Input Parameters:
119: + comm - the communicator, must be `PETSC_COMM_SELF`
120: . bs - the block size
121: . n - the vector length
122: - gpuarray - GPU memory where the vector elements are to be stored (or `NULL`)
124: Output Parameter:
125: . v - the vector
127: Level: intermediate
129: Notes:
130: If the user-provided array is `NULL`, then `VecCUDAPlaceArray()` can be used at a later stage to
131: SET the array for storing the vector values. Otherwise, the array must be allocated on the
132: device.
134: The array is NOT freed when the vector is destroyed via `VecDestroy()`. The user must free the
135: array themselves, but not until the vector is destroyed.
137: Use `VecDuplicate()` or `VecDuplicateVecs()` to form additional vectors of the same type as an
138: existing vector.
140: This function may initialize `PetscDevice`, which may incur a device synchronization.
142: .seealso: [](ch_vectors), `PetscDeviceInitialize()`, `VecCreate()`, `VecCreateSeq()`, `VecCreateSeqWithArray()`,
143: `VecCreateMPIWithArray()`, `VecCreateSeqCUDA()`, `VecCreateMPICUDAWithArray()`, `VecCUDAPlaceArray()`,
144: `VecDuplicate()`, `VecDuplicateVecs()`, `VecCreateGhost()`
145: @*/
146: PetscErrorCode VecCreateSeqCUDAWithArray(MPI_Comm comm, PetscInt bs, PetscInt n, const PetscScalar gpuarray[], Vec *v)
147: {
148: PetscFunctionBegin;
149: PetscCall(VecCreateSeqCUDAWithArrays(comm, bs, n, nullptr, gpuarray, v));
150: PetscFunctionReturn(PETSC_SUCCESS);
151: }
153: // PetscClangLinter pragma disable: -fdoc-internal-linkage
154: /*@C
155: VecCUDAGetArray - Provides access to the device buffer inside a vector
157: Logically Collective; Asynchronous; No Fortran Support
159: Input Parameter:
160: . v - the vector
162: Output Parameter:
163: . a - the device buffer
165: Level: intermediate
167: Notes:
168: This routine has semantics similar to `VecGetArray()`; the returned buffer points to a
169: consistent view of the vector data. This may involve copying data from the host to the device
170: if the data on the device is out of date. It is also assumed that the returned buffer is
171: immediately modified, marking the host data out of date. This is similar to intent(inout) in
172: Fortran.
174: If the user does require strong memory guarantees, they are encouraged to use
175: `VecCUDAGetArrayRead()` and/or `VecCUDAGetArrayWrite()` instead.
177: The user must call `VecCUDARestoreArray()` when they are finished using the array.
179: Developer Note:
180: If the device memory hasn't been allocated previously it will be allocated as part of this
181: routine.
183: .seealso: [](ch_vectors), `VecCUDARestoreArray()`, `VecCUDAGetArrayRead()`, `VecCUDAGetArrayWrite()`, `VecGetArray()`,
184: `VecGetArrayRead()`, `VecGetArrayWrite()`
185: @*/
186: PetscErrorCode VecCUDAGetArray(Vec v, PetscScalar **a)
187: {
188: PetscFunctionBegin;
189: PetscCall(VecCUPMGetArrayAsync<DeviceType::CUDA>(v, a));
190: PetscFunctionReturn(PETSC_SUCCESS);
191: }
193: // PetscClangLinter pragma disable: -fdoc-internal-linkage
194: /*@C
195: VecCUDARestoreArray - Restore a device buffer previously acquired with `VecCUDAGetArray()`.
197: NotCollective; Asynchronous; No Fortran Support
199: Input Parameters:
200: + v - the vector
201: - a - the device buffer
203: Level: intermediate
205: Note:
206: The restored pointer is invalid after this function returns. This function also marks the
207: host data as out of date. Subsequent access to the vector data on the host side via
208: `VecGetArray()` will incur a (synchronous) data transfer.
210: .seealso: [](ch_vectors), `VecCUDAGetArray()`, `VecCUDAGetArrayRead()`, `VecCUDAGetArrayWrite()`, `VecGetArray()`,
211: `VecRestoreArray()`, `VecGetArrayRead()`
212: @*/
213: PetscErrorCode VecCUDARestoreArray(Vec v, PetscScalar **a)
214: {
215: PetscFunctionBegin;
216: PetscCall(VecCUPMRestoreArrayAsync<DeviceType::CUDA>(v, a));
217: PetscFunctionReturn(PETSC_SUCCESS);
218: }
220: // PetscClangLinter pragma disable: -fdoc-internal-linkage
221: /*@C
222: VecCUDAGetArrayRead - Provides read access to the CUDA buffer inside a vector.
224: Not Collective; Asynchronous; No Fortran Support
226: Input Parameter:
227: . v - the vector
229: Output Parameter:
230: . a - the CUDA pointer.
232: Level: intermediate
234: Notes:
235: See `VecCUDAGetArray()` for data movement semantics of this function.
237: This function assumes that the user will not modify the vector data. This is analgogous to
238: intent(in) in Fortran.
240: The device pointer must be restored by calling `VecCUDARestoreArrayRead()`. If the data on the
241: host side was previously up to date it will remain so, i.e. data on both the device and the
242: host is up to date. Accessing data on the host side does not incur a device to host data
243: transfer.
245: .seealso: [](ch_vectors), `VecCUDARestoreArrayRead()`, `VecCUDAGetArray()`, `VecCUDAGetArrayWrite()`, `VecGetArray()`,
246: `VecGetArrayRead()`
247: @*/
248: PetscErrorCode VecCUDAGetArrayRead(Vec v, const PetscScalar **a)
249: {
250: PetscFunctionBegin;
251: PetscCall(VecCUPMGetArrayReadAsync<DeviceType::CUDA>(v, a));
252: PetscFunctionReturn(PETSC_SUCCESS);
253: }
255: // PetscClangLinter pragma disable: -fdoc-internal-linkage
256: /*@C
257: VecCUDARestoreArrayRead - Restore a CUDA device pointer previously acquired with
258: `VecCUDAGetArrayRead()`.
260: Not Collective; Asynchronous; No Fortran Support
262: Input Parameters:
263: + v - the vector
264: - a - the CUDA device pointer
266: Level: intermediate
268: Note:
269: This routine does not modify the corresponding array on the host in any way. The pointer is
270: invalid after this function returns.
272: .seealso: [](ch_vectors), `VecCUDAGetArrayRead()`, `VecCUDAGetArrayWrite()`, `VecCUDAGetArray()`, `VecGetArray()`,
273: `VecRestoreArray()`, `VecGetArrayRead()`
274: @*/
275: PetscErrorCode VecCUDARestoreArrayRead(Vec v, const PetscScalar **a)
276: {
277: PetscFunctionBegin;
278: PetscCall(VecCUPMRestoreArrayReadAsync<DeviceType::CUDA>(v, a));
279: PetscFunctionReturn(PETSC_SUCCESS);
280: }
282: // PetscClangLinter pragma disable: -fdoc-internal-linkage
283: /*@C
284: VecCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a vector.
286: Logically Collective; Asynchronous; No Fortran Support
288: Input Parameter:
289: . v - the vector
291: Output Parameter:
292: . a - the CUDA pointer
294: Level: advanced
296: Notes:
297: The data pointed to by the device pointer is uninitialized. The user may not read from this
298: data. Furthermore, the entire array needs to be filled by the user to obtain well-defined
299: behaviour. The device memory will be allocated by this function if it hasn't been allocated
300: previously. This is analogous to intent(out) in Fortran.
302: The device pointer needs to be released with `VecCUDARestoreArrayWrite()`. When the pointer is
303: released the host data of the vector is marked as out of data. Subsequent access of the host
304: data with e.g. VecGetArray() incurs a device to host data transfer.
306: .seealso: [](ch_vectors), `VecCUDARestoreArrayWrite()`, `VecCUDAGetArray()`, `VecCUDAGetArrayRead()`,
307: `VecCUDAGetArrayWrite()`, `VecGetArray()`, `VecGetArrayRead()`
308: @*/
309: PetscErrorCode VecCUDAGetArrayWrite(Vec v, PetscScalar **a)
310: {
311: PetscFunctionBegin;
312: PetscCall(VecCUPMGetArrayWriteAsync<DeviceType::CUDA>(v, a));
313: PetscFunctionReturn(PETSC_SUCCESS);
314: }
316: // PetscClangLinter pragma disable: -fdoc-internal-linkage
317: /*@C
318: VecCUDARestoreArrayWrite - Restore a CUDA device pointer previously acquired with
319: `VecCUDAGetArrayWrite()`.
321: Logically Collective; Asynchronous; No Fortran Support
323: Input Parameters:
324: + v - the vector
325: - a - the CUDA device pointer. This pointer is invalid after `VecCUDARestoreArrayWrite()` returns.
327: Level: intermediate
329: Note:
330: Data on the host will be marked as out of date. Subsequent access of the data on the host
331: side e.g. with `VecGetArray()` will incur a device to host data transfer.
333: .seealso: [](ch_vectors), `VecCUDAGetArrayWrite()`, `VecCUDAGetArray()`, `VecCUDAGetArrayRead()`,
334: `VecCUDAGetArrayWrite()`, `VecGetArray()`, `VecRestoreArray()`, `VecGetArrayRead()`
335: @*/
336: PetscErrorCode VecCUDARestoreArrayWrite(Vec v, PetscScalar **a)
337: {
338: PetscFunctionBegin;
339: PetscCall(VecCUPMRestoreArrayWriteAsync<DeviceType::CUDA>(v, a));
340: PetscFunctionReturn(PETSC_SUCCESS);
341: }
343: // PetscClangLinter pragma disable: -fdoc-internal-linkage
344: /*@C
345: VecCUDAPlaceArray - Allows one to replace the GPU array in a vector with a GPU array provided
346: by the user.
348: Logically Collective; Asynchronous; No Fortran Support
350: Input Parameters:
351: + vec - the vector
352: - array - the GPU array
354: Level: advanced
356: Notes:
357: This routine is useful to avoid copying an array into a vector, though you can return to the
358: original GPU array with a call to `VecCUDAResetArray()`.
360: It is not possible to use `VecCUDAPlaceArray()` and `VecPlaceArray()` at the same time on the
361: same vector.
363: `vec` does not take ownership of `array` in any way. The user must free `array` themselves
364: but be careful not to do so before the vector has either been destroyed, had its original
365: array restored with `VecCUDAResetArray()` or permanently replaced with
366: `VecCUDAReplaceArray()`.
368: .seealso: [](ch_vectors), `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`,
369: `VecResetArray()`, `VecCUDAResetArray()`, `VecCUDAReplaceArray()`
370: @*/
371: PetscErrorCode VecCUDAPlaceArray(Vec vin, const PetscScalar a[])
372: {
373: PetscFunctionBegin;
374: PetscCall(VecCUPMPlaceArrayAsync<DeviceType::CUDA>(vin, a));
375: PetscFunctionReturn(PETSC_SUCCESS);
376: }
378: // PetscClangLinter pragma disable: -fdoc-internal-linkage
379: /*@C
380: VecCUDAReplaceArray - Permanently replace the GPU array in a vector with a GPU array provided
381: by the user.
383: Logically Collective; No Fortran Support
385: Input Parameters:
386: + vec - the vector
387: - array - the GPU array
389: Level: advanced
391: Notes:
392: This is useful to avoid copying a GPU array into a vector.
394: This frees the memory associated with the old GPU array. The vector takes ownership of the
395: passed array so it CANNOT be freed by the user. It will be freed when the vector is
396: destroyed.
398: .seealso: [](ch_vectors), `VecGetArray()`, `VecRestoreArray()`, `VecPlaceArray()`, `VecResetArray()`,
399: `VecCUDAResetArray()`, `VecCUDAPlaceArray()`, `VecReplaceArray()`
400: @*/
401: PetscErrorCode VecCUDAReplaceArray(Vec vin, const PetscScalar a[])
402: {
403: PetscFunctionBegin;
404: PetscCall(VecCUPMReplaceArrayAsync<DeviceType::CUDA>(vin, a));
405: PetscFunctionReturn(PETSC_SUCCESS);
406: }
408: // PetscClangLinter pragma disable: -fdoc-internal-linkage
409: /*@C
410: VecCUDAResetArray - Resets a vector to use its default memory.
412: Logically Collective; No Fortran Support
414: Input Parameters:
415: . vec - the vector
417: Level: advanced
419: Note:
420: Call this after the use of `VecCUDAPlaceArray()`.
422: .seealso: [](ch_vectors), `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecPlaceArray()`,
423: `VecResetArray()`, `VecCUDAPlaceArray()`, `VecCUDAReplaceArray()`
424: @*/
425: PetscErrorCode VecCUDAResetArray(Vec vin)
426: {
427: PetscFunctionBegin;
428: PetscCall(VecCUPMResetArrayAsync<DeviceType::CUDA>(vin));
429: PetscFunctionReturn(PETSC_SUCCESS);
430: }