Actual source code: hypre1.cu
1: #include <petsc/private/petschypre.h>
2: #include <petscdevice_cuda.h>
3: #include <../src/mat/impls/hypre/mhypre_kernels.hpp>
4: #include <../src/mat/impls/hypre/mhypre.h>
6: PetscErrorCode MatZeroRows_CUDA(PetscInt n, const PetscInt rows[], const HYPRE_Int i[], const HYPRE_Int j[], HYPRE_Complex a[], HYPRE_Complex diag)
7: {
8: const PetscInt blkDimX = 16, blkDimY = 32;
9: PetscInt gridDimX = (n + blkDimX - 1) / blkDimX;
10: cudaStream_t stream;
12: PetscFunctionBegin;
13: if (!n) PetscFunctionReturn(PETSC_SUCCESS);
14: PetscCall(PetscGetCurrentCUDAStream(&stream));
15: ZeroRows<<<dim3(gridDimX, 1), dim3(blkDimX, blkDimY), 0, stream>>>(n, rows, i, j, a, diag);
16: PetscCallCUDA(cudaGetLastError());
17: PetscFunctionReturn(PETSC_SUCCESS);
18: }
20: PetscErrorCode PetscHypreIntCastArray_CUDA(PetscInt n, const PetscInt *a, HYPRE_Int *b)
21: {
22: cudaStream_t stream;
24: PetscFunctionBegin;
25: if (n) {
26: PetscCall(PetscGetCurrentCUDAStream(&stream));
27: CastArray<<<(n + 255) / 256, 256, 0, stream>>>(n, a, b);
28: PetscCallCUDA(cudaGetLastError());
29: }
30: PetscFunctionReturn(PETSC_SUCCESS);
31: }
33: PetscErrorCode MatHypreDeviceMalloc_CUDA(size_t size, void **ptr)
34: {
35: PetscFunctionBegin;
36: if (size) PetscCallCUDA(cudaMalloc(ptr, size));
37: else *ptr = NULL;
38: PetscFunctionReturn(PETSC_SUCCESS);
39: }
41: PetscErrorCode MatHypreDeviceFree_CUDA(void *a)
42: {
43: PetscFunctionBegin;
44: PetscCallCUDA(cudaFree(a));
45: PetscFunctionReturn(PETSC_SUCCESS);
46: }