Actual source code: hypre1.cu

  1: #include <petsc/private/petschypre.h>
  2: #include <petscdevice_cuda.h>
  3: #include <../src/mat/impls/hypre/mhypre_kernels.hpp>
  4: #include <../src/mat/impls/hypre/mhypre.h>

  6: PetscErrorCode MatZeroRows_CUDA(PetscInt n, const PetscInt rows[], const HYPRE_Int i[], const HYPRE_Int j[], HYPRE_Complex a[], HYPRE_Complex diag)
  7: {
  8:   const PetscInt blkDimX = 16, blkDimY = 32;
  9:   PetscInt       gridDimX = (n + blkDimX - 1) / blkDimX;
 10:   cudaStream_t   stream;

 12:   PetscFunctionBegin;
 13:   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
 14:   PetscCall(PetscGetCurrentCUDAStream(&stream));
 15:   ZeroRows<<<dim3(gridDimX, 1), dim3(blkDimX, blkDimY), 0, stream>>>(n, rows, i, j, a, diag);
 16:   PetscCallCUDA(cudaGetLastError());
 17:   PetscFunctionReturn(PETSC_SUCCESS);
 18: }

 20: PetscErrorCode PetscHypreIntCastArray_CUDA(PetscInt n, const PetscInt *a, HYPRE_Int *b)
 21: {
 22:   cudaStream_t stream;

 24:   PetscFunctionBegin;
 25:   if (n) {
 26:     PetscCall(PetscGetCurrentCUDAStream(&stream));
 27:     CastArray<<<(n + 255) / 256, 256, 0, stream>>>(n, a, b);
 28:     PetscCallCUDA(cudaGetLastError());
 29:   }
 30:   PetscFunctionReturn(PETSC_SUCCESS);
 31: }

 33: PetscErrorCode MatHypreDeviceMalloc_CUDA(size_t size, void **ptr)
 34: {
 35:   PetscFunctionBegin;
 36:   if (size) PetscCallCUDA(cudaMalloc(ptr, size));
 37:   else *ptr = NULL;
 38:   PetscFunctionReturn(PETSC_SUCCESS);
 39: }

 41: PetscErrorCode MatHypreDeviceFree_CUDA(void *a)
 42: {
 43:   PetscFunctionBegin;
 44:   PetscCallCUDA(cudaFree(a));
 45:   PetscFunctionReturn(PETSC_SUCCESS);
 46: }