Actual source code: petscdevice_cuda.h
1: #pragma once
3: #include <petscdevice.h>
4: #include <petscpkg_version.h>
6: #if defined(__NVCC__) || defined(__CUDACC__)
7: #define PETSC_USING_NVCC 1
8: #endif
10: #if PetscDefined(HAVE_CUDA)
11: PETSC_PRAGMA_DIAGNOSTIC_IGNORED_BEGIN("-Wdeprecated-declarations")
12: #include <cuda.h>
13: #include <cuda_runtime.h>
14: #include <cublas_v2.h>
15: #include <cusolverDn.h>
16: #include <cusolverSp.h>
17: #include <cufft.h>
18: PETSC_PRAGMA_DIAGNOSTIC_IGNORED_END()
20: /* cuBLAS does not have cublasGetErrorName(). We create one on our own. */
21: PETSC_EXTERN const char *PetscCUBLASGetErrorName(cublasStatus_t); /* PETSC_EXTERN since it is exposed by the CHKERRCUBLAS macro */
22: PETSC_EXTERN const char *PetscCUSolverGetErrorName(cusolverStatus_t);
23: PETSC_EXTERN const char *PetscCUFFTGetErrorName(cufftResult);
25: /* REMOVE ME */
26: #define WaitForCUDA() cudaDeviceSynchronize()
28: /* CUDART_VERSION = 1000 x major + 10 x minor version */
30: /* Could not find exactly which CUDART_VERSION introduced cudaGetErrorName. At least it was in CUDA 8.0 (Sep. 2016) */
31: #if PETSC_PKG_CUDA_VERSION_GE(8, 0, 0)
32: #define PetscCallCUDAVoid(...) \
33: do { \
34: const cudaError_t _p_cuda_err__ = __VA_ARGS__; \
35: PetscCheckAbort(_p_cuda_err__ == cudaSuccess, PETSC_COMM_SELF, PETSC_ERR_GPU, "cuda error %d (%s) : %s", (PetscErrorCode)_p_cuda_err__, cudaGetErrorName(_p_cuda_err__), cudaGetErrorString(_p_cuda_err__)); \
36: } while (0)
38: #define PetscCallCUDA(...) \
39: do { \
40: const cudaError_t _p_cuda_err__ = __VA_ARGS__; \
41: PetscCheck(_p_cuda_err__ == cudaSuccess, PETSC_COMM_SELF, PETSC_ERR_GPU, "cuda error %d (%s) : %s", (PetscErrorCode)_p_cuda_err__, cudaGetErrorName(_p_cuda_err__), cudaGetErrorString(_p_cuda_err__)); \
42: } while (0)
43: #else /* PETSC_PKG_CUDA_VERSION_GE(8,0,0) */
44: #define PetscCallCUDA(...) \
45: do { \
46: const cudaError_t _p_cuda_err__ = __VA_ARGS__; \
47: PetscCheck(_p_cuda_err__ == cudaSuccess, PETSC_COMM_SELF, PETSC_ERR_GPU, "cuda error %d", (PetscErrorCode)_p_cuda_err__); \
48: } while (0)
50: #define PetscCallCUDAVoid(...) \
51: do { \
52: const cudaError_t _p_cuda_err__ = __VA_ARGS__; \
53: PetscCheckAbort(_p_cuda_err__ == cudaSuccess, PETSC_COMM_SELF, PETSC_ERR_GPU, "cuda error %d", (PetscErrorCode)_p_cuda_err__); \
54: } while (0)
55: #endif /* PETSC_PKG_CUDA_VERSION_GE(8,0,0) */
56: #define CHKERRCUDA(...) PetscCallCUDA(__VA_ARGS__)
58: #define PetscCUDACheckLaunch \
59: do { \
60: /* Check synchronous errors, i.e. pre-launch */ \
61: PetscCallCUDA(cudaGetLastError()); \
62: /* Check asynchronous errors, i.e. kernel failed (ULF) */ \
63: PetscCallCUDA(cudaDeviceSynchronize()); \
64: } while (0)
66: #define PetscCallCUBLAS(...) \
67: do { \
68: const cublasStatus_t _p_cublas_stat__ = __VA_ARGS__; \
69: if (PetscUnlikely(_p_cublas_stat__ != CUBLAS_STATUS_SUCCESS)) { \
70: const char *name = PetscCUBLASGetErrorName(_p_cublas_stat__); \
71: if (((_p_cublas_stat__ == CUBLAS_STATUS_NOT_INITIALIZED) || (_p_cublas_stat__ == CUBLAS_STATUS_ALLOC_FAILED)) && PetscDeviceInitialized(PETSC_DEVICE_CUDA)) { \
72: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU_RESOURCE, \
73: "cuBLAS error %d (%s). " \
74: "Reports not initialized or alloc failed; " \
75: "this indicates the GPU may have run out resources", \
76: (PetscErrorCode)_p_cublas_stat__, name); \
77: } else { \
78: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "cuBLAS error %d (%s)", (PetscErrorCode)_p_cublas_stat__, name); \
79: } \
80: } \
81: } while (0)
82: #define CHKERRCUBLAS(...) PetscCallCUBLAS(__VA_ARGS__)
84: #if (CUSPARSE_VER_MAJOR > 10 || CUSPARSE_VER_MAJOR == 10 && CUSPARSE_VER_MINOR >= 2) /* According to cuda/10.1.168 on OLCF Summit */
85: #define PetscCallCUSPARSE(...) \
86: do { \
87: const cusparseStatus_t _p_cusparse_stat__ = __VA_ARGS__; \
88: if (PetscUnlikely(_p_cusparse_stat__)) { \
89: const char *name = cusparseGetErrorName(_p_cusparse_stat__); \
90: const char *descr = cusparseGetErrorString(_p_cusparse_stat__); \
91: PetscCheck((_p_cusparse_stat__ != CUSPARSE_STATUS_NOT_INITIALIZED) && (_p_cusparse_stat__ != CUSPARSE_STATUS_ALLOC_FAILED), PETSC_COMM_SELF, PETSC_ERR_GPU_RESOURCE, \
92: "cuSPARSE errorcode %d (%s) : %s.; " \
93: "this indicates the GPU has run out resources", \
94: (int)_p_cusparse_stat__, name, descr); \
95: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "cuSPARSE errorcode %d (%s) : %s", (int)_p_cusparse_stat__, name, descr); \
96: } \
97: } while (0)
98: #else /* (CUSPARSE_VER_MAJOR > 10 || CUSPARSE_VER_MAJOR == 10 && CUSPARSE_VER_MINOR >= 2) */
99: #define PetscCallCUSPARSE(...) \
100: do { \
101: const cusparseStatus_t _p_cusparse_stat__ = __VA_ARGS__; \
102: PetscCheck(_p_cusparse_stat__ == CUSPARSE_STATUS_SUCCESS, PETSC_COMM_SELF, PETSC_ERR_GPU, "cuSPARSE errorcode %d", (PetscErrorCode)_p_cusparse_stat__); \
103: } while (0)
104: #endif /* (CUSPARSE_VER_MAJOR > 10 || CUSPARSE_VER_MAJOR == 10 && CUSPARSE_VER_MINOR >= 2) */
105: #define CHKERRCUSPARSE(...) PetscCallCUSPARSE(__VA_ARGS__)
107: #define PetscCallCUSOLVER(...) \
108: do { \
109: const cusolverStatus_t _p_cusolver_stat__ = __VA_ARGS__; \
110: if (PetscUnlikely(_p_cusolver_stat__ != CUSOLVER_STATUS_SUCCESS)) { \
111: const char *name = PetscCUSolverGetErrorName(_p_cusolver_stat__); \
112: if (((_p_cusolver_stat__ == CUSOLVER_STATUS_NOT_INITIALIZED) || (_p_cusolver_stat__ == CUSOLVER_STATUS_ALLOC_FAILED) || (_p_cusolver_stat__ == CUSOLVER_STATUS_INTERNAL_ERROR)) && PetscDeviceInitialized(PETSC_DEVICE_CUDA)) { \
113: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU_RESOURCE, \
114: "cuSolver error %d (%s). " \
115: "This indicates the GPU may have run out resources", \
116: (PetscErrorCode)_p_cusolver_stat__, name); \
117: } else { \
118: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "cuSolver error %d (%s)", (PetscErrorCode)_p_cusolver_stat__, name); \
119: } \
120: } \
121: } while (0)
122: #define CHKERRCUSOLVER(...) PetscCallCUSOLVER(__VA_ARGS__)
124: #define PetscCallCUFFT(...) \
125: do { \
126: const cufftResult_t _p_cufft_stat__ = __VA_ARGS__; \
127: if (PetscUnlikely(_p_cufft_stat__ != CUFFT_SUCCESS)) { \
128: const char *name = PetscCUFFTGetErrorName(_p_cufft_stat__); \
129: if (((_p_cufft_stat__ == CUFFT_SETUP_FAILED) || (_p_cufft_stat__ == CUFFT_ALLOC_FAILED)) && PetscDeviceInitialized(PETSC_DEVICE_CUDA)) { \
130: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU_RESOURCE, \
131: "cuFFT error %d (%s). " \
132: "Reports not initialized or alloc failed; " \
133: "this indicates the GPU has run out resources", \
134: (PetscErrorCode)_p_cufft_stat__, name); \
135: } else { \
136: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "cuFFT error %d (%s)", (PetscErrorCode)_p_cufft_stat__, name); \
137: } \
138: } \
139: } while (0)
140: #define CHKERRCUFFT(...) PetscCallCUFFT(__VA_ARGS__)
142: #define PetscCallCURAND(...) \
143: do { \
144: const curandStatus_t _p_curand_stat__ = __VA_ARGS__; \
145: if (PetscUnlikely(_p_curand_stat__ != CURAND_STATUS_SUCCESS)) { \
146: if (((_p_curand_stat__ == CURAND_STATUS_INITIALIZATION_FAILED) || (_p_curand_stat__ == CURAND_STATUS_ALLOCATION_FAILED)) && PetscDeviceInitialized(PETSC_DEVICE_CUDA)) { \
147: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU_RESOURCE, \
148: "cuRAND error %d. " \
149: "Reports not initialized or alloc failed; " \
150: "this indicates the GPU has run out resources", \
151: (PetscErrorCode)_p_curand_stat__); \
152: } else { \
153: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "cuRand error %d", (PetscErrorCode)_p_curand_stat__); \
154: } \
155: } \
156: } while (0)
157: #define CHKERRCURAND(...) PetscCallCURAND(__VA_ARGS__)
159: PETSC_EXTERN cudaStream_t PetscDefaultCudaStream; // The default stream used by PETSc
160: PETSC_EXTERN PetscErrorCode PetscCUBLASGetHandle(cublasHandle_t *);
161: PETSC_EXTERN PetscErrorCode PetscCUSOLVERDnGetHandle(cusolverDnHandle_t *);
162: PETSC_EXTERN PetscErrorCode PetscGetCurrentCUDAStream(cudaStream_t *);
164: #endif // PETSC_HAVE_CUDA
166: // these can also be defined in petscdevice_hip.h so we undef and define them *only* if the
167: // current compiler is NVCC. In this case if petscdevice_hip.h is included first, the macros
168: // would already be defined, but they would be empty since we cannot be using HCC at the same
169: // time.
170: #if PetscDefined(USING_NVCC)
171: #undef PETSC_HOST_DECL
172: #undef PETSC_DEVICE_DECL
173: #undef PETSC_KERNEL_DECL
174: #undef PETSC_SHAREDMEM_DECL
175: #undef PETSC_FORCEINLINE
176: #undef PETSC_CONSTMEM_DECL
178: #define PETSC_HOST_DECL __host__
179: #define PETSC_DEVICE_DECL __device__
180: #define PETSC_KERNEL_DECL __global__
181: #define PETSC_SHAREDMEM_DECL __shared__
182: #define PETSC_FORCEINLINE __forceinline__
183: #define PETSC_CONSTMEM_DECL __constant__
184: #endif
186: #ifndef PETSC_HOST_DECL // use HOST_DECL as canary
187: #define PETSC_HOST_DECL
188: #define PETSC_DEVICE_DECL
189: #define PETSC_KERNEL_DECL
190: #define PETSC_SHAREDMEM_DECL
191: #define PETSC_FORCEINLINE inline
192: #define PETSC_CONSTMEM_DECL
193: #endif
195: #ifndef PETSC_DEVICE_DEFINED_DECLS_PRIVATE
196: #define PETSC_DEVICE_DEFINED_DECLS_PRIVATE
197: #define PETSC_HOSTDEVICE_DECL PETSC_HOST_DECL PETSC_DEVICE_DECL
198: #define PETSC_DEVICE_INLINE_DECL PETSC_DEVICE_DECL PETSC_FORCEINLINE
199: #define PETSC_HOSTDEVICE_INLINE_DECL PETSC_HOSTDEVICE_DECL PETSC_FORCEINLINE
200: #endif