Actual source code: petscdevice_hip.h
1: #pragma once
3: #include <petscdevice.h>
4: #include <petscpkg_version.h>
6: #if defined(__HCC__) || (defined(__clang__) && defined(__HIP__))
7: #define PETSC_USING_HCC 1
8: #endif
10: #if PetscDefined(HAVE_HIP)
11: #include <hip/hip_runtime.h>
13: #if PETSC_PKG_HIP_VERSION_GE(5, 2, 0)
15: // cupmScalarPtrCast() returns hip{Float,Double}Complex while hipBLAS uses hipBlas{Float,Double}Complex, causing many VecCUPM errors like
16: // error: no matching function for call to 'cupmBlasXdot'.
17: // Before rocm-6.0, one can define ROCM_MATHLIBS_API_USE_HIP_COMPLEX to force rocm to 'typedef hipDoubleComplex hipBlasDoubleComplex' for example.
18: // Since then, ROCM_MATHLIBS_API_USE_HIP_COMPLEX is deprecated, and one can define HIPBLAS_V2 to use version 2 of hipBLAS that directly use hipDoubleComplex etc.
19: // Per AMD, HIPBLAS_V2 will be removed in the future so that hipBLAS only provides updated APIs (but not yet in 6.2.2 as of Sep. 27, 2024).
20: //
21: // see https://rocm.docs.amd.com/projects/hipBLAS/en/docs-6.0.0/functions.html#complex-datatypes
22: // and https://rocm.docs.amd.com/projects/hipBLAS/en/docs-6.2.2/functions.html#hipblas-v2-and-deprecations
23: #if PETSC_PKG_HIP_VERSION_GE(6, 0, 0)
24: #define HIPBLAS_V2
25: #else
26: #define ROCM_MATHLIBS_API_USE_HIP_COMPLEX
27: #endif
28: #include <hipblas/hipblas.h>
29: #include <hipsparse/hipsparse.h>
30: #else
31: #include <hipblas.h>
32: #include <hipsparse.h>
33: #endif
35: #if PETSC_PKG_HIP_VERSION_LT(5, 4, 0)
36: #define HIPSPARSE_ORDER_COL HIPSPARSE_ORDER_COLUMN
37: #endif
39: #if defined(__HIP_PLATFORM_NVCC__)
40: #include <cusolverDn.h>
41: #else // __HIP_PLATFORM_HCC__
42: #if PETSC_PKG_HIP_VERSION_GE(5, 2, 0)
43: #include <hipsolver/hipsolver.h>
44: #else
45: #include <hipsolver.h>
46: #endif
47: #endif // __HIP_PLATFORM_NVCC__
48: #include <hip/hip_complex.h> // for hipComplex, hipDoubleComplex
50: // REMOVE ME
51: #define WaitForHIP() hipDeviceSynchronize()
53: /* hipBLAS, hipSPARSE and hipSolver does not have hip*GetErrorName(). We create one on our own. */
54: PETSC_EXTERN const char *PetscHIPBLASGetErrorName(hipblasStatus_t); /* PETSC_EXTERN since it is exposed by the CHKERRHIPBLAS macro */
55: PETSC_EXTERN const char *PetscHIPSPARSEGetErrorName(hipsparseStatus_t); /* PETSC_EXTERN since it is exposed by the CHKERRHIPSPARSE macro */
56: PETSC_EXTERN const char *PetscHIPSolverGetErrorName(hipsolverStatus_t); /* PETSC_EXTERN since it is exposed by the CHKERRHIPSOLVER macro */
58: #define PetscCallHIP(...) \
59: do { \
60: const hipError_t _p_hip_err__ = __VA_ARGS__; \
61: if (PetscUnlikely(_p_hip_err__ != hipSuccess)) { \
62: const char *name = hipGetErrorName(_p_hip_err__); \
63: const char *descr = hipGetErrorString(_p_hip_err__); \
64: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "hip error %d (%s) : %s", (PetscErrorCode)_p_hip_err__, name, descr); \
65: } \
66: } while (0)
67: #define CHKERRHIP(...) PetscCallHIP(__VA_ARGS__)
69: #define PetscHIPCheckLaunch \
70: do { \
71: /* Check synchronous errors, i.e. pre-launch */ \
72: PetscCallHIP(hipGetLastError()); \
73: /* Check asynchronous errors, i.e. kernel failed (ULF) */ \
74: PetscCallHIP(hipDeviceSynchronize()); \
75: } while (0)
77: #define PetscCallHIPBLAS(...) \
78: do { \
79: const hipblasStatus_t _p_hipblas_stat__ = __VA_ARGS__; \
80: if (PetscUnlikely(_p_hipblas_stat__ != HIPBLAS_STATUS_SUCCESS)) { \
81: const char *name = PetscHIPBLASGetErrorName(_p_hipblas_stat__); \
82: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "hipBLAS error %d (%s)", (PetscErrorCode)_p_hipblas_stat__, name); \
83: } \
84: } while (0)
85: #define CHKERRHIPBLAS(...) PetscCallHIPBLAS(__VA_ARGS__)
87: #if PETSC_PKG_HIP_VERSION_GE(4, 5, 0)
88: /* HIPSPARSE & HIPSOLVER have better functionality with ROCm-4.5 or newer */
89: #define PetscCallHIPSPARSE(...) \
90: do { \
91: const hipsparseStatus_t _p_hipsparse_stat__ = __VA_ARGS__; \
92: if (PetscUnlikely(_p_hipsparse_stat__ != HIPSPARSE_STATUS_SUCCESS)) { \
93: const char *name = PetscHIPSPARSEGetErrorName(_p_hipsparse_stat__); \
94: PetscCheck((_p_hipsparse_stat__ != HIPSPARSE_STATUS_NOT_INITIALIZED) && (_p_hipsparse_stat__ != HIPSPARSE_STATUS_ALLOC_FAILED), PETSC_COMM_SELF, PETSC_ERR_GPU_RESOURCE, "hipSPARSE errorcode %d (%s): Reports not initialized or alloc failed; this indicates the GPU has run out resources", (int)_p_hipsparse_stat__, name); \
95: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "hipSPARSE errorcode %d (%s)", (int)_p_hipsparse_stat__, name); \
96: } \
97: } while (0)
98: #define CHKERRHIPSPARSE(...) PetscCallHIPSPARSE(__VA_ARGS__)
100: #define PetscCallHIPSOLVER(...) \
101: do { \
102: const hipsolverStatus_t _p_hipsolver_stat__ = __VA_ARGS__; \
103: if (PetscUnlikely(_p_hipsolver_stat__ != HIPSOLVER_STATUS_SUCCESS)) { \
104: const char *name = PetscHIPSolverGetErrorName(_p_hipsolver_stat__); \
105: if (((_p_hipsolver_stat__ == HIPSOLVER_STATUS_NOT_INITIALIZED) || (_p_hipsolver_stat__ == HIPSOLVER_STATUS_ALLOC_FAILED) || (_p_hipsolver_stat__ == HIPSOLVER_STATUS_INTERNAL_ERROR)) && PetscDeviceInitialized(PETSC_DEVICE_HIP)) { \
106: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU_RESOURCE, \
107: "hipSolver error %d (%s). " \
108: "This indicates the GPU may have run out resources", \
109: (PetscErrorCode)_p_hipsolver_stat__, name); \
110: } else { \
111: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "hipSolver error %d (%s)", (PetscErrorCode)_p_hipsolver_stat__, name); \
112: } \
113: } \
114: } while (0)
115: #define CHKERRHIPSOLVER(...) PetscCallHIPSOLVER(__VA_ARGS__)
117: #else /* PETSC_PKG_HIP_VERSION_GE(4,5,0) */
118: /* hipSolver does not exist yet so we work around it
119: rocSOLVER users rocBLAS for the handle
120: * */
121: #if defined(__HIP_PLATFORM_NVCC__)
122: #include <cusolverDn.h>
123: typedef cusolverDnHandle_t hipsolverHandle_t;
124: typedef cusolverStatus_t hipsolverStatus_t;
126: /* Alias hipsolverDestroy to cusolverDnDestroy */
127: static inline hipsolverStatus_t hipsolverDestroy(hipsolverHandle_t *hipsolverhandle)
128: {
129: return cusolverDnDestroy(hipsolverhandle);
130: }
132: /* Alias hipsolverCreate to cusolverDnCreate */
133: static inline hipsolverStatus_t hipsolverCreate(hipsolverHandle_t *hipsolverhandle)
134: {
135: return cusolverDnCreate(hipsolverhandle);
136: }
138: /* Alias hipsolverGetStream to cusolverDnGetStream */
139: static inline hipsolverStatus_t hipsolverGetStream(hipsolverHandle_t handle, hipStream_t *stream)
140: {
141: return cusolverDnGetStream(handle, stream);
142: }
144: /* Alias hipsolverSetStream to cusolverDnSetStream */
145: static inline hipsolverStatus_t hipsolverSetStream(hipsolverHandle_t handle, hipStream_t stream)
146: {
147: return cusolveDnSetStream(handle, stream);
148: }
149: #else /* __HIP_PLATFORM_HCC__ */
150: #include <rocsolver.h>
151: #include <rocblas.h>
152: typedef rocblas_handle hipsolverHandle_t;
153: typedef rocblas_status hipsolverStatus_t;
155: /* Alias hipsolverDestroy to rocblas_destroy_handle */
156: static inline hipsolverStatus_t hipsolverDestroy(hipsolverHandle_t hipsolverhandle)
157: {
158: return rocblas_destroy_handle(hipsolverhandle);
159: }
161: /* Alias hipsolverCreate to rocblas_destroy_handle */
162: static inline hipsolverStatus_t hipsolverCreate(hipsolverHandle_t *hipsolverhandle)
163: {
164: return rocblas_create_handle(hipsolverhandle);
165: }
167: // Alias hipsolverGetStream to rocblas_get_stream
168: static inline hipsolverStatus_t hipsolverGetStream(hipsolverHandle_t handle, hipStream_t *stream)
169: {
170: return rocblas_get_stream(handle, stream);
171: }
173: // Alias hipsolverSetStream to rocblas_set_stream
174: static inline hipsolverStatus_t hipsolverSetStream(hipsolverHandle_t handle, hipStream_t stream)
175: {
176: return rocblas_set_stream(handle, stream);
177: }
178: #endif // __HIP_PLATFORM_NVCC__
179: #endif /* PETSC_PKG_HIP_VERSION_GE(4,5,0) */
180: // REMOVE ME
181: PETSC_EXTERN hipStream_t PetscDefaultHipStream; // The default stream used by PETSc
182: PETSC_EXTERN PetscErrorCode PetscHIPBLASGetHandle(hipblasHandle_t *);
183: PETSC_EXTERN PetscErrorCode PetscHIPSOLVERGetHandle(hipsolverHandle_t *);
184: PETSC_EXTERN PetscErrorCode PetscGetCurrentHIPStream(hipStream_t *);
186: #endif // PETSC_HAVE_HIP
188: // these can also be defined in petscdevice_cuda.h so we undef and define them *only* if the
189: // current compiler is HCC. In this case if petscdevice_cuda.h is included first, the macros
190: // would already be defined, but they would be empty since we cannot be using NVCC at the same
191: // time.
192: #if PetscDefined(USING_HCC)
193: #undef PETSC_HOST_DECL
194: #undef PETSC_DEVICE_DECL
195: #undef PETSC_KERNEL_DECL
196: #undef PETSC_SHAREDMEM_DECL
197: #undef PETSC_FORCEINLINE
198: #undef PETSC_CONSTMEM_DECL
200: #define PETSC_HOST_DECL __host__
201: #define PETSC_DEVICE_DECL __device__
202: #define PETSC_KERNEL_DECL __global__
203: #define PETSC_SHAREDMEM_DECL __shared__
204: #define PETSC_FORCEINLINE __forceinline__
205: #define PETSC_CONSTMEM_DECL __constant__
206: #endif
208: #ifndef PETSC_HOST_DECL // use HOST_DECL as canary
209: #define PETSC_HOST_DECL
210: #define PETSC_DEVICE_DECL
211: #define PETSC_KERNEL_DECL
212: #define PETSC_SHAREDMEM_DECL
213: #define PETSC_FORCEINLINE inline
214: #define PETSC_CONSTMEM_DECL
215: #endif
217: #ifndef PETSC_DEVICE_DEFINED_DECLS_PRIVATE
218: #define PETSC_DEVICE_DEFINED_DECLS_PRIVATE
219: #define PETSC_HOSTDEVICE_DECL PETSC_HOST_DECL PETSC_DEVICE_DECL
220: #define PETSC_DEVICE_INLINE_DECL PETSC_DEVICE_DECL PETSC_FORCEINLINE
221: #define PETSC_HOSTDEVICE_INLINE_DECL PETSC_HOSTDEVICE_DECL PETSC_FORCEINLINE
222: #endif