Actual source code: petscdevice_hip.h

  1: #pragma once

  3: #include <petscdevice.h>
  4: #include <petscpkg_version.h>

  6: #if defined(__HCC__) || (defined(__clang__) && defined(__HIP__))
  7:   #define PETSC_USING_HCC 1
  8: #endif

 10: #if PetscDefined(HAVE_HIP)
 11:   #include <hip/hip_runtime.h>

 13:   #if PETSC_PKG_HIP_VERSION_GE(5, 2, 0)
 14:     #include <hipblas/hipblas.h>
 15:     #include <hipsparse/hipsparse.h>
 16:   #else
 17:     #include <hipblas.h>
 18:     #include <hipsparse.h>
 19:   #endif

 21:   #if PETSC_PKG_HIP_VERSION_LT(5, 4, 0)
 22:     #define HIPSPARSE_ORDER_COL HIPSPARSE_ORDER_COLUMN
 23:   #endif

 25:   #if defined(__HIP_PLATFORM_NVCC__)
 26:     #include <cusolverDn.h>
 27:   #else // __HIP_PLATFORM_HCC__
 28:     #if PETSC_PKG_HIP_VERSION_GE(5, 2, 0)
 29:       #include <hipsolver/hipsolver.h>
 30:     #else
 31:       #include <hipsolver.h>
 32:     #endif
 33:   #endif                       // __HIP_PLATFORM_NVCC__
 34:   #include <hip/hip_complex.h> // for hipComplex, hipDoubleComplex

 36:   // REMOVE ME
 37:   #define WaitForHIP() hipDeviceSynchronize()

 39: /* hipBLAS, hipSPARSE and hipSolver does not have hip*GetErrorName(). We create one on our own. */
 40: PETSC_EXTERN const char *PetscHIPBLASGetErrorName(hipblasStatus_t);     /* PETSC_EXTERN since it is exposed by the CHKERRHIPBLAS macro */
 41: PETSC_EXTERN const char *PetscHIPSPARSEGetErrorName(hipsparseStatus_t); /* PETSC_EXTERN since it is exposed by the CHKERRHIPSPARSE macro */
 42: PETSC_EXTERN const char *PetscHIPSolverGetErrorName(hipsolverStatus_t); /* PETSC_EXTERN since it is exposed by the CHKERRHIPSOLVER macro */

 44:   #define PetscCallHIP(...) \
 45:     do { \
 46:       const hipError_t _p_hip_err__ = __VA_ARGS__; \
 47:       if (PetscUnlikely(_p_hip_err__ != hipSuccess)) { \
 48:         const char *name  = hipGetErrorName(_p_hip_err__); \
 49:         const char *descr = hipGetErrorString(_p_hip_err__); \
 50:         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "hip error %d (%s) : %s", (PetscErrorCode)_p_hip_err__, name, descr); \
 51:       } \
 52:     } while (0)
 53:   #define CHKERRHIP(...) PetscCallHIP(__VA_ARGS__)

 55:   #define PetscHIPCheckLaunch \
 56:     do { \
 57:       /* Check synchronous errors, i.e. pre-launch */ \
 58:       PetscCallHIP(hipGetLastError()); \
 59:       /* Check asynchronous errors, i.e. kernel failed (ULF) */ \
 60:       PetscCallHIP(hipDeviceSynchronize()); \
 61:     } while (0)

 63:   #define PetscCallHIPBLAS(...) \
 64:     do { \
 65:       const hipblasStatus_t _p_hipblas_stat__ = __VA_ARGS__; \
 66:       if (PetscUnlikely(_p_hipblas_stat__ != HIPBLAS_STATUS_SUCCESS)) { \
 67:         const char *name = PetscHIPBLASGetErrorName(_p_hipblas_stat__); \
 68:         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "hipBLAS error %d (%s)", (PetscErrorCode)_p_hipblas_stat__, name); \
 69:       } \
 70:     } while (0)
 71:   #define CHKERRHIPBLAS(...) PetscCallHIPBLAS(__VA_ARGS__)

 73:   #if PETSC_PKG_HIP_VERSION_GE(4, 5, 0)
 74:     /* HIPSPARSE & HIPSOLVER have better functionality with ROCm-4.5 or newer */
 75:     #define PetscCallHIPSPARSE(...) \
 76:       do { \
 77:         const hipsparseStatus_t _p_hipsparse_stat__ = __VA_ARGS__; \
 78:         if (PetscUnlikely(_p_hipsparse_stat__ != HIPSPARSE_STATUS_SUCCESS)) { \
 79:           const char *name = PetscHIPSPARSEGetErrorName(_p_hipsparse_stat__); \
 80:           PetscCheck((_p_hipsparse_stat__ != HIPSPARSE_STATUS_NOT_INITIALIZED) && (_p_hipsparse_stat__ != HIPSPARSE_STATUS_ALLOC_FAILED), PETSC_COMM_SELF, PETSC_ERR_GPU_RESOURCE, "hipSPARSE errorcode %d (%s): Reports not initialized or alloc failed; this indicates the GPU has run out resources", (int)_p_hipsparse_stat__, name); \
 81:           SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "hipSPARSE errorcode %d (%s)", (int)_p_hipsparse_stat__, name); \
 82:         } \
 83:       } while (0)
 84:     #define CHKERRHIPSPARSE(...) PetscCallHIPSPARSE(__VA_ARGS__)

 86:     #define PetscCallHIPSOLVER(...) \
 87:       do { \
 88:         const hipsolverStatus_t _p_hipsolver_stat__ = __VA_ARGS__; \
 89:         if (PetscUnlikely(_p_hipsolver_stat__ != HIPSOLVER_STATUS_SUCCESS)) { \
 90:           const char *name = PetscHIPSolverGetErrorName(_p_hipsolver_stat__); \
 91:           if (((_p_hipsolver_stat__ == HIPSOLVER_STATUS_NOT_INITIALIZED) || (_p_hipsolver_stat__ == HIPSOLVER_STATUS_ALLOC_FAILED) || (_p_hipsolver_stat__ == HIPSOLVER_STATUS_INTERNAL_ERROR)) && PetscDeviceInitialized(PETSC_DEVICE_HIP)) { \
 92:             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU_RESOURCE, \
 93:                     "hipSolver error %d (%s). " \
 94:                     "This indicates the GPU may have run out resources", \
 95:                     (PetscErrorCode)_p_hipsolver_stat__, name); \
 96:           } else { \
 97:             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "hipSolver error %d (%s)", (PetscErrorCode)_p_hipsolver_stat__, name); \
 98:           } \
 99:         } \
100:       } while (0)
101:     #define CHKERRHIPSOLVER(...) PetscCallHIPSOLVER(__VA_ARGS__)

103:   #else /* PETSC_PKG_HIP_VERSION_GE(4,5,0) */
104:     /* hipSolver does not exist yet so we work around it
105:   rocSOLVER users rocBLAS for the handle
106:   * */
107:     #if defined(__HIP_PLATFORM_NVCC__)
108:       #include <cusolverDn.h>
109: typedef cusolverDnHandle_t hipsolverHandle_t;
110: typedef cusolverStatus_t   hipsolverStatus_t;

112: /* Alias hipsolverDestroy to cusolverDnDestroy */
113: static inline hipsolverStatus_t hipsolverDestroy(hipsolverHandle_t *hipsolverhandle)
114: {
115:   return cusolverDnDestroy(hipsolverhandle);
116: }

118: /* Alias hipsolverCreate to cusolverDnCreate */
119: static inline hipsolverStatus_t hipsolverCreate(hipsolverHandle_t *hipsolverhandle)
120: {
121:   return cusolverDnCreate(hipsolverhandle);
122: }

124: /* Alias hipsolverGetStream to cusolverDnGetStream */
125: static inline hipsolverStatus_t hipsolverGetStream(hipsolverHandle_t handle, hipStream_t *stream)
126: {
127:   return cusolverDnGetStream(handle, stream);
128: }

130: /* Alias hipsolverSetStream to cusolverDnSetStream */
131: static inline hipsolverStatus_t hipsolverSetStream(hipsolverHandle_t handle, hipStream_t stream)
132: {
133:   return cusolveDnSetStream(handle, stream);
134: }
135:     #else /* __HIP_PLATFORM_HCC__ */
136:       #include <rocsolver.h>
137:       #include <rocblas.h>
138: typedef rocblas_handle hipsolverHandle_t;
139: typedef rocblas_status hipsolverStatus_t;

141: /* Alias hipsolverDestroy to rocblas_destroy_handle */
142: static inline hipsolverStatus_t hipsolverDestroy(hipsolverHandle_t hipsolverhandle)
143: {
144:   return rocblas_destroy_handle(hipsolverhandle);
145: }

147: /* Alias hipsolverCreate to rocblas_destroy_handle */
148: static inline hipsolverStatus_t hipsolverCreate(hipsolverHandle_t *hipsolverhandle)
149: {
150:   return rocblas_create_handle(hipsolverhandle);
151: }

153: // Alias hipsolverGetStream to rocblas_get_stream
154: static inline hipsolverStatus_t hipsolverGetStream(hipsolverHandle_t handle, hipStream_t *stream)
155: {
156:   return rocblas_get_stream(handle, stream);
157: }

159: // Alias hipsolverSetStream to rocblas_set_stream
160: static inline hipsolverStatus_t hipsolverSetStream(hipsolverHandle_t handle, hipStream_t stream)
161: {
162:   return rocblas_set_stream(handle, stream);
163: }
164:     #endif // __HIP_PLATFORM_NVCC__
165:   #endif   /* PETSC_PKG_HIP_VERSION_GE(4,5,0) */
166: // REMOVE ME
167: PETSC_EXTERN hipStream_t    PetscDefaultHipStream; // The default stream used by PETSc
168: PETSC_EXTERN PetscErrorCode PetscHIPBLASGetHandle(hipblasHandle_t *);
169: PETSC_EXTERN PetscErrorCode PetscHIPSOLVERGetHandle(hipsolverHandle_t *);
170: PETSC_EXTERN PetscErrorCode PetscGetCurrentHIPStream(hipStream_t *);

172: #endif // PETSC_HAVE_HIP

174: // these can also be defined in petscdevice_cuda.h so we undef and define them *only* if the
175: // current compiler is HCC. In this case if petscdevice_cuda.h is included first, the macros
176: // would already be defined, but they would be empty since we cannot be using NVCC at the same
177: // time.
178: #if PetscDefined(USING_HCC)
179:   #undef PETSC_HOST_DECL
180:   #undef PETSC_DEVICE_DECL
181:   #undef PETSC_KERNEL_DECL
182:   #undef PETSC_SHAREDMEM_DECL
183:   #undef PETSC_FORCEINLINE
184:   #undef PETSC_CONSTMEM_DECL

186:   #define PETSC_HOST_DECL      __host__
187:   #define PETSC_DEVICE_DECL    __device__
188:   #define PETSC_KERNEL_DECL    __global__
189:   #define PETSC_SHAREDMEM_DECL __shared__
190:   #define PETSC_FORCEINLINE    __forceinline__
191:   #define PETSC_CONSTMEM_DECL  __constant__
192: #endif

194: #ifndef PETSC_HOST_DECL // use HOST_DECL as canary
195:   #define PETSC_HOST_DECL
196:   #define PETSC_DEVICE_DECL
197:   #define PETSC_KERNEL_DECL
198:   #define PETSC_SHAREDMEM_DECL
199:   #define PETSC_FORCEINLINE inline
200:   #define PETSC_CONSTMEM_DECL
201: #endif

203: #ifndef PETSC_DEVICE_DEFINED_DECLS_PRIVATE
204:   #define PETSC_DEVICE_DEFINED_DECLS_PRIVATE
205:   #define PETSC_HOSTDEVICE_DECL        PETSC_HOST_DECL PETSC_DEVICE_DECL
206:   #define PETSC_DEVICE_INLINE_DECL     PETSC_DEVICE_DECL PETSC_FORCEINLINE
207:   #define PETSC_HOSTDEVICE_INLINE_DECL PETSC_HOSTDEVICE_DECL PETSC_FORCEINLINE
208: #endif