Actual source code: kernels.hpp
  1: #pragma once
  3: #include <petscdevice_cupm.h>
  5: namespace Petsc
  6: {
  8: namespace device
  9: {
 11: namespace cupm
 12: {
 14: namespace kernels
 15: {
 17: namespace util
 18: {
 20: template <typename SizeType, typename T>
 21: PETSC_DEVICE_INLINE_DECL static void grid_stride_1D(const SizeType size, T &&func) noexcept
 22: {
 23:   for (SizeType i = blockIdx.x * blockDim.x + threadIdx.x; i < size; i += blockDim.x * gridDim.x) func(i);
 24:   return;
 25: }
 27: } // namespace util
 29: } // namespace kernels
 31: namespace functors
 32: {
 34: template <typename T>
 35: class plus_equals {
 36: public:
 37:   using value_type = T;
 39:   PETSC_HOSTDEVICE_DECL constexpr explicit plus_equals(value_type v = value_type{}) noexcept : v_{std::move(v)} { }
 41:   PETSC_NODISCARD PETSC_HOSTDEVICE_INLINE_DECL constexpr value_type operator()(const value_type &val) const noexcept { return val + v_; }
 43: private:
 44:   value_type v_;
 45: };
 47: template <typename T>
 48: class times_equals {
 49: public:
 50:   using value_type = T;
 52:   PETSC_HOSTDEVICE_DECL constexpr explicit times_equals(value_type v = value_type{}) noexcept : v_{std::move(v)} { }
 54:   PETSC_NODISCARD PETSC_HOSTDEVICE_INLINE_DECL constexpr value_type operator()(const value_type &val) const noexcept { return val * v_; }
 56: private:
 57:   value_type v_;
 58: };
 60: template <typename T>
 61: class axpy {
 62: public:
 63:   using value_type = T;
 65:   PETSC_HOSTDEVICE_DECL constexpr explicit axpy(value_type v = value_type{}) noexcept : v_{std::move(v)} { }
 67:   PETSC_NODISCARD PETSC_HOSTDEVICE_INLINE_DECL constexpr value_type operator()(const value_type &x, const value_type &y) const noexcept { return v_ * x + y; }
 69: private:
 70:   value_type v_;
 71: };
 73: namespace
 74: {
 76: template <typename T>
 77: PETSC_HOSTDEVICE_INLINE_DECL constexpr plus_equals<T> make_plus_equals(const T &v) noexcept
 78: {
 79:   return plus_equals<T>{v};
 80: }
 82: template <typename T>
 83: PETSC_HOSTDEVICE_INLINE_DECL constexpr times_equals<T> make_times_equals(const T &v) noexcept
 84: {
 85:   return times_equals<T>{v};
 86: }
 88: template <typename T>
 89: PETSC_HOSTDEVICE_INLINE_DECL constexpr axpy<T> make_axpy(const T &v) noexcept
 90: {
 91:   return axpy<T>{v};
 92: }
 94: } // anonymous namespace
 96: } // namespace functors
 98: } // namespace cupm
100: } // namespace device
102: } // namespace Petsc