Actual source code: kernels.hpp

  1: #pragma once

  3: #include <petscdevice_cupm.h>

  5: namespace Petsc
  6: {

  8: namespace device
  9: {

 11: namespace cupm
 12: {

 14: namespace kernels
 15: {

 17: namespace util
 18: {

 20: template <typename SizeType, typename T>
 21: PETSC_DEVICE_INLINE_DECL static void grid_stride_1D(const SizeType size, T &&func) noexcept
 22: {
 23:   for (SizeType i = blockIdx.x * blockDim.x + threadIdx.x; i < size; i += blockDim.x * gridDim.x) func(i);
 24:   return;
 25: }

 27: } // namespace util

 29: } // namespace kernels

 31: namespace functors
 32: {

 34: template <typename T>
 35: class plus_equals {
 36: public:
 37:   using value_type = T;

 39:   PETSC_HOSTDEVICE_DECL constexpr explicit plus_equals(value_type v = value_type{}) noexcept : v_{std::move(v)} { }

 41:   PETSC_NODISCARD PETSC_HOSTDEVICE_INLINE_DECL constexpr value_type operator()(const value_type &val) const noexcept { return val + v_; }

 43: private:
 44:   value_type v_;
 45: };

 47: template <typename T>
 48: class times_equals {
 49: public:
 50:   using value_type = T;

 52:   PETSC_HOSTDEVICE_DECL constexpr explicit times_equals(value_type v = value_type{}) noexcept : v_{std::move(v)} { }

 54:   PETSC_NODISCARD PETSC_HOSTDEVICE_INLINE_DECL constexpr value_type operator()(const value_type &val) const noexcept { return val * v_; }

 56: private:
 57:   value_type v_;
 58: };

 60: template <typename T>
 61: class axpy {
 62: public:
 63:   using value_type = T;

 65:   PETSC_HOSTDEVICE_DECL constexpr explicit axpy(value_type v = value_type{}) noexcept : v_{std::move(v)} { }

 67:   PETSC_NODISCARD PETSC_HOSTDEVICE_INLINE_DECL constexpr value_type operator()(const value_type &x, const value_type &y) const noexcept { return v_ * x + y; }

 69: private:
 70:   value_type v_;
 71: };

 73: namespace
 74: {

 76: template <typename T>
 77: PETSC_HOSTDEVICE_INLINE_DECL constexpr plus_equals<T> make_plus_equals(const T &v) noexcept
 78: {
 79:   return plus_equals<T>{v};
 80: }

 82: template <typename T>
 83: PETSC_HOSTDEVICE_INLINE_DECL constexpr times_equals<T> make_times_equals(const T &v) noexcept
 84: {
 85:   return times_equals<T>{v};
 86: }

 88: template <typename T>
 89: PETSC_HOSTDEVICE_INLINE_DECL constexpr axpy<T> make_axpy(const T &v) noexcept
 90: {
 91:   return axpy<T>{v};
 92: }

 94: } // anonymous namespace

 96: } // namespace functors

 98: } // namespace cupm

100: } // namespace device

102: } // namespace Petsc