Actual source code: kernels.hpp
1: #pragma once
3: #include <petscdevice_cupm.h>
5: namespace Petsc
6: {
8: namespace device
9: {
11: namespace cupm
12: {
14: namespace kernels
15: {
17: namespace util
18: {
20: template <typename SizeType, typename T>
21: PETSC_DEVICE_INLINE_DECL static void grid_stride_1D(const SizeType size, T &&func) noexcept
22: {
23: for (SizeType i = blockIdx.x * blockDim.x + threadIdx.x; i < size; i += blockDim.x * gridDim.x) func(i);
24: return;
25: }
27: } // namespace util
29: } // namespace kernels
31: namespace functors
32: {
34: template <typename T>
35: class plus_equals {
36: public:
37: using value_type = T;
39: PETSC_HOSTDEVICE_DECL constexpr explicit plus_equals(value_type v = value_type{}) noexcept : v_{std::move(v)} { }
41: PETSC_NODISCARD PETSC_HOSTDEVICE_INLINE_DECL constexpr value_type operator()(const value_type &val) const noexcept { return val + v_; }
43: private:
44: value_type v_;
45: };
47: template <typename T>
48: class times_equals {
49: public:
50: using value_type = T;
52: PETSC_HOSTDEVICE_DECL constexpr explicit times_equals(value_type v = value_type{}) noexcept : v_{std::move(v)} { }
54: PETSC_NODISCARD PETSC_HOSTDEVICE_INLINE_DECL constexpr value_type operator()(const value_type &val) const noexcept { return val * v_; }
56: private:
57: value_type v_;
58: };
60: template <typename T>
61: class axpy {
62: public:
63: using value_type = T;
65: PETSC_HOSTDEVICE_DECL constexpr explicit axpy(value_type v = value_type{}) noexcept : v_{std::move(v)} { }
67: PETSC_NODISCARD PETSC_HOSTDEVICE_INLINE_DECL constexpr value_type operator()(const value_type &x, const value_type &y) const noexcept { return v_ * x + y; }
69: private:
70: value_type v_;
71: };
73: namespace
74: {
76: template <typename T>
77: PETSC_HOSTDEVICE_INLINE_DECL constexpr plus_equals<T> make_plus_equals(const T &v) noexcept
78: {
79: return plus_equals<T>{v};
80: }
82: template <typename T>
83: PETSC_HOSTDEVICE_INLINE_DECL constexpr times_equals<T> make_times_equals(const T &v) noexcept
84: {
85: return times_equals<T>{v};
86: }
88: template <typename T>
89: PETSC_HOSTDEVICE_INLINE_DECL constexpr axpy<T> make_axpy(const T &v) noexcept
90: {
91: return axpy<T>{v};
92: }
94: } // anonymous namespace
96: } // namespace functors
98: } // namespace cupm
100: } // namespace device
102: } // namespace Petsc