Actual source code: matdensecupmimpl.h
1: #pragma once
3: #define PETSC_SKIP_IMMINTRIN_H_CUDAWORKAROUND 1
4: #include <petsc/private/matimpl.h>
6: #ifdef __cplusplus
7: #include <petsc/private/deviceimpl.h>
8: #include <petsc/private/cupmsolverinterface.hpp>
9: #include <petsc/private/cupmobject.hpp>
11: #include "../src/sys/objects/device/impls/cupm/cupmthrustutility.hpp"
12: #include "../src/sys/objects/device/impls/cupm/kernels.hpp"
14: #include <thrust/device_vector.h>
15: #include <thrust/device_ptr.h>
16: #include <thrust/iterator/counting_iterator.h>
17: #include <thrust/iterator/transform_iterator.h>
18: #include <thrust/iterator/permutation_iterator.h>
19: #include <thrust/transform.h>
20: #include <thrust/copy.h>
22: namespace Petsc
23: {
25: namespace vec
26: {
28: namespace cupm
29: {
31: namespace impl
32: {
34: template <device::cupm::DeviceType>
35: class VecSeq_CUPM;
36: template <device::cupm::DeviceType>
37: class VecMPI_CUPM;
39: } // namespace impl
41: } // namespace cupm
43: } // namespace vec
45: namespace mat
46: {
48: namespace cupm
49: {
51: namespace impl
52: {
54: // ==========================================================================================
55: // MatDense_CUPM_Base
56: //
57: // A base class to separate out the CRTP code from the common CUPM stuff (like the composed
58: // function names).
59: // ==========================================================================================
61: template <device::cupm::DeviceType T>
62: class PETSC_SINGLE_LIBRARY_VISIBILITY_INTERNAL MatDense_CUPM_Base : protected device::cupm::impl::CUPMObject<T> {
63: public:
64: PETSC_CUPMOBJECT_HEADER(T);
66: #define MatDenseCUPMComposedOpDecl(OP_NAME) \
67: PETSC_NODISCARD static constexpr const char *PetscConcat(MatDenseCUPM, OP_NAME)() noexcept \
68: { \
69: return T == device::cupm::DeviceType::CUDA ? PetscStringize(PetscConcat(MatDenseCUDA, OP_NAME)) : PetscStringize(PetscConcat(MatDenseHIP, OP_NAME)); \
70: }
72: // clang-format off
73: MatDenseCUPMComposedOpDecl(GetArray_C)
74: MatDenseCUPMComposedOpDecl(GetArrayRead_C)
75: MatDenseCUPMComposedOpDecl(GetArrayWrite_C)
76: MatDenseCUPMComposedOpDecl(RestoreArray_C)
77: MatDenseCUPMComposedOpDecl(RestoreArrayRead_C)
78: MatDenseCUPMComposedOpDecl(RestoreArrayWrite_C)
79: MatDenseCUPMComposedOpDecl(PlaceArray_C)
80: MatDenseCUPMComposedOpDecl(ReplaceArray_C)
81: MatDenseCUPMComposedOpDecl(ResetArray_C)
82: MatDenseCUPMComposedOpDecl(SetPreallocation_C)
83: // clang-format on
85: #undef MatDenseCUPMComposedOpDecl
87: PETSC_NODISCARD static constexpr MatType MATSEQDENSECUPM() noexcept;
88: PETSC_NODISCARD static constexpr MatType MATMPIDENSECUPM() noexcept;
89: PETSC_NODISCARD static constexpr MatType MATDENSECUPM() noexcept;
90: PETSC_NODISCARD static constexpr MatSolverType MATSOLVERCUPM() noexcept;
91: };
93: // ==========================================================================================
94: // MatDense_CUPM_Base -- Public API
95: // ==========================================================================================
97: template <device::cupm::DeviceType T>
98: inline constexpr MatType MatDense_CUPM_Base<T>::MATSEQDENSECUPM() noexcept
99: {
100: return T == device::cupm::DeviceType::CUDA ? MATSEQDENSECUDA : MATSEQDENSEHIP;
101: }
103: template <device::cupm::DeviceType T>
104: inline constexpr MatType MatDense_CUPM_Base<T>::MATMPIDENSECUPM() noexcept
105: {
106: return T == device::cupm::DeviceType::CUDA ? MATMPIDENSECUDA : MATMPIDENSEHIP;
107: }
109: template <device::cupm::DeviceType T>
110: inline constexpr MatType MatDense_CUPM_Base<T>::MATDENSECUPM() noexcept
111: {
112: return T == device::cupm::DeviceType::CUDA ? MATDENSECUDA : MATDENSEHIP;
113: }
115: template <device::cupm::DeviceType T>
116: inline constexpr MatSolverType MatDense_CUPM_Base<T>::MATSOLVERCUPM() noexcept
117: {
118: return T == device::cupm::DeviceType::CUDA ? MATSOLVERCUDA : MATSOLVERHIP;
119: }
121: #define MATDENSECUPM_BASE_HEADER(T) \
122: PETSC_CUPMOBJECT_HEADER(T); \
123: using VecSeq_CUPM = ::Petsc::vec::cupm::impl::VecSeq_CUPM<T>; \
124: using VecMPI_CUPM = ::Petsc::vec::cupm::impl::VecMPI_CUPM<T>; \
125: using ::Petsc::mat::cupm::impl::MatDense_CUPM_Base<T>::MATSEQDENSECUPM; \
126: using ::Petsc::mat::cupm::impl::MatDense_CUPM_Base<T>::MATMPIDENSECUPM; \
127: using ::Petsc::mat::cupm::impl::MatDense_CUPM_Base<T>::MATDENSECUPM; \
128: using ::Petsc::mat::cupm::impl::MatDense_CUPM_Base<T>::MATSOLVERCUPM; \
129: using ::Petsc::mat::cupm::impl::MatDense_CUPM_Base<T>::MatDenseCUPMGetArray_C; \
130: using ::Petsc::mat::cupm::impl::MatDense_CUPM_Base<T>::MatDenseCUPMGetArrayRead_C; \
131: using ::Petsc::mat::cupm::impl::MatDense_CUPM_Base<T>::MatDenseCUPMGetArrayWrite_C; \
132: using ::Petsc::mat::cupm::impl::MatDense_CUPM_Base<T>::MatDenseCUPMRestoreArray_C; \
133: using ::Petsc::mat::cupm::impl::MatDense_CUPM_Base<T>::MatDenseCUPMRestoreArrayRead_C; \
134: using ::Petsc::mat::cupm::impl::MatDense_CUPM_Base<T>::MatDenseCUPMRestoreArrayWrite_C; \
135: using ::Petsc::mat::cupm::impl::MatDense_CUPM_Base<T>::MatDenseCUPMPlaceArray_C; \
136: using ::Petsc::mat::cupm::impl::MatDense_CUPM_Base<T>::MatDenseCUPMReplaceArray_C; \
137: using ::Petsc::mat::cupm::impl::MatDense_CUPM_Base<T>::MatDenseCUPMResetArray_C; \
138: using ::Petsc::mat::cupm::impl::MatDense_CUPM_Base<T>::MatDenseCUPMSetPreallocation_C
140: // forward declare
141: template <device::cupm::DeviceType>
142: class MatDense_Seq_CUPM;
143: template <device::cupm::DeviceType>
144: class MatDense_MPI_CUPM;
146: // ==========================================================================================
147: // MatDense_CUPM
148: //
149: // The true "base" class for MatDenseCUPM. The reason MatDense_CUPM and MatDense_CUPM_Base
150: // exist is to separate out the CRTP code from the non-crtp code so that the generic functions
151: // can be called via templates below.
152: // ==========================================================================================
154: template <device::cupm::DeviceType T, typename Derived>
155: class PETSC_SINGLE_LIBRARY_VISIBILITY_INTERNAL MatDense_CUPM : protected MatDense_CUPM_Base<T> {
156: private:
157: static PetscErrorCode CheckSaneSequentialMatSizes_(Mat) noexcept;
159: protected:
160: MATDENSECUPM_BASE_HEADER(T);
162: template <PetscMemType, PetscMemoryAccessMode>
163: class MatrixArray;
165: // Cast the Mat to its host struct, i.e. return the result of (Mat_SeqDense *)m->data
166: template <typename U = Derived>
167: PETSC_NODISCARD static constexpr auto MatIMPLCast(Mat m) noexcept PETSC_DECLTYPE_AUTO_RETURNS(U::MatIMPLCast_(m))
168: PETSC_NODISCARD static constexpr MatType MATIMPLCUPM() noexcept;
170: static PetscErrorCode CreateIMPLDenseCUPM(MPI_Comm, PetscInt, PetscInt, PetscInt, PetscInt, PetscScalar *, Mat *, PetscDeviceContext, bool) noexcept;
171: static PetscErrorCode SetPreallocation(Mat, PetscDeviceContext, PetscScalar *) noexcept;
173: template <typename F>
174: static PetscErrorCode DiagonalUnaryTransform(Mat, PetscDeviceContext, F &&) noexcept;
176: static PetscErrorCode Shift(Mat, PetscScalar) noexcept;
177: static PetscErrorCode GetDiagonal(Mat, Vec) noexcept;
179: PETSC_NODISCARD static auto DeviceArrayRead(PetscDeviceContext dctx, Mat m) noexcept PETSC_DECLTYPE_AUTO_RETURNS(MatrixArray<PETSC_MEMTYPE_DEVICE, PETSC_MEMORY_ACCESS_READ>{dctx, m})
180: PETSC_NODISCARD static auto DeviceArrayWrite(PetscDeviceContext dctx, Mat m) noexcept PETSC_DECLTYPE_AUTO_RETURNS(MatrixArray<PETSC_MEMTYPE_DEVICE, PETSC_MEMORY_ACCESS_WRITE>{dctx, m})
181: PETSC_NODISCARD static auto DeviceArrayReadWrite(PetscDeviceContext dctx, Mat m) noexcept PETSC_DECLTYPE_AUTO_RETURNS(MatrixArray<PETSC_MEMTYPE_DEVICE, PETSC_MEMORY_ACCESS_READ_WRITE>{dctx, m})
182: PETSC_NODISCARD static auto HostArrayRead(PetscDeviceContext dctx, Mat m) noexcept PETSC_DECLTYPE_AUTO_RETURNS(MatrixArray<PETSC_MEMTYPE_HOST, PETSC_MEMORY_ACCESS_READ>{dctx, m})
183: PETSC_NODISCARD static auto HostArrayWrite(PetscDeviceContext dctx, Mat m) noexcept PETSC_DECLTYPE_AUTO_RETURNS(MatrixArray<PETSC_MEMTYPE_HOST, PETSC_MEMORY_ACCESS_WRITE>{dctx, m})
184: PETSC_NODISCARD static auto HostArrayReadWrite(PetscDeviceContext dctx, Mat m) noexcept PETSC_DECLTYPE_AUTO_RETURNS(MatrixArray<PETSC_MEMTYPE_HOST, PETSC_MEMORY_ACCESS_READ_WRITE>{dctx, m})
185: };
187: // ==========================================================================================
188: // MatDense_CUPM::MatrixArray
189: // ==========================================================================================
191: template <device::cupm::DeviceType T, typename D>
192: template <PetscMemType MT, PetscMemoryAccessMode MA>
193: class PETSC_SINGLE_LIBRARY_VISIBILITY_INTERNAL MatDense_CUPM<T, D>::MatrixArray : public device::cupm::impl::RestoreableArray<T, MT, MA> {
194: using base_type = device::cupm::impl::RestoreableArray<T, MT, MA>;
196: public:
197: MatrixArray(PetscDeviceContext, Mat) noexcept;
198: ~MatrixArray() noexcept;
200: // must declare move constructor since we declare a destructor
201: constexpr MatrixArray(MatrixArray &&) noexcept;
203: private:
204: Mat m_ = nullptr;
205: };
207: // ==========================================================================================
208: // MatDense_CUPM::MatrixArray -- Public API
209: // ==========================================================================================
211: template <device::cupm::DeviceType T, typename D>
212: template <PetscMemType MT, PetscMemoryAccessMode MA>
213: inline MatDense_CUPM<T, D>::MatrixArray<MT, MA>::MatrixArray(PetscDeviceContext dctx, Mat m) noexcept : base_type{dctx}, m_{m}
214: {
215: PetscFunctionBegin;
216: PetscCallAbort(PETSC_COMM_SELF, D::template GetArray<MT, MA>(m, &this->ptr_, dctx));
217: PetscFunctionReturnVoid();
218: }
220: template <device::cupm::DeviceType T, typename D>
221: template <PetscMemType MT, PetscMemoryAccessMode MA>
222: inline MatDense_CUPM<T, D>::MatrixArray<MT, MA>::~MatrixArray() noexcept
223: {
224: PetscFunctionBegin;
225: PetscCallAbort(PETSC_COMM_SELF, D::template RestoreArray<MT, MA>(m_, &this->ptr_, this->dctx_));
226: PetscFunctionReturnVoid();
227: }
229: template <device::cupm::DeviceType T, typename D>
230: template <PetscMemType MT, PetscMemoryAccessMode MA>
231: inline constexpr MatDense_CUPM<T, D>::MatrixArray<MT, MA>::MatrixArray(MatrixArray &&other) noexcept : base_type{std::move(other)}, m_{util::exchange(other.m_, nullptr)}
232: {
233: }
235: // ==========================================================================================
236: // MatDense_CUPM -- Private API
237: // ==========================================================================================
239: template <device::cupm::DeviceType T, typename D>
240: inline PetscErrorCode MatDense_CUPM<T, D>::CheckSaneSequentialMatSizes_(Mat A) noexcept
241: {
242: PetscFunctionBegin;
243: if (PetscDefined(USE_DEBUG)) {
244: PetscBool isseq;
246: PetscCall(PetscObjectTypeCompare(PetscObjectCast(A), D::MATSEQDENSECUPM(), &isseq));
247: if (isseq) {
248: // doing this check allows both sequential and parallel implementations to just pass in
249: // A, otherwise they would need to specify rstart, rend, and cols separately!
250: PetscCheck(A->rmap->rstart == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Sequential matrix row start %" PetscInt_FMT " != 0?", A->rmap->rstart);
251: PetscCheck(A->rmap->rend == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Sequential matrix row end %" PetscInt_FMT " != number of rows %" PetscInt_FMT, A->rmap->rend, A->rmap->n);
252: PetscCheck(A->cmap->n == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Sequential matrix number of local columns %" PetscInt_FMT " != number of global columns %" PetscInt_FMT, A->cmap->n, A->cmap->n);
253: }
254: }
255: PetscFunctionReturn(PETSC_SUCCESS);
256: }
258: // ==========================================================================================
259: // MatDense_CUPM -- Protected API
260: // ==========================================================================================
262: template <device::cupm::DeviceType T, typename D>
263: inline constexpr MatType MatDense_CUPM<T, D>::MATIMPLCUPM() noexcept
264: {
265: return D::MATIMPLCUPM_();
266: }
268: // Common core for MatCreateSeqDenseCUPM() and MatCreateMPIDenseCUPM()
269: template <device::cupm::DeviceType T, typename D>
270: inline PetscErrorCode MatDense_CUPM<T, D>::CreateIMPLDenseCUPM(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A, PetscDeviceContext dctx, bool preallocate) noexcept
271: {
272: Mat mat;
274: PetscFunctionBegin;
275: PetscAssertPointer(A, 7);
276: PetscCall(MatCreate(comm, &mat));
277: PetscCall(MatSetSizes(mat, m, n, M, N));
278: PetscCall(MatSetType(mat, D::MATIMPLCUPM()));
279: if (preallocate) {
280: PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
281: PetscCall(D::SetPreallocation(mat, dctx, data));
282: }
283: *A = mat;
284: PetscFunctionReturn(PETSC_SUCCESS);
285: }
287: template <device::cupm::DeviceType T, typename D>
288: inline PetscErrorCode MatDense_CUPM<T, D>::SetPreallocation(Mat A, PetscDeviceContext dctx, PetscScalar *device_array) noexcept
289: {
290: PetscFunctionBegin;
292: // might be the local (sequential) matrix of a MatMPIDense_CUPM. Since this would be called
293: // from the MPI matrix'es impl MATIMPLCUPM() would return MATMPIDENSECUPM().
295: PetscCheckTypeNames(A, D::MATSEQDENSECUPM(), D::MATMPIDENSECUPM());
296: PetscCall(PetscLayoutSetUp(A->rmap));
297: PetscCall(PetscLayoutSetUp(A->cmap));
298: PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
299: PetscCall(D::SetPreallocation_(A, dctx, device_array));
300: A->preallocated = PETSC_TRUE;
301: A->assembled = PETSC_TRUE;
302: PetscFunctionReturn(PETSC_SUCCESS);
303: }
305: namespace detail
306: {
308: // ==========================================================================================
309: // MatrixIteratorBase
310: //
311: // A base class for creating thrust iterators over the local sub-matrix. This will set up the
312: // proper iterator definitions so thrust knows how to handle things properly. Template
313: // parameters are as follows:
314: //
315: // - Iterator:
316: // The type of the primary array iterator. Usually this is
317: // thrust::device_pointer::iterator.
318: //
319: // - IndexFunctor:
320: // This should be a functor which contains an operator() that when called with an index `i`,
321: // returns the i'th permuted index into the array. For example, it could return the i'th
322: // diagonal entry.
323: // ==========================================================================================
324: template <typename Iterator, typename IndexFunctor>
325: class MatrixIteratorBase {
326: public:
327: using array_iterator_type = Iterator;
328: using index_functor_type = IndexFunctor;
330: using difference_type = typename thrust::iterator_difference<array_iterator_type>::type;
331: using CountingIterator = thrust::counting_iterator<difference_type>;
332: using TransformIterator = thrust::transform_iterator<index_functor_type, CountingIterator>;
333: using PermutationIterator = thrust::permutation_iterator<array_iterator_type, TransformIterator>;
334: using iterator = PermutationIterator; // type of the begin/end iterator
336: constexpr MatrixIteratorBase(array_iterator_type first, array_iterator_type last, index_functor_type idx_func) noexcept : first{std::move(first)}, last{std::move(last)}, func{std::move(idx_func)} { }
338: PETSC_NODISCARD iterator begin() const noexcept
339: {
340: return PermutationIterator{
341: first, TransformIterator{CountingIterator{0}, func}
342: };
343: }
345: protected:
346: array_iterator_type first;
347: array_iterator_type last;
348: index_functor_type func;
349: };
351: // ==========================================================================================
352: // StridedIndexFunctor
353: //
354: // Iterator which permutes a linear index range into strided matrix indices. Usually used to
355: // get the diagonal.
356: // ==========================================================================================
357: template <typename T>
358: struct StridedIndexFunctor {
359: PETSC_NODISCARD PETSC_HOSTDEVICE_INLINE_DECL constexpr T operator()(const T &i) const noexcept { return stride * i; }
361: T stride;
362: };
364: template <typename Iterator>
365: class DiagonalIterator : public MatrixIteratorBase<Iterator, StridedIndexFunctor<typename thrust::iterator_difference<Iterator>::type>> {
366: public:
367: using base_type = MatrixIteratorBase<Iterator, StridedIndexFunctor<typename thrust::iterator_difference<Iterator>::type>>;
369: using difference_type = typename base_type::difference_type;
370: using iterator = typename base_type::iterator;
372: constexpr DiagonalIterator(Iterator first, Iterator last, difference_type stride) noexcept : base_type{std::move(first), std::move(last), {stride}} { }
374: PETSC_NODISCARD iterator end() const noexcept { return this->begin() + (this->last - this->first + this->func.stride - 1) / this->func.stride; }
375: };
377: template <typename T>
378: inline DiagonalIterator<typename thrust::device_vector<T>::iterator> MakeDiagonalIterator(T *data, PetscInt rstart, PetscInt rend, PetscInt cols, PetscInt lda) noexcept
379: {
380: const auto rend2 = std::min(rend, cols);
381: const std::size_t begin = rstart * lda;
382: const std::size_t end = rend2 - rstart + rend2 * lda;
383: const auto dptr = thrust::device_pointer_cast(data);
385: return {dptr + begin, dptr + end, lda + 1};
386: }
388: } // namespace detail
390: template <device::cupm::DeviceType T, typename D>
391: template <typename F>
392: inline PetscErrorCode MatDense_CUPM<T, D>::DiagonalUnaryTransform(Mat A, PetscDeviceContext dctx, F &&functor) noexcept
393: {
394: const auto rstart = A->rmap->rstart;
395: const auto rend = A->rmap->rend;
396: const auto gcols = A->cmap->N;
397: const auto rend2 = std::min(rend, gcols);
399: PetscFunctionBegin;
400: PetscCall(CheckSaneSequentialMatSizes_(A));
401: if (rend2 > rstart) {
402: const auto da = D::DeviceArrayReadWrite(dctx, A);
403: cupmStream_t stream;
404: PetscInt lda;
406: PetscCall(MatDenseGetLDA(A, &lda));
407: PetscCall(D::GetHandlesFrom_(dctx, &stream));
408: {
409: auto diagonal = detail::MakeDiagonalIterator(da.data(), rstart, rend, gcols, lda);
411: // clang-format off
412: PetscCallThrust(
413: THRUST_CALL(
414: thrust::transform,
415: stream,
416: diagonal.begin(), diagonal.end(), diagonal.begin(),
417: std::forward<F>(functor)
418: )
419: );
420: // clang-format on
421: }
422: PetscCall(PetscLogGpuFlops(rend2 - rstart));
423: }
424: PetscFunctionReturn(PETSC_SUCCESS);
425: }
427: template <device::cupm::DeviceType T, typename D>
428: inline PetscErrorCode MatDense_CUPM<T, D>::Shift(Mat A, PetscScalar alpha) noexcept
429: {
430: PetscDeviceContext dctx;
432: PetscFunctionBegin;
433: PetscCall(GetHandles_(&dctx));
434: PetscCall(DiagonalUnaryTransform(A, dctx, device::cupm::functors::make_plus_equals(alpha)));
435: PetscFunctionReturn(PETSC_SUCCESS);
436: }
438: template <device::cupm::DeviceType T, typename D>
439: inline PetscErrorCode MatDense_CUPM<T, D>::GetDiagonal(Mat A, Vec v) noexcept
440: {
441: const auto rstart = A->rmap->rstart;
442: const auto rend = A->rmap->rend;
443: const auto gcols = A->cmap->N;
444: PetscInt lda;
445: PetscDeviceContext dctx;
447: PetscFunctionBegin;
448: PetscCall(CheckSaneSequentialMatSizes_(A));
449: PetscCall(GetHandles_(&dctx));
450: PetscCall(MatDenseGetLDA(A, &lda));
451: {
452: auto dv = VecSeq_CUPM::DeviceArrayWrite(dctx, v);
453: auto da = D::DeviceArrayRead(dctx, A);
454: auto diagonal = detail::MakeDiagonalIterator(da.data(), rstart, rend, gcols, lda);
455: // We must have this cast outside of THRUST_CALL(). Without it, GCC 6.4 - 7.5, and 11.3.0
456: // throw spurious warnings:
457: //
458: // warning: 'MatDense_CUPM<...>::GetDiagonal(Mat, Vec)::' declared with greater
459: // visibility than the type of its field 'MatDense_CUPM<...>::GetDiagonal(Mat,
460: // Vec)::::' [-Wattributes]
461: // 460 | PetscCallThrust(
462: // | ^~~~~~~~~~~~~~~~
463: auto dvp = thrust::device_pointer_cast(dv.data());
464: cupmStream_t stream;
466: PetscCall(GetHandlesFrom_(dctx, &stream));
467: PetscCallThrust(THRUST_CALL(thrust::copy, stream, diagonal.begin(), diagonal.end(), dvp));
468: }
469: PetscFunctionReturn(PETSC_SUCCESS);
470: }
472: #define MatComposeOp_CUPM(use_host, pobj, op_str, op_host, ...) \
473: do { \
474: if (use_host) { \
475: PetscCall(PetscObjectComposeFunction(pobj, op_str, op_host)); \
476: } else { \
477: PetscCall(PetscObjectComposeFunction(pobj, op_str, __VA_ARGS__)); \
478: } \
479: } while (0)
481: #define MatSetOp_CUPM(use_host, mat, op_name, op_host, ...) \
482: do { \
483: if (use_host) { \
484: (mat)->ops->op_name = op_host; \
485: } else { \
486: (mat)->ops->op_name = __VA_ARGS__; \
487: } \
488: } while (0)
490: #define MATDENSECUPM_HEADER(T, ...) \
491: MATDENSECUPM_BASE_HEADER(T); \
492: friend class ::Petsc::mat::cupm::impl::MatDense_CUPM<T, __VA_ARGS__>; \
493: using ::Petsc::mat::cupm::impl::MatDense_CUPM<T, __VA_ARGS__>::MatIMPLCast; \
494: using ::Petsc::mat::cupm::impl::MatDense_CUPM<T, __VA_ARGS__>::MATIMPLCUPM; \
495: using ::Petsc::mat::cupm::impl::MatDense_CUPM<T, __VA_ARGS__>::CreateIMPLDenseCUPM; \
496: using ::Petsc::mat::cupm::impl::MatDense_CUPM<T, __VA_ARGS__>::SetPreallocation; \
497: using ::Petsc::mat::cupm::impl::MatDense_CUPM<T, __VA_ARGS__>::DeviceArrayRead; \
498: using ::Petsc::mat::cupm::impl::MatDense_CUPM<T, __VA_ARGS__>::DeviceArrayWrite; \
499: using ::Petsc::mat::cupm::impl::MatDense_CUPM<T, __VA_ARGS__>::DeviceArrayReadWrite; \
500: using ::Petsc::mat::cupm::impl::MatDense_CUPM<T, __VA_ARGS__>::HostArrayRead; \
501: using ::Petsc::mat::cupm::impl::MatDense_CUPM<T, __VA_ARGS__>::HostArrayWrite; \
502: using ::Petsc::mat::cupm::impl::MatDense_CUPM<T, __VA_ARGS__>::HostArrayReadWrite; \
503: using ::Petsc::mat::cupm::impl::MatDense_CUPM<T, __VA_ARGS__>::DiagonalUnaryTransform; \
504: using ::Petsc::mat::cupm::impl::MatDense_CUPM<T, __VA_ARGS__>::Shift; \
505: using ::Petsc::mat::cupm::impl::MatDense_CUPM<T, __VA_ARGS__>::GetDiagonal
507: } // namespace impl
509: // ==========================================================================================
510: // MatDense_CUPM -- Implementations
511: // ==========================================================================================
513: template <device::cupm::DeviceType T, PetscMemoryAccessMode access>
514: inline PetscErrorCode MatDenseCUPMGetArray_Private(Mat A, PetscScalar **array) noexcept
515: {
516: PetscFunctionBegin;
518: PetscAssertPointer(array, 2);
519: switch (access) {
520: case PETSC_MEMORY_ACCESS_READ:
521: PetscUseMethod(A, impl::MatDense_CUPM_Base<T>::MatDenseCUPMGetArrayRead_C(), (Mat, PetscScalar **), (A, array));
522: break;
523: case PETSC_MEMORY_ACCESS_WRITE:
524: PetscUseMethod(A, impl::MatDense_CUPM_Base<T>::MatDenseCUPMGetArrayWrite_C(), (Mat, PetscScalar **), (A, array));
525: break;
526: case PETSC_MEMORY_ACCESS_READ_WRITE:
527: PetscUseMethod(A, impl::MatDense_CUPM_Base<T>::MatDenseCUPMGetArray_C(), (Mat, PetscScalar **), (A, array));
528: break;
529: }
530: if (PetscMemoryAccessWrite(access)) PetscCall(PetscObjectStateIncrease(PetscObjectCast(A)));
531: PetscFunctionReturn(PETSC_SUCCESS);
532: }
534: template <device::cupm::DeviceType T, PetscMemoryAccessMode access>
535: inline PetscErrorCode MatDenseCUPMRestoreArray_Private(Mat A, PetscScalar **array) noexcept
536: {
537: PetscFunctionBegin;
539: if (array) PetscAssertPointer(array, 2);
540: switch (access) {
541: case PETSC_MEMORY_ACCESS_READ:
542: PetscUseMethod(A, impl::MatDense_CUPM_Base<T>::MatDenseCUPMRestoreArrayRead_C(), (Mat, PetscScalar **), (A, array));
543: break;
544: case PETSC_MEMORY_ACCESS_WRITE:
545: PetscUseMethod(A, impl::MatDense_CUPM_Base<T>::MatDenseCUPMRestoreArrayWrite_C(), (Mat, PetscScalar **), (A, array));
546: break;
547: case PETSC_MEMORY_ACCESS_READ_WRITE:
548: PetscUseMethod(A, impl::MatDense_CUPM_Base<T>::MatDenseCUPMRestoreArray_C(), (Mat, PetscScalar **), (A, array));
549: break;
550: }
551: if (PetscMemoryAccessWrite(access)) {
552: PetscCall(PetscObjectStateIncrease(PetscObjectCast(A)));
553: A->offloadmask = PETSC_OFFLOAD_GPU;
554: }
555: if (array) *array = nullptr;
556: PetscFunctionReturn(PETSC_SUCCESS);
557: }
559: template <device::cupm::DeviceType T>
560: inline PetscErrorCode MatDenseCUPMGetArray(Mat A, PetscScalar **array) noexcept
561: {
562: PetscFunctionBegin;
563: PetscCall(MatDenseCUPMGetArray_Private<T, PETSC_MEMORY_ACCESS_READ_WRITE>(A, array));
564: PetscFunctionReturn(PETSC_SUCCESS);
565: }
567: template <device::cupm::DeviceType T>
568: inline PetscErrorCode MatDenseCUPMGetArrayRead(Mat A, const PetscScalar **array) noexcept
569: {
570: PetscFunctionBegin;
571: PetscCall(MatDenseCUPMGetArray_Private<T, PETSC_MEMORY_ACCESS_READ>(A, const_cast<PetscScalar **>(array)));
572: PetscFunctionReturn(PETSC_SUCCESS);
573: }
575: template <device::cupm::DeviceType T>
576: inline PetscErrorCode MatDenseCUPMGetArrayWrite(Mat A, PetscScalar **array) noexcept
577: {
578: PetscFunctionBegin;
579: PetscCall(MatDenseCUPMGetArray_Private<T, PETSC_MEMORY_ACCESS_WRITE>(A, array));
580: PetscFunctionReturn(PETSC_SUCCESS);
581: }
583: template <device::cupm::DeviceType T>
584: inline PetscErrorCode MatDenseCUPMRestoreArray(Mat A, PetscScalar **array) noexcept
585: {
586: PetscFunctionBegin;
587: PetscCall(MatDenseCUPMRestoreArray_Private<T, PETSC_MEMORY_ACCESS_READ_WRITE>(A, array));
588: PetscFunctionReturn(PETSC_SUCCESS);
589: }
591: template <device::cupm::DeviceType T>
592: inline PetscErrorCode MatDenseCUPMRestoreArrayRead(Mat A, const PetscScalar **array) noexcept
593: {
594: PetscFunctionBegin;
595: PetscCall(MatDenseCUPMRestoreArray_Private<T, PETSC_MEMORY_ACCESS_READ>(A, const_cast<PetscScalar **>(array)));
596: PetscFunctionReturn(PETSC_SUCCESS);
597: }
599: template <device::cupm::DeviceType T>
600: inline PetscErrorCode MatDenseCUPMRestoreArrayWrite(Mat A, PetscScalar **array) noexcept
601: {
602: PetscFunctionBegin;
603: PetscCall(MatDenseCUPMRestoreArray_Private<T, PETSC_MEMORY_ACCESS_WRITE>(A, array));
604: PetscFunctionReturn(PETSC_SUCCESS);
605: }
607: template <device::cupm::DeviceType T>
608: inline PetscErrorCode MatDenseCUPMPlaceArray(Mat A, const PetscScalar *array) noexcept
609: {
610: PetscFunctionBegin;
612: PetscUseMethod(A, impl::MatDense_CUPM_Base<T>::MatDenseCUPMPlaceArray_C(), (Mat, const PetscScalar *), (A, array));
613: PetscCall(PetscObjectStateIncrease(PetscObjectCast(A)));
614: A->offloadmask = PETSC_OFFLOAD_GPU;
615: PetscFunctionReturn(PETSC_SUCCESS);
616: }
618: template <device::cupm::DeviceType T>
619: inline PetscErrorCode MatDenseCUPMReplaceArray(Mat A, const PetscScalar *array) noexcept
620: {
621: PetscFunctionBegin;
623: PetscUseMethod(A, impl::MatDense_CUPM_Base<T>::MatDenseCUPMReplaceArray_C(), (Mat, const PetscScalar *), (A, array));
624: PetscCall(PetscObjectStateIncrease(PetscObjectCast(A)));
625: A->offloadmask = PETSC_OFFLOAD_GPU;
626: PetscFunctionReturn(PETSC_SUCCESS);
627: }
629: template <device::cupm::DeviceType T>
630: inline PetscErrorCode MatDenseCUPMResetArray(Mat A) noexcept
631: {
632: PetscFunctionBegin;
634: PetscUseMethod(A, impl::MatDense_CUPM_Base<T>::MatDenseCUPMResetArray_C(), (Mat), (A));
635: PetscCall(PetscObjectStateIncrease(PetscObjectCast(A)));
636: PetscFunctionReturn(PETSC_SUCCESS);
637: }
639: template <device::cupm::DeviceType T>
640: inline PetscErrorCode MatDenseCUPMSetPreallocation(Mat A, PetscScalar *device_data, PetscDeviceContext dctx = nullptr) noexcept
641: {
642: PetscFunctionBegin;
644: PetscUseMethod(A, impl::MatDense_CUPM_Base<T>::MatDenseCUPMSetPreallocation_C(), (Mat, PetscDeviceContext, PetscScalar *), (A, dctx, device_data));
645: PetscFunctionReturn(PETSC_SUCCESS);
646: }
648: } // namespace cupm
650: } // namespace mat
652: } // namespace Petsc
654: #endif // __cplusplus