Actual source code: matdensecupmimpl.h
1: #pragma once
3: #define PETSC_SKIP_IMMINTRIN_H_CUDAWORKAROUND 1
4: #include <petsc/private/matimpl.h>
6: #ifdef __cplusplus
7: #include <petsc/private/deviceimpl.h>
8: #include <petsc/private/cupmsolverinterface.hpp>
9: #include <petsc/private/cupmobject.hpp>
11: #include "../src/sys/objects/device/impls/cupm/cupmthrustutility.hpp"
12: #include "../src/sys/objects/device/impls/cupm/kernels.hpp"
14: #include <thrust/device_vector.h>
15: #include <thrust/device_ptr.h>
16: #include <thrust/iterator/counting_iterator.h>
17: #include <thrust/iterator/transform_iterator.h>
18: #include <thrust/iterator/permutation_iterator.h>
19: #include <thrust/transform.h>
20: #include <thrust/copy.h>
22: namespace Petsc
23: {
25: namespace vec
26: {
28: namespace cupm
29: {
31: namespace impl
32: {
34: template <device::cupm::DeviceType>
35: class VecSeq_CUPM;
36: template <device::cupm::DeviceType>
37: class VecMPI_CUPM;
39: } // namespace impl
41: } // namespace cupm
43: } // namespace vec
45: namespace mat
46: {
48: namespace cupm
49: {
51: namespace impl
52: {
54: // ==========================================================================================
55: // MatDense_CUPM_Base
56: //
57: // A base class to separate out the CRTP code from the common CUPM stuff (like the composed
58: // function names).
59: // ==========================================================================================
61: template <device::cupm::DeviceType T>
62: class PETSC_SINGLE_LIBRARY_VISIBILITY_INTERNAL MatDense_CUPM_Base : protected device::cupm::impl::CUPMObject<T> {
63: public:
64: PETSC_CUPMOBJECT_HEADER(T);
66: #define MatDenseCUPMComposedOpDecl(OP_NAME) \
67: PETSC_NODISCARD static constexpr const char *PetscConcat(MatDenseCUPM, OP_NAME)() noexcept \
68: { \
69: return T == device::cupm::DeviceType::CUDA ? PetscStringize(PetscConcat(MatDenseCUDA, OP_NAME)) : PetscStringize(PetscConcat(MatDenseHIP, OP_NAME)); \
70: }
72: // clang-format off
73: MatDenseCUPMComposedOpDecl(GetArray_C)
74: MatDenseCUPMComposedOpDecl(GetArrayRead_C)
75: MatDenseCUPMComposedOpDecl(GetArrayWrite_C)
76: MatDenseCUPMComposedOpDecl(RestoreArray_C)
77: MatDenseCUPMComposedOpDecl(RestoreArrayRead_C)
78: MatDenseCUPMComposedOpDecl(RestoreArrayWrite_C)
79: MatDenseCUPMComposedOpDecl(PlaceArray_C)
80: MatDenseCUPMComposedOpDecl(ReplaceArray_C)
81: MatDenseCUPMComposedOpDecl(ResetArray_C)
82: MatDenseCUPMComposedOpDecl(SetPreallocation_C)
83: // clang-format on
85: #undef MatDenseCUPMComposedOpDecl
87: PETSC_NODISCARD static constexpr MatType MATSEQDENSECUPM() noexcept;
88: PETSC_NODISCARD static constexpr MatType MATMPIDENSECUPM() noexcept;
89: PETSC_NODISCARD static constexpr MatType MATDENSECUPM() noexcept;
90: PETSC_NODISCARD static constexpr MatSolverType MATSOLVERCUPM() noexcept;
91: };
93: // ==========================================================================================
94: // MatDense_CUPM_Base -- Public API
95: // ==========================================================================================
97: template <device::cupm::DeviceType T>
98: inline constexpr MatType MatDense_CUPM_Base<T>::MATSEQDENSECUPM() noexcept
99: {
100: return T == device::cupm::DeviceType::CUDA ? MATSEQDENSECUDA : MATSEQDENSEHIP;
101: }
103: template <device::cupm::DeviceType T>
104: inline constexpr MatType MatDense_CUPM_Base<T>::MATMPIDENSECUPM() noexcept
105: {
106: return T == device::cupm::DeviceType::CUDA ? MATMPIDENSECUDA : MATMPIDENSEHIP;
107: }
109: template <device::cupm::DeviceType T>
110: inline constexpr MatType MatDense_CUPM_Base<T>::MATDENSECUPM() noexcept
111: {
112: return T == device::cupm::DeviceType::CUDA ? MATDENSECUDA : MATDENSEHIP;
113: }
115: template <device::cupm::DeviceType T>
116: inline constexpr MatSolverType MatDense_CUPM_Base<T>::MATSOLVERCUPM() noexcept
117: {
118: return T == device::cupm::DeviceType::CUDA ? MATSOLVERCUDA : MATSOLVERHIP;
119: }
121: #define MATDENSECUPM_BASE_HEADER(T) \
122: PETSC_CUPMOBJECT_HEADER(T); \
123: using VecSeq_CUPM = ::Petsc::vec::cupm::impl::VecSeq_CUPM<T>; \
124: using VecMPI_CUPM = ::Petsc::vec::cupm::impl::VecMPI_CUPM<T>; \
125: using ::Petsc::mat::cupm::impl::MatDense_CUPM_Base<T>::MATSEQDENSECUPM; \
126: using ::Petsc::mat::cupm::impl::MatDense_CUPM_Base<T>::MATMPIDENSECUPM; \
127: using ::Petsc::mat::cupm::impl::MatDense_CUPM_Base<T>::MATDENSECUPM; \
128: using ::Petsc::mat::cupm::impl::MatDense_CUPM_Base<T>::MATSOLVERCUPM; \
129: using ::Petsc::mat::cupm::impl::MatDense_CUPM_Base<T>::MatDenseCUPMGetArray_C; \
130: using ::Petsc::mat::cupm::impl::MatDense_CUPM_Base<T>::MatDenseCUPMGetArrayRead_C; \
131: using ::Petsc::mat::cupm::impl::MatDense_CUPM_Base<T>::MatDenseCUPMGetArrayWrite_C; \
132: using ::Petsc::mat::cupm::impl::MatDense_CUPM_Base<T>::MatDenseCUPMRestoreArray_C; \
133: using ::Petsc::mat::cupm::impl::MatDense_CUPM_Base<T>::MatDenseCUPMRestoreArrayRead_C; \
134: using ::Petsc::mat::cupm::impl::MatDense_CUPM_Base<T>::MatDenseCUPMRestoreArrayWrite_C; \
135: using ::Petsc::mat::cupm::impl::MatDense_CUPM_Base<T>::MatDenseCUPMPlaceArray_C; \
136: using ::Petsc::mat::cupm::impl::MatDense_CUPM_Base<T>::MatDenseCUPMReplaceArray_C; \
137: using ::Petsc::mat::cupm::impl::MatDense_CUPM_Base<T>::MatDenseCUPMResetArray_C; \
138: using ::Petsc::mat::cupm::impl::MatDense_CUPM_Base<T>::MatDenseCUPMSetPreallocation_C
140: // forward declare
141: template <device::cupm::DeviceType>
142: class MatDense_Seq_CUPM;
143: template <device::cupm::DeviceType>
144: class MatDense_MPI_CUPM;
146: // ==========================================================================================
147: // MatDense_CUPM
148: //
149: // The true "base" class for MatDenseCUPM. The reason MatDense_CUPM and MatDense_CUPM_Base
150: // exist is to separate out the CRTP code from the non-crtp code so that the generic functions
151: // can be called via templates below.
152: // ==========================================================================================
154: template <device::cupm::DeviceType T, typename Derived>
155: class PETSC_SINGLE_LIBRARY_VISIBILITY_INTERNAL MatDense_CUPM : protected MatDense_CUPM_Base<T> {
156: private:
157: static PetscErrorCode CheckSaneSequentialMatSizes_(Mat) noexcept;
159: protected:
160: MATDENSECUPM_BASE_HEADER(T);
162: template <PetscMemType, PetscMemoryAccessMode>
163: class MatrixArray;
165: // Cast the Mat to its host struct, i.e. return the result of (Mat_SeqDense *)m->data
166: template <typename U = Derived>
167: PETSC_NODISCARD static constexpr auto MatIMPLCast(Mat m) noexcept PETSC_DECLTYPE_AUTO_RETURNS(U::MatIMPLCast_(m))
168: PETSC_NODISCARD static constexpr MatType MATIMPLCUPM() noexcept;
170: static PetscErrorCode CreateIMPLDenseCUPM(MPI_Comm, PetscInt, PetscInt, PetscInt, PetscInt, PetscScalar *, Mat *, PetscDeviceContext, bool) noexcept;
171: static PetscErrorCode SetPreallocation(Mat, PetscDeviceContext, PetscScalar *) noexcept;
173: template <typename F>
174: static PetscErrorCode DiagonalUnaryTransform(Mat, PetscDeviceContext, F &&) noexcept;
176: static PetscErrorCode Shift(Mat, PetscScalar) noexcept;
177: static PetscErrorCode GetDiagonal(Mat, Vec) noexcept;
179: PETSC_NODISCARD static auto DeviceArrayRead(PetscDeviceContext dctx, Mat m) noexcept PETSC_DECLTYPE_AUTO_RETURNS(MatrixArray<PETSC_MEMTYPE_DEVICE, PETSC_MEMORY_ACCESS_READ>{dctx, m})
180: PETSC_NODISCARD static auto DeviceArrayWrite(PetscDeviceContext dctx, Mat m) noexcept PETSC_DECLTYPE_AUTO_RETURNS(MatrixArray<PETSC_MEMTYPE_DEVICE, PETSC_MEMORY_ACCESS_WRITE>{dctx, m})
181: PETSC_NODISCARD static auto DeviceArrayReadWrite(PetscDeviceContext dctx, Mat m) noexcept PETSC_DECLTYPE_AUTO_RETURNS(MatrixArray<PETSC_MEMTYPE_DEVICE, PETSC_MEMORY_ACCESS_READ_WRITE>{dctx, m})
182: PETSC_NODISCARD static auto HostArrayRead(PetscDeviceContext dctx, Mat m) noexcept PETSC_DECLTYPE_AUTO_RETURNS(MatrixArray<PETSC_MEMTYPE_HOST, PETSC_MEMORY_ACCESS_READ>{dctx, m})
183: PETSC_NODISCARD static auto HostArrayWrite(PetscDeviceContext dctx, Mat m) noexcept PETSC_DECLTYPE_AUTO_RETURNS(MatrixArray<PETSC_MEMTYPE_HOST, PETSC_MEMORY_ACCESS_WRITE>{dctx, m})
184: PETSC_NODISCARD static auto HostArrayReadWrite(PetscDeviceContext dctx, Mat m) noexcept PETSC_DECLTYPE_AUTO_RETURNS(MatrixArray<PETSC_MEMTYPE_HOST, PETSC_MEMORY_ACCESS_READ_WRITE>{dctx, m})
185: };
187: // ==========================================================================================
188: // MatDense_CUPM::MatrixArray
189: // ==========================================================================================
191: template <device::cupm::DeviceType T, typename D>
192: template <PetscMemType MT, PetscMemoryAccessMode MA>
193: class PETSC_SINGLE_LIBRARY_VISIBILITY_INTERNAL MatDense_CUPM<T, D>::MatrixArray : public device::cupm::impl::RestoreableArray<T, MT, MA> {
194: using base_type = device::cupm::impl::RestoreableArray<T, MT, MA>;
196: public:
197: MatrixArray(PetscDeviceContext, Mat) noexcept;
198: ~MatrixArray() noexcept;
200: // must declare move constructor since we declare a destructor
201: constexpr MatrixArray(MatrixArray &&) noexcept;
203: private:
204: Mat m_ = nullptr;
205: };
207: // ==========================================================================================
208: // MatDense_CUPM::MatrixArray -- Public API
209: // ==========================================================================================
211: template <device::cupm::DeviceType T, typename D>
212: template <PetscMemType MT, PetscMemoryAccessMode MA>
213: inline MatDense_CUPM<T, D>::MatrixArray<MT, MA>::MatrixArray(PetscDeviceContext dctx, Mat m) noexcept : base_type{dctx}, m_{m}
214: {
215: PetscFunctionBegin;
216: PetscCallAbort(PETSC_COMM_SELF, D::template GetArray<MT, MA>(m, &this->ptr_, dctx));
217: PetscFunctionReturnVoid();
218: }
220: template <device::cupm::DeviceType T, typename D>
221: template <PetscMemType MT, PetscMemoryAccessMode MA>
222: inline MatDense_CUPM<T, D>::MatrixArray<MT, MA>::~MatrixArray() noexcept
223: {
224: PetscFunctionBegin;
225: PetscCallAbort(PETSC_COMM_SELF, D::template RestoreArray<MT, MA>(m_, &this->ptr_, this->dctx_));
226: PetscFunctionReturnVoid();
227: }
229: template <device::cupm::DeviceType T, typename D>
230: template <PetscMemType MT, PetscMemoryAccessMode MA>
231: inline constexpr MatDense_CUPM<T, D>::MatrixArray<MT, MA>::MatrixArray(MatrixArray &&other) noexcept : base_type{std::move(other)}, m_{util::exchange(other.m_, nullptr)}
232: {
233: }
235: // ==========================================================================================
236: // MatDense_CUPM -- Private API
237: // ==========================================================================================
239: template <device::cupm::DeviceType T, typename D>
240: inline PetscErrorCode MatDense_CUPM<T, D>::CheckSaneSequentialMatSizes_(Mat A) noexcept
241: {
242: PetscFunctionBegin;
243: if (PetscDefined(USE_DEBUG)) {
244: PetscBool isseq;
246: PetscCall(PetscObjectTypeCompare(PetscObjectCast(A), D::MATSEQDENSECUPM(), &isseq));
247: if (isseq) {
248: // doing this check allows both sequential and parallel implementations to just pass in
249: // A, otherwise they would need to specify rstart, rend, and cols separately!
250: PetscCheck(A->rmap->rstart == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Sequential matrix row start %" PetscInt_FMT " != 0?", A->rmap->rstart);
251: PetscCheck(A->rmap->rend == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Sequential matrix row end %" PetscInt_FMT " != number of rows %" PetscInt_FMT, A->rmap->rend, A->rmap->n);
252: PetscCheck(A->cmap->n == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Sequential matrix number of local columns %" PetscInt_FMT " != number of global columns %" PetscInt_FMT, A->cmap->n, A->cmap->n);
253: }
254: }
255: PetscFunctionReturn(PETSC_SUCCESS);
256: }
258: // ==========================================================================================
259: // MatDense_CUPM -- Protected API
260: // ==========================================================================================
262: template <device::cupm::DeviceType T, typename D>
263: inline constexpr MatType MatDense_CUPM<T, D>::MATIMPLCUPM() noexcept
264: {
265: return D::MATIMPLCUPM_();
266: }
268: // Common core for MatCreateSeqDenseCUPM() and MatCreateMPIDenseCUPM()
269: template <device::cupm::DeviceType T, typename D>
270: inline PetscErrorCode MatDense_CUPM<T, D>::CreateIMPLDenseCUPM(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A, PetscDeviceContext dctx, bool preallocate) noexcept
271: {
272: Mat mat;
274: PetscFunctionBegin;
275: PetscAssertPointer(A, 7);
276: PetscCall(MatCreate(comm, &mat));
277: PetscCall(MatSetSizes(mat, m, n, M, N));
278: PetscCall(MatSetType(mat, D::MATIMPLCUPM()));
279: if (preallocate) {
280: PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
281: PetscCall(D::SetPreallocation(mat, dctx, data));
282: }
283: *A = mat;
284: PetscFunctionReturn(PETSC_SUCCESS);
285: }
287: template <device::cupm::DeviceType T, typename D>
288: inline PetscErrorCode MatDense_CUPM<T, D>::SetPreallocation(Mat A, PetscDeviceContext dctx, PetscScalar *device_array) noexcept
289: {
290: PetscFunctionBegin;
292: // might be the local (sequential) matrix of a MatMPIDense_CUPM. Since this would be called
293: // from the MPI matrix'es impl MATIMPLCUPM() would return MATMPIDENSECUPM().
295: PetscCheckTypeNames(A, D::MATSEQDENSECUPM(), D::MATMPIDENSECUPM());
296: PetscCall(PetscLayoutSetUp(A->rmap));
297: PetscCall(PetscLayoutSetUp(A->cmap));
298: PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
299: PetscCall(D::SetPreallocation_(A, dctx, device_array));
300: A->preallocated = PETSC_TRUE;
301: A->assembled = PETSC_TRUE;
302: PetscFunctionReturn(PETSC_SUCCESS);
303: }
305: namespace detail
306: {
308: #if CCCL_VERSION >= 3000000
309: template <class T>
310: using iter_difference_t = cuda::std::iter_difference_t<T>;
311: #else
312: template <class T>
313: using iter_difference_t = typename thrust::iterator_difference<T>::type;
314: #endif
316: // ==========================================================================================
317: // MatrixIteratorBase
318: //
319: // A base class for creating thrust iterators over the local sub-matrix. This will set up the
320: // proper iterator definitions so thrust knows how to handle things properly. Template
321: // parameters are as follows:
322: //
323: // - Iterator:
324: // The type of the primary array iterator. Usually this is
325: // thrust::device_pointer::iterator.
326: //
327: // - IndexFunctor:
328: // This should be a functor which contains an operator() that when called with an index `i`,
329: // returns the i'th permuted index into the array. For example, it could return the i'th
330: // diagonal entry.
331: // ==========================================================================================
332: template <typename Iterator, typename IndexFunctor>
333: class MatrixIteratorBase {
334: public:
335: using array_iterator_type = Iterator;
336: using index_functor_type = IndexFunctor;
338: using difference_type = iter_difference_t<array_iterator_type>;
339: using CountingIterator = thrust::counting_iterator<difference_type>;
340: using TransformIterator = thrust::transform_iterator<index_functor_type, CountingIterator>;
341: using PermutationIterator = thrust::permutation_iterator<array_iterator_type, TransformIterator>;
342: using iterator = PermutationIterator; // type of the begin/end iterator
344: constexpr MatrixIteratorBase(array_iterator_type first, array_iterator_type last, index_functor_type idx_func) noexcept : first{std::move(first)}, last{std::move(last)}, func{std::move(idx_func)} { }
346: PETSC_NODISCARD iterator begin() const noexcept
347: {
348: return PermutationIterator{
349: first, TransformIterator{CountingIterator{0}, func}
350: };
351: }
353: protected:
354: array_iterator_type first;
355: array_iterator_type last;
356: index_functor_type func;
357: };
359: // ==========================================================================================
360: // StridedIndexFunctor
361: //
362: // Iterator which permutes a linear index range into strided matrix indices. Usually used to
363: // get the diagonal.
364: // ==========================================================================================
365: template <typename T>
366: struct StridedIndexFunctor {
367: PETSC_NODISCARD PETSC_HOSTDEVICE_INLINE_DECL constexpr T operator()(const T &i) const noexcept { return stride * i; }
369: T stride;
370: };
372: template <typename Iterator>
373: class DiagonalIterator : public MatrixIteratorBase<Iterator, StridedIndexFunctor<iter_difference_t<Iterator>>> {
374: public:
375: using base_type = MatrixIteratorBase<Iterator, StridedIndexFunctor<iter_difference_t<Iterator>>>;
377: using difference_type = typename base_type::difference_type;
378: using iterator = typename base_type::iterator;
380: constexpr DiagonalIterator(Iterator first, Iterator last, difference_type stride) noexcept : base_type{std::move(first), std::move(last), {stride}} { }
382: PETSC_NODISCARD iterator end() const noexcept { return this->begin() + (this->last - this->first + this->func.stride - 1) / this->func.stride; }
383: };
385: template <typename T>
386: inline DiagonalIterator<typename thrust::device_vector<T>::iterator> MakeDiagonalIterator(T *data, PetscInt rstart, PetscInt rend, PetscInt cols, PetscInt lda) noexcept
387: {
388: const auto rend2 = std::min(rend, cols);
389: const std::size_t begin = rstart * lda;
390: const std::size_t end = rend2 - rstart + rend2 * lda;
391: const auto dptr = thrust::device_pointer_cast(data);
393: return {dptr + begin, dptr + end, lda + 1};
394: }
396: } // namespace detail
398: template <device::cupm::DeviceType T, typename D>
399: template <typename F>
400: inline PetscErrorCode MatDense_CUPM<T, D>::DiagonalUnaryTransform(Mat A, PetscDeviceContext dctx, F &&functor) noexcept
401: {
402: const auto rstart = A->rmap->rstart;
403: const auto rend = A->rmap->rend;
404: const auto gcols = A->cmap->N;
405: const auto rend2 = std::min(rend, gcols);
407: PetscFunctionBegin;
408: PetscCall(CheckSaneSequentialMatSizes_(A));
409: if (rend2 > rstart) {
410: const auto da = D::DeviceArrayReadWrite(dctx, A);
411: cupmStream_t stream;
412: PetscInt lda;
414: PetscCall(MatDenseGetLDA(A, &lda));
415: PetscCall(D::GetHandlesFrom_(dctx, &stream));
416: {
417: auto diagonal = detail::MakeDiagonalIterator(da.data(), rstart, rend, gcols, lda);
419: // clang-format off
420: PetscCallThrust(
421: THRUST_CALL(
422: thrust::transform,
423: stream,
424: diagonal.begin(), diagonal.end(), diagonal.begin(),
425: std::forward<F>(functor)
426: )
427: );
428: // clang-format on
429: }
430: PetscCall(PetscLogGpuFlops(rend2 - rstart));
431: }
432: PetscFunctionReturn(PETSC_SUCCESS);
433: }
435: template <device::cupm::DeviceType T, typename D>
436: inline PetscErrorCode MatDense_CUPM<T, D>::Shift(Mat A, PetscScalar alpha) noexcept
437: {
438: PetscDeviceContext dctx;
440: PetscFunctionBegin;
441: PetscCall(GetHandles_(&dctx));
442: PetscCall(DiagonalUnaryTransform(A, dctx, device::cupm::functors::make_plus_equals(alpha)));
443: PetscFunctionReturn(PETSC_SUCCESS);
444: }
446: template <device::cupm::DeviceType T, typename D>
447: inline PetscErrorCode MatDense_CUPM<T, D>::GetDiagonal(Mat A, Vec v) noexcept
448: {
449: const auto rstart = A->rmap->rstart;
450: const auto rend = A->rmap->rend;
451: const auto gcols = A->cmap->N;
452: PetscInt lda;
453: PetscDeviceContext dctx;
455: PetscFunctionBegin;
456: PetscCall(CheckSaneSequentialMatSizes_(A));
457: PetscCall(GetHandles_(&dctx));
458: PetscCall(MatDenseGetLDA(A, &lda));
459: {
460: auto dv = VecSeq_CUPM::DeviceArrayWrite(dctx, v);
461: auto da = D::DeviceArrayRead(dctx, A);
462: auto diagonal = detail::MakeDiagonalIterator(da.data(), rstart, rend, gcols, lda);
463: // We must have this cast outside of THRUST_CALL(). Without it, GCC 6.4 - 7.5, and 11.3.0
464: // throw spurious warnings:
465: //
466: // warning: 'MatDense_CUPM<...>::GetDiagonal(Mat, Vec)::' declared with greater
467: // visibility than the type of its field 'MatDense_CUPM<...>::GetDiagonal(Mat,
468: // Vec)::::' [-Wattributes]
469: // 460 | PetscCallThrust(
470: // | ^~~~~~~~~~~~~~~~
471: auto dvp = thrust::device_pointer_cast(dv.data());
472: cupmStream_t stream;
474: PetscCall(GetHandlesFrom_(dctx, &stream));
475: PetscCallThrust(THRUST_CALL(thrust::copy, stream, diagonal.begin(), diagonal.end(), dvp));
476: }
477: PetscFunctionReturn(PETSC_SUCCESS);
478: }
480: #define MatComposeOp_CUPM(use_host, pobj, op_str, op_host, ...) \
481: do { \
482: if (use_host) { \
483: PetscCall(PetscObjectComposeFunction(pobj, op_str, op_host)); \
484: } else { \
485: PetscCall(PetscObjectComposeFunction(pobj, op_str, __VA_ARGS__)); \
486: } \
487: } while (0)
489: #define MatSetOp_CUPM(use_host, mat, op_name, op_host, ...) \
490: do { \
491: if (use_host) { \
492: (mat)->ops->op_name = op_host; \
493: } else { \
494: (mat)->ops->op_name = __VA_ARGS__; \
495: } \
496: } while (0)
498: #define MATDENSECUPM_HEADER(T, ...) \
499: MATDENSECUPM_BASE_HEADER(T); \
500: friend class ::Petsc::mat::cupm::impl::MatDense_CUPM<T, __VA_ARGS__>; \
501: using ::Petsc::mat::cupm::impl::MatDense_CUPM<T, __VA_ARGS__>::MatIMPLCast; \
502: using ::Petsc::mat::cupm::impl::MatDense_CUPM<T, __VA_ARGS__>::MATIMPLCUPM; \
503: using ::Petsc::mat::cupm::impl::MatDense_CUPM<T, __VA_ARGS__>::CreateIMPLDenseCUPM; \
504: using ::Petsc::mat::cupm::impl::MatDense_CUPM<T, __VA_ARGS__>::SetPreallocation; \
505: using ::Petsc::mat::cupm::impl::MatDense_CUPM<T, __VA_ARGS__>::DeviceArrayRead; \
506: using ::Petsc::mat::cupm::impl::MatDense_CUPM<T, __VA_ARGS__>::DeviceArrayWrite; \
507: using ::Petsc::mat::cupm::impl::MatDense_CUPM<T, __VA_ARGS__>::DeviceArrayReadWrite; \
508: using ::Petsc::mat::cupm::impl::MatDense_CUPM<T, __VA_ARGS__>::HostArrayRead; \
509: using ::Petsc::mat::cupm::impl::MatDense_CUPM<T, __VA_ARGS__>::HostArrayWrite; \
510: using ::Petsc::mat::cupm::impl::MatDense_CUPM<T, __VA_ARGS__>::HostArrayReadWrite; \
511: using ::Petsc::mat::cupm::impl::MatDense_CUPM<T, __VA_ARGS__>::DiagonalUnaryTransform; \
512: using ::Petsc::mat::cupm::impl::MatDense_CUPM<T, __VA_ARGS__>::Shift; \
513: using ::Petsc::mat::cupm::impl::MatDense_CUPM<T, __VA_ARGS__>::GetDiagonal
515: } // namespace impl
517: // ==========================================================================================
518: // MatDense_CUPM -- Implementations
519: // ==========================================================================================
521: template <device::cupm::DeviceType T, PetscMemoryAccessMode access>
522: inline PetscErrorCode MatDenseCUPMGetArray_Private(Mat A, PetscScalar **array) noexcept
523: {
524: PetscFunctionBegin;
526: PetscAssertPointer(array, 2);
527: switch (access) {
528: case PETSC_MEMORY_ACCESS_READ:
529: PetscUseMethod(A, impl::MatDense_CUPM_Base<T>::MatDenseCUPMGetArrayRead_C(), (Mat, PetscScalar **), (A, array));
530: break;
531: case PETSC_MEMORY_ACCESS_WRITE:
532: PetscUseMethod(A, impl::MatDense_CUPM_Base<T>::MatDenseCUPMGetArrayWrite_C(), (Mat, PetscScalar **), (A, array));
533: break;
534: case PETSC_MEMORY_ACCESS_READ_WRITE:
535: PetscUseMethod(A, impl::MatDense_CUPM_Base<T>::MatDenseCUPMGetArray_C(), (Mat, PetscScalar **), (A, array));
536: break;
537: }
538: if (PetscMemoryAccessWrite(access)) PetscCall(PetscObjectStateIncrease(PetscObjectCast(A)));
539: PetscFunctionReturn(PETSC_SUCCESS);
540: }
542: template <device::cupm::DeviceType T, PetscMemoryAccessMode access>
543: inline PetscErrorCode MatDenseCUPMRestoreArray_Private(Mat A, PetscScalar **array) noexcept
544: {
545: PetscFunctionBegin;
547: if (array) PetscAssertPointer(array, 2);
548: switch (access) {
549: case PETSC_MEMORY_ACCESS_READ:
550: PetscUseMethod(A, impl::MatDense_CUPM_Base<T>::MatDenseCUPMRestoreArrayRead_C(), (Mat, PetscScalar **), (A, array));
551: break;
552: case PETSC_MEMORY_ACCESS_WRITE:
553: PetscUseMethod(A, impl::MatDense_CUPM_Base<T>::MatDenseCUPMRestoreArrayWrite_C(), (Mat, PetscScalar **), (A, array));
554: break;
555: case PETSC_MEMORY_ACCESS_READ_WRITE:
556: PetscUseMethod(A, impl::MatDense_CUPM_Base<T>::MatDenseCUPMRestoreArray_C(), (Mat, PetscScalar **), (A, array));
557: break;
558: }
559: if (PetscMemoryAccessWrite(access)) {
560: PetscCall(PetscObjectStateIncrease(PetscObjectCast(A)));
561: A->offloadmask = PETSC_OFFLOAD_GPU;
562: }
563: if (array) *array = nullptr;
564: PetscFunctionReturn(PETSC_SUCCESS);
565: }
567: template <device::cupm::DeviceType T>
568: inline PetscErrorCode MatDenseCUPMGetArray(Mat A, PetscScalar **array) noexcept
569: {
570: PetscFunctionBegin;
571: PetscCall(MatDenseCUPMGetArray_Private<T, PETSC_MEMORY_ACCESS_READ_WRITE>(A, array));
572: PetscFunctionReturn(PETSC_SUCCESS);
573: }
575: template <device::cupm::DeviceType T>
576: inline PetscErrorCode MatDenseCUPMGetArrayRead(Mat A, const PetscScalar **array) noexcept
577: {
578: PetscFunctionBegin;
579: PetscCall(MatDenseCUPMGetArray_Private<T, PETSC_MEMORY_ACCESS_READ>(A, const_cast<PetscScalar **>(array)));
580: PetscFunctionReturn(PETSC_SUCCESS);
581: }
583: template <device::cupm::DeviceType T>
584: inline PetscErrorCode MatDenseCUPMGetArrayWrite(Mat A, PetscScalar **array) noexcept
585: {
586: PetscFunctionBegin;
587: PetscCall(MatDenseCUPMGetArray_Private<T, PETSC_MEMORY_ACCESS_WRITE>(A, array));
588: PetscFunctionReturn(PETSC_SUCCESS);
589: }
591: template <device::cupm::DeviceType T>
592: inline PetscErrorCode MatDenseCUPMRestoreArray(Mat A, PetscScalar **array) noexcept
593: {
594: PetscFunctionBegin;
595: PetscCall(MatDenseCUPMRestoreArray_Private<T, PETSC_MEMORY_ACCESS_READ_WRITE>(A, array));
596: PetscFunctionReturn(PETSC_SUCCESS);
597: }
599: template <device::cupm::DeviceType T>
600: inline PetscErrorCode MatDenseCUPMRestoreArrayRead(Mat A, const PetscScalar **array) noexcept
601: {
602: PetscFunctionBegin;
603: PetscCall(MatDenseCUPMRestoreArray_Private<T, PETSC_MEMORY_ACCESS_READ>(A, const_cast<PetscScalar **>(array)));
604: PetscFunctionReturn(PETSC_SUCCESS);
605: }
607: template <device::cupm::DeviceType T>
608: inline PetscErrorCode MatDenseCUPMRestoreArrayWrite(Mat A, PetscScalar **array) noexcept
609: {
610: PetscFunctionBegin;
611: PetscCall(MatDenseCUPMRestoreArray_Private<T, PETSC_MEMORY_ACCESS_WRITE>(A, array));
612: PetscFunctionReturn(PETSC_SUCCESS);
613: }
615: template <device::cupm::DeviceType T>
616: inline PetscErrorCode MatDenseCUPMPlaceArray(Mat A, const PetscScalar *array) noexcept
617: {
618: PetscFunctionBegin;
620: PetscUseMethod(A, impl::MatDense_CUPM_Base<T>::MatDenseCUPMPlaceArray_C(), (Mat, const PetscScalar *), (A, array));
621: PetscCall(PetscObjectStateIncrease(PetscObjectCast(A)));
622: A->offloadmask = PETSC_OFFLOAD_GPU;
623: PetscFunctionReturn(PETSC_SUCCESS);
624: }
626: template <device::cupm::DeviceType T>
627: inline PetscErrorCode MatDenseCUPMReplaceArray(Mat A, const PetscScalar *array) noexcept
628: {
629: PetscFunctionBegin;
631: PetscUseMethod(A, impl::MatDense_CUPM_Base<T>::MatDenseCUPMReplaceArray_C(), (Mat, const PetscScalar *), (A, array));
632: PetscCall(PetscObjectStateIncrease(PetscObjectCast(A)));
633: A->offloadmask = PETSC_OFFLOAD_GPU;
634: PetscFunctionReturn(PETSC_SUCCESS);
635: }
637: template <device::cupm::DeviceType T>
638: inline PetscErrorCode MatDenseCUPMResetArray(Mat A) noexcept
639: {
640: PetscFunctionBegin;
642: PetscUseMethod(A, impl::MatDense_CUPM_Base<T>::MatDenseCUPMResetArray_C(), (Mat), (A));
643: PetscCall(PetscObjectStateIncrease(PetscObjectCast(A)));
644: PetscFunctionReturn(PETSC_SUCCESS);
645: }
647: template <device::cupm::DeviceType T>
648: inline PetscErrorCode MatDenseCUPMSetPreallocation(Mat A, PetscScalar *device_data, PetscDeviceContext dctx = nullptr) noexcept
649: {
650: PetscFunctionBegin;
652: PetscUseMethod(A, impl::MatDense_CUPM_Base<T>::MatDenseCUPMSetPreallocation_C(), (Mat, PetscDeviceContext, PetscScalar *), (A, dctx, device_data));
653: PetscFunctionReturn(PETSC_SUCCESS);
654: }
656: } // namespace cupm
658: } // namespace mat
660: } // namespace Petsc
662: #endif // __cplusplus