Actual source code: syclcontext.sycl.cxx
1: #include "sycldevice.hpp"
2: #include <sycl/sycl.hpp>
3: #include <chrono>
5: namespace Petsc
6: {
8: namespace device
9: {
11: namespace sycl
12: {
14: namespace impl
15: {
17: class DeviceContext {
18: public:
19: struct PetscDeviceContext_SYCL {
20: ::sycl::event event;
21: ::sycl::event begin; // timer-only
22: ::sycl::event end; // timer-only
23: #if PetscDefined(USE_DEBUG)
24: PetscBool timerInUse{};
25: #endif
26: ::sycl::queue queue;
28: std::chrono::time_point<std::chrono::steady_clock> timeBegin{};
29: };
31: private:
32: static bool initialized_;
34: static PetscErrorCode finalize_() noexcept
35: {
36: PetscFunctionBegin;
37: initialized_ = false;
38: PetscFunctionReturn(PETSC_SUCCESS);
39: }
41: static PetscErrorCode initialize_(PetscInt id, PetscDeviceContext dctx) noexcept
42: {
43: PetscFunctionBegin;
44: PetscCall(PetscDeviceCheckDeviceCount_Internal(id));
45: if (!initialized_) {
46: initialized_ = true;
47: PetscCall(PetscRegisterFinalize(finalize_));
48: }
49: PetscFunctionReturn(PETSC_SUCCESS);
50: }
52: public:
53: const struct _DeviceContextOps ops = {destroy, changeStreamType, setUp, query, waitForContext, synchronize, getBlasHandle, getSolverHandle, getStreamHandle, beginTimer, endTimer, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr};
55: // default constructor
56: DeviceContext() noexcept = default;
58: // All of these functions MUST be static in order to be callable from C, otherwise they
59: // get the implicit 'this' pointer tacked on
60: static PetscErrorCode destroy(PetscDeviceContext dctx) noexcept
61: {
62: PetscFunctionBegin;
63: delete static_cast<PetscDeviceContext_SYCL *>(dctx->data);
64: dctx->data = nullptr;
65: PetscFunctionReturn(PETSC_SUCCESS);
66: }
68: static PetscErrorCode setUp(PetscDeviceContext dctx) noexcept
69: {
70: PetscDevice dev;
71: PetscInt id;
73: PetscFunctionBegin;
74: #if PetscDefined(USE_DEBUG)
75: static_cast<PetscDeviceContext_SYCL *>(dctx->data)->timerInUse = PETSC_FALSE;
76: #endif
77: PetscCall(PetscDeviceContextGetDevice(dctx, &dev));
78: PetscCall(PetscDeviceGetDeviceId(dev, &id));
79: const ::sycl::device &syclDevice = (id == PETSC_SYCL_DEVICE_HOST) ? ::sycl::device(::sycl::cpu_selector_v) : ::sycl::device::get_devices(::sycl::info::device_type::gpu)[id];
81: static_cast<PetscDeviceContext_SYCL *>(dctx->data)->queue = ::sycl::queue(syclDevice, ::sycl::property::queue::in_order());
82: PetscFunctionReturn(PETSC_SUCCESS);
83: }
85: static PetscErrorCode query(PetscDeviceContext dctx, PetscBool *idle) noexcept
86: {
87: PetscFunctionBegin;
88: // available in future, https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/supported/sycl_ext_oneapi_queue_empty.asciidoc
89: // *idle = static_cast(dctx->data)->queue.empty() ? PETSC_TRUE : PETSC_FALSE;
90: *idle = PETSC_FALSE;
91: PetscFunctionReturn(PETSC_SUCCESS);
92: }
94: static PetscErrorCode synchronize(PetscDeviceContext dctx) noexcept
95: {
96: PetscBool idle = PETSC_TRUE;
97: const auto dci = static_cast<PetscDeviceContext_SYCL *>(dctx->data);
99: PetscFunctionBegin;
100: PetscCall(query(dctx, &idle));
101: if (!idle) PetscCallCXX(dci->queue.wait());
102: PetscFunctionReturn(PETSC_SUCCESS);
103: }
105: static PetscErrorCode getStreamHandle(PetscDeviceContext dctx, void **handle) noexcept
106: {
107: PetscFunctionBegin;
108: *reinterpret_cast<::sycl::queue **>(handle) = &(static_cast<PetscDeviceContext_SYCL *>(dctx->data)->queue);
109: PetscFunctionReturn(PETSC_SUCCESS);
110: }
112: static PetscErrorCode beginTimer(PetscDeviceContext dctx) noexcept
113: {
114: const auto dci = static_cast<PetscDeviceContext_SYCL *>(dctx->data);
116: PetscFunctionBegin;
117: #if PetscDefined(USE_DEBUG)
118: PetscCheck(!dci->timerInUse, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Forgot to call PetscLogGpuTimeEnd()?");
119: dci->timerInUse = PETSC_TRUE;
120: #endif
121: // It is not a good approach to time SYCL kernels because the timer starts at the kernel launch time at host,
122: // not at the start of execution time on device. SYCL provides this style of kernel timing:
123: /*
124: sycl::queue q(sycl::default_selector_v, sycl::property::queue::enable_profiling{});
125: sycl::event e = q.submit([&](sycl::handler &h) {
126: ...
127: });
128: e.wait();
129: auto start_time = e.get_profiling_info<sycl::info::event_profiling::command_start>();
130: auto end_time = e.get_profiling_info<sycl::info::event_profiling::command_end>();
131: long long kernel_duration_ns = end_time - start_time;
132: */
133: // It requires 1) enable profiling at the queue's creation time, and 2) store the event returned by kernel launch.
134: // But neither we have control of the input queue, nor does PetscDeviceContext support 2), so we just use a
135: // host side timer.
136: PetscCallCXX(dci->timeBegin = std::chrono::steady_clock::now());
137: PetscFunctionReturn(PETSC_SUCCESS);
138: }
140: static PetscErrorCode endTimer(PetscDeviceContext dctx, PetscLogDouble *elapsed) noexcept
141: {
142: const auto dci = static_cast<PetscDeviceContext_SYCL *>(dctx->data);
143: std::chrono::duration<double> duration;
145: PetscFunctionBegin;
146: #if PetscDefined(USE_DEBUG)
147: PetscCheck(dci->timerInUse, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Forgot to call PetscLogGpuTimeBegin()?");
148: dci->timerInUse = PETSC_FALSE;
149: #endif
150: PetscCallCXX(dci->queue.wait());
151: PetscCallCXX(duration = std::chrono::steady_clock::now() - dci->timeBegin);
152: PetscCallCXX(*elapsed = duration.count());
153: PetscFunctionReturn(PETSC_SUCCESS);
154: }
156: static PetscErrorCode changeStreamType(PetscDeviceContext, PetscStreamType) noexcept { SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented"); }
157: static PetscErrorCode waitForContext(PetscDeviceContext, PetscDeviceContext) noexcept { SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented"); }
158: static PetscErrorCode getBlasHandle(PetscDeviceContext, void *) noexcept { SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented"); }
159: static PetscErrorCode getSolverHandle(PetscDeviceContext, void *) noexcept { SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented"); }
160: };
162: } // namespace impl
164: } // namespace sycl
166: } // namespace device
168: } // namespace Petsc
170: PetscErrorCode PetscDeviceContextCreate_SYCL(PetscDeviceContext dctx)
171: {
172: using namespace Petsc::device::sycl::impl;
174: static const DeviceContext syclctx;
176: PetscFunctionBegin;
177: PetscCallCXX(dctx->data = new DeviceContext::PetscDeviceContext_SYCL());
178: dctx->ops[0] = syclctx.ops;
179: PetscFunctionReturn(PETSC_SUCCESS);
180: }