Actual source code: dcontext.cxx
1: #include "petscdevice_interface_internal.hpp" /*I <petscdevice.h> I*/
2: #include <petsc/private/viewerimpl.h>
4: #include <petsc/private/cpp/object_pool.hpp>
5: #include <petsc/private/cpp/utility.hpp>
6: #include <petsc/private/cpp/array.hpp>
8: #include <vector>
9: #include <string> // std::to_string among other things
11: /* Define the allocator */
12: class PetscDeviceContextConstructor : public Petsc::ConstructorInterface<_p_PetscDeviceContext, PetscDeviceContextConstructor> {
13: public:
14: PetscErrorCode construct_(PetscDeviceContext dctx) const noexcept
15: {
16: PetscFunctionBegin;
17: PetscCall(PetscArrayzero(dctx, 1));
18: PetscCall(PetscHeaderCreate_Private((PetscObject)dctx, PETSC_DEVICE_CONTEXT_CLASSID, "PetscDeviceContext", "PetscDeviceContext", "Sys", PETSC_COMM_SELF, (PetscObjectDestroyFn *)PetscDeviceContextDestroy, (PetscObjectViewFn *)PetscDeviceContextView));
19: PetscCall(PetscLogObjectCreate((PetscObject)dctx));
21: PetscCallCXX(PetscObjectCast(dctx)->cpp = new CxxData{dctx});
22: PetscCall(underlying().reset(dctx, false));
23: PetscFunctionReturn(PETSC_SUCCESS);
24: }
26: static PetscErrorCode destroy_(PetscDeviceContext dctx) noexcept
27: {
28: PetscFunctionBegin;
29: PetscAssert(!dctx->numChildren, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Device context still has %" PetscInt_FMT " un-joined children, must call PetscDeviceContextJoin() with all children before destroying", dctx->numChildren);
30: PetscTryTypeMethod(dctx, destroy);
31: PetscCall(PetscDeviceDestroy(&dctx->device));
32: PetscCall(PetscFree(dctx->childIDs));
33: delete CxxDataCast(dctx);
34: PetscCall(PetscHeaderDestroy_Private(PetscObjectCast(dctx), PETSC_FALSE));
35: PetscFunctionReturn(PETSC_SUCCESS);
36: }
38: static PetscErrorCode reset_(PetscDeviceContext dctx, bool zero = true) noexcept
39: {
40: PetscFunctionBegin;
41: if (zero) {
42: // reset the device if the user set it
43: if (Petsc::util::exchange(dctx->usersetdevice, PETSC_FALSE)) {
44: PetscTryTypeMethod(dctx, destroy);
45: PetscCall(PetscDeviceDestroy(&dctx->device));
46: PetscCall(PetscArrayzero(dctx->ops, 1));
47: dctx->data = nullptr;
48: }
49: PetscCall(PetscHeaderReset_Internal(PetscObjectCast(dctx)));
50: dctx->numChildren = 0;
51: dctx->setup = PETSC_FALSE;
52: // don't deallocate the child array, rather just zero it out
53: PetscCall(PetscArrayzero(dctx->childIDs, dctx->maxNumChildren));
54: PetscCall(CxxDataCast(dctx)->clear());
55: PetscCall(CxxDataCast(dctx)->reset_self(dctx));
56: }
57: dctx->streamType = PETSC_STREAM_DEFAULT;
58: PetscFunctionReturn(PETSC_SUCCESS);
59: }
61: static PetscErrorCode invalidate_(PetscDeviceContext dctx) noexcept
62: {
63: PetscFunctionBegin;
64: PetscCall(CxxDataCast(dctx)->reset_self(dctx));
65: PetscFunctionReturn(PETSC_SUCCESS);
66: }
67: };
69: static Petsc::ObjectPool<_p_PetscDeviceContext, PetscDeviceContextConstructor> contextPool;
71: // PetscClangLinter pragma disable: -fdoc-section-header-unknown
72: /*@C
73: PetscDeviceContextCreate - Creates a `PetscDeviceContext`
75: Not Collective
77: Output Parameter:
78: . dctx - The `PetscDeviceContext`
80: Level: beginner
82: Note:
83: Unlike almost every other PETSc class it is advised that most users use
84: `PetscDeviceContextDuplicate()` rather than this routine to create new contexts. Contexts of
85: different types are incompatible with one another; using `PetscDeviceContextDuplicate()`
86: ensures compatible types.
88: DAG representation:
89: .vb
90: time ->
92: |= CALL =| - dctx ->
93: .ve
95: .N ASYNC_API
97: .seealso: `PetscDeviceContextDuplicate()`, `PetscDeviceContextSetDevice()`,
98: `PetscDeviceContextSetStreamType()`, `PetscDeviceContextSetUp()`,
99: `PetscDeviceContextSetFromOptions()`, `PetscDeviceContextView()`, `PetscDeviceContextDestroy()`
100: @*/
101: PetscErrorCode PetscDeviceContextCreate(PetscDeviceContext *dctx)
102: {
103: PetscFunctionBegin;
104: PetscAssertPointer(dctx, 1);
105: PetscCall(PetscDeviceInitializePackage());
106: PetscCall(PetscLogEventBegin(DCONTEXT_Create, nullptr, nullptr, nullptr, nullptr));
107: PetscCall(contextPool.allocate(dctx));
108: PetscCall(PetscLogEventEnd(DCONTEXT_Create, nullptr, nullptr, nullptr, nullptr));
109: PetscFunctionReturn(PETSC_SUCCESS);
110: }
112: // PetscClangLinter pragma disable: -fdoc-section-header-unknown
113: /*@C
114: PetscDeviceContextDestroy - Frees a `PetscDeviceContext`
116: Not Collective
118: Input Parameter:
119: . dctx - The `PetscDeviceContext`
121: Level: beginner
123: Notes:
124: No implicit synchronization occurs due to this routine, all resources are released completely
125: asynchronously w.r.t. the host. If one needs to guarantee access to the data produced on
126: `dctx`'s stream the user is responsible for calling `PetscDeviceContextSynchronize()` before
127: calling this routine.
129: DAG representation:
130: .vb
131: time ->
133: -> dctx - |= CALL =|
134: .ve
136: Developer Notes:
137: `dctx` is never actually "destroyed" in the classical sense. It is returned to an ever
138: growing pool of `PetscDeviceContext`s. There are currently no limits on the size of the pool,
139: this should perhaps be implemented.
141: .N ASYNC_API
143: .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextSetDevice()`,
144: `PetscDeviceContextSetUp()`, `PetscDeviceContextSynchronize()`
145: @*/
146: PetscErrorCode PetscDeviceContextDestroy(PetscDeviceContext *dctx)
147: {
148: PetscFunctionBegin;
149: PetscAssertPointer(dctx, 1);
150: if (!*dctx) PetscFunctionReturn(PETSC_SUCCESS);
151: PetscCall(PetscLogEventBegin(DCONTEXT_Destroy, nullptr, nullptr, nullptr, nullptr));
152: if (--(PetscObjectCast(*dctx)->refct) <= 0) {
153: PetscCall(PetscDeviceContextCheckNotOrphaned_Internal(*dctx));
154: PetscCall(contextPool.deallocate(dctx));
155: }
156: PetscCall(PetscLogEventEnd(DCONTEXT_Destroy, nullptr, nullptr, nullptr, nullptr));
157: *dctx = nullptr;
158: PetscFunctionReturn(PETSC_SUCCESS);
159: }
161: /*@C
162: PetscDeviceContextSetStreamType - Set the implementation type of the underlying stream for a
163: `PetscDeviceContext`
165: Not Collective
167: Input Parameters:
168: + dctx - The `PetscDeviceContext`
169: - type - The `PetscStreamType`
171: Level: beginner
173: Note:
174: See `PetscStreamType` in `include/petscdevicetypes.h` for more information on the available
175: types and their interactions. If the `PetscDeviceContext` was previously set up and stream
176: type was changed, you must call `PetscDeviceContextSetUp()` again after this routine.
178: .seealso: `PetscStreamType`, `PetscDeviceContextGetStreamType()`, `PetscDeviceContextCreate()`,
179: `PetscDeviceContextSetUp()`, `PetscDeviceContextSetFromOptions()`
180: @*/
181: PetscErrorCode PetscDeviceContextSetStreamType(PetscDeviceContext dctx, PetscStreamType type)
182: {
183: PetscFunctionBegin;
184: // do not use getoptionalnullcontext here since we do not want the user to change the stream
185: // type
188: // only need to do complex swapping if the object has already been setup
189: if (dctx->setup && (dctx->streamType != type)) {
190: dctx->setup = PETSC_FALSE;
191: PetscCall(PetscLogEventBegin(DCONTEXT_ChangeStream, dctx, nullptr, nullptr, nullptr));
192: PetscUseTypeMethod(dctx, changestreamtype, type);
193: PetscCall(PetscLogEventEnd(DCONTEXT_ChangeStream, dctx, nullptr, nullptr, nullptr));
194: }
195: dctx->streamType = type;
196: PetscFunctionReturn(PETSC_SUCCESS);
197: }
199: /*@C
200: PetscDeviceContextGetStreamType - Get the implementation type of the underlying stream for a
201: `PetscDeviceContext`
203: Not Collective
205: Input Parameter:
206: . dctx - The `PetscDeviceContext`
208: Output Parameter:
209: . type - The `PetscStreamType`
211: Level: beginner
213: Note:
214: See `PetscStreamType` in `include/petscdevicetypes.h` for more information on the available
215: types and their interactions
217: .seealso: `PetscDeviceContextSetStreamType()`, `PetscDeviceContextCreate()`,
218: `PetscDeviceContextSetFromOptions()`
219: @*/
220: PetscErrorCode PetscDeviceContextGetStreamType(PetscDeviceContext dctx, PetscStreamType *type)
221: {
222: PetscFunctionBegin;
223: PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
224: PetscAssertPointer(type, 2);
225: *type = dctx->streamType;
226: PetscFunctionReturn(PETSC_SUCCESS);
227: }
229: /*
230: Actual function to set the device.
232: 1. Repeatedly destroying and recreating internal data structures (like streams and events)
233: for recycled PetscDeviceContexts is not free. If done often, it does add up.
234: 2. The vast majority of PetscDeviceContexts are created by PETSc either as children or
235: default contexts. The default contexts *never* change type, and the children are extremely
236: unlikely to (chances are if you fork once, you will fork again very soon).
237: 3. The only time this calculus changes is if the user themselves sets the device type. In
238: this case we do not know what the user has changed, so must always wipe the slate clean.
240: Thus we need to keep track whether the user explicitly sets the device contexts device.
241: */
242: static PetscErrorCode PetscDeviceContextSetDevice_Private(PetscDeviceContext dctx, PetscDevice device, PetscBool user_set)
243: {
244: PetscFunctionBegin;
245: // do not use getoptionalnullcontext here since we do not want the user to change its device
248: if (dctx->device && (dctx->device->id == device->id)) PetscFunctionReturn(PETSC_SUCCESS);
249: PetscCall(PetscLogEventBegin(DCONTEXT_SetDevice, dctx, nullptr, nullptr, nullptr));
250: PetscTryTypeMethod(dctx, destroy);
251: PetscCall(PetscDeviceDestroy(&dctx->device));
252: PetscCall(PetscMemzero(dctx->ops, sizeof(*dctx->ops)));
253: PetscCall(PetscDeviceReference_Internal(device));
254: // set it before calling the method
255: dctx->device = device;
256: PetscCall((*device->ops->createcontext)(dctx));
257: PetscCall(PetscLogEventEnd(DCONTEXT_SetDevice, dctx, nullptr, nullptr, nullptr));
258: dctx->setup = PETSC_FALSE;
259: dctx->usersetdevice = user_set;
260: PetscFunctionReturn(PETSC_SUCCESS);
261: }
263: PetscErrorCode PetscDeviceContextSetDefaultDeviceForType_Internal(PetscDeviceContext dctx, PetscDeviceType type)
264: {
265: PetscDevice device;
267: PetscFunctionBegin;
268: PetscCall(PetscDeviceGetDefaultForType_Internal(type, &device));
269: PetscCall(PetscDeviceContextSetDevice_Private(dctx, device, PETSC_FALSE));
270: PetscFunctionReturn(PETSC_SUCCESS);
271: }
273: /*@C
274: PetscDeviceContextSetDevice - Set the underlying `PetscDevice` for a `PetscDeviceContext`
276: Not Collective
278: Input Parameters:
279: + dctx - The `PetscDeviceContext`
280: - device - The `PetscDevice`
282: Level: intermediate
284: Notes:
285: This routine is effectively `PetscDeviceContext`'s "set-type" (so every `PetscDeviceContext` must
286: also have an attached `PetscDevice`). Unlike the usual set-type semantics, it is not strictly
287: necessary to set a contexts device to enable usage, any created `PetscDeviceContext`s will
288: always come equipped with the "default" device.
290: This routine is a no-op if `device` is already attached to `dctx`.
292: This routine may (but is very unlikely to) initialize the backend device and may incur
293: synchronization.
295: .seealso: `PetscDeviceCreate()`, `PetscDeviceConfigure()`, `PetscDeviceContextGetDevice()`,
296: `PetscDeviceContextGetDeviceType()`
297: @*/
298: PetscErrorCode PetscDeviceContextSetDevice(PetscDeviceContext dctx, PetscDevice device)
299: {
300: PetscFunctionBegin;
301: PetscCall(PetscDeviceContextSetDevice_Private(dctx, device, PETSC_TRUE));
302: PetscFunctionReturn(PETSC_SUCCESS);
303: }
305: /*@C
306: PetscDeviceContextGetDevice - Get the underlying `PetscDevice` for a `PetscDeviceContext`
308: Not Collective
310: Input Parameter:
311: . dctx - the `PetscDeviceContext`
313: Output Parameter:
314: . device - The `PetscDevice`
316: Level: intermediate
318: Note:
319: This is a borrowed reference, the user should not destroy `device`.
321: .seealso: `PetscDeviceContextSetDevice()`, `PetscDevice`, `PetscDeviceContextGetDeviceType()`
322: @*/
323: PetscErrorCode PetscDeviceContextGetDevice(PetscDeviceContext dctx, PetscDevice *device)
324: {
325: PetscFunctionBegin;
326: PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
327: PetscAssertPointer(device, 2);
328: PetscAssert(dctx->device, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "PetscDeviceContext %" PetscInt64_FMT " has no attached PetscDevice to get", PetscObjectCast(dctx)->id);
329: *device = dctx->device;
330: PetscFunctionReturn(PETSC_SUCCESS);
331: }
333: /*@C
334: PetscDeviceContextGetDeviceType - Get the `PetscDeviceType` for a `PetscDeviceContext`
336: Not Collective
338: Input Parameter:
339: . dctx - The `PetscDeviceContext`
341: Output Parameter:
342: . type - The `PetscDeviceType`
344: Level: beginner
346: Note:
347: This routine is a convenience shorthand for `PetscDeviceContextGetDevice()` ->
348: `PetscDeviceGetType()`.
350: .seealso: `PetscDeviceType`, `PetscDeviceContextGetDevice()`, `PetscDeviceGetType()`, `PetscDevice`
351: @*/
352: PetscErrorCode PetscDeviceContextGetDeviceType(PetscDeviceContext dctx, PetscDeviceType *type)
353: {
354: PetscDevice device = nullptr;
356: PetscFunctionBegin;
357: PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
358: PetscAssertPointer(type, 2);
359: PetscCall(PetscDeviceContextGetDevice(dctx, &device));
360: PetscCall(PetscDeviceGetType(device, type));
361: PetscFunctionReturn(PETSC_SUCCESS);
362: }
364: /*@C
365: PetscDeviceContextSetUp - Prepares a `PetscDeviceContext` for use
367: Not Collective
369: Input Parameter:
370: . dctx - The `PetscDeviceContext`
372: Level: beginner
374: Developer Notes:
375: This routine is usually the stage where a `PetscDeviceContext` acquires device-side data
376: structures such as streams, events, and (possibly) handles.
378: .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextSetDevice()`,
379: `PetscDeviceContextDestroy()`, `PetscDeviceContextSetFromOptions()`
380: @*/
381: PetscErrorCode PetscDeviceContextSetUp(PetscDeviceContext dctx)
382: {
383: PetscFunctionBegin;
384: PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
385: if (dctx->setup) PetscFunctionReturn(PETSC_SUCCESS);
386: if (!dctx->device) {
387: const auto default_dtype = PETSC_DEVICE_DEFAULT();
389: PetscCall(PetscInfo(dctx, "PetscDeviceContext %" PetscInt64_FMT " did not have an explicitly attached PetscDevice, using default with type %s\n", PetscObjectCast(dctx)->id, PetscDeviceTypes[default_dtype]));
390: PetscCall(PetscDeviceContextSetDefaultDeviceForType_Internal(dctx, default_dtype));
391: }
392: PetscCall(PetscLogEventBegin(DCONTEXT_SetUp, dctx, nullptr, nullptr, nullptr));
393: PetscUseTypeMethod(dctx, setup);
394: PetscCall(PetscLogEventEnd(DCONTEXT_SetUp, dctx, nullptr, nullptr, nullptr));
395: dctx->setup = PETSC_TRUE;
396: PetscFunctionReturn(PETSC_SUCCESS);
397: }
399: static PetscErrorCode PetscDeviceContextDuplicate_Private(PetscDeviceContext dctx, PetscStreamType stype, PetscDeviceContext *dctxdup)
400: {
401: PetscFunctionBegin;
402: PetscCall(PetscLogEventBegin(DCONTEXT_Duplicate, dctx, nullptr, nullptr, nullptr));
403: PetscCall(PetscDeviceContextCreate(dctxdup));
404: PetscCall(PetscDeviceContextSetStreamType(*dctxdup, stype));
405: if (const auto device = dctx->device) PetscCall(PetscDeviceContextSetDevice_Private(*dctxdup, device, dctx->usersetdevice));
406: PetscCall(PetscDeviceContextSetUp(*dctxdup));
407: PetscCall(PetscLogEventEnd(DCONTEXT_Duplicate, dctx, nullptr, nullptr, nullptr));
408: PetscFunctionReturn(PETSC_SUCCESS);
409: }
411: // PetscClangLinter pragma disable: -fdoc-section-header-unknown
412: /*@C
413: PetscDeviceContextDuplicate - Duplicates a `PetscDeviceContext` object
415: Not Collective
417: Input Parameter:
418: . dctx - The `PetscDeviceContext` to duplicate
420: Output Parameter:
421: . dctxdup - The duplicated `PetscDeviceContext`
423: Level: beginner
425: Notes:
426: This is a shorthand method for creating a `PetscDeviceContext` with the exact same settings as
427: another. Note however that `dctxdup` does not share any of the underlying data with `dctx`,
428: (including its current stream-state) they are completely separate objects.
430: There is no implied ordering between `dctx` or `dctxdup`.
432: DAG representation:
433: .vb
434: time ->
436: -> dctx - |= CALL =| - dctx ---->
437: - dctxdup ->
438: .ve
440: .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextSetDevice()`,
441: `PetscDeviceContextSetStreamType()`
442: @*/
443: PetscErrorCode PetscDeviceContextDuplicate(PetscDeviceContext dctx, PetscDeviceContext *dctxdup)
444: {
445: auto stype = PETSC_STREAM_DEFAULT;
447: PetscFunctionBegin;
448: PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
449: PetscAssertPointer(dctxdup, 2);
450: PetscCall(PetscDeviceContextGetStreamType(dctx, &stype));
451: PetscCall(PetscDeviceContextDuplicate_Private(dctx, stype, dctxdup));
452: PetscFunctionReturn(PETSC_SUCCESS);
453: }
455: /*@C
456: PetscDeviceContextQueryIdle - Returns whether or not a `PetscDeviceContext` is idle
458: Not Collective
460: Input Parameter:
461: . dctx - The `PetscDeviceContext`
463: Output Parameter:
464: . idle - `PETSC_TRUE` if `dctx` has NO work, `PETSC_FALSE` if it has work
466: Level: intermediate
468: Note:
469: This routine only refers a singular context and does NOT take any of its children into
470: account. That is, if `dctx` is idle but has dependents who do have work this routine still
471: returns `PETSC_TRUE`.
473: .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextWaitForContext()`, `PetscDeviceContextFork()`
474: @*/
475: PetscErrorCode PetscDeviceContextQueryIdle(PetscDeviceContext dctx, PetscBool *idle)
476: {
477: PetscFunctionBegin;
478: PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
479: PetscAssertPointer(idle, 2);
480: PetscCall(PetscLogEventBegin(DCONTEXT_QueryIdle, dctx, nullptr, nullptr, nullptr));
481: PetscUseTypeMethod(dctx, query, idle);
482: PetscCall(PetscLogEventEnd(DCONTEXT_QueryIdle, dctx, nullptr, nullptr, nullptr));
483: PetscCall(PetscInfo(dctx, "PetscDeviceContext ('%s', id %" PetscInt64_FMT ") %s idle\n", PetscObjectCast(dctx)->name ? PetscObjectCast(dctx)->name : "unnamed", PetscObjectCast(dctx)->id, *idle ? "was" : "was not"));
484: PetscFunctionReturn(PETSC_SUCCESS);
485: }
487: // PetscClangLinter pragma disable: -fdoc-section-header-unknown
488: /*@C
489: PetscDeviceContextWaitForContext - Make one context wait for another context to finish
491: Not Collective
493: Input Parameters:
494: + dctxa - The `PetscDeviceContext` object that is waiting
495: - dctxb - The `PetscDeviceContext` object that is being waited on
497: Level: beginner
499: Notes:
500: Serializes two `PetscDeviceContext`s. Serialization is performed asynchronously; the host
501: does not wait for the serialization to actually occur.
503: This routine uses only the state of `dctxb` at the moment this routine was called, so any
504: future work queued will not affect `dctxa`. It is safe to pass the same context to both
505: arguments (in which case this routine does nothing).
507: DAG representation:
508: .vb
509: time ->
511: -> dctxa ---/- |= CALL =| - dctxa ->
512: /
513: -> dctxb -/------------------------>
514: .ve
516: .N ASYNC_API
518: .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextQueryIdle()`, `PetscDeviceContextJoin()`
519: @*/
520: PetscErrorCode PetscDeviceContextWaitForContext(PetscDeviceContext dctxa, PetscDeviceContext dctxb)
521: {
522: PetscObjectId bid;
524: PetscFunctionBegin;
525: PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctxa));
526: PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctxb));
527: PetscCheckCompatibleDeviceContexts(dctxa, 1, dctxb, 2);
528: if (dctxa == dctxb) PetscFunctionReturn(PETSC_SUCCESS);
529: bid = PetscObjectCast(dctxb)->id;
530: PetscCall(PetscLogEventBegin(DCONTEXT_WaitForCtx, dctxa, dctxb, nullptr, nullptr));
531: PetscUseTypeMethod(dctxa, waitforcontext, dctxb);
532: PetscCallCXX(CxxDataCast(dctxa)->upstream()[bid] = CxxDataCast(dctxb)->weak_snapshot());
533: PetscCall(PetscLogEventEnd(DCONTEXT_WaitForCtx, dctxa, dctxb, nullptr, nullptr));
534: PetscCall(PetscInfo(dctxa, "dctx %" PetscInt64_FMT " waiting on dctx %" PetscInt64_FMT "\n", PetscObjectCast(dctxa)->id, bid));
535: PetscCall(PetscObjectStateIncrease(PetscObjectCast(dctxa)));
536: PetscFunctionReturn(PETSC_SUCCESS);
537: }
539: // PetscClangLinter pragma disable: -fdoc-section-header-unknown
540: /*@C
541: PetscDeviceContextForkWithStreamType - Create a set of dependent child contexts from a parent
542: context with a prescribed `PetscStreamType`
544: Not Collective, Asynchronous
546: Input Parameters:
547: + dctx - The parent `PetscDeviceContext`
548: . stype - The prescribed `PetscStreamType`
549: - n - The number of children to create
551: Output Parameter:
552: . dsub - The created child context(s)
554: Level: intermediate
556: Notes:
557: This routine creates `n` edges of a DAG from a source node which are causally dependent on the
558: source node. This causal dependency is established as-if by calling
559: `PetscDeviceContextWaitForContext()` on every child.
561: `dsub` is allocated by this routine and has its lifetime bounded by `dctx`. That is, `dctx`
562: expects to free `dsub` (via `PetscDeviceContextJoin()`) before it itself is destroyed.
564: This routine only accounts for work queued on `dctx` up until calling this routine, any
565: subsequent work enqueued on `dctx` has no effect on `dsub`.
567: The `PetscStreamType` of `dctx` does not have to equal `stype`. In fact, it is often the case
568: that they are different. This is useful in cases where a routine can locally exploit stream
569: parallelism without needing to worry about what stream type the incoming `PetscDeviceContext`
570: carries.
572: DAG representation:
573: .vb
574: time ->
576: -> dctx - |= CALL =| -\----> dctx ------>
577: \---> dsub[0] --->
578: \--> ... ------->
579: \-> dsub[n-1] ->
580: .ve
582: .N ASYNC_API
584: .seealso: `PetscDeviceContextJoin()`, `PetscDeviceContextSynchronize()`,
585: `PetscDeviceContextQueryIdle()`, `PetscDeviceContextWaitForContext()`
586: @*/
587: PetscErrorCode PetscDeviceContextForkWithStreamType(PetscDeviceContext dctx, PetscStreamType stype, PetscInt n, PetscDeviceContext **dsub)
588: {
589: // debugging only
590: std::string idList;
591: auto ninput = n;
593: PetscFunctionBegin;
594: PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
595: PetscAssert(n >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of contexts requested %" PetscInt_FMT " < 0", n);
596: PetscAssertPointer(dsub, 4);
597: *dsub = nullptr;
598: /* reserve 4 chars per id, 2 for number and 2 for ', ' separator */
599: if (PetscDefined(USE_DEBUG_AND_INFO)) PetscCallCXX(idList.reserve(4 * n));
600: PetscCall(PetscLogEventBegin(DCONTEXT_Fork, dctx, nullptr, nullptr, nullptr));
601: /* update child totals */
602: dctx->numChildren += n;
603: /* now to find out if we have room */
604: if (dctx->numChildren > dctx->maxNumChildren) {
605: const auto numChildren = dctx->numChildren;
606: auto &maxNumChildren = dctx->maxNumChildren;
607: auto numAllocated = numChildren;
609: /* no room, either from having too many kids or not having any */
610: if (auto &childIDs = dctx->childIDs) {
611: // the difference is backwards because we have not updated maxNumChildren yet
612: numAllocated -= maxNumChildren;
613: /* have existing children, must reallocate them */
614: PetscCall(PetscRealloc(numChildren * sizeof(*childIDs), &childIDs));
615: /* clear the extra memory since realloc doesn't do it for us */
616: PetscCall(PetscArrayzero(std::next(childIDs, maxNumChildren), numAllocated));
617: } else {
618: /* have no children */
619: PetscCall(PetscCalloc1(numChildren, &childIDs));
620: }
621: /* update total number of children */
622: maxNumChildren = numChildren;
623: }
624: PetscCall(PetscMalloc1(n, dsub));
625: for (PetscInt i = 0; ninput && (i < dctx->numChildren); ++i) {
626: auto &childID = dctx->childIDs[i];
627: /* empty child slot */
628: if (!childID) {
629: auto &childctx = (*dsub)[i];
631: /* create the child context in the image of its parent */
632: PetscCall(PetscDeviceContextDuplicate_Private(dctx, stype, &childctx));
633: PetscCall(PetscDeviceContextWaitForContext(childctx, dctx));
634: /* register the child with its parent */
635: PetscCall(PetscObjectGetId(PetscObjectCast(childctx), &childID));
636: if (PetscDefined(USE_DEBUG_AND_INFO)) {
637: PetscCallCXX(idList += std::to_string(childID));
638: if (ninput != 1) PetscCallCXX(idList += ", ");
639: }
640: --ninput;
641: }
642: }
643: PetscCall(PetscLogEventEnd(DCONTEXT_Fork, dctx, nullptr, nullptr, nullptr));
644: PetscCall(PetscDebugInfo(dctx, "Forked %" PetscInt_FMT " children from parent %" PetscInt64_FMT " with IDs: %s\n", n, PetscObjectCast(dctx)->id, idList.c_str()));
645: PetscFunctionReturn(PETSC_SUCCESS);
646: }
648: /*@C
649: PetscDeviceContextFork - Create a set of dependent child contexts from a parent context
651: Not Collective, Asynchronous
653: Input Parameters:
654: + dctx - The parent `PetscDeviceContext`
655: - n - The number of children to create
657: Output Parameter:
658: . dsub - The created child context(s)
660: Level: beginner
662: Notes:
663: Behaves identically to `PetscDeviceContextForkWithStreamType()` except that the prescribed
664: `PetscStreamType` is taken from `dctx`. In effect this routine is shorthand for\:
666: .vb
667: PetscStreamType stype;
669: PetscDeviceContextGetStreamType(dctx, &stype);
670: PetscDeviceContextForkWithStreamType(dctx, stype, ...);
671: .ve
673: .N ASYNC_API
675: .seealso: `PetscDeviceContextForkWithStreamType()`, `PetscDeviceContextJoin()`,
676: `PetscDeviceContextSynchronize()`, `PetscDeviceContextQueryIdle()`
677: @*/
678: PetscErrorCode PetscDeviceContextFork(PetscDeviceContext dctx, PetscInt n, PetscDeviceContext **dsub)
679: {
680: auto stype = PETSC_STREAM_DEFAULT;
682: PetscFunctionBegin;
683: PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
684: PetscCall(PetscDeviceContextGetStreamType(dctx, &stype));
685: PetscCall(PetscDeviceContextForkWithStreamType(dctx, stype, n, dsub));
686: PetscFunctionReturn(PETSC_SUCCESS);
687: }
689: // PetscClangLinter pragma disable: -fdoc-section-header-unknown
690: /*@C
691: PetscDeviceContextJoin - Converge a set of child contexts
693: Not Collective, Asynchronous
695: Input Parameters:
696: + dctx - A `PetscDeviceContext` to converge on
697: . n - The number of sub contexts to converge
698: . joinMode - The type of join to perform
699: - dsub - The sub contexts to converge
701: Level: beginner
703: Notes:
704: If `PetscDeviceContextFork()` creates `n` edges from a source node which all depend on the source
705: node, then this routine is the exact mirror. That is, it creates a node (represented in `dctx`)
706: which receives `n` edges (and optionally destroys them) which is dependent on the completion
707: of all incoming edges.
709: If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_DESTROY`. All contexts in `dsub` will be
710: destroyed by this routine. Thus all sub contexts must have been created with the `dctx`
711: passed to this routine.
713: If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_SYNC`. All sub contexts will additionally wait on
714: `dctx` after converging. This has the effect of "synchronizing" the outgoing edges. Note the
715: sync suffix does NOT refer to the host, i.e. this routine does NOT call
716: `PetscDeviceSynchronize()`.
718: If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC`. `dctx` waits for all sub contexts but
719: the sub contexts do not wait for one another or `dctx` afterwards.
721: DAG representations:
722: If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_DESTROY`
723: .vb
724: time ->
726: -> dctx ---------/- |= CALL =| - dctx ->
727: -> dsub[0] -----/
728: -> ... -------/
729: -> dsub[n-1] -/
730: .ve
731: If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_SYNC`
732: .vb
733: time ->
735: -> dctx ---------/- |= CALL =| -\----> dctx ------>
736: -> dsub[0] -----/ \---> dsub[0] --->
737: -> ... -------/ \--> ... ------->
738: -> dsub[n-1] -/ \-> dsub[n-1] ->
739: .ve
740: If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC`
741: .vb
742: time ->
744: -> dctx ----------/- |= CALL =| - dctx ->
745: -> dsub[0] ------/----------------------->
746: -> ... --------/------------------------>
747: -> dsub[n-1] --/------------------------->
748: .ve
750: .N ASYNC_API
752: .seealso: `PetscDeviceContextFork()`, `PetscDeviceContextForkWithStreamType()`,
753: `PetscDeviceContextSynchronize()`, `PetscDeviceContextJoinMode`
754: @*/
755: PetscErrorCode PetscDeviceContextJoin(PetscDeviceContext dctx, PetscInt n, PetscDeviceContextJoinMode joinMode, PetscDeviceContext **dsub)
756: {
757: // debugging only
758: std::string idList;
760: PetscFunctionBegin;
761: PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
762: /* validity of dctx is checked in the wait-for loop */
763: PetscAssertPointer(dsub, 4);
764: PetscAssert(n >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of contexts merged %" PetscInt_FMT " < 0", n);
765: /* reserve 4 chars per id, 2 for number and 2 for ', ' separator */
766: if (PetscDefined(USE_DEBUG_AND_INFO)) PetscCallCXX(idList.reserve(4 * n));
767: /* first dctx waits on all the incoming edges */
768: PetscCall(PetscLogEventBegin(DCONTEXT_Join, dctx, nullptr, nullptr, nullptr));
769: for (PetscInt i = 0; i < n; ++i) {
770: PetscCheckCompatibleDeviceContexts(dctx, 1, (*dsub)[i], 4);
771: PetscCall(PetscDeviceContextWaitForContext(dctx, (*dsub)[i]));
772: if (PetscDefined(USE_DEBUG_AND_INFO)) {
773: PetscCallCXX(idList += std::to_string(PetscObjectCast((*dsub)[i])->id));
774: if (i + 1 < n) PetscCallCXX(idList += ", ");
775: }
776: }
778: /* now we handle the aftermath */
779: switch (joinMode) {
780: case PETSC_DEVICE_CONTEXT_JOIN_DESTROY: {
781: const auto children = dctx->childIDs;
782: const auto maxchild = dctx->maxNumChildren;
783: auto &nchild = dctx->numChildren;
784: PetscInt j = 0;
786: PetscCheck(n <= nchild, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Trying to destroy %" PetscInt_FMT " children of a parent context that only has %" PetscInt_FMT " children, likely trying to restore to wrong parent", n, nchild);
787: /* update child count while it's still fresh in memory */
788: nchild -= n;
789: for (PetscInt i = 0; i < maxchild; ++i) {
790: if (children[i] && (children[i] == PetscObjectCast((*dsub)[j])->id)) {
791: /* child is one of ours, can destroy it */
792: PetscCall(PetscDeviceContextDestroy((*dsub) + j));
793: /* reset the child slot */
794: children[i] = 0;
795: if (++j == n) break;
796: }
797: }
798: /* gone through the loop but did not find every child */
799: PetscCheck(j == n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "%" PetscInt_FMT " contexts still remain after destroy, this may be because you are trying to restore to the wrong parent context, or the device contexts are not in the same order as they were checked out in", n - j);
800: PetscCall(PetscFree(*dsub));
801: } break;
802: case PETSC_DEVICE_CONTEXT_JOIN_SYNC:
803: for (PetscInt i = 0; i < n; ++i) PetscCall(PetscDeviceContextWaitForContext((*dsub)[i], dctx));
804: case PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC:
805: break;
806: default:
807: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown PetscDeviceContextJoinMode given");
808: }
809: PetscCall(PetscLogEventEnd(DCONTEXT_Join, dctx, nullptr, nullptr, nullptr));
811: PetscCall(PetscDebugInfo(dctx, "Joined %" PetscInt_FMT " ctxs to ctx %" PetscInt64_FMT ", mode %s with IDs: %s\n", n, PetscObjectCast(dctx)->id, PetscDeviceContextJoinModes[joinMode], idList.c_str()));
812: PetscFunctionReturn(PETSC_SUCCESS);
813: }
815: // PetscClangLinter pragma disable: -fdoc-section-header-unknown
816: /*@C
817: PetscDeviceContextSynchronize - Block the host until all work queued on a
818: `PetscDeviceContext` has finished
820: Not Collective
822: Input Parameter:
823: . dctx - The `PetscDeviceContext` to synchronize
825: Level: beginner
827: Notes:
828: The host will not return from this routine until `dctx` is idle. Any and all memory
829: operations queued on or otherwise associated with (either explicitly or implicitly via
830: dependencies) are guaranteed to have finished and be globally visible on return.
832: In effect, this routine serves as memory and execution barrier.
834: DAG representation:
835: .vb
836: time ->
838: -> dctx - |= CALL =| - dctx ->
839: .ve
841: .seealso: `PetscDeviceContextFork()`, `PetscDeviceContextJoin()`, `PetscDeviceContextQueryIdle()`
842: @*/
843: PetscErrorCode PetscDeviceContextSynchronize(PetscDeviceContext dctx)
844: {
845: PetscFunctionBegin;
846: PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
847: PetscCall(PetscLogEventBegin(DCONTEXT_Sync, dctx, nullptr, nullptr, nullptr));
848: /* if it isn't setup there is nothing to sync on */
849: if (dctx->setup) {
850: PetscUseTypeMethod(dctx, synchronize);
851: PetscCall(PetscDeviceContextSyncClearMap_Internal(dctx));
852: }
853: PetscCall(PetscLogEventEnd(DCONTEXT_Sync, dctx, nullptr, nullptr, nullptr));
854: PetscFunctionReturn(PETSC_SUCCESS);
855: }
857: /* every device type has a vector of null PetscDeviceContexts -- one for each device */
858: static auto nullContexts = std::array<std::vector<PetscDeviceContext>, PETSC_DEVICE_MAX>{};
859: static auto nullContextsFinalizer = false;
861: static PetscErrorCode PetscDeviceContextGetNullContextForDevice_Private(PetscBool user_set_device, PetscDevice device, PetscDeviceContext *dctx)
862: {
863: PetscInt devid;
864: PetscDeviceType dtype;
866: PetscFunctionBegin;
868: PetscAssertPointer(dctx, 3);
869: if (PetscUnlikely(!nullContextsFinalizer)) {
870: nullContextsFinalizer = true;
871: PetscCall(PetscRegisterFinalize([] {
872: PetscFunctionBegin;
873: for (auto &&dvec : nullContexts) {
874: for (auto &&dctx : dvec) PetscCall(PetscDeviceContextDestroy(&dctx));
875: PetscCallCXX(dvec.clear());
876: }
877: nullContextsFinalizer = false;
878: PetscFunctionReturn(PETSC_SUCCESS);
879: }));
880: }
881: PetscCall(PetscDeviceGetDeviceId(device, &devid));
882: PetscCall(PetscDeviceGetType(device, &dtype));
883: {
884: auto &ctxlist = nullContexts[dtype];
886: PetscCheck(devid >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Device ID (%" PetscInt_FMT ") must be positive", devid);
887: // need to resize the container if not big enough because incrementing the iterator in
888: // std::next() (if we haven't initialized that ctx yet) may cause it to fall outside the
889: // current size of the container.
890: if (static_cast<std::size_t>(devid) >= ctxlist.size()) PetscCallCXX(ctxlist.resize(devid + 1));
891: if (PetscUnlikely(!ctxlist[devid])) {
892: // we have not seen this device before
893: PetscCall(PetscDeviceContextCreate(dctx));
894: PetscCall(PetscInfo(*dctx, "Initializing null PetscDeviceContext (of type %s) for device %" PetscInt_FMT "\n", PetscDeviceTypes[dtype], devid));
895: {
896: const auto pobj = PetscObjectCast(*dctx);
897: const auto name = "null context " + std::to_string(devid);
898: const auto prefix = "null_context_" + std::to_string(devid) + '_';
900: PetscCall(PetscObjectSetName(pobj, name.c_str()));
901: PetscCall(PetscObjectSetOptionsPrefix(pobj, prefix.c_str()));
902: }
903: PetscCall(PetscDeviceContextSetStreamType(*dctx, PETSC_STREAM_DEFAULT));
904: PetscCall(PetscDeviceContextSetDevice_Private(*dctx, device, user_set_device));
905: PetscCall(PetscDeviceContextSetUp(*dctx));
906: // would use ctxlist.cbegin() but GCC 4.8 can't handle const iterator insert!
907: PetscCallCXX(ctxlist.insert(std::next(ctxlist.begin(), devid), *dctx));
908: } else *dctx = ctxlist[devid];
909: }
910: PetscFunctionReturn(PETSC_SUCCESS);
911: }
913: /*
914: Gets the "NULL" context for the current PetscDeviceType and PetscDevice. NULL contexts are
915: guaranteed to always be globally blocking.
916: */
917: PetscErrorCode PetscDeviceContextGetNullContext_Internal(PetscDeviceContext *dctx)
918: {
919: PetscDeviceContext gctx;
920: PetscDevice gdev = nullptr;
922: PetscFunctionBegin;
923: PetscAssertPointer(dctx, 1);
924: PetscCall(PetscDeviceContextGetCurrentContext(&gctx));
925: PetscCall(PetscDeviceContextGetDevice(gctx, &gdev));
926: PetscCall(PetscDeviceContextGetNullContextForDevice_Private(gctx->usersetdevice, gdev, dctx));
927: PetscFunctionReturn(PETSC_SUCCESS);
928: }
930: /*@C
931: PetscDeviceContextSetFromOptions - Configure a `PetscDeviceContext` from the options database
933: Collective on `comm` or `dctx`
935: Input Parameters:
936: + comm - MPI communicator on which to query the options database (optional)
937: - dctx - The `PetscDeviceContext` to configure
939: Output Parameter:
940: . dctx - The `PetscDeviceContext`
942: Options Database Keys:
943: + -device_context_stream_type - type of stream to create inside the `PetscDeviceContext` -
944: `PetscDeviceContextSetStreamType()`
945: - -device_context_device_type - the type of `PetscDevice` to attach by default - `PetscDeviceType`
947: Level: beginner
949: Note:
950: The user may pass `MPI_COMM_NULL` for `comm` in which case the communicator of `dctx` is
951: used (which is always `PETSC_COMM_SELF`).
953: .seealso: `PetscDeviceContextSetStreamType()`, `PetscDeviceContextSetDevice()`,
954: `PetscDeviceContextView()`
955: @*/
956: PetscErrorCode PetscDeviceContextSetFromOptions(MPI_Comm comm, PetscDeviceContext dctx)
957: {
958: const auto pobj = PetscObjectCast(dctx);
959: auto dtype = std::make_pair(PETSC_DEVICE_DEFAULT(), PETSC_FALSE);
960: auto stype = std::make_pair(PETSC_DEVICE_CONTEXT_DEFAULT_STREAM_TYPE, PETSC_FALSE);
961: MPI_Comm old_comm = PETSC_COMM_SELF;
963: PetscFunctionBegin;
964: // do not user getoptionalnullcontext here, the user is not allowed to set it from options!
966: /* set the device type first */
967: if (const auto device = dctx->device) PetscCall(PetscDeviceGetType(device, &dtype.first));
968: PetscCall(PetscDeviceContextGetStreamType(dctx, &stype.first));
970: if (comm == MPI_COMM_NULL) {
971: PetscCall(PetscObjectGetComm(pobj, &comm));
972: } else {
973: // briefly set the communicator for dctx (it is always PETSC_COMM_SELF) so
974: // PetscObjectOptionsBegin() behaves as if dctx had comm
975: old_comm = Petsc::util::exchange(pobj->comm, comm);
976: }
978: PetscObjectOptionsBegin(pobj);
979: PetscCall(PetscDeviceContextQueryOptions_Internal(PetscOptionsObject, dtype, stype));
980: PetscOptionsEnd();
981: // reset the comm (should be PETSC_COMM_SELF)
982: if (comm != MPI_COMM_NULL) pobj->comm = old_comm;
983: if (dtype.second) PetscCall(PetscDeviceContextSetDefaultDeviceForType_Internal(dctx, dtype.first));
984: if (stype.second) PetscCall(PetscDeviceContextSetStreamType(dctx, stype.first));
985: PetscCall(PetscDeviceContextSetUp(dctx));
986: PetscFunctionReturn(PETSC_SUCCESS);
987: }
989: /*@
990: PetscDeviceContextView - View a `PetscDeviceContext`
992: Collective on `viewer`
994: Input Parameters:
995: + dctx - The `PetscDeviceContext`
996: - viewer - The `PetscViewer` to view `dctx` with (may be `NULL`)
998: Level: beginner
1000: Note:
1001: If `viewer` is `NULL`, `PETSC_VIEWER_STDOUT_WORLD` is used instead, in which case this
1002: routine is collective on `PETSC_COMM_WORLD`.
1004: .seealso: `PetscDeviceContextViewFromOptions()`, `PetscDeviceView()`, `PETSC_VIEWER_STDOUT_WORLD`, `PetscDeviceContextCreate()`
1005: @*/
1006: PetscErrorCode PetscDeviceContextView(PetscDeviceContext dctx, PetscViewer viewer)
1007: {
1008: PetscBool iascii;
1010: PetscFunctionBegin;
1011: PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
1012: if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PETSC_COMM_WORLD, &viewer));
1014: PetscCall(PetscObjectTypeCompare(PetscObjectCast(viewer), PETSCVIEWERASCII, &iascii));
1015: if (iascii) {
1016: auto stype = PETSC_STREAM_DEFAULT;
1017: PetscViewer sub;
1019: PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sub));
1020: PetscCall(PetscObjectPrintClassNamePrefixType(PetscObjectCast(dctx), sub));
1021: PetscCall(PetscViewerASCIIPushTab(sub));
1022: PetscCall(PetscDeviceContextGetStreamType(dctx, &stype));
1023: PetscCall(PetscViewerASCIIPrintf(sub, "stream type: %s\n", PetscStreamTypes[stype]));
1024: PetscCall(PetscViewerASCIIPrintf(sub, "children: %" PetscInt_FMT "\n", dctx->numChildren));
1025: if (const auto nchild = dctx->numChildren) {
1026: PetscCall(PetscViewerASCIIPushTab(sub));
1027: for (PetscInt i = 0; i < nchild; ++i) {
1028: if (i == nchild - 1) {
1029: PetscCall(PetscViewerASCIIPrintf(sub, "%" PetscInt64_FMT, dctx->childIDs[i]));
1030: } else {
1031: PetscCall(PetscViewerASCIIPrintf(sub, "%" PetscInt64_FMT ", ", dctx->childIDs[i]));
1032: }
1033: }
1034: }
1035: PetscCall(PetscViewerASCIIPopTab(sub));
1036: PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sub));
1037: PetscCall(PetscViewerASCIIPushTab(viewer));
1038: }
1039: if (const auto device = dctx->device) PetscCall(PetscDeviceView(device, viewer));
1040: if (iascii) PetscCall(PetscViewerASCIIPopTab(viewer));
1041: PetscFunctionReturn(PETSC_SUCCESS);
1042: }
1044: /*@
1045: PetscDeviceContextViewFromOptions - View a `PetscDeviceContext` from options
1047: Input Parameters:
1048: + dctx - The `PetscDeviceContext` to view
1049: . obj - Optional `PetscObject` to associate (may be `NULL`)
1050: - name - The command line option
1052: Level: beginner
1054: .seealso: `PetscDeviceContextView()`, `PetscObjectViewFromOptions()`, `PetscDeviceContextCreate()`
1055: @*/
1056: PetscErrorCode PetscDeviceContextViewFromOptions(PetscDeviceContext dctx, PetscObject obj, const char name[])
1057: {
1058: PetscFunctionBegin;
1059: PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
1061: PetscAssertPointer(name, 3);
1062: PetscCall(PetscObjectViewFromOptions(PetscObjectCast(dctx), obj, name));
1063: PetscFunctionReturn(PETSC_SUCCESS);
1064: }
1066: /*@C
1067: PetscDeviceContextGetStreamHandle - Return a handle to the underlying stream of the current device context
1069: Input Parameter:
1070: . dctx - The `PetscDeviceContext` to get the stream from
1072: Output Parameter:
1073: . handle - A pointer to the handle to the stream
1075: Level: developer
1077: Note:
1078: This routine is dangerous. It exists only for the most experienced users and
1079: internal PETSc development.
1081: There is no way for PETSc's auto-dependency system to track what the caller does with the
1082: stream.
1084: If the user uses the stream to copy memory that was previously modified by PETSc, or launches
1085: kernels that modify memory with the stream, it is the users responsibility to inform PETSc of
1086: their actions via `PetscDeviceContextMarkIntentFromID()`. Failure to do so may introduce a
1087: race condition. This race condition may manifest in nondeterministic ways.
1089: Alternatively, the user may synchronize the stream immediately before and after use. This is
1090: the safest option.
1092: Example Usage:
1093: .vb
1094: PetscDeviceContext dctx;
1095: PetscDeviceType type;
1096: void *handle;
1098: PetscDeviceContextGetCurrentContext(&dctx);
1099: PetscDeviceContextGetStreamHandle(dctx, &handle);
1100: PetscDeviceContextGetDeviceType(dctx, &type);
1102: if (type == PETSC_DEVICE_CUDA) {
1103: cudaStream_t stream = *(cudaStream_t *)handle;
1105: my_cuda_kernel<<<1, 2, 3, stream>>>();
1106: }
1107: .ve
1108: Alternatively, if type of `PetscDeviceContext` is known (for example `PETSC_DEVICE_HIP`), the
1109: user may pass in a pointer to stream handle directly\:
1110: .vb
1111: hipStream_t *stream;
1113: // note the cast to void **
1114: PetscDeviceContextGetStreamHandle(dctx, (void **)&stream);
1115: // note the dereference
1116: my_hip_kernel<<<1, 2, 3, *stream>>>();
1117: .ve
1119: .N ASYNC_API
1121: .seealso: `PetscDeviceContext`
1122: @*/
1123: PetscErrorCode PetscDeviceContextGetStreamHandle(PetscDeviceContext dctx, void **handle)
1124: {
1125: PetscFunctionBegin;
1126: PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
1127: PetscAssertPointer(handle, 2);
1128: PetscCall(PetscDeviceContextGetStreamHandle_Internal(dctx, handle));
1129: PetscFunctionReturn(PETSC_SUCCESS);
1130: }