Actual source code: petscdevicetypes.h
1: #pragma once
3: #include <petscsys.h>
5: // Some overzealous older gcc versions warn that the comparisons below are always true. Neat
6: // that it can detect this, but the tautology *is* the point of the static_assert()!
7: #if defined(__GNUC__) && __GNUC__ >= 6 && !PetscDefined(HAVE_WINDOWS_COMPILERS)
8: #define PETSC_SHOULD_SILENCE_GCC_TAUTOLOGICAL_COMPARE_WARNING 1
9: #else
10: #define PETSC_SHOULD_SILENCE_GCC_TAUTOLOGICAL_COMPARE_WARNING 0
11: #endif
13: /* SUBMANSEC = Sys */
15: /*E
16: PetscMemType - Memory type of a pointer
18: Level: intermediate
20: Note:
21: `PETSC_MEMTYPE_KOKKOS` depends on the Kokkos backend configuration
23: Developer Notes:
24: This enum uses a function (`PetscMemTypeToString()`) to convert to string representation so
25: cannot be used in `PetscOptionsEnum()`.
27: Encoding of the bitmask in binary\: xxxxyyyz
28: .vb
29: z = 0 - Host memory
30: z = 1 - Device memory
31: yyy = 000 - CUDA-related memory
32: yyy = 001 - HIP-related memory
33: yyy = 010 - SYCL-related memory
34: xxxxyyy1 = 0000,0001 - CUDA memory
35: xxxxyyy1 = 0001,0001 - CUDA NVSHMEM memory
36: xxxxyyy1 = 0000,0011 - HIP memory
37: xxxxyyy1 = 0000,0101 - SYCL memory
38: .ve
40: Other types of memory, e.g., CUDA managed memory, can be added when needed.
42: .seealso: `PetscMemTypeToString()`, `VecGetArrayAndMemType()`,
43: `PetscSFBcastWithMemTypeBegin()`, `PetscSFReduceWithMemTypeBegin()`
44: E*/
45: typedef enum {
46: PETSC_MEMTYPE_HOST = 0,
47: PETSC_MEMTYPE_DEVICE = 1, /* 0x01 */
48: PETSC_MEMTYPE_CUDA = 1, /* 0x01 */
49: PETSC_MEMTYPE_NVSHMEM = 17, /* 0x11 */
50: PETSC_MEMTYPE_HIP = 3, /* 0x03 */
51: PETSC_MEMTYPE_SYCL = 5 /* 0x05 */
52: } PetscMemType;
53: #if PetscDefined(HAVE_CUDA)
54: #define PETSC_MEMTYPE_KOKKOS PETSC_MEMTYPE_CUDA
55: #elif PetscDefined(HAVE_HIP)
56: #define PETSC_MEMTYPE_KOKKOS PETSC_MEMTYPE_HIP
57: #elif PetscDefined(HAVE_SYCL)
58: #define PETSC_MEMTYPE_KOKKOS PETSC_MEMTYPE_SYCL
59: #else
60: #define PETSC_MEMTYPE_KOKKOS PETSC_MEMTYPE_HOST
61: #endif
63: /*MC
64: PetscMemTypeHost - Returns `PETSC_TRUE` if a given `PetscMemType` refers to host (CPU) memory
66: Synopsis:
67: #include <petscdevicetypes.h>
68: PetscBool PetscMemTypeHost(PetscMemType m)
70: Not Collective; No Fortran Support
72: Input Parameter:
73: . m - the `PetscMemType` value
75: Level: intermediate
77: .seealso: `PetscMemType`, `PetscMemTypeDevice()`, `PetscMemTypeCUDA()`, `PetscMemTypeHIP()`, `PetscMemTypeSYCL()`, `PetscMemTypeNVSHMEM()`
78: M*/
79: #define PetscMemTypeHost(m) ((((m) & 0x1) == PETSC_MEMTYPE_HOST) ? PETSC_TRUE : PETSC_FALSE)
81: /*MC
82: PetscMemTypeDevice - Returns `PETSC_TRUE` if a given `PetscMemType` refers to any kind of device (GPU) memory
84: Synopsis:
85: #include <petscdevicetypes.h>
86: PetscBool PetscMemTypeDevice(PetscMemType m)
88: Not Collective; No Fortran Support
90: Input Parameter:
91: . m - the `PetscMemType` value
93: Level: intermediate
95: .seealso: `PetscMemType`, `PetscMemTypeHost()`, `PetscMemTypeCUDA()`, `PetscMemTypeHIP()`, `PetscMemTypeSYCL()`, `PetscMemTypeNVSHMEM()`
96: M*/
97: #define PetscMemTypeDevice(m) ((((m) & 0x1) == PETSC_MEMTYPE_DEVICE) ? PETSC_TRUE : PETSC_FALSE)
99: /*MC
100: PetscMemTypeCUDA - Returns `PETSC_TRUE` if a given `PetscMemType` refers to CUDA device memory (including CUDA NVSHMEM memory)
102: Synopsis:
103: #include <petscdevicetypes.h>
104: PetscBool PetscMemTypeCUDA(PetscMemType m)
106: Not Collective; No Fortran Support
108: Input Parameter:
109: . m - the `PetscMemType` value
111: Level: intermediate
113: .seealso: `PetscMemType`, `PetscMemTypeDevice()`, `PetscMemTypeHIP()`, `PetscMemTypeSYCL()`, `PetscMemTypeNVSHMEM()`
114: M*/
115: #define PetscMemTypeCUDA(m) ((((m) & 0xF) == PETSC_MEMTYPE_CUDA) ? PETSC_TRUE : PETSC_FALSE)
117: /*MC
118: PetscMemTypeHIP - Returns `PETSC_TRUE` if a given `PetscMemType` refers to HIP device memory
120: Synopsis:
121: #include <petscdevicetypes.h>
122: PetscBool PetscMemTypeHIP(PetscMemType m)
124: Not Collective; No Fortran Support
126: Input Parameter:
127: . m - the `PetscMemType` value
129: Level: intermediate
131: .seealso: `PetscMemType`, `PetscMemTypeDevice()`, `PetscMemTypeCUDA()`, `PetscMemTypeSYCL()`, `PetscMemTypeNVSHMEM()`
132: M*/
133: #define PetscMemTypeHIP(m) ((((m) & 0xF) == PETSC_MEMTYPE_HIP) ? PETSC_TRUE : PETSC_FALSE)
135: /*MC
136: PetscMemTypeSYCL - Returns `PETSC_TRUE` if a given `PetscMemType` refers to SYCL device memory
138: Synopsis:
139: #include <petscdevicetypes.h>
140: PetscBool PetscMemTypeSYCL(PetscMemType m)
142: Not Collective; No Fortran Support
144: Input Parameter:
145: . m - the `PetscMemType` value
147: Level: intermediate
149: .seealso: `PetscMemType`, `PetscMemTypeDevice()`, `PetscMemTypeCUDA()`, `PetscMemTypeHIP()`, `PetscMemTypeNVSHMEM()`
150: M*/
151: #define PetscMemTypeSYCL(m) ((((m) & 0xF) == PETSC_MEMTYPE_SYCL) ? PETSC_TRUE : PETSC_FALSE)
153: /*MC
154: PetscMemTypeNVSHMEM - Returns `PETSC_TRUE` if a given `PetscMemType` refers to NVSHMEM memory
156: Synopsis:
157: #include <petscdevicetypes.h>
158: PetscBool PetscMemTypeNVSHMEM(PetscMemType m)
160: Not Collective; No Fortran Support
162: Input Parameter:
163: . m - the `PetscMemType` value
165: Level: intermediate
167: .seealso: `PetscMemType`, `PetscMemTypeDevice()`, `PetscMemTypeCUDA()`, `PetscMemTypeHIP()`, `PetscMemTypeSYCL()`
168: M*/
169: #define PetscMemTypeNVSHMEM(m) (((m) == PETSC_MEMTYPE_NVSHMEM) ? PETSC_TRUE : PETSC_FALSE)
171: #if defined(__cplusplus)
172: #if PETSC_SHOULD_SILENCE_GCC_TAUTOLOGICAL_COMPARE_WARNING
173: #pragma GCC diagnostic push
174: #pragma GCC diagnostic ignored "-Wtautological-compare"
175: #endif
176: static_assert(PetscMemTypeHost(PETSC_MEMTYPE_HOST), "");
177: static_assert(!PetscMemTypeHost(PETSC_MEMTYPE_DEVICE), "");
178: static_assert(!PetscMemTypeHost(PETSC_MEMTYPE_CUDA), "");
179: static_assert(!PetscMemTypeHost(PETSC_MEMTYPE_HIP), "");
180: static_assert(!PetscMemTypeHost(PETSC_MEMTYPE_SYCL), "");
181: static_assert(!PetscMemTypeHost(PETSC_MEMTYPE_NVSHMEM), "");
183: static_assert(!PetscMemTypeDevice(PETSC_MEMTYPE_HOST), "");
184: static_assert(PetscMemTypeDevice(PETSC_MEMTYPE_DEVICE), "");
185: static_assert(PetscMemTypeDevice(PETSC_MEMTYPE_CUDA), "");
186: static_assert(PetscMemTypeDevice(PETSC_MEMTYPE_HIP), "");
187: static_assert(PetscMemTypeDevice(PETSC_MEMTYPE_SYCL), "");
188: static_assert(PetscMemTypeDevice(PETSC_MEMTYPE_NVSHMEM), "");
190: static_assert(PetscMemTypeCUDA(PETSC_MEMTYPE_CUDA), "");
191: static_assert(PetscMemTypeCUDA(PETSC_MEMTYPE_NVSHMEM), "");
192: #if PETSC_SHOULD_SILENCE_GCC_TAUTOLOGICAL_COMPARE_WARNING
193: #pragma GCC diagnostic pop
194: #endif
195: #endif // __cplusplus
197: PETSC_NODISCARD static inline PETSC_CONSTEXPR_14 const char *PetscMemTypeToString(PetscMemType mtype)
198: {
199: #ifdef __cplusplus
200: static_assert(PETSC_MEMTYPE_CUDA == PETSC_MEMTYPE_DEVICE, "");
201: #endif
202: #define PETSC_CASE_NAME(v) \
203: case v: \
204: return PetscStringize(v)
206: switch (mtype) {
207: PETSC_CASE_NAME(PETSC_MEMTYPE_HOST);
208: /* PETSC_CASE_NAME(PETSC_MEMTYPE_DEVICE); same as PETSC_MEMTYPE_CUDA */
209: PETSC_CASE_NAME(PETSC_MEMTYPE_CUDA);
210: PETSC_CASE_NAME(PETSC_MEMTYPE_NVSHMEM);
211: PETSC_CASE_NAME(PETSC_MEMTYPE_HIP);
212: PETSC_CASE_NAME(PETSC_MEMTYPE_SYCL);
213: }
214: PetscUnreachable();
215: return "invalid";
216: #undef PETSC_CASE_NAME
217: }
219: #define PETSC_OFFLOAD_VECKOKKOS_DEPRECATED PETSC_OFFLOAD_VECKOKKOS PETSC_DEPRECATED_ENUM(3, 17, 0, "PETSC_OFFLOAD_KOKKOS", )
221: /*E
222: PetscOffloadMask - indicates which memory (CPU, GPU, or none) contains valid data
224: Values:
225: + `PETSC_OFFLOAD_UNALLOCATED` - no memory contains valid matrix entries; NEVER used for vectors
226: . `PETSC_OFFLOAD_GPU` - GPU has valid vector/matrix entries
227: . `PETSC_OFFLOAD_CPU` - CPU has valid vector/matrix entries
228: . `PETSC_OFFLOAD_BOTH` - Both GPU and CPU have valid vector/matrix entries and they match
229: - `PETSC_OFFLOAD_KOKKOS` - Reserved for Kokkos matrix and vector. It means the offload is managed by Kokkos, thus this flag itself cannot tell you where the valid data is.
231: Level: developer
233: Developer Note:
234: This enum uses a function (`PetscOffloadMaskToString()`) to convert to string representation so
235: cannot be used in `PetscOptionsEnum()`.
237: .seealso: `PetscOffloadMaskToString()`, `PetscOffloadMaskToMemType()`, `PetscOffloadMaskToDeviceCopyMode()`
238: E*/
239: typedef enum {
240: PETSC_OFFLOAD_UNALLOCATED = 0, /* 0x0 */
241: PETSC_OFFLOAD_CPU = 1, /* 0x1 */
242: PETSC_OFFLOAD_GPU = 2, /* 0x2 */
243: PETSC_OFFLOAD_BOTH = 3, /* 0x3 */
244: PETSC_OFFLOAD_VECKOKKOS_DEPRECATED = 256, /* 0x100 */
245: PETSC_OFFLOAD_KOKKOS = 256 /* 0x100 */
246: } PetscOffloadMask;
248: /*MC
249: PetscOffloadUnallocated - Returns `PETSC_TRUE` if a given `PetscOffloadMask` indicates that no memory has been allocated yet
251: Synopsis:
252: #include <petscdevicetypes.h>
253: PetscBool PetscOffloadUnallocated(PetscOffloadMask m)
255: Not Collective; No Fortran Support
257: Input Parameter:
258: . m - the `PetscOffloadMask` value
260: Level: developer
262: .seealso: `PetscOffloadMask`, `PetscOffloadHost()`, `PetscOffloadDevice()`, `PetscOffloadBoth()`
263: M*/
264: #define PetscOffloadUnallocated(m) ((m) == PETSC_OFFLOAD_UNALLOCATED)
266: /*MC
267: PetscOffloadHost - Returns `PETSC_TRUE` if a given `PetscOffloadMask` indicates that the host (CPU) memory holds valid data
269: Synopsis:
270: #include <petscdevicetypes.h>
271: PetscBool PetscOffloadHost(PetscOffloadMask m)
273: Not Collective; No Fortran Support
275: Input Parameter:
276: . m - the `PetscOffloadMask` value
278: Level: developer
280: .seealso: `PetscOffloadMask`, `PetscOffloadUnallocated()`, `PetscOffloadDevice()`, `PetscOffloadBoth()`
281: M*/
282: #define PetscOffloadHost(m) (((m) & PETSC_OFFLOAD_CPU) == PETSC_OFFLOAD_CPU)
284: /*MC
285: PetscOffloadDevice - Returns `PETSC_TRUE` if a given `PetscOffloadMask` indicates that device (GPU) memory holds valid data
287: Synopsis:
288: #include <petscdevicetypes.h>
289: PetscBool PetscOffloadDevice(PetscOffloadMask m)
291: Not Collective; No Fortran Support
293: Input Parameter:
294: . m - the `PetscOffloadMask` value
296: Level: developer
298: .seealso: `PetscOffloadMask`, `PetscOffloadUnallocated()`, `PetscOffloadHost()`, `PetscOffloadBoth()`
299: M*/
300: #define PetscOffloadDevice(m) (((m) & PETSC_OFFLOAD_GPU) == PETSC_OFFLOAD_GPU)
302: /*MC
303: PetscOffloadBoth - Returns `PETSC_TRUE` if a given `PetscOffloadMask` indicates that both host and device memory hold matching valid data
305: Synopsis:
306: #include <petscdevicetypes.h>
307: PetscBool PetscOffloadBoth(PetscOffloadMask m)
309: Not Collective; No Fortran Support
311: Input Parameter:
312: . m - the `PetscOffloadMask` value
314: Level: developer
316: .seealso: `PetscOffloadMask`, `PetscOffloadUnallocated()`, `PetscOffloadHost()`, `PetscOffloadDevice()`
317: M*/
318: #define PetscOffloadBoth(m) ((m) == PETSC_OFFLOAD_BOTH)
320: #if defined(__cplusplus)
321: #if PETSC_SHOULD_SILENCE_GCC_TAUTOLOGICAL_COMPARE_WARNING
322: #pragma GCC diagnostic push
323: #pragma GCC diagnostic ignored "-Wtautological-compare"
324: #endif
325: static_assert(!PetscOffloadHost(PETSC_OFFLOAD_UNALLOCATED), "");
326: static_assert(PetscOffloadHost(PETSC_OFFLOAD_BOTH), "");
327: static_assert(!PetscOffloadHost(PETSC_OFFLOAD_GPU), "");
328: static_assert(PetscOffloadHost(PETSC_OFFLOAD_BOTH), "");
329: static_assert(!PetscOffloadHost(PETSC_OFFLOAD_KOKKOS), "");
331: static_assert(!PetscOffloadDevice(PETSC_OFFLOAD_UNALLOCATED), "");
332: static_assert(!PetscOffloadDevice(PETSC_OFFLOAD_CPU), "");
333: static_assert(PetscOffloadDevice(PETSC_OFFLOAD_GPU), "");
334: static_assert(PetscOffloadDevice(PETSC_OFFLOAD_BOTH), "");
335: static_assert(!PetscOffloadDevice(PETSC_OFFLOAD_KOKKOS), "");
337: static_assert(PetscOffloadBoth(PETSC_OFFLOAD_BOTH), "");
338: static_assert(!PetscOffloadBoth(PETSC_OFFLOAD_CPU), "");
339: static_assert(!PetscOffloadBoth(PETSC_OFFLOAD_GPU), "");
340: static_assert(!PetscOffloadBoth(PETSC_OFFLOAD_GPU), "");
341: static_assert(!PetscOffloadBoth(PETSC_OFFLOAD_KOKKOS), "");
342: #if PETSC_SHOULD_SILENCE_GCC_TAUTOLOGICAL_COMPARE_WARNING
343: #pragma GCC diagnostic pop
344: #endif
345: #endif // __cplusplus
347: PETSC_NODISCARD static inline PETSC_CONSTEXPR_14 const char *PetscOffloadMaskToString(PetscOffloadMask mask)
348: {
349: #define PETSC_CASE_RETURN(v) \
350: case v: \
351: return PetscStringize(v)
353: switch (mask) {
354: PETSC_CASE_RETURN(PETSC_OFFLOAD_UNALLOCATED);
355: PETSC_CASE_RETURN(PETSC_OFFLOAD_CPU);
356: PETSC_CASE_RETURN(PETSC_OFFLOAD_GPU);
357: PETSC_CASE_RETURN(PETSC_OFFLOAD_BOTH);
358: PETSC_CASE_RETURN(PETSC_OFFLOAD_KOKKOS);
359: }
360: PetscUnreachable();
361: return "invalid";
362: #undef PETSC_CASE_RETURN
363: }
365: PETSC_NODISCARD static inline PETSC_CONSTEXPR_14 PetscMemType PetscOffloadMaskToMemType(PetscOffloadMask mask)
366: {
367: switch (mask) {
368: case PETSC_OFFLOAD_UNALLOCATED:
369: case PETSC_OFFLOAD_CPU:
370: return PETSC_MEMTYPE_HOST;
371: case PETSC_OFFLOAD_GPU:
372: case PETSC_OFFLOAD_BOTH:
373: return PETSC_MEMTYPE_DEVICE;
374: case PETSC_OFFLOAD_KOKKOS:
375: return PETSC_MEMTYPE_KOKKOS;
376: }
377: PetscUnreachable();
378: return PETSC_MEMTYPE_HOST;
379: }
381: /*E
382: PetscDeviceInitType - Initialization strategy for `PetscDevice`
384: Values:
385: + `PETSC_DEVICE_INIT_NONE` - PetscDevice is never initialized
386: . `PETSC_DEVICE_INIT_LAZY` - PetscDevice is initialized on demand
387: - `PETSC_DEVICE_INIT_EAGER` - PetscDevice is initialized as soon as possible
389: Level: beginner
391: Note:
392: `PETSC_DEVICE_INIT_NONE` implies that any initialization of `PetscDevice` is disallowed and
393: doing so results in an error. Useful to ensure that no accelerator is used in a program.
395: .seealso: `PetscDevice`, `PetscDeviceType`, `PetscDeviceInitialize()`,
396: `PetscDeviceInitialized()`, `PetscDeviceCreate()`
397: E*/
398: typedef enum {
399: PETSC_DEVICE_INIT_NONE,
400: PETSC_DEVICE_INIT_LAZY,
401: PETSC_DEVICE_INIT_EAGER
402: } PetscDeviceInitType;
403: PETSC_EXTERN const char *const PetscDeviceInitTypes[];
405: /*E
406: PetscDeviceType - Kind of accelerator device backend
408: Values:
409: + `PETSC_DEVICE_HOST` - Host, no accelerator backend found
410: . `PETSC_DEVICE_CUDA` - CUDA enabled GPU
411: . `PETSC_DEVICE_HIP` - ROCM/HIP enabled GPU
412: . `PETSC_DEVICE_SYCL` - SYCL enabled device
413: - `PETSC_DEVICE_MAX` - Always 1 greater than the largest valid `PetscDeviceType`, invalid type, do not use
415: Level: beginner
417: Note:
418: One can also use the `PETSC_DEVICE_DEFAULT()` routine to get the current default `PetscDeviceType`.
420: .seealso: `PetscDevice`, `PetscDeviceInitType`, `PetscDeviceCreate()`, `PETSC_DEVICE_DEFAULT()`
421: E*/
422: typedef enum {
423: PETSC_DEVICE_HOST,
424: PETSC_DEVICE_CUDA,
425: PETSC_DEVICE_HIP,
426: PETSC_DEVICE_SYCL,
427: PETSC_DEVICE_MAX
428: } PetscDeviceType;
429: PETSC_EXTERN const char *const PetscDeviceTypes[];
431: /*E
432: PetscDeviceAttribute - Attribute detailing a property or feature of a `PetscDevice`
434: Values:
435: + `PETSC_DEVICE_ATTR_SIZE_T_SHARED_MEM_PER_BLOCK` - The maximum amount of shared memory per block in a device kernel
436: - `PETSC_DEVICE_ATTR_MAX` - Invalid attribute, do not use
438: Level: beginner
440: .seealso: `PetscDevice`, `PetscDeviceGetAttribute()`
441: E*/
442: typedef enum {
443: PETSC_DEVICE_ATTR_SIZE_T_SHARED_MEM_PER_BLOCK,
444: PETSC_DEVICE_ATTR_MAX
445: } PetscDeviceAttribute;
446: PETSC_EXTERN const char *const PetscDeviceAttributes[];
448: /*S
449: PetscDevice - Object to manage an accelerator "device" (usually a GPU)
451: Level: beginner
453: Note:
454: This object is used to house configuration and state of a device, but does not offer any
455: ability to interact with or drive device computation. This functionality is facilitated
456: instead by the `PetscDeviceContext` object.
458: .seealso: `PetscDeviceType`, `PetscDeviceInitType`, `PetscDeviceCreate()`,
459: `PetscDeviceConfigure()`, `PetscDeviceDestroy()`, `PetscDeviceContext`,
460: `PetscDeviceContextSetDevice()`, `PetscDeviceContextGetDevice()`, `PetscDeviceGetAttribute()`
461: S*/
462: typedef struct _n_PetscDevice *PetscDevice;
464: /*E
465: PetscStreamType - indicates how a stream implementation will interact
466: with other streams and if it blocks the host.
468: Values:
469: + `PETSC_STREAM_DEFAULT` - Same as the default stream in CUDA or HIP. Streams of this type may or may not synchronize implicitly with other streams. It does not block the host.
470: . `PETSC_STREAM_NONBLOCKING` - Same as the nonblocking stream in CUDA or HIP. Streams of this type is truly asynchronous, and is blocked by nothing. It does not block the host. In CUDA, it is created with cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking).
471: . `PETSC_STREAM_DEFAULT_WITH_BARRIER` - Same as the default stream in CUDA or HIP. PETSc async functions using this kind of stream will end with a stream synchronization. Stream of this type may or may not synchronize implicitly with other streams.
472: . `PETSC_STREAM_NONBLOCKING_WITH_BARRIER` - Same as the nonblocking stream in CUDA or HIP. PETSc async functions using this kind of stream will end with a stream synchronization. Streams of this type are truly asynchronous and are blocked by nothing.
473: - `PETSC_STREAM_MAX` - Always 1 greater than the largest `PetscStreamType`, do not use
475: Level: intermediate
477: Note:
478: The default stream, also known as the NULL stream or stream 0, can have two different behaviors: legacy behavior and per-thread behavior.
479: The behavior is determined at compile time. By default, the legacy default stream is used.
480: The legacy default stream implicitly synchronizes with per-thread default streams.
481: The per-thread default stream, like nonblocking streams, does not synchronizes with other per-thread streams, but synchronize with the default stream.
482: The per-thread default stream may be useful for running kernels launched from different threads concurrently on the same GPU when the Multi-Process Service is not available.
483: To use the per-thread default stream, one can enable it by using the nvcc option "--default-stream per-thread" or the hipcc option "-fgpu-default-stream=per-thread", depending on the backend used.
485: .seealso: `PetscDeviceContextSetStreamType()`, `PetscDeviceContextGetStreamType()`
486: E*/
487: typedef enum {
488: PETSC_STREAM_DEFAULT,
489: PETSC_STREAM_NONBLOCKING,
490: PETSC_STREAM_DEFAULT_WITH_BARRIER,
491: PETSC_STREAM_NONBLOCKING_WITH_BARRIER,
492: PETSC_STREAM_MAX
493: } PetscStreamType;
494: PETSC_EXTERN const char *const PetscStreamTypes[];
496: /*E
497: PetscDeviceContextJoinMode - Describes the type of join operation to perform in
498: `PetscDeviceContextJoin()`
500: Values:
501: + `PETSC_DEVICE_CONTEXT_JOIN_DESTROY` - Destroy all incoming sub-contexts after join.
502: . `PETSC_DEVICE_CONTEXT_JOIN_SYNC` - Synchronize incoming sub-contexts after join.
503: - `PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC` - Do not synchronize incoming sub-contexts after join.
505: Level: beginner
507: .seealso: `PetscDeviceContext`, `PetscDeviceContextFork()`, `PetscDeviceContextJoin()`
508: E*/
509: typedef enum {
510: PETSC_DEVICE_CONTEXT_JOIN_DESTROY,
511: PETSC_DEVICE_CONTEXT_JOIN_SYNC,
512: PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC
513: } PetscDeviceContextJoinMode;
514: PETSC_EXTERN const char *const PetscDeviceContextJoinModes[];
516: /*S
517: PetscDeviceContext - Container to manage stream dependencies and the various solver handles
518: for asynchronous device compute.
520: Level: beginner
522: .seealso: `PetscDevice`, `PetscDeviceContextCreate()`, `PetscDeviceContextSetDevice()`,
523: `PetscDeviceContextDestroy()`, `PetscDeviceContextFork()`, `PetscDeviceContextJoin()`
524: S*/
525: typedef struct _p_PetscDeviceContext *PetscDeviceContext;
527: /*E
528: PetscDeviceCopyMode - Describes the copy direction of a device-aware `memcpy`
530: Values:
531: + `PETSC_DEVICE_COPY_HTOH` - Copy from host memory to host memory
532: . `PETSC_DEVICE_COPY_DTOH` - Copy from device memory to host memory
533: . `PETSC_DEVICE_COPY_HTOD` - Copy from host memory to device memory
534: . `PETSC_DEVICE_COPY_DTOD` - Copy from device memory to device memory
535: - `PETSC_DEVICE_COPY_AUTO` - Infer the copy direction from the pointers
537: Level: beginner
539: .seealso: `PetscDeviceArrayCopy()`, `PetscDeviceMemcpy()`
540: E*/
541: typedef enum {
542: PETSC_DEVICE_COPY_HTOH,
543: PETSC_DEVICE_COPY_DTOH,
544: PETSC_DEVICE_COPY_HTOD,
545: PETSC_DEVICE_COPY_DTOD,
546: PETSC_DEVICE_COPY_AUTO,
547: } PetscDeviceCopyMode;
548: PETSC_EXTERN const char *const PetscDeviceCopyModes[];
550: PETSC_NODISCARD static inline PetscDeviceCopyMode PetscOffloadMaskToDeviceCopyMode(PetscOffloadMask dest, PetscOffloadMask src)
551: {
552: PetscDeviceCopyMode mode;
554: PetscFunctionBegin;
555: PetscAssertAbort(dest != PETSC_OFFLOAD_UNALLOCATED, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot copy to unallocated");
556: PetscAssertAbort(src != PETSC_OFFLOAD_UNALLOCATED, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot copy from unallocated");
558: if (PetscOffloadDevice(dest)) {
559: mode = PetscOffloadHost(src) ? PETSC_DEVICE_COPY_HTOD : PETSC_DEVICE_COPY_DTOD;
560: } else {
561: mode = PetscOffloadHost(src) ? PETSC_DEVICE_COPY_HTOH : PETSC_DEVICE_COPY_DTOH;
562: }
563: PetscFunctionReturn(mode);
564: }
566: PETSC_NODISCARD static inline PETSC_CONSTEXPR_14 PetscDeviceCopyMode PetscMemTypeToDeviceCopyMode(PetscMemType dest, PetscMemType src)
567: {
568: if (PetscMemTypeHost(dest)) {
569: return PetscMemTypeHost(src) ? PETSC_DEVICE_COPY_HTOH : PETSC_DEVICE_COPY_DTOH;
570: } else {
571: return PetscMemTypeDevice(src) ? PETSC_DEVICE_COPY_DTOD : PETSC_DEVICE_COPY_HTOD;
572: }
573: }
575: /*E
576: PetscMemoryAccessMode - Describes the intended usage of a memory region
578: Values:
579: + `PETSC_MEMORY_ACCESS_READ` - Read only
580: . `PETSC_MEMORY_ACCESS_WRITE` - Write only
581: - `PETSC_MEMORY_ACCESS_READ_WRITE` - Read and write
583: Level: beginner
585: Notes:
586: This `enum` is a bitmask with the following encoding (assuming 2 bit)\:
588: .vb
589: PETSC_MEMORY_ACCESS_READ = 0b01
590: PETSC_MEMORY_ACCESS_WRITE = 0b10
591: PETSC_MEMORY_ACCESS_READ_WRITE = 0b11
593: // consequently
594: PETSC_MEMORY_ACCESS_READ | PETSC_MEMORY_ACCESS_WRITE = PETSC_MEMORY_ACCESS_READ_WRITE
595: .ve
597: The following convenience macros are also provided\:
599: + `PetscMemoryAccessRead(mode)` - `true` if `mode` is any kind of read, `false` otherwise
600: - `PetscMemoryAccessWrite(mode)` - `true` if `mode` is any kind of write, `false` otherwise
602: Developer Note:
603: This enum uses a function (`PetscMemoryAccessModeToString()`) to convert values to string
604: representation, so cannot be used in `PetscOptionsEnum()`.
606: .seealso: `PetscMemoryAccessModeToString()`, `PetscDevice`, `PetscDeviceContext`
607: E*/
608: typedef enum {
609: PETSC_MEMORY_ACCESS_READ = 1, /* 01 */
610: PETSC_MEMORY_ACCESS_WRITE = 2, /* 10 */
611: PETSC_MEMORY_ACCESS_READ_WRITE = 3 /* 11 */
612: } PetscMemoryAccessMode;
614: /*MC
615: PetscMemoryAccessRead - Returns `PETSC_TRUE` if a given `PetscMemoryAccessMode` includes read access
617: Synopsis:
618: #include <petscdevicetypes.h>
619: PetscBool PetscMemoryAccessRead(PetscMemoryAccessMode m)
621: Not Collective; No Fortran Support
623: Input Parameter:
624: . m - the `PetscMemoryAccessMode` value
626: Level: developer
628: .seealso: `PetscMemoryAccessMode`, `PetscMemoryAccessWrite()`
629: M*/
630: #define PetscMemoryAccessRead(m) (((m) & PETSC_MEMORY_ACCESS_READ) == PETSC_MEMORY_ACCESS_READ)
632: /*MC
633: PetscMemoryAccessWrite - Returns `PETSC_TRUE` if a given `PetscMemoryAccessMode` includes write access
635: Synopsis:
636: #include <petscdevicetypes.h>
637: PetscBool PetscMemoryAccessWrite(PetscMemoryAccessMode m)
639: Not Collective; No Fortran Support
641: Input Parameter:
642: . m - the `PetscMemoryAccessMode` value
644: Level: developer
646: .seealso: `PetscMemoryAccessMode`, `PetscMemoryAccessRead()`
647: M*/
648: #define PetscMemoryAccessWrite(m) (((m) & PETSC_MEMORY_ACCESS_WRITE) == PETSC_MEMORY_ACCESS_WRITE)
650: #if defined(__cplusplus)
651: #if PETSC_SHOULD_SILENCE_GCC_TAUTOLOGICAL_COMPARE_WARNING
652: #pragma GCC diagnostic push
653: #pragma GCC diagnostic ignored "-Wtautological-compare"
654: #endif
655: static_assert(PetscMemoryAccessRead(PETSC_MEMORY_ACCESS_READ), "");
656: static_assert(PetscMemoryAccessRead(PETSC_MEMORY_ACCESS_READ_WRITE), "");
657: static_assert(!PetscMemoryAccessRead(PETSC_MEMORY_ACCESS_WRITE), "");
658: static_assert(PetscMemoryAccessWrite(PETSC_MEMORY_ACCESS_WRITE), "");
659: static_assert(PetscMemoryAccessWrite(PETSC_MEMORY_ACCESS_READ_WRITE), "");
660: static_assert(!PetscMemoryAccessWrite(PETSC_MEMORY_ACCESS_READ), "");
661: static_assert((PETSC_MEMORY_ACCESS_READ | PETSC_MEMORY_ACCESS_WRITE) == PETSC_MEMORY_ACCESS_READ_WRITE, "");
662: #if PETSC_SHOULD_SILENCE_GCC_TAUTOLOGICAL_COMPARE_WARNING
663: #pragma GCC diagnostic pop
664: #endif
665: #endif
667: PETSC_NODISCARD static inline PETSC_CONSTEXPR_14 const char *PetscMemoryAccessModeToString(PetscMemoryAccessMode mode)
668: {
669: #define PETSC_CASE_RETURN(v) \
670: case v: \
671: return PetscStringize(v)
673: switch (mode) {
674: PETSC_CASE_RETURN(PETSC_MEMORY_ACCESS_READ);
675: PETSC_CASE_RETURN(PETSC_MEMORY_ACCESS_WRITE);
676: PETSC_CASE_RETURN(PETSC_MEMORY_ACCESS_READ_WRITE);
677: }
678: PetscUnreachable();
679: return "invalid";
680: #undef PETSC_CASE_RETURN
681: }
683: #undef PETSC_SHOULD_SILENCE_GCC_TAUTOLOGICAL_COMPARE_WARNING