Actual source code: petscdevicetypes.h

  1: #pragma once

  3: #include <petscsys.h>

  5: // Some overzealous older gcc versions warn that the comparisons below are always true. Neat
  6: // that it can detect this, but the tautology *is* the point of the static_assert()!
  7: #if defined(__GNUC__) && __GNUC__ >= 6 && !PetscDefined(HAVE_WINDOWS_COMPILERS)
  8:   #define PETSC_SHOULD_SILENCE_GCC_TAUTOLOGICAL_COMPARE_WARNING 1
  9: #else
 10:   #define PETSC_SHOULD_SILENCE_GCC_TAUTOLOGICAL_COMPARE_WARNING 0
 11: #endif

 13: /* SUBMANSEC = Sys */

 15: /*E
 16:   PetscMemType - Memory type of a pointer

 18:   Level: intermediate

 20:   Note:
 21:   `PETSC_MEMTYPE_KOKKOS` depends on the Kokkos backend configuration

 23:   Developer Notes:
 24:   This enum uses a function (`PetscMemTypeToString()`) to convert to string representation so
 25:   cannot be used in `PetscOptionsEnum()`.

 27:   Encoding of the bitmask in binary\: xxxxyyyz
 28: .vb
 29:  z = 0                - Host memory
 30:  z = 1                - Device memory
 31:  yyy = 000            - CUDA-related memory
 32:  yyy = 001            - HIP-related memory
 33:  yyy = 010            - SYCL-related memory
 34:  xxxxyyy1 = 0000,0001 - CUDA memory
 35:  xxxxyyy1 = 0001,0001 - CUDA NVSHMEM memory
 36:  xxxxyyy1 = 0000,0011 - HIP memory
 37:  xxxxyyy1 = 0000,0101 - SYCL memory
 38: .ve

 40:   Other types of memory, e.g., CUDA managed memory, can be added when needed.

 42: .seealso: `PetscMemTypeToString()`, `VecGetArrayAndMemType()`,
 43: `PetscSFBcastWithMemTypeBegin()`, `PetscSFReduceWithMemTypeBegin()`
 44: E*/
 45: typedef enum {
 46:   PETSC_MEMTYPE_HOST    = 0,
 47:   PETSC_MEMTYPE_DEVICE  = 1,  /* 0x01 */
 48:   PETSC_MEMTYPE_CUDA    = 1,  /* 0x01 */
 49:   PETSC_MEMTYPE_NVSHMEM = 17, /* 0x11 */
 50:   PETSC_MEMTYPE_HIP     = 3,  /* 0x03 */
 51:   PETSC_MEMTYPE_SYCL    = 5   /* 0x05 */
 52: } PetscMemType;
 53: #if PetscDefined(HAVE_CUDA)
 54:   #define PETSC_MEMTYPE_KOKKOS PETSC_MEMTYPE_CUDA
 55: #elif PetscDefined(HAVE_HIP)
 56:   #define PETSC_MEMTYPE_KOKKOS PETSC_MEMTYPE_HIP
 57: #elif PetscDefined(HAVE_SYCL)
 58:   #define PETSC_MEMTYPE_KOKKOS PETSC_MEMTYPE_SYCL
 59: #else
 60:   #define PETSC_MEMTYPE_KOKKOS PETSC_MEMTYPE_HOST
 61: #endif

 63: /*MC
 64:   PetscMemTypeHost - Returns `PETSC_TRUE` if a given `PetscMemType` refers to host (CPU) memory

 66:   Synopsis:
 67: #include <petscdevicetypes.h>
 68:   PetscBool PetscMemTypeHost(PetscMemType m)

 70:   Not Collective; No Fortran Support

 72:   Input Parameter:
 73: . m - the `PetscMemType` value

 75:   Level: intermediate

 77: .seealso: `PetscMemType`, `PetscMemTypeDevice()`, `PetscMemTypeCUDA()`, `PetscMemTypeHIP()`, `PetscMemTypeSYCL()`, `PetscMemTypeNVSHMEM()`
 78: M*/
 79: #define PetscMemTypeHost(m) ((((m) & 0x1) == PETSC_MEMTYPE_HOST) ? PETSC_TRUE : PETSC_FALSE)

 81: /*MC
 82:   PetscMemTypeDevice - Returns `PETSC_TRUE` if a given `PetscMemType` refers to any kind of device (GPU) memory

 84:   Synopsis:
 85: #include <petscdevicetypes.h>
 86:   PetscBool PetscMemTypeDevice(PetscMemType m)

 88:   Not Collective; No Fortran Support

 90:   Input Parameter:
 91: . m - the `PetscMemType` value

 93:   Level: intermediate

 95: .seealso: `PetscMemType`, `PetscMemTypeHost()`, `PetscMemTypeCUDA()`, `PetscMemTypeHIP()`, `PetscMemTypeSYCL()`, `PetscMemTypeNVSHMEM()`
 96: M*/
 97: #define PetscMemTypeDevice(m) ((((m) & 0x1) == PETSC_MEMTYPE_DEVICE) ? PETSC_TRUE : PETSC_FALSE)

 99: /*MC
100:   PetscMemTypeCUDA - Returns `PETSC_TRUE` if a given `PetscMemType` refers to CUDA device memory (including CUDA NVSHMEM memory)

102:   Synopsis:
103: #include <petscdevicetypes.h>
104:   PetscBool PetscMemTypeCUDA(PetscMemType m)

106:   Not Collective; No Fortran Support

108:   Input Parameter:
109: . m - the `PetscMemType` value

111:   Level: intermediate

113: .seealso: `PetscMemType`, `PetscMemTypeDevice()`, `PetscMemTypeHIP()`, `PetscMemTypeSYCL()`, `PetscMemTypeNVSHMEM()`
114: M*/
115: #define PetscMemTypeCUDA(m) ((((m) & 0xF) == PETSC_MEMTYPE_CUDA) ? PETSC_TRUE : PETSC_FALSE)

117: /*MC
118:   PetscMemTypeHIP - Returns `PETSC_TRUE` if a given `PetscMemType` refers to HIP device memory

120:   Synopsis:
121: #include <petscdevicetypes.h>
122:   PetscBool PetscMemTypeHIP(PetscMemType m)

124:   Not Collective; No Fortran Support

126:   Input Parameter:
127: . m - the `PetscMemType` value

129:   Level: intermediate

131: .seealso: `PetscMemType`, `PetscMemTypeDevice()`, `PetscMemTypeCUDA()`, `PetscMemTypeSYCL()`, `PetscMemTypeNVSHMEM()`
132: M*/
133: #define PetscMemTypeHIP(m) ((((m) & 0xF) == PETSC_MEMTYPE_HIP) ? PETSC_TRUE : PETSC_FALSE)

135: /*MC
136:   PetscMemTypeSYCL - Returns `PETSC_TRUE` if a given `PetscMemType` refers to SYCL device memory

138:   Synopsis:
139: #include <petscdevicetypes.h>
140:   PetscBool PetscMemTypeSYCL(PetscMemType m)

142:   Not Collective; No Fortran Support

144:   Input Parameter:
145: . m - the `PetscMemType` value

147:   Level: intermediate

149: .seealso: `PetscMemType`, `PetscMemTypeDevice()`, `PetscMemTypeCUDA()`, `PetscMemTypeHIP()`, `PetscMemTypeNVSHMEM()`
150: M*/
151: #define PetscMemTypeSYCL(m) ((((m) & 0xF) == PETSC_MEMTYPE_SYCL) ? PETSC_TRUE : PETSC_FALSE)

153: /*MC
154:   PetscMemTypeNVSHMEM - Returns `PETSC_TRUE` if a given `PetscMemType` refers to NVSHMEM memory

156:   Synopsis:
157: #include <petscdevicetypes.h>
158:   PetscBool PetscMemTypeNVSHMEM(PetscMemType m)

160:   Not Collective; No Fortran Support

162:   Input Parameter:
163: . m - the `PetscMemType` value

165:   Level: intermediate

167: .seealso: `PetscMemType`, `PetscMemTypeDevice()`, `PetscMemTypeCUDA()`, `PetscMemTypeHIP()`, `PetscMemTypeSYCL()`
168: M*/
169: #define PetscMemTypeNVSHMEM(m) (((m) == PETSC_MEMTYPE_NVSHMEM) ? PETSC_TRUE : PETSC_FALSE)

171: #if defined(__cplusplus)
172:   #if PETSC_SHOULD_SILENCE_GCC_TAUTOLOGICAL_COMPARE_WARNING
173:     #pragma GCC diagnostic push
174:     #pragma GCC diagnostic ignored "-Wtautological-compare"
175:   #endif
176: static_assert(PetscMemTypeHost(PETSC_MEMTYPE_HOST), "");
177: static_assert(!PetscMemTypeHost(PETSC_MEMTYPE_DEVICE), "");
178: static_assert(!PetscMemTypeHost(PETSC_MEMTYPE_CUDA), "");
179: static_assert(!PetscMemTypeHost(PETSC_MEMTYPE_HIP), "");
180: static_assert(!PetscMemTypeHost(PETSC_MEMTYPE_SYCL), "");
181: static_assert(!PetscMemTypeHost(PETSC_MEMTYPE_NVSHMEM), "");

183: static_assert(!PetscMemTypeDevice(PETSC_MEMTYPE_HOST), "");
184: static_assert(PetscMemTypeDevice(PETSC_MEMTYPE_DEVICE), "");
185: static_assert(PetscMemTypeDevice(PETSC_MEMTYPE_CUDA), "");
186: static_assert(PetscMemTypeDevice(PETSC_MEMTYPE_HIP), "");
187: static_assert(PetscMemTypeDevice(PETSC_MEMTYPE_SYCL), "");
188: static_assert(PetscMemTypeDevice(PETSC_MEMTYPE_NVSHMEM), "");

190: static_assert(PetscMemTypeCUDA(PETSC_MEMTYPE_CUDA), "");
191: static_assert(PetscMemTypeCUDA(PETSC_MEMTYPE_NVSHMEM), "");
192:   #if PETSC_SHOULD_SILENCE_GCC_TAUTOLOGICAL_COMPARE_WARNING
193:     #pragma GCC diagnostic pop
194:   #endif
195: #endif // __cplusplus

197: PETSC_NODISCARD static inline PETSC_CONSTEXPR_14 const char *PetscMemTypeToString(PetscMemType mtype)
198: {
199: #ifdef __cplusplus
200:   static_assert(PETSC_MEMTYPE_CUDA == PETSC_MEMTYPE_DEVICE, "");
201: #endif
202: #define PETSC_CASE_NAME(v) \
203:   case v: \
204:     return PetscStringize(v)

206:   switch (mtype) {
207:     PETSC_CASE_NAME(PETSC_MEMTYPE_HOST);
208:     /* PETSC_CASE_NAME(PETSC_MEMTYPE_DEVICE); same as PETSC_MEMTYPE_CUDA */
209:     PETSC_CASE_NAME(PETSC_MEMTYPE_CUDA);
210:     PETSC_CASE_NAME(PETSC_MEMTYPE_NVSHMEM);
211:     PETSC_CASE_NAME(PETSC_MEMTYPE_HIP);
212:     PETSC_CASE_NAME(PETSC_MEMTYPE_SYCL);
213:   }
214:   PetscUnreachable();
215:   return "invalid";
216: #undef PETSC_CASE_NAME
217: }

219: #define PETSC_OFFLOAD_VECKOKKOS_DEPRECATED PETSC_OFFLOAD_VECKOKKOS PETSC_DEPRECATED_ENUM(3, 17, 0, "PETSC_OFFLOAD_KOKKOS", )

221: /*E
222:   PetscOffloadMask - indicates which memory (CPU, GPU, or none) contains valid data

224:   Values:
225: + `PETSC_OFFLOAD_UNALLOCATED` - no memory contains valid matrix entries; NEVER used for vectors
226: . `PETSC_OFFLOAD_GPU`         - GPU has valid vector/matrix entries
227: . `PETSC_OFFLOAD_CPU`         - CPU has valid vector/matrix entries
228: . `PETSC_OFFLOAD_BOTH`        - Both GPU and CPU have valid vector/matrix entries and they match
229: - `PETSC_OFFLOAD_KOKKOS`      - Reserved for Kokkos matrix and vector. It means the offload is managed by Kokkos, thus this flag itself cannot tell you where the valid data is.

231:   Level: developer

233:   Developer Note:
234:   This enum uses a function (`PetscOffloadMaskToString()`) to convert to string representation so
235:   cannot be used in `PetscOptionsEnum()`.

237: .seealso: `PetscOffloadMaskToString()`, `PetscOffloadMaskToMemType()`, `PetscOffloadMaskToDeviceCopyMode()`
238: E*/
239: typedef enum {
240:   PETSC_OFFLOAD_UNALLOCATED          = 0,   /* 0x0 */
241:   PETSC_OFFLOAD_CPU                  = 1,   /* 0x1 */
242:   PETSC_OFFLOAD_GPU                  = 2,   /* 0x2 */
243:   PETSC_OFFLOAD_BOTH                 = 3,   /* 0x3 */
244:   PETSC_OFFLOAD_VECKOKKOS_DEPRECATED = 256, /* 0x100 */
245:   PETSC_OFFLOAD_KOKKOS               = 256  /* 0x100 */
246: } PetscOffloadMask;

248: /*MC
249:   PetscOffloadUnallocated - Returns `PETSC_TRUE` if a given `PetscOffloadMask` indicates that no memory has been allocated yet

251:   Synopsis:
252: #include <petscdevicetypes.h>
253:   PetscBool PetscOffloadUnallocated(PetscOffloadMask m)

255:   Not Collective; No Fortran Support

257:   Input Parameter:
258: . m - the `PetscOffloadMask` value

260:   Level: developer

262: .seealso: `PetscOffloadMask`, `PetscOffloadHost()`, `PetscOffloadDevice()`, `PetscOffloadBoth()`
263: M*/
264: #define PetscOffloadUnallocated(m) ((m) == PETSC_OFFLOAD_UNALLOCATED)

266: /*MC
267:   PetscOffloadHost - Returns `PETSC_TRUE` if a given `PetscOffloadMask` indicates that the host (CPU) memory holds valid data

269:   Synopsis:
270: #include <petscdevicetypes.h>
271:   PetscBool PetscOffloadHost(PetscOffloadMask m)

273:   Not Collective; No Fortran Support

275:   Input Parameter:
276: . m - the `PetscOffloadMask` value

278:   Level: developer

280: .seealso: `PetscOffloadMask`, `PetscOffloadUnallocated()`, `PetscOffloadDevice()`, `PetscOffloadBoth()`
281: M*/
282: #define PetscOffloadHost(m) (((m) & PETSC_OFFLOAD_CPU) == PETSC_OFFLOAD_CPU)

284: /*MC
285:   PetscOffloadDevice - Returns `PETSC_TRUE` if a given `PetscOffloadMask` indicates that device (GPU) memory holds valid data

287:   Synopsis:
288: #include <petscdevicetypes.h>
289:   PetscBool PetscOffloadDevice(PetscOffloadMask m)

291:   Not Collective; No Fortran Support

293:   Input Parameter:
294: . m - the `PetscOffloadMask` value

296:   Level: developer

298: .seealso: `PetscOffloadMask`, `PetscOffloadUnallocated()`, `PetscOffloadHost()`, `PetscOffloadBoth()`
299: M*/
300: #define PetscOffloadDevice(m) (((m) & PETSC_OFFLOAD_GPU) == PETSC_OFFLOAD_GPU)

302: /*MC
303:   PetscOffloadBoth - Returns `PETSC_TRUE` if a given `PetscOffloadMask` indicates that both host and device memory hold matching valid data

305:   Synopsis:
306: #include <petscdevicetypes.h>
307:   PetscBool PetscOffloadBoth(PetscOffloadMask m)

309:   Not Collective; No Fortran Support

311:   Input Parameter:
312: . m - the `PetscOffloadMask` value

314:   Level: developer

316: .seealso: `PetscOffloadMask`, `PetscOffloadUnallocated()`, `PetscOffloadHost()`, `PetscOffloadDevice()`
317: M*/
318: #define PetscOffloadBoth(m) ((m) == PETSC_OFFLOAD_BOTH)

320: #if defined(__cplusplus)
321:   #if PETSC_SHOULD_SILENCE_GCC_TAUTOLOGICAL_COMPARE_WARNING
322:     #pragma GCC diagnostic push
323:     #pragma GCC diagnostic ignored "-Wtautological-compare"
324:   #endif
325: static_assert(!PetscOffloadHost(PETSC_OFFLOAD_UNALLOCATED), "");
326: static_assert(PetscOffloadHost(PETSC_OFFLOAD_BOTH), "");
327: static_assert(!PetscOffloadHost(PETSC_OFFLOAD_GPU), "");
328: static_assert(PetscOffloadHost(PETSC_OFFLOAD_BOTH), "");
329: static_assert(!PetscOffloadHost(PETSC_OFFLOAD_KOKKOS), "");

331: static_assert(!PetscOffloadDevice(PETSC_OFFLOAD_UNALLOCATED), "");
332: static_assert(!PetscOffloadDevice(PETSC_OFFLOAD_CPU), "");
333: static_assert(PetscOffloadDevice(PETSC_OFFLOAD_GPU), "");
334: static_assert(PetscOffloadDevice(PETSC_OFFLOAD_BOTH), "");
335: static_assert(!PetscOffloadDevice(PETSC_OFFLOAD_KOKKOS), "");

337: static_assert(PetscOffloadBoth(PETSC_OFFLOAD_BOTH), "");
338: static_assert(!PetscOffloadBoth(PETSC_OFFLOAD_CPU), "");
339: static_assert(!PetscOffloadBoth(PETSC_OFFLOAD_GPU), "");
340: static_assert(!PetscOffloadBoth(PETSC_OFFLOAD_GPU), "");
341: static_assert(!PetscOffloadBoth(PETSC_OFFLOAD_KOKKOS), "");
342:   #if PETSC_SHOULD_SILENCE_GCC_TAUTOLOGICAL_COMPARE_WARNING
343:     #pragma GCC diagnostic pop
344:   #endif
345: #endif // __cplusplus

347: PETSC_NODISCARD static inline PETSC_CONSTEXPR_14 const char *PetscOffloadMaskToString(PetscOffloadMask mask)
348: {
349: #define PETSC_CASE_RETURN(v) \
350:   case v: \
351:     return PetscStringize(v)

353:   switch (mask) {
354:     PETSC_CASE_RETURN(PETSC_OFFLOAD_UNALLOCATED);
355:     PETSC_CASE_RETURN(PETSC_OFFLOAD_CPU);
356:     PETSC_CASE_RETURN(PETSC_OFFLOAD_GPU);
357:     PETSC_CASE_RETURN(PETSC_OFFLOAD_BOTH);
358:     PETSC_CASE_RETURN(PETSC_OFFLOAD_KOKKOS);
359:   }
360:   PetscUnreachable();
361:   return "invalid";
362: #undef PETSC_CASE_RETURN
363: }

365: PETSC_NODISCARD static inline PETSC_CONSTEXPR_14 PetscMemType PetscOffloadMaskToMemType(PetscOffloadMask mask)
366: {
367:   switch (mask) {
368:   case PETSC_OFFLOAD_UNALLOCATED:
369:   case PETSC_OFFLOAD_CPU:
370:     return PETSC_MEMTYPE_HOST;
371:   case PETSC_OFFLOAD_GPU:
372:   case PETSC_OFFLOAD_BOTH:
373:     return PETSC_MEMTYPE_DEVICE;
374:   case PETSC_OFFLOAD_KOKKOS:
375:     return PETSC_MEMTYPE_KOKKOS;
376:   }
377:   PetscUnreachable();
378:   return PETSC_MEMTYPE_HOST;
379: }

381: /*E
382:   PetscDeviceInitType - Initialization strategy for `PetscDevice`

384:   Values:
385: + `PETSC_DEVICE_INIT_NONE`  - PetscDevice is never initialized
386: . `PETSC_DEVICE_INIT_LAZY`  - PetscDevice is initialized on demand
387: - `PETSC_DEVICE_INIT_EAGER` - PetscDevice is initialized as soon as possible

389:   Level: beginner

391:   Note:
392:   `PETSC_DEVICE_INIT_NONE` implies that any initialization of `PetscDevice` is disallowed and
393:   doing so results in an error. Useful to ensure that no accelerator is used in a program.

395: .seealso: `PetscDevice`, `PetscDeviceType`, `PetscDeviceInitialize()`,
396: `PetscDeviceInitialized()`, `PetscDeviceCreate()`
397: E*/
398: typedef enum {
399:   PETSC_DEVICE_INIT_NONE,
400:   PETSC_DEVICE_INIT_LAZY,
401:   PETSC_DEVICE_INIT_EAGER
402: } PetscDeviceInitType;
403: PETSC_EXTERN const char *const PetscDeviceInitTypes[];

405: /*E
406:   PetscDeviceType - Kind of accelerator device backend

408:   Values:
409: + `PETSC_DEVICE_HOST` - Host, no accelerator backend found
410: . `PETSC_DEVICE_CUDA` - CUDA enabled GPU
411: . `PETSC_DEVICE_HIP`  - ROCM/HIP enabled GPU
412: . `PETSC_DEVICE_SYCL` - SYCL enabled device
413: - `PETSC_DEVICE_MAX`  - Always 1 greater than the largest valid `PetscDeviceType`, invalid type, do not use

415:   Level: beginner

417:   Note:
418:   One can also use the `PETSC_DEVICE_DEFAULT()` routine to get the current default `PetscDeviceType`.

420: .seealso: `PetscDevice`, `PetscDeviceInitType`, `PetscDeviceCreate()`, `PETSC_DEVICE_DEFAULT()`
421: E*/
422: typedef enum {
423:   PETSC_DEVICE_HOST,
424:   PETSC_DEVICE_CUDA,
425:   PETSC_DEVICE_HIP,
426:   PETSC_DEVICE_SYCL,
427:   PETSC_DEVICE_MAX
428: } PetscDeviceType;
429: PETSC_EXTERN const char *const PetscDeviceTypes[];

431: /*E
432:   PetscDeviceAttribute - Attribute detailing a property or feature of a `PetscDevice`

434:   Values:
435: + `PETSC_DEVICE_ATTR_SIZE_T_SHARED_MEM_PER_BLOCK` - The maximum amount of shared memory per block in a device kernel
436: - `PETSC_DEVICE_ATTR_MAX`                         - Invalid attribute, do not use

438:   Level: beginner

440: .seealso: `PetscDevice`, `PetscDeviceGetAttribute()`
441: E*/
442: typedef enum {
443:   PETSC_DEVICE_ATTR_SIZE_T_SHARED_MEM_PER_BLOCK,
444:   PETSC_DEVICE_ATTR_MAX
445: } PetscDeviceAttribute;
446: PETSC_EXTERN const char *const PetscDeviceAttributes[];

448: /*S
449:   PetscDevice - Object to manage an accelerator "device" (usually a GPU)

451:   Level: beginner

453:   Note:
454:   This object is used to house configuration and state of a device, but does not offer any
455:   ability to interact with or drive device computation. This functionality is facilitated
456:   instead by the `PetscDeviceContext` object.

458: .seealso: `PetscDeviceType`, `PetscDeviceInitType`, `PetscDeviceCreate()`,
459: `PetscDeviceConfigure()`, `PetscDeviceDestroy()`, `PetscDeviceContext`,
460: `PetscDeviceContextSetDevice()`, `PetscDeviceContextGetDevice()`, `PetscDeviceGetAttribute()`
461: S*/
462: typedef struct _n_PetscDevice *PetscDevice;

464: /*E
465:   PetscStreamType - indicates how a stream implementation will interact
466:   with other streams and if it blocks the host.

468:   Values:
469: + `PETSC_STREAM_DEFAULT`                  - Same as the default stream in CUDA or HIP. Streams of this type may or may not synchronize implicitly with other streams. It does not block the host.
470: . `PETSC_STREAM_NONBLOCKING`              - Same as the nonblocking stream in CUDA or HIP. Streams of this type is truly asynchronous, and is blocked by nothing. It does not block the host. In CUDA, it is created with cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking).
471: . `PETSC_STREAM_DEFAULT_WITH_BARRIER`     - Same as the default stream in CUDA or HIP. PETSc async functions using this kind of stream will end with a stream synchronization. Stream of this type may or may not synchronize implicitly with other streams.
472: . `PETSC_STREAM_NONBLOCKING_WITH_BARRIER` - Same as the nonblocking stream in CUDA or HIP. PETSc async functions using this kind of stream will end with a stream synchronization. Streams of this type are truly asynchronous and are blocked by nothing.
473: - `PETSC_STREAM_MAX`                - Always 1 greater than the largest `PetscStreamType`, do not use

475:   Level: intermediate

477:   Note:
478:   The default stream, also known as the NULL stream or stream 0, can have two different behaviors: legacy behavior and per-thread behavior.
479:   The behavior is determined at compile time. By default, the legacy default stream is used.
480:   The legacy default stream implicitly synchronizes with per-thread default streams.
481:   The per-thread default stream, like nonblocking streams, does not synchronizes with other per-thread streams, but synchronize with the default stream.
482:   The per-thread default stream may be useful for running kernels launched from different threads concurrently on the same GPU when the Multi-Process Service is not available.
483:   To use the per-thread default stream, one can enable it by using the nvcc option "--default-stream per-thread" or the hipcc option "-fgpu-default-stream=per-thread", depending on the backend used.

485: .seealso: `PetscDeviceContextSetStreamType()`, `PetscDeviceContextGetStreamType()`
486: E*/
487: typedef enum {
488:   PETSC_STREAM_DEFAULT,
489:   PETSC_STREAM_NONBLOCKING,
490:   PETSC_STREAM_DEFAULT_WITH_BARRIER,
491:   PETSC_STREAM_NONBLOCKING_WITH_BARRIER,
492:   PETSC_STREAM_MAX
493: } PetscStreamType;
494: PETSC_EXTERN const char *const PetscStreamTypes[];

496: /*E
497:   PetscDeviceContextJoinMode - Describes the type of join operation to perform in
498:   `PetscDeviceContextJoin()`

500:   Values:
501: + `PETSC_DEVICE_CONTEXT_JOIN_DESTROY` - Destroy all incoming sub-contexts after join.
502: . `PETSC_DEVICE_CONTEXT_JOIN_SYNC`    - Synchronize incoming sub-contexts after join.
503: - `PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC` - Do not synchronize incoming sub-contexts after join.

505:   Level: beginner

507: .seealso: `PetscDeviceContext`, `PetscDeviceContextFork()`, `PetscDeviceContextJoin()`
508: E*/
509: typedef enum {
510:   PETSC_DEVICE_CONTEXT_JOIN_DESTROY,
511:   PETSC_DEVICE_CONTEXT_JOIN_SYNC,
512:   PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC
513: } PetscDeviceContextJoinMode;
514: PETSC_EXTERN const char *const PetscDeviceContextJoinModes[];

516: /*S
517:   PetscDeviceContext - Container to manage stream dependencies and the various solver handles
518:   for asynchronous device compute.

520:   Level: beginner

522: .seealso: `PetscDevice`, `PetscDeviceContextCreate()`, `PetscDeviceContextSetDevice()`,
523: `PetscDeviceContextDestroy()`, `PetscDeviceContextFork()`, `PetscDeviceContextJoin()`
524: S*/
525: typedef struct _p_PetscDeviceContext *PetscDeviceContext;

527: /*E
528:   PetscDeviceCopyMode - Describes the copy direction of a device-aware `memcpy`

530:   Values:
531: + `PETSC_DEVICE_COPY_HTOH` - Copy from host memory to host memory
532: . `PETSC_DEVICE_COPY_DTOH` - Copy from device memory to host memory
533: . `PETSC_DEVICE_COPY_HTOD` - Copy from host memory to device memory
534: . `PETSC_DEVICE_COPY_DTOD` - Copy from device memory to device memory
535: - `PETSC_DEVICE_COPY_AUTO` - Infer the copy direction from the pointers

537:   Level: beginner

539: .seealso: `PetscDeviceArrayCopy()`, `PetscDeviceMemcpy()`
540: E*/
541: typedef enum {
542:   PETSC_DEVICE_COPY_HTOH,
543:   PETSC_DEVICE_COPY_DTOH,
544:   PETSC_DEVICE_COPY_HTOD,
545:   PETSC_DEVICE_COPY_DTOD,
546:   PETSC_DEVICE_COPY_AUTO,
547: } PetscDeviceCopyMode;
548: PETSC_EXTERN const char *const PetscDeviceCopyModes[];

550: PETSC_NODISCARD static inline PetscDeviceCopyMode PetscOffloadMaskToDeviceCopyMode(PetscOffloadMask dest, PetscOffloadMask src)
551: {
552:   PetscDeviceCopyMode mode;

554:   PetscFunctionBegin;
555:   PetscAssertAbort(dest != PETSC_OFFLOAD_UNALLOCATED, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot copy to unallocated");
556:   PetscAssertAbort(src != PETSC_OFFLOAD_UNALLOCATED, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot copy from unallocated");

558:   if (PetscOffloadDevice(dest)) {
559:     mode = PetscOffloadHost(src) ? PETSC_DEVICE_COPY_HTOD : PETSC_DEVICE_COPY_DTOD;
560:   } else {
561:     mode = PetscOffloadHost(src) ? PETSC_DEVICE_COPY_HTOH : PETSC_DEVICE_COPY_DTOH;
562:   }
563:   PetscFunctionReturn(mode);
564: }

566: PETSC_NODISCARD static inline PETSC_CONSTEXPR_14 PetscDeviceCopyMode PetscMemTypeToDeviceCopyMode(PetscMemType dest, PetscMemType src)
567: {
568:   if (PetscMemTypeHost(dest)) {
569:     return PetscMemTypeHost(src) ? PETSC_DEVICE_COPY_HTOH : PETSC_DEVICE_COPY_DTOH;
570:   } else {
571:     return PetscMemTypeDevice(src) ? PETSC_DEVICE_COPY_DTOD : PETSC_DEVICE_COPY_HTOD;
572:   }
573: }

575: /*E
576:   PetscMemoryAccessMode - Describes the intended usage of a memory region

578:   Values:
579: + `PETSC_MEMORY_ACCESS_READ`       - Read only
580: . `PETSC_MEMORY_ACCESS_WRITE`      - Write only
581: - `PETSC_MEMORY_ACCESS_READ_WRITE` - Read and write

583:   Level: beginner

585:   Notes:
586:   This `enum` is a bitmask with the following encoding (assuming 2 bit)\:

588: .vb
589:   PETSC_MEMORY_ACCESS_READ       = 0b01
590:   PETSC_MEMORY_ACCESS_WRITE      = 0b10
591:   PETSC_MEMORY_ACCESS_READ_WRITE = 0b11

593:   // consequently
594:   PETSC_MEMORY_ACCESS_READ | PETSC_MEMORY_ACCESS_WRITE = PETSC_MEMORY_ACCESS_READ_WRITE
595: .ve

597:   The following convenience macros are also provided\:

599: + `PetscMemoryAccessRead(mode)` - `true` if `mode` is any kind of read, `false` otherwise
600: - `PetscMemoryAccessWrite(mode)` - `true` if `mode` is any kind of write, `false` otherwise

602:   Developer Note:
603:   This enum uses a function (`PetscMemoryAccessModeToString()`) to convert values to string
604:   representation, so cannot be used in `PetscOptionsEnum()`.

606: .seealso: `PetscMemoryAccessModeToString()`, `PetscDevice`, `PetscDeviceContext`
607: E*/
608: typedef enum {
609:   PETSC_MEMORY_ACCESS_READ       = 1, /* 01 */
610:   PETSC_MEMORY_ACCESS_WRITE      = 2, /* 10 */
611:   PETSC_MEMORY_ACCESS_READ_WRITE = 3  /* 11 */
612: } PetscMemoryAccessMode;

614: /*MC
615:   PetscMemoryAccessRead - Returns `PETSC_TRUE` if a given `PetscMemoryAccessMode` includes read access

617:   Synopsis:
618: #include <petscdevicetypes.h>
619:   PetscBool PetscMemoryAccessRead(PetscMemoryAccessMode m)

621:   Not Collective; No Fortran Support

623:   Input Parameter:
624: . m - the `PetscMemoryAccessMode` value

626:   Level: developer

628: .seealso: `PetscMemoryAccessMode`, `PetscMemoryAccessWrite()`
629: M*/
630: #define PetscMemoryAccessRead(m) (((m) & PETSC_MEMORY_ACCESS_READ) == PETSC_MEMORY_ACCESS_READ)

632: /*MC
633:   PetscMemoryAccessWrite - Returns `PETSC_TRUE` if a given `PetscMemoryAccessMode` includes write access

635:   Synopsis:
636: #include <petscdevicetypes.h>
637:   PetscBool PetscMemoryAccessWrite(PetscMemoryAccessMode m)

639:   Not Collective; No Fortran Support

641:   Input Parameter:
642: . m - the `PetscMemoryAccessMode` value

644:   Level: developer

646: .seealso: `PetscMemoryAccessMode`, `PetscMemoryAccessRead()`
647: M*/
648: #define PetscMemoryAccessWrite(m) (((m) & PETSC_MEMORY_ACCESS_WRITE) == PETSC_MEMORY_ACCESS_WRITE)

650: #if defined(__cplusplus)
651:   #if PETSC_SHOULD_SILENCE_GCC_TAUTOLOGICAL_COMPARE_WARNING
652:     #pragma GCC diagnostic push
653:     #pragma GCC diagnostic ignored "-Wtautological-compare"
654:   #endif
655: static_assert(PetscMemoryAccessRead(PETSC_MEMORY_ACCESS_READ), "");
656: static_assert(PetscMemoryAccessRead(PETSC_MEMORY_ACCESS_READ_WRITE), "");
657: static_assert(!PetscMemoryAccessRead(PETSC_MEMORY_ACCESS_WRITE), "");
658: static_assert(PetscMemoryAccessWrite(PETSC_MEMORY_ACCESS_WRITE), "");
659: static_assert(PetscMemoryAccessWrite(PETSC_MEMORY_ACCESS_READ_WRITE), "");
660: static_assert(!PetscMemoryAccessWrite(PETSC_MEMORY_ACCESS_READ), "");
661: static_assert((PETSC_MEMORY_ACCESS_READ | PETSC_MEMORY_ACCESS_WRITE) == PETSC_MEMORY_ACCESS_READ_WRITE, "");
662:   #if PETSC_SHOULD_SILENCE_GCC_TAUTOLOGICAL_COMPARE_WARNING
663:     #pragma GCC diagnostic pop
664:   #endif
665: #endif

667: PETSC_NODISCARD static inline PETSC_CONSTEXPR_14 const char *PetscMemoryAccessModeToString(PetscMemoryAccessMode mode)
668: {
669: #define PETSC_CASE_RETURN(v) \
670:   case v: \
671:     return PetscStringize(v)

673:   switch (mode) {
674:     PETSC_CASE_RETURN(PETSC_MEMORY_ACCESS_READ);
675:     PETSC_CASE_RETURN(PETSC_MEMORY_ACCESS_WRITE);
676:     PETSC_CASE_RETURN(PETSC_MEMORY_ACCESS_READ_WRITE);
677:   }
678:   PetscUnreachable();
679:   return "invalid";
680: #undef PETSC_CASE_RETURN
681: }

683: #undef PETSC_SHOULD_SILENCE_GCC_TAUTOLOGICAL_COMPARE_WARNING