Actual source code: deviceimpl.h

  1: #pragma once

  3: #include <petscdevice.h>
  4: #include <petsc/private/petscimpl.h>

  6: #if defined(PETSC_HAVE_CUPM)
  7: PETSC_INTERN int PetscDeviceCUPMRuntimeArch; // The real CUDA/HIP arch the code is run with. For log view and error diagnosis
  8: #endif

 10: /* logging support */
 11: PETSC_INTERN PetscLogEvent CUBLAS_HANDLE_CREATE;
 12: PETSC_INTERN PetscLogEvent CUSOLVER_HANDLE_CREATE;
 13: PETSC_INTERN PetscLogEvent HIPSOLVER_HANDLE_CREATE;
 14: PETSC_INTERN PetscLogEvent HIPBLAS_HANDLE_CREATE;

 16: PETSC_INTERN PetscLogEvent DCONTEXT_Create;
 17: PETSC_INTERN PetscLogEvent DCONTEXT_Destroy;
 18: PETSC_INTERN PetscLogEvent DCONTEXT_ChangeStream;
 19: PETSC_INTERN PetscLogEvent DCONTEXT_SetDevice;
 20: PETSC_INTERN PetscLogEvent DCONTEXT_SetUp;
 21: PETSC_INTERN PetscLogEvent DCONTEXT_Duplicate;
 22: PETSC_INTERN PetscLogEvent DCONTEXT_QueryIdle;
 23: PETSC_INTERN PetscLogEvent DCONTEXT_WaitForCtx;
 24: PETSC_INTERN PetscLogEvent DCONTEXT_Fork;
 25: PETSC_INTERN PetscLogEvent DCONTEXT_Join;
 26: PETSC_INTERN PetscLogEvent DCONTEXT_Sync;
 27: PETSC_INTERN PetscLogEvent DCONTEXT_Mark;

 29: /* type cast macros for some additional type-safety in C++ land */
 30: #if defined(__cplusplus)
 31:   #define PetscStreamTypeCast(...)     static_cast<PetscStreamType>(__VA_ARGS__)
 32:   #define PetscDeviceTypeCast(...)     static_cast<PetscDeviceType>(__VA_ARGS__)
 33:   #define PetscDeviceInitTypeCast(...) static_cast<PetscDeviceInitType>(__VA_ARGS__)
 34: #else
 35:   #define PetscStreamTypeCast(...)     ((PetscStreamType)(__VA_ARGS__))
 36:   #define PetscDeviceTypeCast(...)     ((PetscDeviceType)(__VA_ARGS__))
 37:   #define PetscDeviceInitTypeCast(...) ((PetscDeviceInitType)(__VA_ARGS__))
 38: #endif

 40: #if defined(PETSC_CLANG_STATIC_ANALYZER)
 41: template <typename T>
 43: template <typename T, typename U>
 44: extern void PetscCheckCompatibleDeviceTypes(T, int, U, int);
 45: template <typename T>
 47: template <typename T>
 49: template <typename T, typename U>
 50: extern void PetscCheckCompatibleDevices(T, int, U, int);
 51: template <typename T>
 53: template <typename T>
 55: template <typename T, typename U>
 56: extern void PetscCheckCompatibleDeviceContexts(T, int, U, int);
 57: #elif PetscDefined(DEVICELANGUAGE_CXX) && (PetscDefined(USE_DEBUG) || PetscDefined(DEVICE_KEEP_ERROR_CHECKING_MACROS))
 59:     do { \
 60:       PetscDeviceType pvdt_dtype_ = PetscDeviceTypeCast(dtype); \
 61:       int             pvdt_argno_ = (int)(argno); \
 62:       PetscCheck(((int)pvdt_dtype_ >= (int)PETSC_DEVICE_HOST) && ((int)pvdt_dtype_ <= (int)PETSC_DEVICE_MAX), PETSC_COMM_SELF, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown PetscDeviceType '%d': Argument #%d", pvdt_dtype_, pvdt_argno_); \
 63:       if (PetscUnlikely(!PetscDeviceConfiguredFor_Internal(pvdt_dtype_))) { \
 64:         PetscCheck((int)pvdt_dtype_ != (int)PETSC_DEVICE_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Invalid PetscDeviceType '%s': Argument #%d", PetscDeviceTypes[pvdt_dtype_], pvdt_argno_); \
 65:         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, \
 66:                 "Not configured for PetscDeviceType '%s': Argument #%d;" \
 67:                 " run configure --help %s for available options", \
 68:                 PetscDeviceTypes[pvdt_dtype_], pvdt_argno_, PetscDeviceTypes[pvdt_dtype_]); \
 69:       } \
 70:     } while (0)

 72:   #define PetscCheckCompatibleDeviceTypes(dtype1, argno1, dtype2, argno2) \
 73:     do { \
 74:       PetscDeviceType pccdt_dtype1_ = PetscDeviceTypeCast(dtype1); \
 75:       PetscDeviceType pccdt_dtype2_ = PetscDeviceTypeCast(dtype2); \
 78:       PetscCheck(pccdt_dtype1_ == pccdt_dtype2_, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMETYPE, "PetscDeviceTypes are incompatible: Arguments #%d and #%d. Expected PetscDeviceType '%s' but have '%s' instead", argno1, argno2, PetscDeviceTypes[pccdt_dtype1_], PetscDeviceTypes[pccdt_dtype2_]); \
 79:     } while (0)

 82:     do { \
 83:       PetscDevice pvd_dev_   = dev; \
 84:       int         pvd_argno_ = (int)(argno); \
 85:       PetscAssertPointer(pvd_dev_, pvd_argno_); \
 87:       PetscCheck(pvd_dev_->id >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid PetscDevice: Argument #%d; id %" PetscInt_FMT " < 0", pvd_argno_, pvd_dev_->id); \
 88:       PetscCheck(pvd_dev_->refcnt >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid PetscDevice: Argument #%d; negative reference count %" PetscInt_FMT, pvd_argno_, pvd_dev_->refcnt); \
 89:     } while (0)

 92:     do { \
 93:       PetscDeviceAttribute pvda_attr_  = (dattr); \
 94:       int                  pvda_argno_ = (int)(argno); \
 95:       PetscCheck((((int)pvda_attr_) >= 0) && (pvda_attr_ <= PETSC_DEVICE_ATTR_MAX), PETSC_COMM_SELF, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown PetscDeviceAttribute '%d': Argument #%d", (int)pvda_attr_, pvda_argno_); \
 96:       PetscCheck(pvda_attr_ != PETSC_DEVICE_ATTR_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Invalid PetscDeviceAttribute '%s': Argument #%d", PetscDeviceAttributes[pvda_attr_], pvda_argno_); \
 97:     } while (0)

 99:   /*
100:   for now just checks strict equality, but this can be changed as some devices (i.e. kokkos and
101:   any cupm should be compatible once implemented)
102: */
103:   #define PetscCheckCompatibleDevices(dev1, argno1, dev2, argno2) \
104:     do { \
105:       PetscDevice pccd_dev1_ = (dev1), pccd_dev2_ = (dev2); \
106:       int         pccd_argno1_ = (int)(argno1), pccd_argno2_ = (int)(argno2); \
109:       PetscCheckCompatibleDeviceTypes(pccd_dev1_->type, pccd_argno1_, pccd_dev2_->type, pccd_argno2_); \
110:     } while (0)

113:     do { \
114:       PetscStreamType pvst_stype_ = PetscStreamTypeCast(stype); \
115:       int             pvst_argno_ = (int)(argno); \
116:       PetscCheck(((int)pvst_stype_ >= 0) && ((int)pvst_stype_ <= (int)PETSC_STREAM_MAX), PETSC_COMM_SELF, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown PetscStreamType '%d': Argument #%d", pvst_stype_, pvst_argno_); \
117:       PetscCheck((int)pvst_stype_ != (int)PETSC_STREAM_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Invalid PetscStreamType '%s': Argument #%d", PetscStreamTypes[pvst_stype_], pvst_argno_); \
118:     } while (0)

121:     do { \
122:       PetscDeviceContext pvdc_dctx_  = dctx; \
123:       int                pvdc_argno_ = (int)(argno); \
126:       if (pvdc_dctx_->device) { \
128:       } else { \
129:         PetscCheck(!pvdc_dctx_->setup, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, \
130:                    "Invalid PetscDeviceContext: Argument #%d; " \
131:                    "PetscDeviceContext is setup but has no PetscDevice", \
132:                    pvdc_argno_); \
133:       } \
134:       PetscCheck(((PetscObject)pvdc_dctx_)->id >= 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid PetscDeviceContext: Argument #%d; id %" PetscInt64_FMT " < 1", pvdc_argno_, ((PetscObject)pvdc_dctx_)->id); \
135:       PetscCheck(pvdc_dctx_->numChildren <= pvdc_dctx_->maxNumChildren, PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "Invalid PetscDeviceContext: Argument #%d; number of children %" PetscInt_FMT " > max number of children %" PetscInt_FMT, pvdc_argno_, \
136:                  pvdc_dctx_->numChildren, pvdc_dctx_->maxNumChildren); \
137:     } while (0)

139:   #define PetscCheckCompatibleDeviceContexts(dctx1, argno1, dctx2, argno2) \
140:     do { \
141:       PetscDeviceContext pccdc_dctx1_ = (dctx1), pccdc_dctx2_ = (dctx2); \
142:       int                pccdc_argno1_ = (int)(argno1), pccdc_argno2_ = (int)(argno2); \
145:       if (pccdc_dctx1_->device && pccdc_dctx2_->device) PetscCheckCompatibleDevices(pccdc_dctx1_->device, pccdc_argno1_, pccdc_dctx2_->device, pccdc_argno2_); \
146:     } while (0)
147: #else /* PetscDefined(USE_DEBUG) */
149:   #define PetscCheckCompatibleDeviceTypes(dtype1, argno1, dtype2, argno2)
152:   #define PetscCheckCompatibleDevices(dev1, argno1, dev2, argno2)
155:   #define PetscCheckCompatibleDeviceContexts(dctx1, argno1, dctx2, argno2) \
156:     do { \
157:     } while (0)
158: #endif /* PetscDefined(USE_DEBUG) */

160: /* if someone is ready to rock with more than 128 GPUs on hand then we're in real trouble */
161: #define PETSC_DEVICE_MAX_DEVICES 128

163: /*
164:   the configure-time default device type, used as the initial the value of
165:   PETSC_DEVICE_DEFAULT() as well as what it is restored to during PetscFinalize()
166: */
167: #if PetscDefined(HAVE_HIP)
168:   #define PETSC_DEVICE_HARDWARE_DEFAULT_TYPE PETSC_DEVICE_HIP
169: #elif PetscDefined(HAVE_CUDA)
170:   #define PETSC_DEVICE_HARDWARE_DEFAULT_TYPE PETSC_DEVICE_CUDA
171: #elif PetscDefined(HAVE_SYCL)
172:   #define PETSC_DEVICE_HARDWARE_DEFAULT_TYPE PETSC_DEVICE_SYCL
173: #else
174:   #define PETSC_DEVICE_HARDWARE_DEFAULT_TYPE PETSC_DEVICE_HOST
175: #endif

177: #define PETSC_DEVICE_CONTEXT_DEFAULT_DEVICE_TYPE PETSC_DEVICE_HARDWARE_DEFAULT_TYPE
178: // REMOVE ME (change)
179: #define PETSC_DEVICE_CONTEXT_DEFAULT_STREAM_TYPE PETSC_STREAM_DEFAULT

181: typedef struct _DeviceOps *DeviceOps;
182: struct _DeviceOps {
183:   /* the creation routine for the corresponding PetscDeviceContext, this is NOT intended
184:    * to be called by the PetscDevice itself */
185:   PetscErrorCode (*createcontext)(PetscDeviceContext);
186:   PetscErrorCode (*configure)(PetscDevice);
187:   PetscErrorCode (*view)(PetscDevice, PetscViewer);
188:   PetscErrorCode (*getattribute)(PetscDevice, PetscDeviceAttribute, void *);
189: };

191: struct _n_PetscDevice {
192:   struct _DeviceOps ops[1];
193:   void             *data;     /* placeholder */
194:   PetscInt          refcnt;   /* reference count for the device */
195:   PetscInt          id;       /* unique id per created PetscDevice */
196:   PetscInt          deviceId; /* the id of the underlying device, i.e. the return of
197:                                * cudaGetDevice() for example */
198:   PetscDeviceType   type;     /* type of device */
199: };

201: typedef struct _n_PetscEvent *PetscEvent;
202: struct _n_PetscEvent {
203:   PetscDeviceType  dtype;      // this cannot change for the lifetime of the event
204:   PetscObjectId    dctx_id;    // id of last dctx to record this event
205:   PetscObjectState dctx_state; // state of last dctx to record this event
206:   void            *data;       // event handle
207:   PetscErrorCode (*destroy)(PetscEvent);
208: };

210: typedef struct _DeviceContextOps *DeviceContextOps;
211: struct _DeviceContextOps {
212:   PetscErrorCode (*destroy)(PetscDeviceContext);
213:   PetscErrorCode (*changestreamtype)(PetscDeviceContext, PetscStreamType);
214:   PetscErrorCode (*setup)(PetscDeviceContext);
215:   PetscErrorCode (*query)(PetscDeviceContext, PetscBool *);
216:   PetscErrorCode (*waitforcontext)(PetscDeviceContext, PetscDeviceContext);
217:   PetscErrorCode (*synchronize)(PetscDeviceContext);
218:   PetscErrorCode (*getblashandle)(PetscDeviceContext, void *);
219:   PetscErrorCode (*getsolverhandle)(PetscDeviceContext, void *);
220:   PetscErrorCode (*getstreamhandle)(PetscDeviceContext, void **);
221:   PetscErrorCode (*begintimer)(PetscDeviceContext);
222:   PetscErrorCode (*endtimer)(PetscDeviceContext, PetscLogDouble *);
223:   PetscErrorCode (*getpower)(PetscDeviceContext, PetscLogDouble *);
224:   PetscErrorCode (*beginenergymeter)(PetscDeviceContext);
225:   PetscErrorCode (*endenergymeter)(PetscDeviceContext, PetscLogDouble *);
226:   PetscErrorCode (*memalloc)(PetscDeviceContext, PetscBool, PetscMemType, size_t, size_t, void **);                             // optional
227:   PetscErrorCode (*memfree)(PetscDeviceContext, PetscMemType, void **);                                                         // optional
228:   PetscErrorCode (*memcopy)(PetscDeviceContext, void *PETSC_RESTRICT, const void *PETSC_RESTRICT, size_t, PetscDeviceCopyMode); // optional
229:   PetscErrorCode (*memset)(PetscDeviceContext, PetscMemType, void *, PetscInt, size_t);                                         // optional
230:   PetscErrorCode (*createevent)(PetscDeviceContext, PetscEvent);                                                                // optional
231:   PetscErrorCode (*recordevent)(PetscDeviceContext, PetscEvent);                                                                // optional
232:   PetscErrorCode (*waitforevent)(PetscDeviceContext, PetscEvent);                                                               // optional
233: };

235: struct _p_PetscDeviceContext {
236:   PETSCHEADER(struct _DeviceContextOps);
237:   PetscDevice     device;         /* the device this context stems from */
238:   void           *data;           /* solver contexts, event, stream */
239:   PetscObjectId  *childIDs;       /* array containing ids of contexts currently forked from this one */
240:   PetscInt        numChildren;    /* how many children does this context expect to destroy */
241:   PetscInt        maxNumChildren; /* how many children can this context have room for without realloc'ing */
242:   PetscStreamType streamType;     /* how should this contexts stream behave around other streams? */
243:   PetscBool       setup;
244:   PetscBool       usersetdevice;
245: };

247: // ===================================================================================
248: //                            PetscDevice Internal Functions
249: // ===================================================================================
250: PETSC_INTERN PetscErrorCode                PetscDeviceInitializeFromOptions_Internal(MPI_Comm);
251: PETSC_SINGLE_LIBRARY_INTERN PetscErrorCode PetscDeviceGetDefaultForType_Internal(PetscDeviceType, PetscDevice *);

253: static inline PetscErrorCode PetscDeviceReference_Internal(PetscDevice device)
254: {
255:   PetscFunctionBegin;
256:   if (PetscDefined(DEVICELANGUAGE_CXX)) ++device->refcnt;
257:   PetscFunctionReturn(PETSC_SUCCESS);
258: }

260: #if PetscDefined(DEVICELANGUAGE_CXX)
261: static inline PetscErrorCode PetscDeviceDereference_Internal(PetscDevice device)
262: {
263:   PetscFunctionBegin;
264:   --device->refcnt;
265:   PetscAssert(device->refcnt >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "PetscDevice has negative reference count %" PetscInt_FMT, device->refcnt);
266:   PetscFunctionReturn(PETSC_SUCCESS);
267: }

269: static inline PetscErrorCode PetscDeviceCheckDeviceCount_Internal(PetscInt count)
270: {
271:   PetscFunctionBegin;
272:   PetscAssert(count < PETSC_DEVICE_MAX_DEVICES, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Detected %" PetscInt_FMT " devices, which is larger than maximum supported number of devices %d", count, PETSC_DEVICE_MAX_DEVICES);
273:   PetscFunctionReturn(PETSC_SUCCESS);
274: }
275: #endif /* PETSC_DEVICELANGUAGE_CXX for PetscDevice Internal Functions */

277: /* More general form of PetscDeviceDefaultType_Internal(), as it calls the former using
278:  * the automatically selected default PetscDeviceType */
279: #define PetscDeviceGetDefault_Internal(device) PetscDeviceGetDefaultForType_Internal(PETSC_DEVICE_DEFAULT(), device)

281: static inline PETSC_CONSTEXPR_14 PetscBool PetscDeviceConfiguredFor_Internal(PetscDeviceType type)
282: {
283:   switch (type) {
284:   case PETSC_DEVICE_HOST:
285:     return PETSC_TRUE;
286:     /* casts are needed in C++ */
287:   case PETSC_DEVICE_CUDA:
288:     return (PetscBool)PetscDefined(HAVE_CUDA);
289:   case PETSC_DEVICE_HIP:
290:     return (PetscBool)PetscDefined(HAVE_HIP);
291:   case PETSC_DEVICE_SYCL:
292:     return (PetscBool)PetscDefined(HAVE_SYCL);
293:   case PETSC_DEVICE_MAX:
294:     return PETSC_FALSE;
295:     /* Do not add default case! Will make compiler warn on new additions to PetscDeviceType! */
296:   }
297:   PetscUnreachable();
298:   return PETSC_FALSE;
299: }

301: // ===================================================================================
302: //                     PetscDeviceContext Internal Functions
303: // ===================================================================================
304: PETSC_SINGLE_LIBRARY_INTERN PetscErrorCode PetscDeviceContextGetNullContext_Internal(PetscDeviceContext *);
305: #if PetscDefined(DEVICELANGUAGE_CXX)
306: static inline PetscErrorCode PetscDeviceContextGetBLASHandle_Internal(PetscDeviceContext dctx, void *handle)
307: {
308:   PetscFunctionBegin;
309:   /* we do error checking here as this routine is an entry-point */
311:   PetscUseTypeMethod(dctx, getblashandle, handle);
312:   PetscFunctionReturn(PETSC_SUCCESS);
313: }

315: static inline PetscErrorCode PetscDeviceContextGetSOLVERHandle_Internal(PetscDeviceContext dctx, void *handle)
316: {
317:   PetscFunctionBegin;
318:   /* we do error checking here as this routine is an entry-point */
320:   PetscUseTypeMethod(dctx, getsolverhandle, handle);
321:   PetscFunctionReturn(PETSC_SUCCESS);
322: }

324: static inline PetscErrorCode PetscDeviceContextGetStreamHandle_Internal(PetscDeviceContext dctx, void **handle)
325: {
326:   PetscFunctionBegin;
327:   /* we do error checking here as this routine is an entry-point */
329:   PetscAssertPointer(handle, 2);
330:   PetscUseTypeMethod(dctx, getstreamhandle, handle);
331:   PetscFunctionReturn(PETSC_SUCCESS);
332: }

334: static inline PetscErrorCode PetscDeviceContextBeginTimer_Internal(PetscDeviceContext dctx)
335: {
336:   PetscFunctionBegin;
337:   /* we do error checking here as this routine is an entry-point */
339:   PetscUseTypeMethod(dctx, begintimer);
340:   PetscFunctionReturn(PETSC_SUCCESS);
341: }

343: static inline PetscErrorCode PetscDeviceContextEndTimer_Internal(PetscDeviceContext dctx, PetscLogDouble *elapsed)
344: {
345:   PetscFunctionBegin;
346:   /* we do error checking here as this routine is an entry-point */
348:   PetscAssertPointer(elapsed, 2);
349:   PetscUseTypeMethod(dctx, endtimer, elapsed);
350:   PetscFunctionReturn(PETSC_SUCCESS);
351: }

353:   #if PetscDefined(HAVE_CUDA_VERSION_12_2PLUS)
354: static inline PetscErrorCode PetscDeviceContextGetPower_Internal(PetscDeviceContext dctx, PetscLogDouble *power)
355: {
356:   PetscFunctionBegin;
358:   PetscAssertPointer(power, 2);
359:   PetscUseTypeMethod(dctx, getpower, power);
360:   PetscFunctionReturn(PETSC_SUCCESS);
361: }
362:   #endif

364: static inline PetscErrorCode PetscDeviceContextBeginEnergyMeter_Internal(PetscDeviceContext dctx)
365: {
366:   PetscFunctionBegin;
367:   /* we do error checking here as this routine is an entry-point */
369:   PetscUseTypeMethod(dctx, beginenergymeter);
370:   PetscFunctionReturn(PETSC_SUCCESS);
371: }

373: static inline PetscErrorCode PetscDeviceContextEndEnergyMeter_Internal(PetscDeviceContext dctx, PetscLogDouble *energy)
374: {
375:   PetscFunctionBegin;
376:   /* we do error checking here as this routine is an entry-point */
378:   PetscAssertPointer(energy, 2);
379:   PetscUseTypeMethod(dctx, endenergymeter, energy);
380:   PetscFunctionReturn(PETSC_SUCCESS);
381: }
382: #endif /* PETSC_DEVICELANGUAGE_CXX for PetscDeviceContext Internal Functions */

384: /* note, only does assertion checking in debug mode */
385: static inline PetscErrorCode PetscDeviceContextGetCurrentContextAssertType_Internal(PetscDeviceContext *dctx, PetscDeviceType type)
386: {
387:   PetscFunctionBegin;
388:   PetscCall(PetscDeviceContextGetCurrentContext(dctx));
389:   if (PetscDefined(USE_DEBUG)) {
390:     PetscDeviceType dtype;

393:     PetscCall(PetscDeviceContextGetDeviceType(*dctx, &dtype));
394:     PetscCheckCompatibleDeviceTypes(dtype, 1, type, 2);
395:   } else (void)type;
396:   PetscFunctionReturn(PETSC_SUCCESS);
397: }

399: static inline PetscErrorCode PetscDeviceContextGetOptionalNullContext_Internal(PetscDeviceContext *dctx)
400: {
401:   PetscFunctionBegin;
402:   PetscAssertPointer(dctx, 1);
403:   if (!*dctx) PetscCall(PetscDeviceContextGetNullContext_Internal(dctx));
405:   PetscFunctionReturn(PETSC_SUCCESS);
406: }

408: /* Experimental API -- it will eventually become public */
409: PETSC_EXTERN PetscErrorCode PetscDeviceRegisterMemory(const void *PETSC_RESTRICT, PetscMemType, size_t);
410: PETSC_EXTERN PetscErrorCode PetscDeviceGetAttribute(PetscDevice, PetscDeviceAttribute, void *);
411: #if PetscDefined(DEVICELANGUAGE_CXX)
412: PETSC_EXTERN PetscErrorCode PetscDeviceContextMarkIntentFromID(PetscDeviceContext, PetscObjectId, PetscMemoryAccessMode, const char name[]);
413: #endif
414: // Used for testing purposes, internal use ONLY
415: PETSC_EXTERN PetscErrorCode PetscGetMarkedObjectMap_Internal(size_t *, PetscObjectId **, PetscMemoryAccessMode **, size_t **, PetscEvent ***);
416: PETSC_EXTERN PetscErrorCode PetscRestoreMarkedObjectMap_Internal(size_t, PetscObjectId **, PetscMemoryAccessMode **, size_t **, PetscEvent ***);
417: #if PetscDefined(DEVICELANGUAGE_CXX) && defined(__cplusplus)
418: namespace
419: {

421: inline PetscErrorCode PetscDeviceContextMarkIntentFromID(PetscDeviceContext dctx, PetscObject obj, PetscMemoryAccessMode mode, const char name[])
422: {
423:   PetscFunctionBegin;
424:   PetscCall(PetscDeviceContextMarkIntentFromID(dctx, obj->id, mode, name));
425:   PetscFunctionReturn(PETSC_SUCCESS);
426: }

428: } // anonymous namespace
429: #endif

431: PETSC_INTERN PetscErrorCode PetscDeviceContextCreate_HOST(PetscDeviceContext);
432: #if PetscDefined(HAVE_CUDA)
433: PETSC_INTERN PetscErrorCode PetscDeviceContextCreate_CUDA(PetscDeviceContext);
434: #endif
435: #if PetscDefined(HAVE_HIP)
436: PETSC_INTERN PetscErrorCode PetscDeviceContextCreate_HIP(PetscDeviceContext);
437: #endif
438: #if PetscDefined(HAVE_SYCL)
439: PETSC_INTERN PetscErrorCode PetscDeviceContextCreate_SYCL(PetscDeviceContext);
440: #endif

442: static inline PetscErrorCode PetscDeviceContextSynchronizeIfWithBarrier_Internal(PetscDeviceContext dctx)
443: {
444:   PetscStreamType stream_type;

446:   PetscFunctionBegin;
447:   PetscCall(PetscDeviceContextGetStreamType(dctx, &stream_type));
448:   if (stream_type == PETSC_STREAM_DEFAULT_WITH_BARRIER || stream_type == PETSC_STREAM_NONBLOCKING_WITH_BARRIER) PetscCall(PetscDeviceContextSynchronize(dctx));
449:   PetscFunctionReturn(PETSC_SUCCESS);
450: }

452: #if PetscDefined(HAVE_CUDA)

454:   #define PetscCallNVML(...) \
455:     do { \
456:       nvmlReturn_t nvmlerr = __VA_ARGS__; \
457:       PetscCheck(nvmlerr == NVML_SUCCESS, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in %s, error string: %s", __func__, nvmlErrorString(nvmlerr)); \
458:     } while (0)

460: #endif