Actual source code: petscdevice.h

  1: #pragma once

  3: #include <petscdevicetypes.h>
  4: #include <petscviewertypes.h>

  6: #if PETSC_CPP_VERSION >= 11 // C++11
  7:   #define PETSC_DEVICE_ALIGNOF(...) alignof(decltype(__VA_ARGS__))
  8: #elif PETSC_C_VERSION >= 11 // C11
  9:   #ifdef __GNUC__
 10:     #define PETSC_DEVICE_ALIGNOF(...) _Alignof(__typeof__(__VA_ARGS__))
 11:   #else
 12:     #include <stddef.h> // max_align_t
 13:     // Note we cannot just do _Alignof(expression) since clang warns that "'_Alignof' applied to an
 14:     // expression is a GNU extension", so we just default to max_align_t which is ultra safe
 15:     #define PETSC_DEVICE_ALIGNOF(...) _Alignof(max_align_t)
 16:   #endif // __GNUC__
 17: #else
 18:   #define PETSC_DEVICE_ALIGNOF(...) PETSC_MEMALIGN
 19: #endif

 21: /* MANSEC = Sys */
 22: /* SUBMANSEC = Device */

 24: // REVIEW ME: this should probably go somewhere better, configure-time?
 25: #define PETSC_HAVE_HOST 1

 27: /* logging support */
 28: PETSC_EXTERN PetscClassId PETSC_DEVICE_CLASSID;
 29: PETSC_EXTERN PetscClassId PETSC_DEVICE_CONTEXT_CLASSID;

 31: PETSC_EXTERN PetscErrorCode PetscDeviceInitializePackage(void);
 32: PETSC_EXTERN PetscErrorCode PetscDeviceFinalizePackage(void);
 33: PETSC_EXTERN PetscErrorCode PetscGetMemType(const void *, PetscMemType *);

 35: /* PetscDevice */
 36: #if PetscDefined(HAVE_CXX)
 37: PETSC_EXTERN PetscErrorCode  PetscDeviceCreate(PetscDeviceType, PetscInt, PetscDevice *);
 38: PETSC_EXTERN PetscErrorCode  PetscDeviceDestroy(PetscDevice *);
 39: PETSC_EXTERN PetscErrorCode  PetscDeviceConfigure(PetscDevice);
 40: PETSC_EXTERN PetscErrorCode  PetscDeviceView(PetscDevice, PetscViewer);
 41: PETSC_EXTERN PetscErrorCode  PetscDeviceGetType(PetscDevice, PetscDeviceType *);
 42: PETSC_EXTERN PetscErrorCode  PetscDeviceGetDeviceId(PetscDevice, PetscInt *);
 43: PETSC_EXTERN PetscDeviceType PETSC_DEVICE_DEFAULT(void);
 44: PETSC_EXTERN PetscErrorCode  PetscDeviceSetDefaultDeviceType(PetscDeviceType);
 45: PETSC_EXTERN PetscErrorCode  PetscDeviceInitialize(PetscDeviceType);
 46: PETSC_EXTERN PetscBool       PetscDeviceInitialized(PetscDeviceType);
 47: #else
 48:   #define PetscDeviceCreate(PetscDeviceType, PetscInt, dev) (*(dev) = PETSC_NULLPTR, PETSC_SUCCESS)
 49:   #define PetscDeviceDestroy(dev)                           (*(dev) = PETSC_NULLPTR, PETSC_SUCCESS)
 50:   #define PetscDeviceConfigure(PetscDevice)                 PETSC_SUCCESS
 51:   #define PetscDeviceView(PetscDevice, PetscViewer)         PETSC_SUCCESS
 52:   #define PetscDeviceGetType(PetscDevice, type)             (*(type) = PETSC_DEVICE_DEFAULT(), PETSC_SUCCESS)
 53:   #define PetscDeviceGetDeviceId(PetscDevice, id)           (*(id) = 0, PETSC_SUCCESS)
 54:   #define PETSC_DEVICE_DEFAULT()                            PETSC_DEVICE_HOST
 55:   #define PetscDeviceSetDefaultDeviceType(PetscDeviceType)  PETSC_SUCCESS
 56:   #define PetscDeviceInitialize(PetscDeviceType)            PETSC_SUCCESS
 57:   #define PetscDeviceInitialized(dtype)                     ((dtype) == PETSC_DEVICE_HOST)
 58: #endif /* PetscDefined(HAVE_CXX) */

 60: /* PetscDeviceContext */
 61: #if PetscDefined(HAVE_CXX)
 62: PETSC_EXTERN PetscErrorCode PetscDeviceContextCreate(PetscDeviceContext *);
 63: PETSC_EXTERN PetscErrorCode PetscDeviceContextDestroy(PetscDeviceContext *);
 64: PETSC_EXTERN PetscErrorCode PetscDeviceContextSetStreamType(PetscDeviceContext, PetscStreamType);
 65: PETSC_EXTERN PetscErrorCode PetscDeviceContextGetStreamType(PetscDeviceContext, PetscStreamType *);
 66: PETSC_EXTERN PetscErrorCode PetscDeviceContextSetDevice(PetscDeviceContext, PetscDevice);
 67: PETSC_EXTERN PetscErrorCode PetscDeviceContextGetDevice(PetscDeviceContext, PetscDevice *);
 68: PETSC_EXTERN PetscErrorCode PetscDeviceContextGetDeviceType(PetscDeviceContext, PetscDeviceType *);
 69: PETSC_EXTERN PetscErrorCode PetscDeviceContextSetUp(PetscDeviceContext);
 70: PETSC_EXTERN PetscErrorCode PetscDeviceContextDuplicate(PetscDeviceContext, PetscDeviceContext *);
 71: PETSC_EXTERN PetscErrorCode PetscDeviceContextQueryIdle(PetscDeviceContext, PetscBool *);
 72: PETSC_EXTERN PetscErrorCode PetscDeviceContextWaitForContext(PetscDeviceContext, PetscDeviceContext);
 73: PETSC_EXTERN PetscErrorCode PetscDeviceContextForkWithStreamType(PetscDeviceContext, PetscStreamType, PetscInt, PetscDeviceContext **);
 74: PETSC_EXTERN PetscErrorCode PetscDeviceContextFork(PetscDeviceContext, PetscInt, PetscDeviceContext **);
 75: PETSC_EXTERN PetscErrorCode PetscDeviceContextJoin(PetscDeviceContext, PetscInt, PetscDeviceContextJoinMode, PetscDeviceContext **);
 76: PETSC_EXTERN PetscErrorCode PetscDeviceContextSynchronize(PetscDeviceContext);
 77: PETSC_EXTERN PetscErrorCode PetscDeviceContextSetFromOptions(MPI_Comm, PetscDeviceContext);
 78: PETSC_EXTERN PetscErrorCode PetscDeviceContextView(PetscDeviceContext, PetscViewer);
 79: PETSC_EXTERN PetscErrorCode PetscDeviceContextViewFromOptions(PetscDeviceContext, PetscObject, const char[]);
 80: PETSC_EXTERN PetscErrorCode PetscDeviceContextGetCurrentContext(PetscDeviceContext *);
 81: PETSC_EXTERN PetscErrorCode PetscDeviceContextSetCurrentContext(PetscDeviceContext);
 82: PETSC_EXTERN PetscErrorCode PetscDeviceContextGetStreamHandle(PetscDeviceContext, void **);
 83: #else
 84:   #define PetscDeviceContextCreate(dctx)                                                                            (*(dctx) = PETSC_NULLPTR, PETSC_SUCCESS)
 85:   #define PetscDeviceContextDestroy(dctx)                                                                           (*(dctx) = PETSC_NULLPTR, PETSC_SUCCESS)
 86:   #define PetscDeviceContextSetStreamType(PetscDeviceContext, PetscStreamType)                                      PETSC_SUCCESS
 87:   #define PetscDeviceContextGetStreamType(PetscDeviceContext, type)                                                 (*(type) = PETSC_STREAM_DEFAULT, PETSC_SUCCESS)
 88:   #define PetscDeviceContextSetDevice(PetscDeviceContext, PetscDevice)                                              PETSC_SUCCESS
 89:   #define PetscDeviceContextGetDevice(PetscDeviceContext, device)                                                   (*(device) = PETSC_NULLPTR, PETSC_SUCCESS)
 90:   #define PetscDeviceContextGetDeviceType(PetscDeviceContext, type)                                                 (*(type) = PETSC_DEVICE_DEFAULT(), PETSC_SUCCESS)
 91:   #define PetscDeviceContextSetUp(PetscDeviceContext)                                                               PETSC_SUCCESS
 92:   #define PetscDeviceContextDuplicate(PetscDeviceContextl, PetscDeviceContextr)                                     (*(PetscDeviceContextr) = PETSC_NULLPTR, PETSC_SUCCESS)
 93:   #define PetscDeviceContextQueryIdle(PetscDeviceContext, idle)                                                     (*(idle) = PETSC_TRUE, PETSC_SUCCESS)
 94:   #define PetscDeviceContextWaitForContext(PetscDeviceContextl, PetscDeviceContextr)                                PETSC_SUCCESS
 95:   #define PetscDeviceContextForkWithStreamType(PetscDeviceContextp, PetscStreamType, PetscInt, PetscDeviceContextc) (*(PetscDeviceContextc) = PETSC_NULLPTR, PETSC_SUCCESS)
 96:   #define PetscDeviceContextFork(PetscDeviceContextp, PetscInt, PetscDeviceContextc)                                (*(PetscDeviceContextc) = PETSC_NULLPTR, PETSC_SUCCESS)
 97:   #define PetscDeviceContextJoin(PetscDeviceContextp, PetscInt, PetscDeviceContextJoinMode, PetscDeviceContextc)    (*(PetscDeviceContextc) = PETSC_NULLPTR, PETSC_SUCCESS)
 98:   #define PetscDeviceContextSynchronize(PetscDeviceContext)                                                         PETSC_SUCCESS
 99:   #define PetscDeviceContextSetFromOptions(MPI_Comm, PetscDeviceContext)                                            PETSC_SUCCESS
100:   #define PetscDeviceContextView(PetscDeviceContext, PetscViewer)                                                   PETSC_SUCCESS
101:   #define PetscDeviceContextViewFromOptions(PetscDeviceContext, PetscObject, PetscViewer)                           PETSC_SUCCESS
102:   #define PetscDeviceContextGetCurrentContext(dctx)                                                                 (*(dctx) = PETSC_NULLPTR, PETSC_SUCCESS)
103:   #define PetscDeviceContextSetCurrentContext(PetscDeviceContext)                                                   PETSC_SUCCESS
104:   #define PetscDeviceContextGetStreamHandle(PetscDeviceContext, handle)                                             (*(handle) = PETSC_NULLPTR, PETSC_SUCCESS)
105: #endif /* PetscDefined(HAVE_CXX) */

107: /* memory */
108: #if PetscDefined(HAVE_CXX)
109: PETSC_EXTERN PetscErrorCode PetscDeviceAllocate_Private(PetscDeviceContext, PetscBool, PetscMemType, size_t, size_t, void **PETSC_RESTRICT);
110: PETSC_EXTERN PetscErrorCode PetscDeviceDeallocate_Private(PetscDeviceContext, void *PETSC_RESTRICT);
111: PETSC_EXTERN PetscErrorCode PetscDeviceMemcpy(PetscDeviceContext, void *PETSC_RESTRICT, const void *PETSC_RESTRICT, size_t);
112: PETSC_EXTERN PetscErrorCode PetscDeviceMemset(PetscDeviceContext, void *PETSC_RESTRICT, PetscInt, size_t);
113: #else
114:   #include <string.h> // memset()
115:   #define PetscDeviceAllocate_Private(PetscDeviceContext, clear, PetscMemType, size, alignment, ptr) PetscMallocA(1, (clear), __LINE__, PETSC_FUNCTION_NAME, __FILE__, (size), (ptr))
116:   #define PetscDeviceDeallocate_Private(PetscDeviceContext, ptr)                                     PetscFree(ptr)
117:   #define PetscDeviceMemcpy(PetscDeviceContext, dest, src, size)                                     PetscMemcpy((dest), (src), (size))
118:   #define PetscDeviceMemset(PetscDeviceContext, ptr, v, size)                                        ((void)memset((ptr), (unsigned char)(v), (size)), PETSC_SUCCESS)
119: #endif /* PetscDefined(HAVE_CXX) */

121: /*MC
122:   PetscDeviceMalloc - Allocate device-aware memory

124:   Synopsis:
125: #include <petscdevice.h>
126:   PetscErrorCode PetscDeviceMalloc(PetscDeviceContext dctx, PetscMemType mtype, size_t n, Type **ptr)

128:   Not Collective, Asynchronous, Auto-dependency aware

130:   Input Parameters:
131: + dctx  - The `PetscDeviceContext` used to allocate the memory
132: . mtype - The type of memory to allocate
133: - n     - The amount (in elements) to allocate

135:   Output Parameter:
136: . ptr - The pointer to store the result in

138:   Level: beginner

140:   Notes:
141:   Memory allocated with this function must be freed with `PetscDeviceFree()`.

143:   If `n` is zero, then `ptr` is set to `PETSC_NULLPTR`.

145:   This routine falls back to using `PetscMalloc1()` if PETSc was not configured with device
146:   support. The user should note that `mtype` is ignored in this case, as `PetscMalloc1()`
147:   allocates only host memory.

149:   This routine uses the `sizeof()` of the memory type requested to determine the total memory
150:   to be allocated, therefore you should not multiply the number of elements requested by the
151:   `sizeof()` the type\:

153: .vb
154:   PetscInt *arr;

156:   // correct
157:   PetscDeviceMalloc(dctx,PETSC_MEMTYPE_DEVICE,n,&arr);

159:   // incorrect
160:   PetscDeviceMalloc(dctx,PETSC_MEMTYPE_DEVICE,n*sizeof(*arr),&arr);
161: .ve

163:   Note result stored `ptr` is immediately valid and the user may freely inspect or manipulate
164:   its value on function return, i.e.\:

166: .vb
167:   PetscInt *ptr;

169:   PetscDeviceMalloc(dctx, PETSC_MEMTYPE_DEVICE, 20, &ptr);

171:   PetscInt *sub_ptr = ptr + 10; // OK, no need to synchronize

173:   ptr[0] = 10; // ERROR, directly accessing contents of ptr is undefined until synchronization
174: .ve

176:   DAG representation:
177: .vb
178:   time ->

180:   -> dctx - |= CALL =| -\- dctx -->
181:                          \- ptr ->
182: .ve

184: .N ASYNC_API

186: .seealso: `PetscDeviceFree()`, `PetscDeviceCalloc()`, `PetscDeviceArrayCopy()`,
187: `PetscDeviceArrayZero()`
188: M*/
189: #define PetscDeviceMalloc(dctx, mtype, n, ptr) PetscDeviceAllocate_Private((dctx), PETSC_FALSE, (mtype), (size_t)(n) * sizeof(**(ptr)), PETSC_DEVICE_ALIGNOF(**(ptr)), (void **)(ptr))

191: /*MC
192:   PetscDeviceCalloc - Allocate zeroed device-aware memory

194:   Synopsis:
195: #include <petscdevice.h>
196:   PetscErrorCode PetscDeviceCalloc(PetscDeviceContext dctx, PetscMemType mtype, size_t n, Type **ptr)

198:   Not Collective, Asynchronous, Auto-dependency aware

200:   Input Parameters:
201: + dctx  - The `PetscDeviceContext` used to allocate the memory
202: . mtype - The type of memory to allocate
203: - n     - The amount (in elements) to allocate

205:   Output Parameter:
206: . ptr - The pointer to store the result in

208:   Level: beginner

210:   Notes:
211:   Has identical usage to `PetscDeviceMalloc()` except that the memory is zeroed before it is
212:   returned. See `PetscDeviceMalloc()` for further discussion.

214:   This routine falls back to using `PetscCalloc1()` if PETSc was not configured with device
215:   support. The user should note that `mtype` is ignored in this case, as `PetscCalloc1()`
216:   allocates only host memory.

218: .N ASYNC_API

220: .seealso: `PetscDeviceFree()`, `PetscDeviceMalloc()`, `PetscDeviceArrayCopy()`,
221: `PetscDeviceArrayZero()`
222: M*/
223: #define PetscDeviceCalloc(dctx, mtype, n, ptr) PetscDeviceAllocate_Private((dctx), PETSC_TRUE, (mtype), (size_t)(n) * sizeof(**(ptr)), PETSC_DEVICE_ALIGNOF(**(ptr)), (void **)(ptr))

225: /*MC
226:   PetscDeviceFree - Free device-aware memory obtained with  `PetscDeviceMalloc()` or `PetscDeviceCalloc()`

228:   Synopsis:
229: #include <petscdevice.h>
230:   PetscErrorCode PetscDeviceFree(PetscDeviceContext dctx, void *ptr)

232:   Not Collective, Asynchronous, Auto-dependency aware

234:   Input Parameters:
235: + dctx - The `PetscDeviceContext` used to free the memory
236: - ptr  - The pointer to free, may be `NULL`

238:   Level: beginner

240:   Notes:
241:   `ptr` is set to `PETSC_NULLPTR` on successful deallocation.

243:   `ptr` must have been allocated using `PetscDeviceMalloc()`, `PetscDeviceCalloc()` not `PetscMalloc()` or related routines

245:   This routine falls back to using `PetscFree()` if PETSc was not configured with device
246:   support. The user should note that `PetscFree()` frees only host memory.

248:   DAG representation:
249: .vb
250:   time ->

252:   -> dctx -/- |= CALL =| - dctx ->
253:   -> ptr -/
254: .ve

256: .N ASYNC_API

258: .seealso: `PetscDeviceMalloc()`, `PetscDeviceCalloc()`
259: M*/
260: #define PetscDeviceFree(dctx, ptr) ((PetscErrorCode)(PetscDeviceDeallocate_Private((dctx), (ptr)) || ((ptr) = PETSC_NULLPTR, PETSC_SUCCESS)))

262: /*MC
263:   PetscDeviceArrayCopy - Copy memory in a device-aware manner

265:   Synopsis:
266: #include <petscdevice.h>
267:   PetscErrorCode PetscDeviceArrayCopy(PetscDeviceContext dctx, void *dest, const void *src, size_t n)

269:   Not Collective, Asynchronous, Auto-dependency aware

271:   Input Parameters:
272: + dctx - The `PetscDeviceContext` used to copy the memory
273: . dest - The pointer to copy to
274: . src  - The pointer to copy from
275: - n    - The amount (in elements) to copy

277:   Notes:
278:   Both `dest` and `src` must have been allocated using `PetscDeviceMalloc()` or
279:   `PetscDeviceCalloc()`.

281:   This uses the `sizeof()` of the `src` memory type requested to determine the total memory to
282:   be copied, therefore you should not multiply the number of elements by the `sizeof()` the
283:   type\:

285: .vb
286:   PetscInt *to,*from;

288:   // correct
289:   PetscDeviceArrayCopy(dctx,to,from,n);

291:   // incorrect
292:   PetscDeviceArrayCopy(dctx,to,from,n*sizeof(*from));
293: .ve

295:   See `PetscDeviceMemcpy()` for further discussion.

297:   Level: beginner

299: .N ASYNC_API

301: .seealso: `PetscDeviceMalloc()`, `PetscDeviceCalloc()`, `PetscDeviceFree()`,
302: `PetscDeviceArrayZero()`, `PetscDeviceMemcpy()`
303: M*/
304: #define PetscDeviceArrayCopy(dctx, dest, src, n) PetscDeviceMemcpy((dctx), (dest), (src), (size_t)(n) * sizeof(*(src)))

306: /*MC
307:   PetscDeviceArrayZero - Zero memory in a device-aware manner

309:   Synopsis:
310: #include <petscdevice.h>
311:   PetscErrorCode PetscDeviceArrayZero(PetscDeviceContext dctx, void *ptr, size_t n)

313:   Not Collective, Asynchronous, Auto-dependency aware

315:   Input Parameters:
316: + dctx  - The `PetscDeviceContext` used to zero the memory
317: . ptr   - The pointer to the memory
318: - n     - The amount (in elements) to zero

320:   Level: beginner

322:   Notes:
323:   `ptr` must have been allocated using `PetscDeviceMalloc()` or `PetscDeviceCalloc()`.

325:   This uses the `sizeof()` of the memory type requested to determine the total memory to be
326:   zeroed, therefore you should not multiply the number of elements by the `sizeof()` the type\:

328: .vb
329:   PetscInt *ptr;

331:   // correct
332:   PetscDeviceArrayZero(dctx,ptr,n);

334:   // incorrect
335:   PetscDeviceArrayZero(dctx,ptr,n*sizeof(*ptr));
336: .ve

338:   See `PetscDeviceMemset()` for further discussion.

340: .N ASYNC_API

342: .seealso: `PetscDeviceMalloc()`, `PetscDeviceCalloc()`, `PetscDeviceFree()`,
343: `PetscDeviceArrayCopy()`, `PetscDeviceMemset()`
344: M*/
345: #define PetscDeviceArrayZero(dctx, ptr, n) PetscDeviceMemset((dctx), (ptr), 0, (size_t)(n) * sizeof(*(ptr)))