Actual source code: kinit.kokkos.cxx

  1: #include <petsc/private/deviceimpl.h>
  2: #include <petsc/private/kokkosimpl.hpp>
  3: #include <petscpkg_version.h>
  4: #include <petsc_kokkos.hpp>
  5: #include <petscdevice_cupm.h>

  7: PetscBool    PetscKokkosInitialized = PETSC_FALSE; // Has Kokkos been initialized (either by PETSc or by users)?
  8: PetscScalar *PetscScalarPool        = nullptr;
  9: PetscInt     PetscScalarPoolSize    = 0;

 11: Kokkos::DefaultExecutionSpace *PetscKokkosExecutionSpacePtr = nullptr;

 13: PetscErrorCode PetscKokkosFinalize_Private(void)
 14: {
 15:   PetscFunctionBegin;
 16:   PetscCallCXX(delete PetscKokkosExecutionSpacePtr);
 17:   PetscKokkosExecutionSpacePtr = nullptr;
 18:   PetscCallCXX(Kokkos::kokkos_free(PetscScalarPool));
 19:   PetscScalarPoolSize = 0;
 20:   if (PetscBeganKokkos) {
 21:     PetscCallCXX(Kokkos::finalize());
 22:     PetscBeganKokkos = PETSC_FALSE;
 23:   }
 24:   PetscFunctionReturn(PETSC_SUCCESS);
 25: }

 27: PetscErrorCode PetscKokkosIsInitialized_Private(PetscBool *isInitialized)
 28: {
 29:   PetscFunctionBegin;
 30:   *isInitialized = Kokkos::is_initialized() ? PETSC_TRUE : PETSC_FALSE;
 31:   PetscFunctionReturn(PETSC_SUCCESS);
 32: }

 34: /*@C
 35:   PetscKokkosInitializeCheck - Initialize Kokkos if it has not already been initialized

 37:   Not Collective

 39:   Level: developer

 41:   Notes:
 42:   Called internally before PETSc invokes any Kokkos-based functionality. The first call initializes
 43:   Kokkos using either the user's command-line options or defaults derived from PETSc's device
 44:   configuration; subsequent calls are no-ops.

 46:   PETSc will call `Kokkos::finalize()` itself during `PetscFinalize()` only if it was the one that
 47:   initialized Kokkos in this process.

 49: .seealso: `PetscInitialize()`, `PetscFinalize()`, `PetscDeviceContext`
 50: @*/
 51: PetscErrorCode PetscKokkosInitializeCheck(void)
 52: {
 53:   PetscFunctionBegin;
 54:   if (!Kokkos::is_initialized()) {
 55: #if PETSC_PKG_KOKKOS_VERSION_GE(3, 7, 0)
 56:     auto args = Kokkos::InitializationSettings();
 57: #else
 58:     auto args = Kokkos::InitArguments{}; /* use default constructor */
 59: #endif

 61: #if (defined(KOKKOS_ENABLE_CUDA) && defined(PETSC_HAVE_CUDA)) || (defined(KOKKOS_ENABLE_HIP) && defined(PETSC_HAVE_HIP)) || (defined(KOKKOS_ENABLE_SYCL) && defined(PETSC_HAVE_SYCL))
 62:     /* Kokkos does not support CUDA and HIP at the same time (but we do :)) */
 63:     PetscDevice device;
 64:     PetscInt    deviceId;
 65:     PetscCall(PetscDeviceCreate(PETSC_DEVICE_DEFAULT(), PETSC_DECIDE, &device));
 66:     PetscCall(PetscDeviceGetDeviceId(device, &deviceId));
 67:     PetscCall(PetscDeviceDestroy(&device));
 68:   #if PETSC_PKG_KOKKOS_VERSION_GE(4, 0, 0)
 69:     // if device_id is not set, and no gpus have been found, kokkos will use CPU
 70:     if (deviceId >= 0) args.set_device_id(static_cast<int>(deviceId));
 71:   #elif PETSC_PKG_KOKKOS_VERSION_GE(3, 7, 0)
 72:     args.set_device_id(static_cast<int>(deviceId));
 73:   #else
 74:     PetscCall(PetscMPIIntCast(deviceId, &args.device_id));
 75:   #endif
 76: #endif

 78:     /* To use PetscNumOMPThreads, one has to configure PETSc --with-openmp.
 79:        Otherwise, let's keep the default value (-1) of args.num_threads.
 80:     */
 81: #if defined(KOKKOS_ENABLE_OPENMP) && PetscDefined(HAVE_OPENMP)
 82:   #if PETSC_PKG_KOKKOS_VERSION_GE(3, 7, 0)
 83:     args.set_num_threads(PetscNumOMPThreads);
 84:   #else
 85:     args.num_threads = PetscNumOMPThreads;
 86:   #endif
 87: #endif
 88:     PetscCallCXX(Kokkos::initialize(args));
 89:     PetscBeganKokkos = PETSC_TRUE;
 90:   }

 92:   if (!PetscKokkosExecutionSpacePtr) { // No matter Kokkos is init'ed by PETSc or by user, we need to init PetscKokkosExecutionSpacePtr
 93: #if (defined(KOKKOS_ENABLE_CUDA) && defined(PETSC_HAVE_CUDA)) || (defined(KOKKOS_ENABLE_HIP) && defined(PETSC_HAVE_HIP)) || (defined(KOKKOS_ENABLE_SYCL) && defined(PETSC_HAVE_SYCL))
 94:     PetscDeviceContext dctx;
 95:     PetscDeviceType    dtype;

 97:     PetscCall(PetscDeviceContextGetCurrentContext(&dctx)); // it internally sets PetscDefaultCuda/HipStream
 98:     PetscCall(PetscDeviceContextGetDeviceType(dctx, &dtype));

100:   #if defined(PETSC_HAVE_CUDA)
101:     if (dtype == PETSC_DEVICE_CUDA) PetscCallCXX(PetscKokkosExecutionSpacePtr = new Kokkos::DefaultExecutionSpace(PetscDefaultCudaStream));
102:   #elif defined(PETSC_HAVE_HIP)
103:     if (dtype == PETSC_DEVICE_HIP) PetscCallCXX(PetscKokkosExecutionSpacePtr = new Kokkos::DefaultExecutionSpace(PetscDefaultHipStream));
104:   #elif defined(PETSC_HAVE_SYCL)
105:     if (dtype == PETSC_DEVICE_SYCL) {
106:       void *handle;
107:       PetscCall(PetscDeviceContextGetStreamHandle(dctx, &handle)); // Kind of PetscDefaultSyclStream
108:       PetscCallCXX(PetscKokkosExecutionSpacePtr = new Kokkos::DefaultExecutionSpace(*(sycl::queue *)handle));
109:     }
110:   #endif
111: #else
112:     // In all other cases, we use Kokkos default
113:     PetscCallCXX(PetscKokkosExecutionSpacePtr = new Kokkos::DefaultExecutionSpace());
114: #endif
115:   }

117:   if (!PetscScalarPoolSize) { // A pool for a small count of PetscScalars
118:     PetscScalarPoolSize = 1024;
119:     PetscCallCXX(PetscScalarPool = static_cast<PetscScalar *>(Kokkos::kokkos_malloc(sizeof(PetscScalar) * PetscScalarPoolSize)));
120:   }

122:   PetscKokkosInitialized = PETSC_TRUE; // PetscKokkosInitializeCheck() was called
123:   PetscFunctionReturn(PETSC_SUCCESS);
124: }