Actual source code: pcbjkokkosimpl.h

  1: #pragma once

  3: #include <petscvec_kokkos.hpp>
  4: #include <petsc/private/pcimpl.h>
  5: #include <petsc/private/deviceimpl.h>
  6: #include <petsc/private/kspimpl.h>

  8: #include "Kokkos_Core.hpp"

 10: #if defined(PETSC_HAVE_CUDA)
 11:   #if PETSC_PKG_CUDA_VERSION_GE(10, 0, 0)
 12:     #include <nvtx3/nvToolsExt.h>
 13:   #else
 14:     #include <nvToolsExt.h>
 15:   #endif
 16: #endif

 18: #define PCBJKOKKOS_SHARED_LEVEL 1 // 0 is shared, 1 is global
 19: #define PCBJKOKKOS_VEC_SIZE     16
 20: #define PCBJKOKKOS_TEAM_SIZE    16

 22: #define PCBJKOKKOS_VERBOSE_LEVEL 1

 24: typedef enum {
 25:   BATCH_KSP_BICG_IDX,
 26:   BATCH_KSP_TFQMR_IDX,
 27:   BATCH_KSP_GMRESKK_IDX,
 28:   BATCH_KSP_PREONLY_IDX,
 29:   NUM_BATCH_TYPES
 30: } KSPIndex;

 32: typedef Kokkos::DefaultExecutionSpace exec_space;
 33: using layout           = Kokkos::LayoutRight;
 34: using IntView          = Kokkos::View<PetscInt **, layout, exec_space>;
 35: using AMatrixValueView = const Kokkos::View<PetscScalar **, layout, exec_space>;
 36: using XYType           = const Kokkos::View<PetscScalar **, layout, exec_space>;

 38: typedef struct {
 39:   Vec                                               vec_diag;
 40:   PetscInt                                          nBlocks; /* total number of blocks */
 41:   PetscInt                                          n;       // cache host version of d_bid_eqOffset_k[nBlocks]
 42:   KSP                                               ksp;     // Used just for options. Should have one for each block
 43:   Kokkos::View<PetscInt *, Kokkos::LayoutRight>    *d_bid_eqOffset_k;
 44:   Kokkos::View<PetscScalar *, Kokkos::LayoutRight> *d_idiag_k;
 45:   Kokkos::View<PetscInt *>                         *d_isrow_k;
 46:   Kokkos::View<PetscInt *>                         *d_isicol_k;
 47:   KSPIndex                                          ksp_type_idx;
 48:   PetscInt                                          nwork;
 49:   PetscInt                                          const_block_size; // used to decide to use shared memory for work vectors
 50:   PetscInt                                         *dm_Nf;            // Number of fields in each DM
 51:   PetscInt                                          num_dms;
 52:   // diagnostics
 53:   PetscBool reason;
 54:   PetscBool monitor;
 55:   PetscInt  batch_target;
 56:   PetscInt  rank_target;
 57:   PetscInt  nsolves_team;
 58:   PetscInt  max_nits;
 59:   // caches
 60:   IntView          *rowOffsets;
 61:   IntView          *colIndices;
 62:   XYType           *batch_b;
 63:   XYType           *batch_x;
 64:   AMatrixValueView *batch_values;
 65: } PC_PCBJKOKKOS;

 67: typedef Kokkos::TeamPolicy<>::member_type team_member;
 68: #if defined(PETSC_HAVE_KOKKOS_KERNELS_BATCH)
 69: PETSC_INTERN PetscErrorCode PCApply_BJKOKKOSKERNELS(PC, const PetscScalar *, PetscScalar *, const PetscInt *glb_Aai, const PetscInt *glb_Aaj, const PetscScalar *glb_Aaa, const PetscInt, MatInfo, const PetscInt, PCFailedReason *);
 70: #endif