Actual source code: ex302k.kokkos.cxx

  1: static char help[] = "Testing MatCreateSeqAIJKokkosWithKokkosViews() and building Mat on the device.\n\n";

  3: #include <petscvec_kokkos.hpp>
  4: #include <petscdevice.h>
  5: #include <petscmat.h>
  6: #include <petscmat_kokkos.hpp>
  7: #include <Kokkos_Core.hpp>
  8: #include <Kokkos_DualView.hpp>

 10: using HostMirrorMemorySpace = Kokkos::DualView<PetscScalar *>::host_mirror_space::memory_space;

 12: int main(int argc, char **argv)
 13: {
 14:   Mat             A, B, BT;
 15:   PetscInt        i, j, column, M, N, m, n, m_ab, n_ab;
 16:   PetscInt       *di, *dj, *oi, *oj, nd;
 17:   const PetscInt *garray;
 18:   PetscInt       *garray_h;
 19:   PetscScalar    *oa, *da;
 20:   PetscScalar     value;
 21:   PetscRandom     rctx;
 22:   PetscBool       equal, done;
 23:   Mat             AA, AB;
 24:   PetscMPIInt     size, rank;

 26:   // ~~~~~~~~~~~~~~~~~~~~~
 27:   // This test shows the routines needed to build a kokkos matrix without preallocation
 28:   // on the host
 29:   // ~~~~~~~~~~~~~~~~~~~~~

 31:   PetscFunctionBeginUser;
 32:   PetscCall(PetscInitialize(&argc, &argv, NULL, help));
 33:   PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
 34:   PetscCheck(size > 1, PETSC_COMM_WORLD, PETSC_ERR_WRONG_MPI_SIZE, "Must run with 2 or more processes");
 35:   PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank));

 37:   /* Create a mpiaij matrix for checking */
 38:   PetscCall(MatCreateAIJ(PETSC_COMM_WORLD, 5, 5, PETSC_DECIDE, PETSC_DECIDE, 0, NULL, 0, NULL, &A));
 39:   PetscCall(MatSetFromOptions(A));
 40:   PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
 41:   PetscCall(MatSetUp(A));
 42:   PetscCall(PetscRandomCreate(PETSC_COMM_WORLD, &rctx));
 43:   PetscCall(PetscRandomSetFromOptions(rctx));

 45:   for (i = 5 * rank; i < 5 * rank + 5; i++) {
 46:     for (j = 0; j < 5 * size; j++) {
 47:       PetscCall(PetscRandomGetValue(rctx, &value));
 48:       column = (PetscInt)(5 * size * PetscRealPart(value));

 50:       // rank 0 has no off-process entries
 51:       if (rank == 0 && (column < i || column >= i)) column = i;

 53:       PetscCall(PetscRandomGetValue(rctx, &value));
 54:       PetscCall(MatSetValues(A, 1, &i, 1, &column, &value, INSERT_VALUES));
 55:     }
 56:   }
 57:   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
 58:   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));

 60:   PetscCall(MatGetSize(A, &M, &N));
 61:   PetscCall(MatGetLocalSize(A, &m, &n));
 62:   PetscCall(MatMPIAIJGetSeqAIJ(A, &AA, &AB, &garray));
 63:   PetscCall(MatGetRowIJ(AA, 0, PETSC_FALSE, PETSC_FALSE, &nd, (const PetscInt **)&di, (const PetscInt **)&dj, &done));
 64:   PetscCall(MatSeqAIJGetArray(AA, &da));
 65:   PetscCall(MatGetRowIJ(AB, 0, PETSC_FALSE, PETSC_FALSE, &nd, (const PetscInt **)&oi, (const PetscInt **)&oj, &done));
 66:   PetscCall(MatSeqAIJGetArray(AB, &oa));
 67:   PetscCall(MatGetSize(AB, &m_ab, &n_ab));

 69:   Mat output_mat_local, output_mat_nonlocal;
 70:   // Be careful about scope given the kokkos memory reference counts
 71:   {
 72:     // Local
 73:     Kokkos::View<PetscScalar *> a_local_d;
 74:     Kokkos::View<PetscInt *>    i_local_d;
 75:     Kokkos::View<PetscInt *>    j_local_d;

 77:     // Nonlocal
 78:     Kokkos::View<PetscScalar *> a_nonlocal_d;
 79:     Kokkos::View<PetscInt *>    i_nonlocal_d;
 80:     Kokkos::View<PetscInt *>    j_nonlocal_d;

 82:     // Create device memory
 83:     PetscCallCXX(a_local_d = Kokkos::View<PetscScalar *>("a_local_d", di[5]));
 84:     PetscCallCXX(i_local_d = Kokkos::View<PetscInt *>("i_local_d", m + 1));
 85:     PetscCallCXX(j_local_d = Kokkos::View<PetscInt *>("j_local_d", di[5]));

 87:     // Create non-local device memory
 88:     PetscCallCXX(a_nonlocal_d = Kokkos::View<PetscScalar *>("a_nonlocal_d", oi[5]));
 89:     PetscCallCXX(i_nonlocal_d = Kokkos::View<PetscInt *>("i_nonlocal_d", m + 1));
 90:     PetscCallCXX(j_nonlocal_d = Kokkos::View<PetscInt *>("j_nonlocal_d", oi[5]));

 92:     // ~~~~~~~~~~~~~~~~~~~~~
 93:     // Could fill the aij on the device - we're just going to test
 94:     // by copying in the existing host values
 95:     // ~~~~~~~~~~~~~~~~~~~~~
 96:     Kokkos::View<PetscScalar *, HostMirrorMemorySpace> a_local_h;
 97:     Kokkos::View<PetscInt *, HostMirrorMemorySpace>    i_local_h;
 98:     Kokkos::View<PetscInt *, HostMirrorMemorySpace>    j_local_h;
 99:     Kokkos::View<PetscScalar *, HostMirrorMemorySpace> a_nonlocal_h;
100:     Kokkos::View<PetscInt *, HostMirrorMemorySpace>    i_nonlocal_h;
101:     Kokkos::View<PetscInt *, HostMirrorMemorySpace>    j_nonlocal_h;

103:     PetscCallCXX(a_local_h = Kokkos::View<PetscScalar *, HostMirrorMemorySpace>(da, di[5]));
104:     PetscCallCXX(i_local_h = Kokkos::View<PetscInt *, HostMirrorMemorySpace>(di, m + 1));
105:     PetscCallCXX(j_local_h = Kokkos::View<PetscInt *, HostMirrorMemorySpace>(dj, di[5]));
106:     PetscCallCXX(a_nonlocal_h = Kokkos::View<PetscScalar *, HostMirrorMemorySpace>(oa, oi[5]));
107:     PetscCallCXX(i_nonlocal_h = Kokkos::View<PetscInt *, HostMirrorMemorySpace>(oi, m + 1));
108:     PetscCallCXX(j_nonlocal_h = Kokkos::View<PetscInt *, HostMirrorMemorySpace>(oj, oi[5]));

110:     // Haven't specified an exec space so these will all be synchronous
111:     // and finish without a need to call fence after
112:     PetscCallCXX(Kokkos::deep_copy(a_local_d, a_local_h));
113:     PetscCallCXX(Kokkos::deep_copy(i_local_d, i_local_h));
114:     PetscCallCXX(Kokkos::deep_copy(j_local_d, j_local_h));
115:     PetscCallCXX(Kokkos::deep_copy(a_nonlocal_d, a_nonlocal_h));
116:     PetscCallCXX(Kokkos::deep_copy(i_nonlocal_d, i_nonlocal_h));
117:     PetscCallCXX(Kokkos::deep_copy(j_nonlocal_d, j_nonlocal_h));

119:     // The garray passed in has to be on the host, but it can be created
120:     // on device and copied to the host
121:     // We're just going to copy the existing host values here
122:     PetscCall(PetscMalloc1(n_ab, &garray_h));
123:     for (int i = 0; i < n_ab; i++) garray_h[i] = garray[i];

125:     // ~~~~~~~~~~~~~~~~~~~~~

127:     // ~~~~~~~~~~~~~~~~~
128:     // Test MatCreateSeqAIJKokkosWithKokkosViews
129:     // ~~~~~~~~~~~~~~~~~

131:     // We can create our local diagonal block matrix directly on the device
132:     PetscCall(MatCreateSeqAIJKokkosWithKokkosViews(PETSC_COMM_SELF, m, n, i_local_d, j_local_d, a_local_d, &output_mat_local));

134:     // We can create our nonlocal diagonal block matrix directly on the device
135:     PetscCall(MatCreateSeqAIJKokkosWithKokkosViews(PETSC_COMM_SELF, m, n_ab, i_nonlocal_d, j_nonlocal_d, a_nonlocal_d, &output_mat_nonlocal));

137:     // Build our MPI matrix
138:     // If we provide garray and output_mat_nonlocal with local indices and the compactified size
139:     // almost nothing happens on the host
140:     PetscCall(MatCreateMPIAIJWithSeqAIJ(PETSC_COMM_WORLD, M, N, output_mat_local, output_mat_nonlocal, garray_h, &B));

142:     PetscCall(MatEqual(A, B, &equal));
143:     PetscCall(MatRestoreRowIJ(AA, 0, PETSC_FALSE, PETSC_FALSE, &nd, (const PetscInt **)&di, (const PetscInt **)&dj, &done));
144:     PetscCall(MatSeqAIJRestoreArray(AA, &da));
145:     PetscCall(MatRestoreRowIJ(AB, 0, PETSC_FALSE, PETSC_FALSE, &nd, (const PetscInt **)&oi, (const PetscInt **)&oj, &done));
146:     PetscCall(MatSeqAIJRestoreArray(AB, &oa));
147:     PetscCheck(equal, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Likely a bug in MatCreateSeqAIJKokkosWithKokkosViews()");
148:   }

150:   PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &BT));

152:   /* Free spaces */
153:   PetscCall(PetscRandomDestroy(&rctx));
154:   PetscCall(MatDestroy(&A));
155:   PetscCall(MatDestroy(&B));
156:   PetscCall(MatDestroy(&BT));
157:   PetscCall(PetscFinalize());
158:   return 0;
159: }

161: /*TEST
162:   build:
163:     requires: kokkos_kernels

165:   test:
166:     nsize: 2
167:     args: -mat_type aijkokkos
168:     requires: kokkos_kernels
169:     output_file: output/empty.out

171: TEST*/