Actual source code: ex18cu.cu
1: #include <petscdevice_cuda.h>
2: #include "ex18.h"
4: __global__ void FillValues(PetscInt n, PetscScalar *v)
5: {
6: PetscInt i = blockIdx.x * blockDim.x + threadIdx.x;
7: PetscScalar *s;
8: if (i < n) {
9: s = &v[3 * 3 * i];
10: for (PetscInt vi = 0; vi < 3; vi++) {
11: for (PetscInt vj = 0; vj < 3; vj++) s[vi * 3 + vj] = vi + 2 * vj;
12: }
13: }
14: }
16: PetscErrorCode FillMatrixCUDACOO(FEStruct *fe, Mat A)
17: {
18: PetscScalar *v;
20: PetscFunctionBeginUser;
21: PetscCallCUDA(cudaMalloc((void **)&v, 3 * 3 * fe->Ne * sizeof(PetscScalar)));
22: FillValues<<<(fe->Ne + 255) / 256, 256>>>(fe->Ne, v);
23: PetscCall(MatSetValuesCOO(A, v, INSERT_VALUES));
24: PetscCallCUDA(cudaFree(v));
25: PetscFunctionReturn(PETSC_SUCCESS);
26: }