1: #include <petscdevice_cuda.h> 2: #include "ex18.h" 4: __global__ void FillValues(PetscInt n, PetscScalar *v) 5: { 6: PetscInt i = blockIdx.x * blockDim.x + threadIdx.x; 7: PetscScalar *s; 8: if (i < n) { 9: s = &v[3 * 3 * i]; 10: for (PetscInt vi = 0; vi < 3; vi++) { 11: for (PetscInt vj = 0; vj < 3; vj++) s[vi * 3 + vj] = vi + 2 * vj; 12: } 13: } 14: } 16: PetscErrorCode FillMatrixCUDACOO(FEStruct *fe, Mat A) 17: { 18: PetscScalar *v; 20: PetscFunctionBeginUser; 21: PetscCallCUDA(cudaMalloc((void **)&v, 3 * 3 * fe->Ne * sizeof(PetscScalar))); 22: FillValues<<<(fe->Ne + 255) / 256, 256>>>(fe->Ne, v); 23: PetscCall(MatSetValuesCOO(A, v, INSERT_VALUES)); 24: PetscCallCUDA(cudaFree(v)); 25: PetscFunctionReturn(PETSC_SUCCESS); 26: }