Actual source code: ex47cu.cu
1: static char help[] = "Solves -Laplacian u - exp(u) = 0, 0 < x < 1 using GPU\n\n";
2: /*
3: Same as ex47.c except it also uses the GPU to evaluate the function
4: */
6: #include <petscdm.h>
7: #include <petscdmda.h>
8: #include <petscsnes.h>
10: #include <thrust/device_ptr.h>
11: #include <thrust/for_each.h>
12: #include <thrust/tuple.h>
13: #include <thrust/iterator/constant_iterator.h>
14: #include <thrust/iterator/counting_iterator.h>
15: #include <thrust/iterator/zip_iterator.h>
17: extern PetscErrorCode ComputeFunction(SNES, Vec, Vec, void *), ComputeJacobian(SNES, Vec, Mat, Mat, void *);
18: PetscBool useCUDA = PETSC_FALSE;
20: int main(int argc, char **argv)
21: {
22: SNES snes;
23: Vec x, f;
24: Mat J;
25: DM da;
26: char *tmp, typeName[256];
27: PetscBool flg;
29: PetscFunctionBeginUser;
30: PetscCall(PetscInitialize(&argc, &argv, NULL, help));
31: PetscCall(PetscOptionsGetString(NULL, NULL, "-dm_vec_type", typeName, sizeof(typeName), &flg));
32: if (flg) {
33: PetscCall(PetscStrstr(typeName, "cuda", &tmp));
34: if (tmp) useCUDA = PETSC_TRUE;
35: }
37: PetscCall(DMDACreate1d(PETSC_COMM_WORLD, DM_BOUNDARY_NONE, 8, 1, 1, NULL, &da));
38: PetscCall(DMSetFromOptions(da));
39: PetscCall(DMSetUp(da));
40: PetscCall(DMCreateGlobalVector(da, &x));
41: PetscCall(VecDuplicate(x, &f));
42: PetscCall(DMCreateMatrix(da, &J));
44: PetscCall(SNESCreate(PETSC_COMM_WORLD, &snes));
45: PetscCall(SNESSetFunction(snes, f, ComputeFunction, da));
46: PetscCall(SNESSetJacobian(snes, J, J, ComputeJacobian, da));
47: PetscCall(SNESSetFromOptions(snes));
48: PetscCall(SNESSolve(snes, NULL, x));
50: PetscCall(MatDestroy(&J));
51: PetscCall(VecDestroy(&x));
52: PetscCall(VecDestroy(&f));
53: PetscCall(SNESDestroy(&snes));
54: PetscCall(DMDestroy(&da));
56: PetscCall(PetscFinalize());
57: return 0;
58: }
60: struct ApplyStencil {
61: template <typename Tuple>
62: __host__ __device__ void operator()(Tuple t)
63: {
64: /* f = (2*x_i - x_(i+1) - x_(i-1))/h - h*exp(x_i) */
65: thrust::get<0>(t) = 1;
66: if ((thrust::get<4>(t) > 0) && (thrust::get<4>(t) < thrust::get<5>(t) - 1)) {
67: thrust::get<0>(t) = (((PetscScalar)2.0) * thrust::get<1>(t) - thrust::get<2>(t) - thrust::get<3>(t)) / (thrust::get<6>(t)) - (thrust::get<6>(t)) * exp(thrust::get<1>(t));
68: } else if (thrust::get<4>(t) == 0) {
69: thrust::get<0>(t) = thrust::get<1>(t) / (thrust::get<6>(t));
70: } else if (thrust::get<4>(t) == thrust::get<5>(t) - 1) {
71: thrust::get<0>(t) = thrust::get<1>(t) / (thrust::get<6>(t));
72: }
73: }
74: };
76: PetscErrorCode ComputeFunction(SNES, Vec x, Vec f, void *ctx)
77: {
78: PetscInt i, Mx, xs, xm, xstartshift, xendshift, fstart, lsize;
79: PetscScalar *xx, *ff, hx;
80: DM da = (DM)ctx;
81: Vec xlocal;
82: PetscMPIInt rank, size;
83: MPI_Comm comm;
84: PetscScalar const *xarray;
85: PetscScalar *farray;
87: PetscFunctionBeginUser;
88: PetscCall(DMDAGetInfo(da, PETSC_IGNORE, &Mx, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE));
89: hx = 1.0 / (PetscReal)(Mx - 1);
90: PetscCall(DMGetLocalVector(da, &xlocal));
91: PetscCall(DMGlobalToLocalBegin(da, x, INSERT_VALUES, xlocal));
92: PetscCall(DMGlobalToLocalEnd(da, x, INSERT_VALUES, xlocal));
94: if (useCUDA) {
95: PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
96: PetscCallMPI(MPI_Comm_size(comm, &size));
97: PetscCallMPI(MPI_Comm_rank(comm, &rank));
98: PetscCall(VecCUDAGetArrayRead(xlocal, &xarray));
99: PetscCall(VecCUDAGetArrayWrite(f, &farray));
100: if (rank) xstartshift = 1;
101: else xstartshift = 0;
102: if (rank != size - 1) xendshift = 1;
103: else xendshift = 0;
104: PetscCall(VecGetOwnershipRange(f, &fstart, NULL));
105: PetscCall(VecGetLocalSize(x, &lsize));
106: // clang-format off
107: try {
108: thrust::for_each(
109: thrust::make_zip_iterator(
110: thrust::make_tuple(
111: thrust::device_ptr<PetscScalar>(farray),
112: thrust::device_ptr<const PetscScalar>(xarray + xstartshift),
113: thrust::device_ptr<const PetscScalar>(xarray + xstartshift + 1),
114: thrust::device_ptr<const PetscScalar>(xarray + xstartshift - 1),
115: thrust::counting_iterator<int>(fstart),
116: thrust::constant_iterator<int>(Mx),
117: thrust::constant_iterator<PetscScalar>(hx))),
118: thrust::make_zip_iterator(
119: thrust::make_tuple(
120: thrust::device_ptr<PetscScalar>(farray + lsize),
121: thrust::device_ptr<const PetscScalar>(xarray + lsize - xendshift),
122: thrust::device_ptr<const PetscScalar>(xarray + lsize - xendshift + 1),
123: thrust::device_ptr<const PetscScalar>(xarray + lsize - xendshift - 1),
124: thrust::counting_iterator<int>(fstart) + lsize,
125: thrust::constant_iterator<int>(Mx),
126: thrust::constant_iterator<PetscScalar>(hx))),
127: ApplyStencil());
128: }
129: // clang-format on
130: catch (char *all) {
131: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Thrust is not working\n"));
132: }
133: PetscCall(VecCUDARestoreArrayRead(xlocal, &xarray));
134: PetscCall(VecCUDARestoreArrayWrite(f, &farray));
135: } else {
136: PetscCall(DMDAVecGetArray(da, xlocal, &xx));
137: PetscCall(DMDAVecGetArray(da, f, &ff));
138: PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &xm, NULL, NULL));
140: for (i = xs; i < xs + xm; i++) {
141: if (i == 0 || i == Mx - 1) ff[i] = xx[i] / hx;
142: else ff[i] = (2.0 * xx[i] - xx[i - 1] - xx[i + 1]) / hx - hx * PetscExpScalar(xx[i]);
143: }
144: PetscCall(DMDAVecRestoreArray(da, xlocal, &xx));
145: PetscCall(DMDAVecRestoreArray(da, f, &ff));
146: }
147: PetscCall(DMRestoreLocalVector(da, &xlocal));
148: PetscFunctionReturn(PETSC_SUCCESS);
149: }
150: PetscErrorCode ComputeJacobian(SNES, Vec x, Mat J, Mat, void *ctx)
151: {
152: DM da = (DM)ctx;
153: PetscInt i, Mx, xm, xs;
154: PetscScalar hx, *xx;
155: Vec xlocal;
157: PetscFunctionBeginUser;
158: PetscCall(DMDAGetInfo(da, PETSC_IGNORE, &Mx, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE));
159: hx = 1.0 / (PetscReal)(Mx - 1);
160: PetscCall(DMGetLocalVector(da, &xlocal));
161: PetscCall(DMGlobalToLocalBegin(da, x, INSERT_VALUES, xlocal));
162: PetscCall(DMGlobalToLocalEnd(da, x, INSERT_VALUES, xlocal));
163: PetscCall(DMDAVecGetArray(da, xlocal, &xx));
164: PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &xm, NULL, NULL));
166: for (i = xs; i < xs + xm; i++) {
167: if (i == 0 || i == Mx - 1) {
168: PetscCall(MatSetValue(J, i, i, 1.0 / hx, INSERT_VALUES));
169: } else {
170: PetscCall(MatSetValue(J, i, i - 1, -1.0 / hx, INSERT_VALUES));
171: PetscCall(MatSetValue(J, i, i, 2.0 / hx - hx * PetscExpScalar(xx[i]), INSERT_VALUES));
172: PetscCall(MatSetValue(J, i, i + 1, -1.0 / hx, INSERT_VALUES));
173: }
174: }
175: PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
176: PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
177: PetscCall(DMDAVecRestoreArray(da, xlocal, &xx));
178: PetscCall(DMRestoreLocalVector(da, &xlocal));
179: PetscFunctionReturn(PETSC_SUCCESS);
180: }
182: /*TEST
184: build:
185: requires: cuda
187: testset:
188: args: -snes_monitor_short -dm_mat_type aijcusparse -dm_vec_type cuda
189: output_file: output/ex47cu_1.out
190: test:
191: suffix: 1
192: nsize: 1
193: test:
194: suffix: 2
195: nsize: 2
197: TEST*/