Actual source code: ex47cu.cu

  1: static char help[] = "Solves -Laplacian u - exp(u) = 0,  0 < x < 1 using GPU\n\n";
  2: /*
  3:    Same as ex47.c except it also uses the GPU to evaluate the function
  4: */

  6: #include <petscdm.h>
  7: #include <petscdmda.h>
  8: #include <petscsnes.h>

 10: #include <thrust/device_ptr.h>
 11: #include <thrust/for_each.h>
 12: #include <thrust/tuple.h>
 13: #include <thrust/iterator/constant_iterator.h>
 14: #include <thrust/iterator/counting_iterator.h>
 15: #include <thrust/iterator/zip_iterator.h>

 17: extern PetscErrorCode ComputeFunction(SNES, Vec, Vec, void *), ComputeJacobian(SNES, Vec, Mat, Mat, void *);
 18: PetscBool             useCUDA = PETSC_FALSE;

 20: int main(int argc, char **argv)
 21: {
 22:   SNES      snes;
 23:   Vec       x, f;
 24:   Mat       J;
 25:   DM        da;
 26:   char     *tmp, typeName[256];
 27:   PetscBool flg;

 29:   PetscFunctionBeginUser;
 30:   PetscCall(PetscInitialize(&argc, &argv, (char *)0, help));
 31:   PetscCall(PetscOptionsGetString(NULL, NULL, "-dm_vec_type", typeName, sizeof(typeName), &flg));
 32:   if (flg) {
 33:     PetscCall(PetscStrstr(typeName, "cuda", &tmp));
 34:     if (tmp) useCUDA = PETSC_TRUE;
 35:   }

 37:   PetscCall(DMDACreate1d(PETSC_COMM_WORLD, DM_BOUNDARY_NONE, 8, 1, 1, NULL, &da));
 38:   PetscCall(DMSetFromOptions(da));
 39:   PetscCall(DMSetUp(da));
 40:   PetscCall(DMCreateGlobalVector(da, &x));
 41:   PetscCall(VecDuplicate(x, &f));
 42:   PetscCall(DMCreateMatrix(da, &J));

 44:   PetscCall(SNESCreate(PETSC_COMM_WORLD, &snes));
 45:   PetscCall(SNESSetFunction(snes, f, ComputeFunction, da));
 46:   PetscCall(SNESSetJacobian(snes, J, J, ComputeJacobian, da));
 47:   PetscCall(SNESSetFromOptions(snes));
 48:   PetscCall(SNESSolve(snes, NULL, x));

 50:   PetscCall(MatDestroy(&J));
 51:   PetscCall(VecDestroy(&x));
 52:   PetscCall(VecDestroy(&f));
 53:   PetscCall(SNESDestroy(&snes));
 54:   PetscCall(DMDestroy(&da));

 56:   PetscCall(PetscFinalize());
 57:   return 0;
 58: }

 60: struct ApplyStencil {
 61:   template <typename Tuple>
 62:   __host__ __device__ void operator()(Tuple t)
 63:   {
 64:     /* f = (2*x_i - x_(i+1) - x_(i-1))/h - h*exp(x_i) */
 65:     thrust::get<0>(t) = 1;
 66:     if ((thrust::get<4>(t) > 0) && (thrust::get<4>(t) < thrust::get<5>(t) - 1)) {
 67:       thrust::get<0>(t) = (((PetscScalar)2.0) * thrust::get<1>(t) - thrust::get<2>(t) - thrust::get<3>(t)) / (thrust::get<6>(t)) - (thrust::get<6>(t)) * exp(thrust::get<1>(t));
 68:     } else if (thrust::get<4>(t) == 0) {
 69:       thrust::get<0>(t) = thrust::get<1>(t) / (thrust::get<6>(t));
 70:     } else if (thrust::get<4>(t) == thrust::get<5>(t) - 1) {
 71:       thrust::get<0>(t) = thrust::get<1>(t) / (thrust::get<6>(t));
 72:     }
 73:   }
 74: };

 76: PetscErrorCode ComputeFunction(SNES, Vec x, Vec f, void *ctx)
 77: {
 78:   PetscInt           i, Mx, xs, xm, xstartshift, xendshift, fstart, lsize;
 79:   PetscScalar       *xx, *ff, hx;
 80:   DM                 da = (DM)ctx;
 81:   Vec                xlocal;
 82:   PetscMPIInt        rank, size;
 83:   MPI_Comm           comm;
 84:   PetscScalar const *xarray;
 85:   PetscScalar       *farray;

 87:   PetscFunctionBeginUser;
 88:   PetscCall(DMDAGetInfo(da, PETSC_IGNORE, &Mx, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE));
 89:   hx = 1.0 / (PetscReal)(Mx - 1);
 90:   PetscCall(DMGetLocalVector(da, &xlocal));
 91:   PetscCall(DMGlobalToLocalBegin(da, x, INSERT_VALUES, xlocal));
 92:   PetscCall(DMGlobalToLocalEnd(da, x, INSERT_VALUES, xlocal));

 94:   if (useCUDA) {
 95:     PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
 96:     PetscCallMPI(MPI_Comm_size(comm, &size));
 97:     PetscCallMPI(MPI_Comm_rank(comm, &rank));
 98:     PetscCall(VecCUDAGetArrayRead(xlocal, &xarray));
 99:     PetscCall(VecCUDAGetArrayWrite(f, &farray));
100:     if (rank) xstartshift = 1;
101:     else xstartshift = 0;
102:     if (rank != size - 1) xendshift = 1;
103:     else xendshift = 0;
104:     PetscCall(VecGetOwnershipRange(f, &fstart, NULL));
105:     PetscCall(VecGetLocalSize(x, &lsize));
106:     // clang-format off
107:     try {
108:       thrust::for_each(
109:         thrust::make_zip_iterator(
110:           thrust::make_tuple(
111:             thrust::device_ptr<PetscScalar>(farray),
112:             thrust::device_ptr<const PetscScalar>(xarray + xstartshift),
113:             thrust::device_ptr<const PetscScalar>(xarray + xstartshift + 1),
114:             thrust::device_ptr<const PetscScalar>(xarray + xstartshift - 1),
115:             thrust::counting_iterator<int>(fstart),
116:             thrust::constant_iterator<int>(Mx),
117:             thrust::constant_iterator<PetscScalar>(hx))),
118:         thrust::make_zip_iterator(
119:           thrust::make_tuple(
120:             thrust::device_ptr<PetscScalar>(farray + lsize),
121:             thrust::device_ptr<const PetscScalar>(xarray + lsize - xendshift),
122:             thrust::device_ptr<const PetscScalar>(xarray + lsize - xendshift + 1),
123:             thrust::device_ptr<const PetscScalar>(xarray + lsize - xendshift - 1),
124:             thrust::counting_iterator<int>(fstart) + lsize,
125:             thrust::constant_iterator<int>(Mx),
126:             thrust::constant_iterator<PetscScalar>(hx))),
127:         ApplyStencil());
128:     }
129:     // clang-format on
130:     catch (char *all) {
131:       PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Thrust is not working\n"));
132:     }
133:     PetscCall(VecCUDARestoreArrayRead(xlocal, &xarray));
134:     PetscCall(VecCUDARestoreArrayWrite(f, &farray));
135:   } else {
136:     PetscCall(DMDAVecGetArray(da, xlocal, &xx));
137:     PetscCall(DMDAVecGetArray(da, f, &ff));
138:     PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &xm, NULL, NULL));

140:     for (i = xs; i < xs + xm; i++) {
141:       if (i == 0 || i == Mx - 1) ff[i] = xx[i] / hx;
142:       else ff[i] = (2.0 * xx[i] - xx[i - 1] - xx[i + 1]) / hx - hx * PetscExpScalar(xx[i]);
143:     }
144:     PetscCall(DMDAVecRestoreArray(da, xlocal, &xx));
145:     PetscCall(DMDAVecRestoreArray(da, f, &ff));
146:   }
147:   PetscCall(DMRestoreLocalVector(da, &xlocal));
148:   PetscFunctionReturn(PETSC_SUCCESS);
149: }
150: PetscErrorCode ComputeJacobian(SNES, Vec x, Mat J, Mat, void *ctx)
151: {
152:   DM          da = (DM)ctx;
153:   PetscInt    i, Mx, xm, xs;
154:   PetscScalar hx, *xx;
155:   Vec         xlocal;

157:   PetscFunctionBeginUser;
158:   PetscCall(DMDAGetInfo(da, PETSC_IGNORE, &Mx, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE));
159:   hx = 1.0 / (PetscReal)(Mx - 1);
160:   PetscCall(DMGetLocalVector(da, &xlocal));
161:   PetscCall(DMGlobalToLocalBegin(da, x, INSERT_VALUES, xlocal));
162:   PetscCall(DMGlobalToLocalEnd(da, x, INSERT_VALUES, xlocal));
163:   PetscCall(DMDAVecGetArray(da, xlocal, &xx));
164:   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &xm, NULL, NULL));

166:   for (i = xs; i < xs + xm; i++) {
167:     if (i == 0 || i == Mx - 1) {
168:       PetscCall(MatSetValue(J, i, i, 1.0 / hx, INSERT_VALUES));
169:     } else {
170:       PetscCall(MatSetValue(J, i, i - 1, -1.0 / hx, INSERT_VALUES));
171:       PetscCall(MatSetValue(J, i, i, 2.0 / hx - hx * PetscExpScalar(xx[i]), INSERT_VALUES));
172:       PetscCall(MatSetValue(J, i, i + 1, -1.0 / hx, INSERT_VALUES));
173:     }
174:   }
175:   PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
176:   PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
177:   PetscCall(DMDAVecRestoreArray(da, xlocal, &xx));
178:   PetscCall(DMRestoreLocalVector(da, &xlocal));
179:   PetscFunctionReturn(PETSC_SUCCESS);
180: }

182: /*TEST

184:    build:
185:       requires: cuda

187:    testset:
188:       args: -snes_monitor_short -dm_mat_type aijcusparse -dm_vec_type cuda
189:       output_file: output/ex47cu_1.out
190:       test:
191:         suffix: 1
192:         nsize: 1
193:       test:
194:         suffix: 2
195:         nsize: 2

197: TEST*/