Actual source code: ex3k.kokkos.cxx

  1: static char help[] = "Newton methods to solve u'' + u^{2} = f in parallel. Uses Kokkos\n\\n";

  3: #include <petscdmda_kokkos.hpp>
  4: #include <petscsnes.h>

  6: /*
  7:    User-defined application context
  8: */
  9: typedef struct {
 10:   DM        da; /* distributed array */
 11:   Vec       F;  /* right-hand side of PDE */
 12:   PetscReal h;  /* mesh spacing */
 13: } ApplicationCtx;

 15: /* ------------------------------------------------------------------- */
 16: /*
 17:    FormInitialGuess - Computes initial guess.

 19:    Input/Output Parameter:
 20: .  x - the solution vector
 21: */
 22: PetscErrorCode FormInitialGuess(Vec x)
 23: {
 24:   PetscScalar pfive = .50;

 26:   PetscFunctionBeginUser;
 27:   PetscCall(VecSet(x, pfive));
 28:   PetscFunctionReturn(PETSC_SUCCESS);
 29: }

 31: /* ------------------------------------------------------------------- */
 32: /*
 33:    CpuFunction - Evaluates nonlinear function, F(x) on CPU

 35:    Input Parameters:
 36: .  snes - the SNES context
 37: .  x - input vector
 38: .  ctx - optional user-defined context, as set by SNESSetFunction()

 40:    Output Parameter:
 41: .  r - function vector

 43:    Note:
 44:    The user-defined context can contain any application-specific
 45:    data needed for the function evaluation.
 46: */
 47: PetscErrorCode CpuFunction(SNES snes, Vec x, Vec r, void *ctx)
 48: {
 49:   ApplicationCtx *user = (ApplicationCtx *)ctx;
 50:   DM              da   = user->da;
 51:   PetscScalar    *X, *R, *F, d;
 52:   PetscInt        i, M, xs, xm;
 53:   Vec             xl;

 55:   PetscFunctionBeginUser;
 56:   PetscCall(DMGetLocalVector(da, &xl));
 57:   PetscCall(DMGlobalToLocal(da, x, INSERT_VALUES, xl));
 58:   PetscCall(DMDAVecGetArray(da, xl, &X));
 59:   PetscCall(DMDAVecGetArray(da, r, &R));
 60:   PetscCall(DMDAVecGetArray(da, user->F, &F));

 62:   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &xm, NULL, NULL));
 63:   PetscCall(DMDAGetInfo(da, NULL, &M, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL));

 65:   if (xs == 0) { /* left boundary */
 66:     R[0] = X[0];
 67:     xs++;
 68:     xm--;
 69:   }
 70:   if (xs + xm == M) { /* right boundary */
 71:     R[xs + xm - 1] = X[xs + xm - 1] - 1.0;
 72:     xm--;
 73:   }
 74:   d = 1.0 / (user->h * user->h);
 75:   for (i = xs; i < xs + xm; i++) R[i] = d * (X[i - 1] - 2.0 * X[i] + X[i + 1]) + X[i] * X[i] - F[i];

 77:   PetscCall(DMDAVecRestoreArray(da, xl, &X));
 78:   PetscCall(DMDAVecRestoreArray(da, r, &R));
 79:   PetscCall(DMDAVecRestoreArray(da, user->F, &F));
 80:   PetscCall(DMRestoreLocalVector(da, &xl));
 81:   PetscFunctionReturn(PETSC_SUCCESS);
 82: }

 84: using DefaultExecutionSpace            = Kokkos::DefaultExecutionSpace;
 85: using DefaultMemorySpace               = Kokkos::DefaultExecutionSpace::memory_space;
 86: using PetscScalarKokkosOffsetView      = Kokkos::Experimental::OffsetView<PetscScalar *, DefaultMemorySpace>;
 87: using ConstPetscScalarKokkosOffsetView = Kokkos::Experimental::OffsetView<const PetscScalar *, DefaultMemorySpace>;

 89: PetscErrorCode KokkosFunction(SNES snes, Vec x, Vec r, void *ctx)
 90: {
 91:   ApplicationCtx                  *user = (ApplicationCtx *)ctx;
 92:   DM                               da   = user->da;
 93:   PetscScalar                      d;
 94:   PetscInt                         M;
 95:   Vec                              xl;
 96:   PetscScalarKokkosOffsetView      R;
 97:   ConstPetscScalarKokkosOffsetView X, F;

 99:   PetscFunctionBeginUser;
100:   PetscCall(DMGetLocalVector(da, &xl));
101:   PetscCall(DMGlobalToLocal(da, x, INSERT_VALUES, xl));
102:   d = 1.0 / (user->h * user->h);
103:   PetscCall(DMDAGetInfo(da, NULL, &M, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL));
104:   PetscCall(DMDAVecGetKokkosOffsetView(da, xl, &X));      /* read only */
105:   PetscCall(DMDAVecGetKokkosOffsetViewWrite(da, r, &R));  /* write only */
106:   PetscCall(DMDAVecGetKokkosOffsetView(da, user->F, &F)); /* read only */
107:   Kokkos::parallel_for(
108:     Kokkos::RangePolicy<>(R.begin(0), R.end(0)), KOKKOS_LAMBDA(int i) {
109:       if (i == 0) R(0) = X(0);                                                 /* left boundary */
110:       else if (i == M - 1) R(i) = X(i) - 1.0;                                  /* right boundary */
111:       else R(i) = d * (X(i - 1) - 2.0 * X(i) + X(i + 1)) + X(i) * X(i) - F(i); /* interior */
112:     });
113:   PetscCall(DMDAVecRestoreKokkosOffsetView(da, xl, &X));
114:   PetscCall(DMDAVecRestoreKokkosOffsetViewWrite(da, r, &R));
115:   PetscCall(DMDAVecRestoreKokkosOffsetView(da, user->F, &F));
116:   PetscCall(DMRestoreLocalVector(da, &xl));
117:   PetscFunctionReturn(PETSC_SUCCESS);
118: }

120: PetscErrorCode StubFunction(SNES snes, Vec x, Vec r, void *ctx)
121: {
122:   ApplicationCtx *user = (ApplicationCtx *)ctx;
123:   DM              da   = user->da;
124:   Vec             rk;
125:   PetscReal       norm = 0;

127:   PetscFunctionBeginUser;
128:   PetscCall(DMGetGlobalVector(da, &rk));
129:   PetscCall(CpuFunction(snes, x, r, ctx));
130:   PetscCall(KokkosFunction(snes, x, rk, ctx));
131:   PetscCall(VecAXPY(rk, -1.0, r));
132:   PetscCall(VecNorm(rk, NORM_2, &norm));
133:   PetscCall(DMRestoreGlobalVector(da, &rk));
134:   PetscCheck(norm <= 1e-6, PETSC_COMM_SELF, PETSC_ERR_PLIB, "KokkosFunction() different from CpuFunction() with a diff norm = %g", (double)norm);
135:   PetscFunctionReturn(PETSC_SUCCESS);
136: }
137: /* ------------------------------------------------------------------- */
138: /*
139:    FormJacobian - Evaluates Jacobian matrix.

141:    Input Parameters:
142: .  snes - the SNES context
143: .  x - input vector
144: .  dummy - optional user-defined context (not used here)

146:    Output Parameters:
147: .  jac - Jacobian matrix
148: .  B - optionally different preconditioning matrix
149: .  flag - flag indicating matrix structure
150: */
151: PetscErrorCode FormJacobian(SNES snes, Vec x, Mat jac, Mat B, void *ctx)
152: {
153:   ApplicationCtx *user = (ApplicationCtx *)ctx;
154:   PetscScalar    *xx, d, A[3];
155:   PetscInt        i, j[3], M, xs, xm;
156:   DM              da = user->da;

158:   PetscFunctionBeginUser;
159:   /*
160:      Get pointer to vector data
161:   */
162:   PetscCall(DMDAVecGetArrayRead(da, x, &xx));
163:   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &xm, NULL, NULL));

165:   /*
166:     Get range of locally owned matrix
167:   */
168:   PetscCall(DMDAGetInfo(da, NULL, &M, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL));

170:   /*
171:      Determine starting and ending local indices for interior grid points.
172:      Set Jacobian entries for boundary points.
173:   */

175:   if (xs == 0) { /* left boundary */
176:     i    = 0;
177:     A[0] = 1.0;

179:     PetscCall(MatSetValues(jac, 1, &i, 1, &i, A, INSERT_VALUES));
180:     xs++;
181:     xm--;
182:   }
183:   if (xs + xm == M) { /* right boundary */
184:     i    = M - 1;
185:     A[0] = 1.0;
186:     PetscCall(MatSetValues(jac, 1, &i, 1, &i, A, INSERT_VALUES));
187:     xm--;
188:   }

190:   /*
191:      Interior grid points
192:       - Note that in this case we set all elements for a particular
193:         row at once.
194:   */
195:   d = 1.0 / (user->h * user->h);
196:   for (i = xs; i < xs + xm; i++) {
197:     j[0] = i - 1;
198:     j[1] = i;
199:     j[2] = i + 1;
200:     A[0] = A[2] = d;
201:     A[1]        = -2.0 * d + 2.0 * xx[i];
202:     PetscCall(MatSetValues(jac, 1, &i, 3, j, A, INSERT_VALUES));
203:   }

205:   /*
206:      Assemble matrix, using the 2-step process:
207:        MatAssemblyBegin(), MatAssemblyEnd().
208:      By placing code between these two statements, computations can be
209:      done while messages are in transition.

211:      Also, restore vector.
212:   */

214:   PetscCall(MatAssemblyBegin(jac, MAT_FINAL_ASSEMBLY));
215:   PetscCall(DMDAVecRestoreArrayRead(da, x, &xx));
216:   PetscCall(MatAssemblyEnd(jac, MAT_FINAL_ASSEMBLY));
217:   PetscFunctionReturn(PETSC_SUCCESS);
218: }

220: int main(int argc, char **argv)
221: {
222:   SNES           snes;       /* SNES context */
223:   Mat            J;          /* Jacobian matrix */
224:   ApplicationCtx ctx;        /* user-defined context */
225:   Vec            x, r, U, F; /* vectors */
226:   PetscScalar    none = -1.0;
227:   PetscInt       its, N = 5, maxit, maxf;
228:   PetscReal      abstol, rtol, stol, norm;
229:   PetscBool      viewinitial = PETSC_FALSE;

231:   PetscFunctionBeginUser;
232:   PetscCall(PetscInitialize(&argc, &argv, nullptr, help));
233:   PetscCall(PetscOptionsGetInt(NULL, NULL, "-n", &N, NULL));
234:   ctx.h = 1.0 / (N - 1);
235:   PetscCall(PetscOptionsGetBool(NULL, NULL, "-view_initial", &viewinitial, NULL));

237:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
238:      Create nonlinear solver context
239:      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
240:   PetscCall(SNESCreate(PETSC_COMM_WORLD, &snes));

242:   /*
243:      Create distributed array (DMDA) to manage parallel grid and vectors
244:   */
245:   PetscCall(DMDACreate1d(PETSC_COMM_WORLD, DM_BOUNDARY_NONE, N, 1, 1, NULL, &ctx.da));
246:   PetscCall(DMSetFromOptions(ctx.da));
247:   PetscCall(DMSetUp(ctx.da));

249:   /*
250:      Extract global and local vectors from DMDA; then duplicate for remaining
251:      vectors that are the same types
252:   */
253:   PetscCall(DMCreateGlobalVector(ctx.da, &x));
254:   PetscCall(PetscObjectSetName((PetscObject)x, "Approximate Solution"));
255:   PetscCall(VecDuplicate(x, &r));
256:   PetscCall(VecDuplicate(x, &F));
257:   ctx.F = F;
258:   PetscCall(PetscObjectSetName((PetscObject)F, "Forcing function"));
259:   PetscCall(VecDuplicate(x, &U));
260:   PetscCall(PetscObjectSetName((PetscObject)U, "Exact Solution"));

262:   /*
263:      Set function evaluation routine and vector.  Whenever the nonlinear
264:      solver needs to compute the nonlinear function, it will call this
265:      routine.
266:       - Note that the final routine argument is the user-defined
267:         context that provides application-specific data for the
268:         function evaluation routine.

270:      At the beginning, one can use a stub function that checks the Kokkos version
271:      against the CPU version to quickly expose errors.
272:      PetscCall(SNESSetFunction(snes,r,StubFunction,&ctx));
273:   */
274:   PetscCall(SNESSetFunction(snes, r, KokkosFunction, &ctx));

276:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
277:      Create matrix data structure; set Jacobian evaluation routine
278:      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
279:   PetscCall(DMCreateMatrix(ctx.da, &J));

281:   /*
282:      Set Jacobian matrix data structure and default Jacobian evaluation
283:      routine.  Whenever the nonlinear solver needs to compute the
284:      Jacobian matrix, it will call this routine.
285:       - Note that the final routine argument is the user-defined
286:         context that provides application-specific data for the
287:         Jacobian evaluation routine.
288:   */
289:   PetscCall(SNESSetJacobian(snes, J, J, FormJacobian, &ctx));
290:   PetscCall(SNESSetFromOptions(snes));
291:   PetscCall(SNESGetTolerances(snes, &abstol, &rtol, &stol, &maxit, &maxf));
292:   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "atol=%g, rtol=%g, stol=%g, maxit=%" PetscInt_FMT ", maxf=%" PetscInt_FMT "\n", (double)abstol, (double)rtol, (double)stol, maxit, maxf));

294:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
295:      Initialize application:
296:      Store forcing function of PDE and exact solution
297:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
298:   {
299:     PetscScalarKokkosOffsetView FF, UU;
300:     PetscCall(DMDAVecGetKokkosOffsetViewWrite(ctx.da, F, &FF));
301:     PetscCall(DMDAVecGetKokkosOffsetViewWrite(ctx.da, U, &UU));
302:     Kokkos::parallel_for(
303:       Kokkos::RangePolicy<>(FF.begin(0), FF.end(0)), KOKKOS_LAMBDA(int i) {
304:         PetscReal xp = i * ctx.h;
305:         FF(i)        = 6.0 * xp + pow(xp + 1.e-12, 6.0); /* +1.e-12 is to prevent 0^6 */
306:         UU(i)        = xp * xp * xp;
307:       });
308:     PetscCall(DMDAVecRestoreKokkosOffsetViewWrite(ctx.da, F, &FF));
309:     PetscCall(DMDAVecRestoreKokkosOffsetViewWrite(ctx.da, U, &UU));
310:   }

312:   if (viewinitial) {
313:     PetscCall(VecView(U, NULL));
314:     PetscCall(VecView(F, NULL));
315:   }

317:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
318:      Evaluate initial guess; then solve nonlinear system
319:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

321:   /*
322:      Note: The user should initialize the vector, x, with the initial guess
323:      for the nonlinear solver prior to calling SNESSolve().  In particular,
324:      to employ an initial guess of zero, the user should explicitly set
325:      this vector to zero by calling VecSet().
326:   */
327:   PetscCall(FormInitialGuess(x));
328:   PetscCall(SNESSolve(snes, NULL, x));
329:   PetscCall(SNESGetIterationNumber(snes, &its));
330:   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Number of SNES iterations = %" PetscInt_FMT "\n", its));

332:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
333:      Check solution and clean up
334:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
335:   /*
336:      Check the error
337:   */
338:   PetscCall(VecAXPY(x, none, U));
339:   PetscCall(VecNorm(x, NORM_2, &norm));
340:   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Norm of error %g Iterations %" PetscInt_FMT "\n", (double)norm, its));

342:   /*
343:      Free work space.  All PETSc objects should be destroyed when they
344:      are no longer needed.
345:   */
346:   PetscCall(VecDestroy(&x));
347:   PetscCall(VecDestroy(&r));
348:   PetscCall(VecDestroy(&U));
349:   PetscCall(VecDestroy(&F));
350:   PetscCall(MatDestroy(&J));
351:   PetscCall(SNESDestroy(&snes));
352:   PetscCall(DMDestroy(&ctx.da));
353:   PetscCall(PetscFinalize());
354:   return 0;
355: }

357: /*TEST

359:    build:
360:      requires: kokkos_kernels

362:    test:
363:      requires: kokkos_kernels !complex !single
364:      nsize: 2
365:      args: -dm_vec_type kokkos -dm_mat_type aijkokkos -view_initial -snes_monitor
366:      output_file: output/ex3k_1.out

368: TEST*/