Actual source code: ex3k.kokkos.cxx
1: static char help[] = "Newton methods to solve u'' + u^{2} = f in parallel. Uses Kokkos\n\\n";
3: #include <petscdmda_kokkos.hpp>
4: #include <petscsnes.h>
6: /*
7: User-defined application context
8: */
9: typedef struct {
10: DM da; /* distributed array */
11: Vec F; /* right-hand side of PDE */
12: PetscReal h; /* mesh spacing */
13: } ApplicationCtx;
15: /* ------------------------------------------------------------------- */
16: /*
17: FormInitialGuess - Computes initial guess.
19: Input/Output Parameter:
20: . x - the solution vector
21: */
22: PetscErrorCode FormInitialGuess(Vec x)
23: {
24: PetscScalar pfive = .50;
26: PetscFunctionBeginUser;
27: PetscCall(VecSet(x, pfive));
28: PetscFunctionReturn(PETSC_SUCCESS);
29: }
31: /* ------------------------------------------------------------------- */
32: /*
33: CpuFunction - Evaluates nonlinear function, F(x) on CPU
35: Input Parameters:
36: . snes - the SNES context
37: . x - input vector
38: . ctx - optional user-defined context, as set by SNESSetFunction()
40: Output Parameter:
41: . r - function vector
43: Note:
44: The user-defined context can contain any application-specific
45: data needed for the function evaluation.
46: */
47: PetscErrorCode CpuFunction(SNES snes, Vec x, Vec r, void *ctx)
48: {
49: ApplicationCtx *user = (ApplicationCtx *)ctx;
50: DM da = user->da;
51: PetscScalar *X, *R, *F, d;
52: PetscInt i, M, xs, xm;
53: Vec xl;
55: PetscFunctionBeginUser;
56: PetscCall(DMGetLocalVector(da, &xl));
57: PetscCall(DMGlobalToLocal(da, x, INSERT_VALUES, xl));
58: PetscCall(DMDAVecGetArray(da, xl, &X));
59: PetscCall(DMDAVecGetArray(da, r, &R));
60: PetscCall(DMDAVecGetArray(da, user->F, &F));
62: PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &xm, NULL, NULL));
63: PetscCall(DMDAGetInfo(da, NULL, &M, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL));
65: if (xs == 0) { /* left boundary */
66: R[0] = X[0];
67: xs++;
68: xm--;
69: }
70: if (xs + xm == M) { /* right boundary */
71: R[xs + xm - 1] = X[xs + xm - 1] - 1.0;
72: xm--;
73: }
74: d = 1.0 / (user->h * user->h);
75: for (i = xs; i < xs + xm; i++) R[i] = d * (X[i - 1] - 2.0 * X[i] + X[i + 1]) + X[i] * X[i] - F[i];
77: PetscCall(DMDAVecRestoreArray(da, xl, &X));
78: PetscCall(DMDAVecRestoreArray(da, r, &R));
79: PetscCall(DMDAVecRestoreArray(da, user->F, &F));
80: PetscCall(DMRestoreLocalVector(da, &xl));
81: PetscFunctionReturn(PETSC_SUCCESS);
82: }
84: using DefaultExecutionSpace = Kokkos::DefaultExecutionSpace;
85: using DefaultMemorySpace = Kokkos::DefaultExecutionSpace::memory_space;
86: using PetscScalarKokkosOffsetView = Kokkos::Experimental::OffsetView<PetscScalar *, DefaultMemorySpace>;
87: using ConstPetscScalarKokkosOffsetView = Kokkos::Experimental::OffsetView<const PetscScalar *, DefaultMemorySpace>;
89: PetscErrorCode KokkosFunction(SNES snes, Vec x, Vec r, void *ctx)
90: {
91: ApplicationCtx *user = (ApplicationCtx *)ctx;
92: DM da = user->da;
93: PetscScalar d;
94: PetscInt M;
95: Vec xl;
96: PetscScalarKokkosOffsetView R;
97: ConstPetscScalarKokkosOffsetView X, F;
99: PetscFunctionBeginUser;
100: PetscCall(DMGetLocalVector(da, &xl));
101: PetscCall(DMGlobalToLocal(da, x, INSERT_VALUES, xl));
102: d = 1.0 / (user->h * user->h);
103: PetscCall(DMDAGetInfo(da, NULL, &M, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL));
104: PetscCall(DMDAVecGetKokkosOffsetView(da, xl, &X)); /* read only */
105: PetscCall(DMDAVecGetKokkosOffsetViewWrite(da, r, &R)); /* write only */
106: PetscCall(DMDAVecGetKokkosOffsetView(da, user->F, &F)); /* read only */
107: Kokkos::parallel_for(
108: Kokkos::RangePolicy<>(R.begin(0), R.end(0)), KOKKOS_LAMBDA(int i) {
109: if (i == 0) R(0) = X(0); /* left boundary */
110: else if (i == M - 1) R(i) = X(i) - 1.0; /* right boundary */
111: else R(i) = d * (X(i - 1) - 2.0 * X(i) + X(i + 1)) + X(i) * X(i) - F(i); /* interior */
112: });
113: PetscCall(DMDAVecRestoreKokkosOffsetView(da, xl, &X));
114: PetscCall(DMDAVecRestoreKokkosOffsetViewWrite(da, r, &R));
115: PetscCall(DMDAVecRestoreKokkosOffsetView(da, user->F, &F));
116: PetscCall(DMRestoreLocalVector(da, &xl));
117: PetscFunctionReturn(PETSC_SUCCESS);
118: }
120: PetscErrorCode StubFunction(SNES snes, Vec x, Vec r, void *ctx)
121: {
122: ApplicationCtx *user = (ApplicationCtx *)ctx;
123: DM da = user->da;
124: Vec rk;
125: PetscReal norm = 0;
127: PetscFunctionBeginUser;
128: PetscCall(DMGetGlobalVector(da, &rk));
129: PetscCall(CpuFunction(snes, x, r, ctx));
130: PetscCall(KokkosFunction(snes, x, rk, ctx));
131: PetscCall(VecAXPY(rk, -1.0, r));
132: PetscCall(VecNorm(rk, NORM_2, &norm));
133: PetscCall(DMRestoreGlobalVector(da, &rk));
134: PetscCheck(norm <= 1e-6, PETSC_COMM_SELF, PETSC_ERR_PLIB, "KokkosFunction() different from CpuFunction() with a diff norm = %g", (double)norm);
135: PetscFunctionReturn(PETSC_SUCCESS);
136: }
137: /* ------------------------------------------------------------------- */
138: /*
139: FormJacobian - Evaluates Jacobian matrix.
141: Input Parameters:
142: . snes - the SNES context
143: . x - input vector
144: . dummy - optional user-defined context (not used here)
146: Output Parameters:
147: . jac - Jacobian matrix
148: . B - optionally different preconditioning matrix
149: . flag - flag indicating matrix structure
150: */
151: PetscErrorCode FormJacobian(SNES snes, Vec x, Mat jac, Mat B, void *ctx)
152: {
153: ApplicationCtx *user = (ApplicationCtx *)ctx;
154: PetscScalar *xx, d, A[3];
155: PetscInt i, j[3], M, xs, xm;
156: DM da = user->da;
158: PetscFunctionBeginUser;
159: /*
160: Get pointer to vector data
161: */
162: PetscCall(DMDAVecGetArrayRead(da, x, &xx));
163: PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &xm, NULL, NULL));
165: /*
166: Get range of locally owned matrix
167: */
168: PetscCall(DMDAGetInfo(da, NULL, &M, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL));
170: /*
171: Determine starting and ending local indices for interior grid points.
172: Set Jacobian entries for boundary points.
173: */
175: if (xs == 0) { /* left boundary */
176: i = 0;
177: A[0] = 1.0;
179: PetscCall(MatSetValues(jac, 1, &i, 1, &i, A, INSERT_VALUES));
180: xs++;
181: xm--;
182: }
183: if (xs + xm == M) { /* right boundary */
184: i = M - 1;
185: A[0] = 1.0;
186: PetscCall(MatSetValues(jac, 1, &i, 1, &i, A, INSERT_VALUES));
187: xm--;
188: }
190: /*
191: Interior grid points
192: - Note that in this case we set all elements for a particular
193: row at once.
194: */
195: d = 1.0 / (user->h * user->h);
196: for (i = xs; i < xs + xm; i++) {
197: j[0] = i - 1;
198: j[1] = i;
199: j[2] = i + 1;
200: A[0] = A[2] = d;
201: A[1] = -2.0 * d + 2.0 * xx[i];
202: PetscCall(MatSetValues(jac, 1, &i, 3, j, A, INSERT_VALUES));
203: }
205: /*
206: Assemble matrix, using the 2-step process:
207: MatAssemblyBegin(), MatAssemblyEnd().
208: By placing code between these two statements, computations can be
209: done while messages are in transition.
211: Also, restore vector.
212: */
214: PetscCall(MatAssemblyBegin(jac, MAT_FINAL_ASSEMBLY));
215: PetscCall(DMDAVecRestoreArrayRead(da, x, &xx));
216: PetscCall(MatAssemblyEnd(jac, MAT_FINAL_ASSEMBLY));
217: PetscFunctionReturn(PETSC_SUCCESS);
218: }
220: int main(int argc, char **argv)
221: {
222: SNES snes; /* SNES context */
223: Mat J; /* Jacobian matrix */
224: ApplicationCtx ctx; /* user-defined context */
225: Vec x, r, U, F; /* vectors */
226: PetscScalar none = -1.0;
227: PetscInt its, N = 5, maxit, maxf;
228: PetscReal abstol, rtol, stol, norm;
229: PetscBool viewinitial = PETSC_FALSE;
231: PetscFunctionBeginUser;
232: PetscCall(PetscInitialize(&argc, &argv, nullptr, help));
233: PetscCall(PetscOptionsGetInt(NULL, NULL, "-n", &N, NULL));
234: ctx.h = 1.0 / (N - 1);
235: PetscCall(PetscOptionsGetBool(NULL, NULL, "-view_initial", &viewinitial, NULL));
237: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
238: Create nonlinear solver context
239: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
240: PetscCall(SNESCreate(PETSC_COMM_WORLD, &snes));
242: /*
243: Create distributed array (DMDA) to manage parallel grid and vectors
244: */
245: PetscCall(DMDACreate1d(PETSC_COMM_WORLD, DM_BOUNDARY_NONE, N, 1, 1, NULL, &ctx.da));
246: PetscCall(DMSetFromOptions(ctx.da));
247: PetscCall(DMSetUp(ctx.da));
249: /*
250: Extract global and local vectors from DMDA; then duplicate for remaining
251: vectors that are the same types
252: */
253: PetscCall(DMCreateGlobalVector(ctx.da, &x));
254: PetscCall(PetscObjectSetName((PetscObject)x, "Approximate Solution"));
255: PetscCall(VecDuplicate(x, &r));
256: PetscCall(VecDuplicate(x, &F));
257: ctx.F = F;
258: PetscCall(PetscObjectSetName((PetscObject)F, "Forcing function"));
259: PetscCall(VecDuplicate(x, &U));
260: PetscCall(PetscObjectSetName((PetscObject)U, "Exact Solution"));
262: /*
263: Set function evaluation routine and vector. Whenever the nonlinear
264: solver needs to compute the nonlinear function, it will call this
265: routine.
266: - Note that the final routine argument is the user-defined
267: context that provides application-specific data for the
268: function evaluation routine.
270: At the beginning, one can use a stub function that checks the Kokkos version
271: against the CPU version to quickly expose errors.
272: PetscCall(SNESSetFunction(snes,r,StubFunction,&ctx));
273: */
274: PetscCall(SNESSetFunction(snes, r, KokkosFunction, &ctx));
276: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
277: Create matrix data structure; set Jacobian evaluation routine
278: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
279: PetscCall(DMCreateMatrix(ctx.da, &J));
281: /*
282: Set Jacobian matrix data structure and default Jacobian evaluation
283: routine. Whenever the nonlinear solver needs to compute the
284: Jacobian matrix, it will call this routine.
285: - Note that the final routine argument is the user-defined
286: context that provides application-specific data for the
287: Jacobian evaluation routine.
288: */
289: PetscCall(SNESSetJacobian(snes, J, J, FormJacobian, &ctx));
290: PetscCall(SNESSetFromOptions(snes));
291: PetscCall(SNESGetTolerances(snes, &abstol, &rtol, &stol, &maxit, &maxf));
292: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "atol=%g, rtol=%g, stol=%g, maxit=%" PetscInt_FMT ", maxf=%" PetscInt_FMT "\n", (double)abstol, (double)rtol, (double)stol, maxit, maxf));
294: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
295: Initialize application:
296: Store forcing function of PDE and exact solution
297: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
298: {
299: PetscScalarKokkosOffsetView FF, UU;
300: PetscCall(DMDAVecGetKokkosOffsetViewWrite(ctx.da, F, &FF));
301: PetscCall(DMDAVecGetKokkosOffsetViewWrite(ctx.da, U, &UU));
302: Kokkos::parallel_for(
303: Kokkos::RangePolicy<>(FF.begin(0), FF.end(0)), KOKKOS_LAMBDA(int i) {
304: PetscReal xp = i * ctx.h;
305: FF(i) = 6.0 * xp + pow(xp + 1.e-12, 6.0); /* +1.e-12 is to prevent 0^6 */
306: UU(i) = xp * xp * xp;
307: });
308: PetscCall(DMDAVecRestoreKokkosOffsetViewWrite(ctx.da, F, &FF));
309: PetscCall(DMDAVecRestoreKokkosOffsetViewWrite(ctx.da, U, &UU));
310: }
312: if (viewinitial) {
313: PetscCall(VecView(U, NULL));
314: PetscCall(VecView(F, NULL));
315: }
317: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
318: Evaluate initial guess; then solve nonlinear system
319: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
321: /*
322: Note: The user should initialize the vector, x, with the initial guess
323: for the nonlinear solver prior to calling SNESSolve(). In particular,
324: to employ an initial guess of zero, the user should explicitly set
325: this vector to zero by calling VecSet().
326: */
327: PetscCall(FormInitialGuess(x));
328: PetscCall(SNESSolve(snes, NULL, x));
329: PetscCall(SNESGetIterationNumber(snes, &its));
330: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Number of SNES iterations = %" PetscInt_FMT "\n", its));
332: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
333: Check solution and clean up
334: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
335: /*
336: Check the error
337: */
338: PetscCall(VecAXPY(x, none, U));
339: PetscCall(VecNorm(x, NORM_2, &norm));
340: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Norm of error %g Iterations %" PetscInt_FMT "\n", (double)norm, its));
342: /*
343: Free work space. All PETSc objects should be destroyed when they
344: are no longer needed.
345: */
346: PetscCall(VecDestroy(&x));
347: PetscCall(VecDestroy(&r));
348: PetscCall(VecDestroy(&U));
349: PetscCall(VecDestroy(&F));
350: PetscCall(MatDestroy(&J));
351: PetscCall(SNESDestroy(&snes));
352: PetscCall(DMDestroy(&ctx.da));
353: PetscCall(PetscFinalize());
354: return 0;
355: }
357: /*TEST
359: build:
360: requires: kokkos_kernels
362: test:
363: requires: kokkos_kernels !complex !single
364: nsize: 2
365: args: -dm_vec_type kokkos -dm_mat_type aijkokkos -view_initial -snes_monitor
366: output_file: output/ex3k_1.out
368: TEST*/