Actual source code: ex55k.kokkos.cxx

  1: #include <petscconf.h>

  3: #if defined(PETSC_HAVE_KOKKOS_KERNELS)
  4:   #include <Kokkos_Core.hpp>
  5: #include <petsc_kokkos.hpp>
  6: #include <petscdmda_kokkos.hpp>

  8: #include <petscdm.h>
  9: #include <petscdmda.h>
 10: #include <petscsnes.h>
 11:   #include "ex55.h"

 13: using DefaultMemorySpace                 = Kokkos::DefaultExecutionSpace::memory_space;
 14: using ConstPetscScalarKokkosOffsetView2D = Kokkos::Experimental::OffsetView<const PetscScalar **, Kokkos::LayoutRight, DefaultMemorySpace>;
 15: using PetscScalarKokkosOffsetView2D      = Kokkos::Experimental::OffsetView<PetscScalar **, Kokkos::LayoutRight, DefaultMemorySpace>;

 17: using PetscCountKokkosView  = Kokkos::View<PetscCount *, DefaultMemorySpace>;
 18: using PetscIntKokkosView    = Kokkos::View<PetscInt *, DefaultMemorySpace>;
 19: using PetscScalarKokkosView = Kokkos::View<PetscScalar *, DefaultMemorySpace>;
 20: using Kokkos::Iterate;
 21: using Kokkos::MDRangePolicy;
 22: using Kokkos::Rank;

 24: KOKKOS_INLINE_FUNCTION PetscErrorCode MMSSolution1(AppCtx *user, const DMDACoor2d *c, PetscScalar *u)
 25: {
 26:   PetscReal x = PetscRealPart(c->x), y = PetscRealPart(c->y);
 27:   u[0] = x * (1 - x) * y * (1 - y);
 28:   return PETSC_SUCCESS;
 29: }

 31: KOKKOS_INLINE_FUNCTION PetscErrorCode MMSForcing1(PetscReal user_param, const DMDACoor2d *c, PetscScalar *f)
 32: {
 33:   PetscReal x = PetscRealPart(c->x), y = PetscRealPart(c->y);
 34:   f[0] = 2 * x * (1 - x) + 2 * y * (1 - y) - user_param * PetscExpReal(x * (1 - x) * y * (1 - y));
 35:   return PETSC_SUCCESS;
 36: }

 38: PetscErrorCode FormFunctionLocalVec(DMDALocalInfo *info, Vec x, Vec f, AppCtx *user)
 39: {
 40:   PetscReal lambda, hx, hy, hxdhy, hydhx;
 41:   PetscInt  xs = info->xs, ys = info->ys, xm = info->xm, ym = info->ym, mx = info->mx, my = info->my;
 42:   PetscReal user_param = user->param;

 44:   ConstPetscScalarKokkosOffsetView2D xv;
 45:   PetscScalarKokkosOffsetView2D      fv;
 46:   Kokkos::DefaultExecutionSpace      exec = PetscGetKokkosExecutionSpace();

 48:   PetscFunctionBeginUser;
 49:   lambda = user->param;
 50:   hx     = 1.0 / (PetscReal)(info->mx - 1);
 51:   hy     = 1.0 / (PetscReal)(info->my - 1);
 52:   hxdhy  = hx / hy;
 53:   hydhx  = hy / hx;
 54:   /*
 55:      Compute function over the locally owned part of the grid
 56:   */
 57:   PetscCall(DMDAVecGetKokkosOffsetView(info->da, x, &xv));
 58:   PetscCall(DMDAVecGetKokkosOffsetViewWrite(info->da, f, &fv));

 60:   PetscCallCXX(Kokkos::parallel_for(
 61:     "FormFunctionLocalVec", MDRangePolicy<Rank<2, Iterate::Right, Iterate::Right>>(exec, {ys, xs}, {ys + ym, xs + xm}), KOKKOS_LAMBDA(PetscInt j, PetscInt i) {
 62:       DMDACoor2d  c;
 63:       PetscScalar u, ue, uw, un, us, uxx, uyy, mms_solution, mms_forcing;

 65:       if (i == 0 || j == 0 || i == mx - 1 || j == my - 1) {
 66:         c.x = i * hx;
 67:         c.y = j * hy;
 68:         static_cast<void>(MMSSolution1(user, &c, &mms_solution));
 69:         fv(j, i) = 2.0 * (hydhx + hxdhy) * (xv(j, i) - mms_solution);
 70:       } else {
 71:         u  = xv(j, i);
 72:         uw = xv(j, i - 1);
 73:         ue = xv(j, i + 1);
 74:         un = xv(j - 1, i);
 75:         us = xv(j + 1, i);

 77:         /* Enforce boundary conditions at neighboring points -- setting these values causes the Jacobian to be symmetric. */
 78:         if (i - 1 == 0) {
 79:           c.x = (i - 1) * hx;
 80:           c.y = j * hy;
 81:           static_cast<void>(MMSSolution1(user, &c, &uw));
 82:         }
 83:         if (i + 1 == mx - 1) {
 84:           c.x = (i + 1) * hx;
 85:           c.y = j * hy;
 86:           static_cast<void>(MMSSolution1(user, &c, &ue));
 87:         }
 88:         if (j - 1 == 0) {
 89:           c.x = i * hx;
 90:           c.y = (j - 1) * hy;
 91:           static_cast<void>(MMSSolution1(user, &c, &un));
 92:         }
 93:         if (j + 1 == my - 1) {
 94:           c.x = i * hx;
 95:           c.y = (j + 1) * hy;
 96:           static_cast<void>(MMSSolution1(user, &c, &us));
 97:         }

 99:         uxx         = (2.0 * u - uw - ue) * hydhx;
100:         uyy         = (2.0 * u - un - us) * hxdhy;
101:         mms_forcing = 0;
102:         c.x         = i * hx;
103:         c.y         = j * hy;
104:         static_cast<void>(MMSForcing1(user_param, &c, &mms_forcing));
105:         fv(j, i) = uxx + uyy - hx * hy * (lambda * PetscExpScalar(u) + mms_forcing);
106:       }
107:     }));

109:   PetscCall(DMDAVecRestoreKokkosOffsetView(info->da, x, &xv));
110:   PetscCall(DMDAVecRestoreKokkosOffsetViewWrite(info->da, f, &fv));

112:   PetscCall(PetscLogFlops(11.0 * info->ym * info->xm));
113:   PetscFunctionReturn(PETSC_SUCCESS);
114: }

116: PetscErrorCode FormObjectiveLocalVec(DMDALocalInfo *info, Vec x, PetscReal *obj, AppCtx *user)
117: {
118:   PetscInt  xs = info->xs, ys = info->ys, xm = info->xm, ym = info->ym, mx = info->mx, my = info->my;
119:   PetscReal lambda, hx, hy, hxdhy, hydhx, sc, lobj = 0;
120:   MPI_Comm  comm;

122:   ConstPetscScalarKokkosOffsetView2D xv;
123:   Kokkos::DefaultExecutionSpace      exec = PetscGetKokkosExecutionSpace();

125:   PetscFunctionBeginUser;
126:   *obj = 0;
127:   PetscCall(PetscObjectGetComm((PetscObject)info->da, &comm));
128:   lambda = user->param;
129:   hx     = 1.0 / (PetscReal)(mx - 1);
130:   hy     = 1.0 / (PetscReal)(my - 1);
131:   sc     = hx * hy * lambda;
132:   hxdhy  = hx / hy;
133:   hydhx  = hy / hx;
134:   /*
135:      Compute function over the locally owned part of the grid
136:   */
137:   PetscCall(DMDAVecGetKokkosOffsetView(info->da, x, &xv));

139:   PetscCallCXX(Kokkos::parallel_reduce(
140:     "FormObjectiveLocalVec", MDRangePolicy<Rank<2, Iterate::Right, Iterate::Right>>(exec, {ys, xs}, {ys + ym, xs + xm}),
141:     KOKKOS_LAMBDA(PetscInt j, PetscInt i, PetscReal &update) {
142:       PetscScalar u, ue, uw, un, us, uxux, uyuy;
143:       if (i == 0 || j == 0 || i == mx - 1 || j == my - 1) {
144:         update += PetscRealPart((hydhx + hxdhy) * xv(j, i) * xv(j, i));
145:       } else {
146:         u  = xv(j, i);
147:         uw = xv(j, i - 1);
148:         ue = xv(j, i + 1);
149:         un = xv(j - 1, i);
150:         us = xv(j + 1, i);

152:         if (i - 1 == 0) uw = 0.;
153:         if (i + 1 == mx - 1) ue = 0.;
154:         if (j - 1 == 0) un = 0.;
155:         if (j + 1 == my - 1) us = 0.;

157:         /* F[u] = 1/2\int_{\omega}\nabla^2u(x)*u(x)*dx */

159:         uxux = u * (2. * u - ue - uw) * hydhx;
160:         uyuy = u * (2. * u - un - us) * hxdhy;

162:         update += PetscRealPart(0.5 * (uxux + uyuy) - sc * PetscExpScalar(u));
163:       }
164:     },
165:     lobj));

167:   PetscCall(DMDAVecRestoreKokkosOffsetView(info->da, x, &xv));
168:   PetscCall(PetscLogFlops(12.0 * info->ym * info->xm));
169:   *obj = lobj;
170:   PetscFunctionReturn(PETSC_SUCCESS);
171: }

173: PetscErrorCode FormJacobianLocalVec(DMDALocalInfo *info, Vec x, Mat jac, Mat jacpre, AppCtx *user)
174: {
175:   PetscInt     i, j;
176:   PetscInt     xs = info->xs, ys = info->ys, xm = info->xm, ym = info->ym, mx = info->mx, my = info->my;
177:   MatStencil   col[5], row;
178:   PetscScalar  lambda, hx, hy, hxdhy, hydhx, sc;
179:   DM           coordDA;
180:   Vec          coordinates;
181:   DMDACoor2d **coords;

183:   PetscFunctionBeginUser;
184:   lambda = user->param;
185:   /* Extract coordinates */
186:   PetscCall(DMGetCoordinateDM(info->da, &coordDA));
187:   PetscCall(DMGetCoordinates(info->da, &coordinates));

189:   PetscCall(DMDAVecGetArray(coordDA, coordinates, &coords));
190:   hx = xm > 1 ? PetscRealPart(coords[ys][xs + 1].x) - PetscRealPart(coords[ys][xs].x) : 1.0;
191:   hy = ym > 1 ? PetscRealPart(coords[ys + 1][xs].y) - PetscRealPart(coords[ys][xs].y) : 1.0;
192:   PetscCall(DMDAVecRestoreArray(coordDA, coordinates, &coords));

194:   hxdhy = hx / hy;
195:   hydhx = hy / hx;
196:   sc    = hx * hy * lambda;

198:   /* ----------------------------------------- */
199:   /*  MatSetPreallocationCOO()                 */
200:   /* ----------------------------------------- */
201:   if (!user->ncoo) {
202:     PetscCount ncoo = ((PetscCount)xm) * ((PetscCount)ym) * 5;
203:     PetscInt  *coo_i, *coo_j, *ip, *jp;
204:     PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j)); /* 5-point stencil such that each row has at most 5 nonzeros */

206:     ip = coo_i;
207:     jp = coo_j;
208:     for (j = ys; j < ys + ym; j++) {
209:       for (i = xs; i < xs + xm; i++) {
210:         row.j = j;
211:         row.i = i;
212:         /* Initialize neighbors with negative indices */
213:         col[0].j = col[1].j = col[3].j = col[4].j = -1;
214:         /* boundary points */
215:         if (i == 0 || j == 0 || i == mx - 1 || j == my - 1) {
216:           col[2].j = row.j;
217:           col[2].i = row.i;
218:         } else {
219:           /* interior grid points */
220:           if (j - 1 != 0) {
221:             col[0].j = j - 1;
222:             col[0].i = i;
223:           }

225:           if (i - 1 != 0) {
226:             col[1].j = j;
227:             col[1].i = i - 1;
228:           }

230:           col[2].j = row.j;
231:           col[2].i = row.i;

233:           if (i + 1 != mx - 1) {
234:             col[3].j = j;
235:             col[3].i = i + 1;
236:           }

238:           if (j + 1 != mx - 1) {
239:             col[4].j = j + 1;
240:             col[4].i = i;
241:           }
242:         }
243:         PetscCall(DMDAMapMatStencilToGlobal(info->da, 5, col, jp));
244:         for (PetscInt k = 0; k < 5; k++) ip[k] = jp[2];
245:         ip += 5;
246:         jp += 5;
247:       }
248:     }
249:     PetscCall(MatSetPreallocationCOO(jacpre, ncoo, coo_i, coo_j));
250:     PetscCall(PetscFree2(coo_i, coo_j));
251:     user->ncoo = ncoo;
252:   }

254:   /* ----------------------------------------- */
255:   /*  MatSetValuesCOO()                        */
256:   /* ----------------------------------------- */
257:   PetscScalarKokkosView              coo_v("coo_v", user->ncoo);
258:   ConstPetscScalarKokkosOffsetView2D xv;
259:   Kokkos::DefaultExecutionSpace      exec = PetscGetKokkosExecutionSpace();

261:   PetscCall(DMDAVecGetKokkosOffsetView(info->da, x, &xv));

263:   PetscCallCXX(Kokkos::parallel_for(
264:     "FormJacobianLocalVec", MDRangePolicy<Rank<2, Iterate::Right, Iterate::Right>>(exec, {ys, xs}, {ys + ym, xs + xm}), KOKKOS_LAMBDA(PetscCount j, PetscCount i) {
265:       PetscInt p = ((j - ys) * xm + (i - xs)) * 5;
266:       /* boundary points */
267:       if (i == 0 || j == 0 || i == mx - 1 || j == my - 1) {
268:         coo_v(p + 2) = 2.0 * (hydhx + hxdhy);
269:       } else {
270:         /* interior grid points */
271:         if (j - 1 != 0) coo_v(p + 0) = -hxdhy;
272:         if (i - 1 != 0) coo_v(p + 1) = -hydhx;

274:         coo_v(p + 2) = 2.0 * (hydhx + hxdhy) - sc * PetscExpScalar(xv(j, i));

276:         if (i + 1 != mx - 1) coo_v(p + 3) = -hydhx;
277:         if (j + 1 != mx - 1) coo_v(p + 4) = -hxdhy;
278:       }
279:     }));
280:   PetscCall(MatSetValuesCOO(jacpre, coo_v.data(), INSERT_VALUES));
281:   PetscCall(DMDAVecRestoreKokkosOffsetView(info->da, x, &xv));
282:   PetscFunctionReturn(PETSC_SUCCESS);
283: }

285: #else
286:   #include "ex55.h"

288: PetscErrorCode FormObjectiveLocalVec(DMDALocalInfo *info, Vec x, PetscReal *obj, AppCtx *user)
289: {
290:   PetscFunctionBeginUser;
291:   PetscCheck(PETSC_FALSE, PETSC_COMM_SELF, PETSC_ERR_SUP, "Need to reconfigure with --download-kokkos-kernels");
292:   PetscFunctionReturn(PETSC_SUCCESS);
293: }

295: PetscErrorCode FormFunctionLocalVec(DMDALocalInfo *info, Vec x, Vec f, AppCtx *user)
296: {
297:   PetscFunctionBeginUser;
298:   PetscCheck(PETSC_FALSE, PETSC_COMM_SELF, PETSC_ERR_SUP, "Need to reconfigure with --download-kokkos-kernels");
299:   PetscFunctionReturn(PETSC_SUCCESS);
300: }

302: PetscErrorCode FormJacobianLocalVec(DMDALocalInfo *info, Vec x, Mat jac, Mat jacpre, AppCtx *user)
303: {
304:   PetscFunctionBeginUser;
305:   PetscCheck(PETSC_FALSE, PETSC_COMM_SELF, PETSC_ERR_SUP, "Need to reconfigure with --download-kokkos-kernels");
306:   PetscFunctionReturn(PETSC_SUCCESS);
307: }
308: #endif

310: /*TEST

312:     build:
313:       TODO:

315: TEST*/