Actual source code: veckok.kokkos.cxx

  1: /*
  2:    Implements the sequential Kokkos vectors.
  3: */
  4: #include <petscvec_kokkos.hpp>

  6: #include <petsc/private/sfimpl.h>
  7: #include <petsc/private/petscimpl.h>
  8: #include <petscmath.h>
  9: #include <petscviewer.h>
 10: #include <KokkosBlas.hpp>
 11: #include <Kokkos_Functional.hpp>

 13: #include <petscerror.h>
 14: #include <../src/vec/vec/impls/dvecimpl.h>
 15: #include <../src/vec/vec/impls/seq/kokkos/veckokkosimpl.hpp>

 17: template <class MemorySpace>
 18: PetscErrorCode VecGetKokkosView_Private(Vec v, PetscScalarKokkosViewType<MemorySpace> *kv, PetscBool overwrite)
 19: {
 20:   Vec_Kokkos *veckok = static_cast<Vec_Kokkos *>(v->spptr);

 22:   PetscFunctionBegin;
 23:   VecErrorIfNotKokkos(v);
 24:   if (!overwrite) veckok->v_dual.sync<MemorySpace>(); /* If overwrite=true, no need to sync the space, since caller will overwrite the data */
 25:   *kv = veckok->v_dual.view<MemorySpace>();
 26:   PetscFunctionReturn(PETSC_SUCCESS);
 27: }

 29: template <class MemorySpace>
 30: PetscErrorCode VecRestoreKokkosView_Private(Vec v, PetscScalarKokkosViewType<MemorySpace> *kv, PetscBool overwrite)
 31: {
 32:   Vec_Kokkos *veckok = static_cast<Vec_Kokkos *>(v->spptr);

 34:   PetscFunctionBegin;
 35:   VecErrorIfNotKokkos(v);
 36:   if (overwrite) veckok->v_dual.clear_sync_state(); /* If overwrite=true, clear the old sync state since user forced an overwrite */
 37:   veckok->v_dual.modify<MemorySpace>();
 38:   PetscCall(PetscObjectStateIncrease((PetscObject)v));
 39:   PetscFunctionReturn(PETSC_SUCCESS);
 40: }

 42: template <class MemorySpace>
 43: PetscErrorCode VecGetKokkosView(Vec v, ConstPetscScalarKokkosViewType<MemorySpace> *kv)
 44: {
 45:   Vec_Kokkos *veckok = static_cast<Vec_Kokkos *>(v->spptr);
 46:   PetscFunctionBegin;
 47:   VecErrorIfNotKokkos(v);
 48:   veckok->v_dual.sync<MemorySpace>(); /* Sync the space for caller to read */
 49:   *kv = veckok->v_dual.view<MemorySpace>();
 50:   PetscFunctionReturn(PETSC_SUCCESS);
 51: }

 53: /* Function template explicit instantiation */
 54: template PETSC_VISIBILITY_PUBLIC PetscErrorCode VecGetKokkosView(Vec, ConstPetscScalarKokkosView *);
 55: template <>
 56: PETSC_VISIBILITY_PUBLIC PetscErrorCode VecGetKokkosView(Vec v, PetscScalarKokkosView *kv)
 57: {
 58:   return VecGetKokkosView_Private(v, kv, PETSC_FALSE);
 59: }
 60: template <>
 61: PETSC_VISIBILITY_PUBLIC PetscErrorCode VecRestoreKokkosView(Vec v, PetscScalarKokkosView *kv)
 62: {
 63:   return VecRestoreKokkosView_Private(v, kv, PETSC_FALSE);
 64: }
 65: template <>
 66: PETSC_VISIBILITY_PUBLIC PetscErrorCode VecGetKokkosViewWrite(Vec v, PetscScalarKokkosView *kv)
 67: {
 68:   return VecGetKokkosView_Private(v, kv, PETSC_TRUE);
 69: }
 70: template <>
 71: PETSC_VISIBILITY_PUBLIC PetscErrorCode VecRestoreKokkosViewWrite(Vec v, PetscScalarKokkosView *kv)
 72: {
 73:   return VecRestoreKokkosView_Private(v, kv, PETSC_TRUE);
 74: }

 76: #if !defined(KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_HOST) /* Get host views if the default memory space is not host space */
 77: template PETSC_VISIBILITY_PUBLIC PetscErrorCode VecGetKokkosView(Vec, ConstPetscScalarKokkosViewHost *);
 78: template <>
 79: PETSC_VISIBILITY_PUBLIC PetscErrorCode VecGetKokkosView(Vec v, PetscScalarKokkosViewHost *kv)
 80: {
 81:   return VecGetKokkosView_Private(v, kv, PETSC_FALSE);
 82: }
 83: template <>
 84: PETSC_VISIBILITY_PUBLIC PetscErrorCode VecRestoreKokkosView(Vec v, PetscScalarKokkosViewHost *kv)
 85: {
 86:   return VecRestoreKokkosView_Private(v, kv, PETSC_FALSE);
 87: }
 88: template <>
 89: PETSC_VISIBILITY_PUBLIC PetscErrorCode VecGetKokkosViewWrite(Vec v, PetscScalarKokkosViewHost *kv)
 90: {
 91:   return VecGetKokkosView_Private(v, kv, PETSC_TRUE);
 92: }
 93: template <>
 94: PETSC_VISIBILITY_PUBLIC PetscErrorCode VecRestoreKokkosViewWrite(Vec v, PetscScalarKokkosViewHost *kv)
 95: {
 96:   return VecRestoreKokkosView_Private(v, kv, PETSC_TRUE);
 97: }
 98: #endif

100: PetscErrorCode VecSetRandom_SeqKokkos(Vec xin, PetscRandom r)
101: {
102:   const PetscInt n = xin->map->n;
103:   PetscScalar   *xx;

105:   PetscFunctionBegin;
106:   PetscCall(VecGetArrayWrite(xin, &xx)); /* TODO: generate randoms directly on device */
107:   for (PetscInt i = 0; i < n; i++) PetscCall(PetscRandomGetValue(r, &xx[i]));
108:   PetscCall(VecRestoreArrayWrite(xin, &xx));
109:   PetscFunctionReturn(PETSC_SUCCESS);
110: }

112: /* x = |x| */
113: PetscErrorCode VecAbs_SeqKokkos(Vec xin)
114: {
115:   PetscScalarKokkosView xv;

117:   PetscFunctionBegin;
118:   PetscCall(PetscLogGpuTimeBegin());
119:   PetscCall(VecGetKokkosView(xin, &xv));
120:   KokkosBlas::abs(xv, xv);
121:   PetscCall(VecRestoreKokkosView(xin, &xv));
122:   PetscCall(PetscLogGpuTimeEnd());
123:   PetscFunctionReturn(PETSC_SUCCESS);
124: }

126: /* x = 1/x */
127: PetscErrorCode VecReciprocal_SeqKokkos(Vec xin)
128: {
129:   PetscScalarKokkosView xv;

131:   PetscFunctionBegin;
132:   PetscCall(PetscLogGpuTimeBegin());
133:   PetscCall(VecGetKokkosView(xin, &xv));
134:   Kokkos::parallel_for(
135:     xv.extent(0), KOKKOS_LAMBDA(const int64_t i) {
136:       if (xv(i) != (PetscScalar)0.0) xv(i) = (PetscScalar)1.0 / xv(i);
137:     });
138:   PetscCall(VecRestoreKokkosView(xin, &xv));
139:   PetscCall(PetscLogGpuTimeEnd());
140:   PetscFunctionReturn(PETSC_SUCCESS);
141: }

143: template <template <typename...> class FunctorType_, template <typename...> class CompareType_, typename IndexType, typename ScalarType>
144: static PetscErrorCode VecMinMax_SeqKokkos_Private(Vec xin, IndexType *p, ScalarType *val, const char name[])
145: {
146:   using FunctorType = FunctorType_<ScalarType, IndexType>;
147:   using CompareType = CompareType_<ScalarType>;
148:   using ResultType  = typename FunctorType::value_type;
149:   ResultType                 result;
150:   ConstPetscScalarKokkosView xv;

152:   PetscFunctionBegin;
153:   PetscCall(PetscLogGpuTimeBegin());
154:   PetscCall(VecGetKokkosView(xin, &xv));
155:   Kokkos::parallel_reduce(
156:     name, xin->map->n,
157:     KOKKOS_LAMBDA(IndexType i, ResultType & loc) {
158:       if (CompareType{}(PetscRealPart(xv(i)), loc.val)) {
159:         loc.val = PetscRealPart(xv(i));
160:         loc.loc = i;
161:       }
162:     },
163:     FunctorType{result}); /* Kokkos will set minloc properly even if xin is zero-lengthed */
164:   if (p) *p = result.loc;
165:   *val = result.val;
166:   PetscCall(VecRestoreKokkosView(xin, &xv));
167:   PetscCall(PetscLogGpuTimeEnd());
168:   PetscFunctionReturn(PETSC_SUCCESS);
169: }

171: PetscErrorCode VecMin_SeqKokkos(Vec xin, PetscInt *p, PetscReal *val)
172: {
173:   PetscFunctionBegin;
174:   PetscCall(VecMinMax_SeqKokkos_Private<Kokkos::MinLoc, Kokkos::less>(xin, p, val, "VecMin"));
175:   PetscFunctionReturn(PETSC_SUCCESS);
176: }

178: PetscErrorCode VecMax_SeqKokkos(Vec xin, PetscInt *p, PetscReal *val)
179: {
180:   PetscFunctionBegin;
181:   PetscCall(VecMinMax_SeqKokkos_Private<Kokkos::MaxLoc, Kokkos::greater>(xin, p, val, "VecMax"));
182:   PetscFunctionReturn(PETSC_SUCCESS);
183: }

185: PetscErrorCode VecSum_SeqKokkos(Vec xin, PetscScalar *sum)
186: {
187:   ConstPetscScalarKokkosView xv;

189:   PetscFunctionBegin;
190:   PetscCall(PetscLogGpuTimeBegin());
191:   PetscCall(VecGetKokkosView(xin, &xv));
192:   *sum = KokkosBlas::sum(xv);
193:   PetscCall(VecRestoreKokkosView(xin, &xv));
194:   PetscCall(PetscLogGpuTimeEnd());
195:   PetscFunctionReturn(PETSC_SUCCESS);
196: }

198: PetscErrorCode VecShift_SeqKokkos(Vec xin, PetscScalar shift)
199: {
200:   PetscScalarKokkosView xv;

202:   PetscFunctionBegin;
203:   PetscCall(PetscLogGpuTimeBegin());
204:   PetscCall(VecGetKokkosView(xin, &xv));
205:   Kokkos::parallel_for(
206:     "VecShift", xin->map->n, KOKKOS_LAMBDA(PetscInt i) { xv(i) += shift; });
207:   PetscCall(VecRestoreKokkosView(xin, &xv));
208:   PetscCall(PetscLogGpuTimeEnd());
209:   PetscFunctionReturn(PETSC_SUCCESS);
210: }

212: /* y = alpha x + y */
213: PetscErrorCode VecAXPY_SeqKokkos(Vec yin, PetscScalar alpha, Vec xin)
214: {
215:   PetscFunctionBegin;
216:   if (alpha == (PetscScalar)0.0) PetscFunctionReturn(PETSC_SUCCESS);
217:   if (yin == xin) {
218:     PetscCall(VecScale_SeqKokkos(yin, alpha + 1));
219:   } else {
220:     PetscBool xiskok, yiskok;

222:     PetscCall(PetscObjectTypeCompareAny((PetscObject)xin, &xiskok, VECSEQKOKKOS, VECMPIKOKKOS, ""));
223:     PetscCall(PetscObjectTypeCompareAny((PetscObject)yin, &yiskok, VECSEQKOKKOS, VECMPIKOKKOS, ""));
224:     if (xiskok && yiskok) {
225:       PetscScalarKokkosView      yv;
226:       ConstPetscScalarKokkosView xv;

228:       PetscCall(PetscLogGpuTimeBegin());
229:       PetscCall(VecGetKokkosView(xin, &xv));
230:       PetscCall(VecGetKokkosView(yin, &yv));
231:       KokkosBlas::axpy(alpha, xv, yv);
232:       PetscCall(VecRestoreKokkosView(xin, &xv));
233:       PetscCall(VecRestoreKokkosView(yin, &yv));
234:       PetscCall(PetscLogGpuTimeEnd());
235:       PetscCall(PetscLogGpuFlops(2.0 * yin->map->n));
236:     } else {
237:       PetscCall(VecAXPY_Seq(yin, alpha, xin));
238:     }
239:   }
240:   PetscFunctionReturn(PETSC_SUCCESS);
241: }

243: /* y = x + beta y */
244: PetscErrorCode VecAYPX_SeqKokkos(Vec yin, PetscScalar beta, Vec xin)
245: {
246:   PetscFunctionBegin;
247:   /* One needs to define KOKKOSBLAS_OPTIMIZATION_LEVEL_AXPBY > 2 to have optimizations for cases alpha/beta = 0,+/-1 */
248:   PetscCall(VecAXPBY_SeqKokkos(yin, 1.0, beta, xin));
249:   PetscFunctionReturn(PETSC_SUCCESS);
250: }

252: /* z = y^T x */
253: PetscErrorCode VecTDot_SeqKokkos(Vec xin, Vec yin, PetscScalar *z)
254: {
255:   ConstPetscScalarKokkosView xv, yv;

257:   PetscFunctionBegin;
258:   PetscCall(PetscLogGpuTimeBegin());
259:   PetscCall(VecGetKokkosView(xin, &xv));
260:   PetscCall(VecGetKokkosView(yin, &yv));
261:   Kokkos::parallel_reduce(
262:     "VecTDot", xin->map->n, KOKKOS_LAMBDA(int64_t i, PetscScalar & update) { update += yv(i) * xv(i); }, *z); /* Kokkos always overwrites z, so no need to init it */
263:   PetscCall(VecRestoreKokkosView(yin, &yv));
264:   PetscCall(VecRestoreKokkosView(xin, &xv));
265:   PetscCall(PetscLogGpuTimeEnd());
266:   if (xin->map->n > 0) PetscCall(PetscLogGpuFlops(2.0 * xin->map->n));
267:   PetscFunctionReturn(PETSC_SUCCESS);
268: }

270: struct TransposeDotTag { };
271: struct ConjugateDotTag { };

273: struct MDotFunctor {
274:   /* Note the C++ notation for an array typedef */
275:   // noted, thanks
276:   typedef PetscScalar                           value_type[];
277:   typedef ConstPetscScalarKokkosView::size_type size_type;

279:   /* Tell Kokkos the result array's number of entries. This must be a public value in the functor */
280:   const size_type            value_count;
281:   ConstPetscScalarKokkosView xv, yv[8];

283:   MDotFunctor(ConstPetscScalarKokkosView &xv, const PetscInt ny, /* Number of valid entries in yv[8]. 1 <= ny <= 8 */
284:               ConstPetscScalarKokkosView &yv0, ConstPetscScalarKokkosView &yv1, ConstPetscScalarKokkosView &yv2, ConstPetscScalarKokkosView &yv3, ConstPetscScalarKokkosView &yv4, ConstPetscScalarKokkosView &yv5, ConstPetscScalarKokkosView &yv6, ConstPetscScalarKokkosView &yv7) :
285:     value_count(ny), xv(xv)
286:   {
287:     yv[0] = yv0;
288:     yv[1] = yv1;
289:     yv[2] = yv2;
290:     yv[3] = yv3;
291:     yv[4] = yv4;
292:     yv[5] = yv5;
293:     yv[6] = yv6;
294:     yv[7] = yv7;
295:   }

297:   KOKKOS_INLINE_FUNCTION void operator()(TransposeDotTag, const size_type i, value_type sum) const
298:   {
299:     PetscScalar xval = xv(i);
300:     for (size_type j = 0; j < value_count; ++j) sum[j] += yv[j](i) * xval;
301:   }

303:   KOKKOS_INLINE_FUNCTION void operator()(ConjugateDotTag, const size_type i, value_type sum) const
304:   {
305:     PetscScalar xval = xv(i);
306:     for (size_type j = 0; j < value_count; ++j) sum[j] += PetscConj(yv[j](i)) * xval;
307:   }

309:   // Per https://kokkos.github.io/kokkos-core-wiki/API/core/parallel-dispatch/parallel_reduce.html#requirements
310:   // "when specifying a tag in the policy, the functor's potential init/join/final member functions must also be tagged"
311:   // So we have this kind of duplicated code.
312:   KOKKOS_INLINE_FUNCTION void join(TransposeDotTag, value_type dst, const value_type src) const { join(dst, src); }
313:   KOKKOS_INLINE_FUNCTION void join(ConjugateDotTag, value_type dst, const value_type src) const { join(dst, src); }

315:   KOKKOS_INLINE_FUNCTION void init(TransposeDotTag, value_type sum) const { init(sum); }
316:   KOKKOS_INLINE_FUNCTION void init(ConjugateDotTag, value_type sum) const { init(sum); }

318:   KOKKOS_INLINE_FUNCTION void join(value_type dst, const value_type src) const
319:   {
320:     for (size_type j = 0; j < value_count; j++) dst[j] += src[j];
321:   }

323:   KOKKOS_INLINE_FUNCTION void init(value_type sum) const
324:   {
325:     for (size_type j = 0; j < value_count; j++) sum[j] = 0.0;
326:   }
327: };

329: template <class WorkTag>
330: PetscErrorCode VecMultiDot_Private(Vec xin, PetscInt nv, const Vec yin[], PetscScalar *z)
331: {
332:   PetscInt                   i, j, cur = 0, ngroup = nv / 8, rem = nv % 8, N = xin->map->n;
333:   ConstPetscScalarKokkosView xv, yv[8];
334:   PetscScalarKokkosViewHost  zv(z, nv);

336:   PetscFunctionBegin;
337:   PetscCall(VecGetKokkosView(xin, &xv));
338:   for (i = 0; i < ngroup; i++) { /* 8 y's per group */
339:     for (j = 0; j < 8; j++) PetscCall(VecGetKokkosView(yin[cur + j], &yv[j]));
340:     MDotFunctor mdot(xv, 8, yv[0], yv[1], yv[2], yv[3], yv[4], yv[5], yv[6], yv[7]); /* Hope Kokkos make it asynchronous */
341:     Kokkos::parallel_reduce(Kokkos::RangePolicy<WorkTag>(0, N), mdot, Kokkos::subview(zv, Kokkos::pair<PetscInt, PetscInt>(cur, cur + 8)));
342:     for (j = 0; j < 8; j++) PetscCall(VecRestoreKokkosView(yin[cur + j], &yv[j]));
343:     cur += 8;
344:   }

346:   if (rem) { /* The remaining */
347:     for (j = 0; j < rem; j++) PetscCall(VecGetKokkosView(yin[cur + j], &yv[j]));
348:     MDotFunctor mdot(xv, rem, yv[0], yv[1], yv[2], yv[3], yv[4], yv[5], yv[6], yv[7]);
349:     Kokkos::parallel_reduce(Kokkos::RangePolicy<WorkTag>(0, N), mdot, Kokkos::subview(zv, Kokkos::pair<PetscInt, PetscInt>(cur, cur + rem)));
350:     for (j = 0; j < rem; j++) PetscCall(VecRestoreKokkosView(yin[cur + j], &yv[j]));
351:   }
352:   PetscCall(VecRestoreKokkosView(xin, &xv));
353:   Kokkos::fence(); /* If reduce is async, then we need this fence to make sure z is ready for use on host */
354:   PetscFunctionReturn(PETSC_SUCCESS);
355: }

357: /* z[i] = (x,y_i) = y_i^H x */
358: PetscErrorCode VecMDot_SeqKokkos(Vec xin, PetscInt nv, const Vec yin[], PetscScalar *z)
359: {
360:   PetscFunctionBegin;
361:   PetscCall(PetscLogGpuTimeBegin());
362:   PetscCall(VecMultiDot_Private<ConjugateDotTag>(xin, nv, yin, z));
363:   PetscCall(PetscLogGpuTimeEnd());
364:   PetscCall(PetscLogGpuFlops(PetscMax(nv * (2.0 * xin->map->n - 1), 0.0)));
365:   PetscFunctionReturn(PETSC_SUCCESS);
366: }

368: /* z[i] = (x,y_i) = y_i^T x */
369: PetscErrorCode VecMTDot_SeqKokkos(Vec xin, PetscInt nv, const Vec yin[], PetscScalar *z)
370: {
371:   PetscFunctionBegin;
372:   PetscCall(PetscLogGpuTimeBegin());
373:   PetscCall(VecMultiDot_Private<TransposeDotTag>(xin, nv, yin, z));
374:   PetscCall(PetscLogGpuTimeEnd());
375:   PetscCall(PetscLogGpuFlops(PetscMax(nv * (2.0 * xin->map->n - 1), 0.0)));
376:   PetscFunctionReturn(PETSC_SUCCESS);
377: }

379: /* x[:] = alpha */
380: PetscErrorCode VecSet_SeqKokkos(Vec xin, PetscScalar alpha)
381: {
382:   PetscScalarKokkosView xv;

384:   PetscFunctionBegin;
385:   PetscCall(PetscLogGpuTimeBegin());
386:   PetscCall(VecGetKokkosViewWrite(xin, &xv));
387:   KokkosBlas::fill(xv, alpha);
388:   PetscCall(VecRestoreKokkosViewWrite(xin, &xv));
389:   PetscCall(PetscLogGpuTimeEnd());
390:   PetscFunctionReturn(PETSC_SUCCESS);
391: }

393: /* x = alpha x */
394: PetscErrorCode VecScale_SeqKokkos(Vec xin, PetscScalar alpha)
395: {
396:   PetscFunctionBegin;
397:   if (alpha == (PetscScalar)0.0) {
398:     PetscCall(VecSet_SeqKokkos(xin, alpha));
399:   } else if (alpha != (PetscScalar)1.0) {
400:     PetscScalarKokkosView xv;

402:     PetscCall(PetscLogGpuTimeBegin());
403:     PetscCall(VecGetKokkosView(xin, &xv));
404:     KokkosBlas::scal(xv, alpha, xv);
405:     PetscCall(VecRestoreKokkosView(xin, &xv));
406:     PetscCall(PetscLogGpuTimeEnd());
407:     PetscCall(PetscLogGpuFlops(xin->map->n));
408:   }
409:   PetscFunctionReturn(PETSC_SUCCESS);
410: }

412: /* z = y^H x */
413: PetscErrorCode VecDot_SeqKokkos(Vec xin, Vec yin, PetscScalar *z)
414: {
415:   ConstPetscScalarKokkosView xv, yv;

417:   PetscFunctionBegin;
418:   PetscCall(PetscLogGpuTimeBegin());
419:   PetscCall(VecGetKokkosView(xin, &xv));
420:   PetscCall(VecGetKokkosView(yin, &yv));
421:   *z = KokkosBlas::dot(yv, xv); /* KokkosBlas::dot(a,b) takes conjugate of a */
422:   PetscCall(VecRestoreKokkosView(xin, &xv));
423:   PetscCall(VecRestoreKokkosView(yin, &yv));
424:   PetscCall(PetscLogGpuTimeEnd());
425:   PetscCall(PetscLogGpuFlops(PetscMax(2.0 * xin->map->n - 1, 0.0)));
426:   PetscFunctionReturn(PETSC_SUCCESS);
427: }

429: /* y = x, where x is VECKOKKOS, but y may be not */
430: PetscErrorCode VecCopy_SeqKokkos(Vec xin, Vec yin)
431: {
432:   PetscFunctionBegin;
433:   PetscCall(PetscLogGpuTimeBegin());
434:   if (xin != yin) {
435:     Vec_Kokkos *xkok = static_cast<Vec_Kokkos *>(xin->spptr);
436:     if (yin->offloadmask == PETSC_OFFLOAD_KOKKOS) {
437:       /* y is also a VecKokkos */
438:       Vec_Kokkos *ykok = static_cast<Vec_Kokkos *>(yin->spptr);
439:       /* Kokkos rule: if x's host has newer data, it will copy to y's host view; otherwise to y's device view
440:         In case x's host is newer, y's device is newer, it will error (though should not, I think). So we just
441:         clear y's sync state.
442:        */
443:       ykok->v_dual.clear_sync_state();
444:       Kokkos::deep_copy(ykok->v_dual, xkok->v_dual);
445:     } else {
446:       PetscScalar *yarray;
447:       PetscCall(VecGetArrayWrite(yin, &yarray));
448:       PetscScalarKokkosViewHost yv(yarray, yin->map->n);
449:       if (xkok->v_dual.need_sync_host()) { /* x's device has newer data */
450:         Kokkos::deep_copy(yv, xkok->v_dual.view_device());
451:       } else {
452:         Kokkos::deep_copy(yv, xkok->v_dual.view_host());
453:       }
454:       PetscCall(VecRestoreArrayWrite(yin, &yarray));
455:     }
456:   }
457:   PetscCall(PetscLogGpuTimeEnd());
458:   PetscFunctionReturn(PETSC_SUCCESS);
459: }

461: /* y[i] <--> x[i] */
462: PetscErrorCode VecSwap_SeqKokkos(Vec xin, Vec yin)
463: {
464:   PetscFunctionBegin;
465:   if (xin != yin) {
466:     PetscScalarKokkosView xv, yv;

468:     PetscCall(PetscLogGpuTimeBegin());
469:     PetscCall(VecGetKokkosView(xin, &xv));
470:     PetscCall(VecGetKokkosView(yin, &yv));
471:     Kokkos::parallel_for(
472:       xin->map->n, KOKKOS_LAMBDA(const int64_t i) {
473:         PetscScalar tmp = xv(i);
474:         xv(i)           = yv(i);
475:         yv(i)           = tmp;
476:       });
477:     PetscCall(VecRestoreKokkosView(xin, &xv));
478:     PetscCall(VecRestoreKokkosView(yin, &yv));
479:     PetscCall(PetscLogGpuTimeEnd());
480:   }
481:   PetscFunctionReturn(PETSC_SUCCESS);
482: }

484: /*  w = alpha x + y */
485: PetscErrorCode VecWAXPY_SeqKokkos(Vec win, PetscScalar alpha, Vec xin, Vec yin)
486: {
487:   PetscFunctionBegin;
488:   if (alpha == (PetscScalar)0.0) {
489:     PetscCall(VecCopy_SeqKokkos(yin, win));
490:   } else {
491:     ConstPetscScalarKokkosView xv, yv;
492:     PetscScalarKokkosView      wv;

494:     PetscCall(PetscLogGpuTimeBegin());
495:     PetscCall(VecGetKokkosViewWrite(win, &wv));
496:     PetscCall(VecGetKokkosView(xin, &xv));
497:     PetscCall(VecGetKokkosView(yin, &yv));
498:     Kokkos::parallel_for(
499:       win->map->n, KOKKOS_LAMBDA(const int64_t i) { wv(i) = alpha * xv(i) + yv(i); });
500:     PetscCall(VecRestoreKokkosView(xin, &xv));
501:     PetscCall(VecRestoreKokkosView(yin, &yv));
502:     PetscCall(VecRestoreKokkosViewWrite(win, &wv));
503:     PetscCall(PetscLogGpuTimeEnd());
504:     PetscCall(PetscLogGpuFlops(2.0 * win->map->n));
505:   }
506:   PetscFunctionReturn(PETSC_SUCCESS);
507: }

509: struct MAXPYFunctor {
510:   typedef ConstPetscScalarKokkosView::size_type size_type;

512:   PetscScalarKokkosView      yv;
513:   PetscInt                   nx; /* Significent entries in a[8] and xv[8] */
514:   PetscScalar                a[8];
515:   ConstPetscScalarKokkosView xv[8];

517:   MAXPYFunctor(PetscScalarKokkosView yv, PetscInt nx, PetscScalar a0, PetscScalar a1, PetscScalar a2, PetscScalar a3, PetscScalar a4, PetscScalar a5, PetscScalar a6, PetscScalar a7, ConstPetscScalarKokkosView xv0, ConstPetscScalarKokkosView xv1, ConstPetscScalarKokkosView xv2, ConstPetscScalarKokkosView xv3, ConstPetscScalarKokkosView xv4, ConstPetscScalarKokkosView xv5, ConstPetscScalarKokkosView xv6, ConstPetscScalarKokkosView xv7) :
518:     yv(yv), nx(nx)
519:   {
520:     a[0]  = a0;
521:     a[1]  = a1;
522:     a[2]  = a2;
523:     a[3]  = a3;
524:     a[4]  = a4;
525:     a[5]  = a5;
526:     a[6]  = a6;
527:     a[7]  = a7;
528:     xv[0] = xv0;
529:     xv[1] = xv1;
530:     xv[2] = xv2;
531:     xv[3] = xv3;
532:     xv[4] = xv4;
533:     xv[5] = xv5;
534:     xv[6] = xv6;
535:     xv[7] = xv7;
536:   }

538:   KOKKOS_INLINE_FUNCTION void operator()(const size_type i) const
539:   {
540:     for (PetscInt j = 0; j < nx; ++j) yv(i) += a[j] * xv[j](i);
541:   }
542: };

544: /*  y = y + sum alpha[i] x[i] */
545: PetscErrorCode VecMAXPY_SeqKokkos(Vec yin, PetscInt nv, const PetscScalar *alpha, Vec *xin)
546: {
547:   PetscInt                   i, j, cur = 0, ngroup = nv / 8, rem = nv % 8;
548:   PetscScalarKokkosView      yv;
549:   PetscScalar                a[8];
550:   ConstPetscScalarKokkosView xv[8];

552:   PetscFunctionBegin;
553:   PetscCall(PetscLogGpuTimeBegin());
554:   PetscCall(VecGetKokkosView(yin, &yv));
555:   for (i = 0; i < ngroup; i++) { /* 8 x's per group */
556:     for (j = 0; j < 8; j++) {    /* Fill the parameters */
557:       a[j] = alpha[cur + j];
558:       PetscCall(VecGetKokkosView(xin[cur + j], &xv[j]));
559:     }
560:     MAXPYFunctor maxpy(yv, 8, a[0], a[1], a[2], a[3], a[4], a[5], a[6], a[7], xv[0], xv[1], xv[2], xv[3], xv[4], xv[5], xv[6], xv[7]);
561:     Kokkos::parallel_for(yin->map->n, maxpy);
562:     for (j = 0; j < 8; j++) PetscCall(VecRestoreKokkosView(xin[cur + j], &xv[j]));
563:     cur += 8;
564:   }

566:   if (rem) { /* The remaining */
567:     for (j = 0; j < rem; j++) {
568:       a[j] = alpha[cur + j];
569:       PetscCall(VecGetKokkosView(xin[cur + j], &xv[j]));
570:     }
571:     MAXPYFunctor maxpy(yv, rem, a[0], a[1], a[2], a[3], a[4], a[5], a[6], a[7], xv[0], xv[1], xv[2], xv[3], xv[4], xv[5], xv[6], xv[7]);
572:     Kokkos::parallel_for(yin->map->n, maxpy);
573:     for (j = 0; j < rem; j++) PetscCall(VecRestoreKokkosView(xin[cur + j], &xv[j]));
574:   }
575:   PetscCall(VecRestoreKokkosView(yin, &yv));
576:   PetscCall(PetscLogGpuTimeEnd());
577:   PetscCall(PetscLogGpuFlops(nv * 2.0 * yin->map->n));
578:   PetscFunctionReturn(PETSC_SUCCESS);
579: }

581: /* y = alpha x + beta y */
582: PetscErrorCode VecAXPBY_SeqKokkos(Vec yin, PetscScalar alpha, PetscScalar beta, Vec xin)
583: {
584:   PetscBool xiskok, yiskok;

586:   PetscFunctionBegin;
587:   PetscCall(PetscObjectTypeCompareAny((PetscObject)xin, &xiskok, VECSEQKOKKOS, VECMPIKOKKOS, ""));
588:   PetscCall(PetscObjectTypeCompareAny((PetscObject)yin, &yiskok, VECSEQKOKKOS, VECMPIKOKKOS, ""));
589:   if (xiskok && yiskok) {
590:     ConstPetscScalarKokkosView xv;
591:     PetscScalarKokkosView      yv;

593:     PetscCall(PetscLogGpuTimeBegin());
594:     PetscCall(VecGetKokkosView(xin, &xv));
595:     PetscCall(VecGetKokkosView(yin, &yv));
596:     KokkosBlas::axpby(alpha, xv, beta, yv);
597:     PetscCall(VecRestoreKokkosView(xin, &xv));
598:     PetscCall(VecRestoreKokkosView(yin, &yv));
599:     PetscCall(PetscLogGpuTimeEnd());
600:     if (alpha == (PetscScalar)0.0 || beta == (PetscScalar)0.0) {
601:       PetscCall(PetscLogGpuFlops(xin->map->n));
602:     } else if (beta == (PetscScalar)1.0 || alpha == (PetscScalar)1.0) {
603:       PetscCall(PetscLogGpuFlops(2.0 * xin->map->n));
604:     } else {
605:       PetscCall(PetscLogGpuFlops(3.0 * xin->map->n));
606:     }
607:   } else {
608:     PetscCall(VecAXPBY_Seq(yin, alpha, beta, xin));
609:   }
610:   PetscFunctionReturn(PETSC_SUCCESS);
611: }

613: /* z = alpha x + beta y + gamma z */
614: PetscErrorCode VecAXPBYPCZ_SeqKokkos(Vec zin, PetscScalar alpha, PetscScalar beta, PetscScalar gamma, Vec xin, Vec yin)
615: {
616:   ConstPetscScalarKokkosView xv, yv;
617:   PetscScalarKokkosView      zv;

619:   PetscFunctionBegin;
620:   PetscCall(PetscLogGpuTimeBegin());
621:   PetscCall(VecGetKokkosView(zin, &zv));
622:   PetscCall(VecGetKokkosView(xin, &xv));
623:   PetscCall(VecGetKokkosView(yin, &yv));
624:   if (gamma == (PetscScalar)0.0) { // a common case
625:     if (alpha == -beta) {
626:       PetscCallCXX(Kokkos::parallel_for( // a common case
627:         zin->map->n, KOKKOS_LAMBDA(const PetscInt i) { zv(i) = alpha * (xv(i) - yv(i)); }));
628:     } else {
629:       PetscCallCXX(Kokkos::parallel_for(
630:         zin->map->n, KOKKOS_LAMBDA(const PetscInt i) { zv(i) = alpha * xv(i) + beta * yv(i); }));
631:     }
632:   } else {
633:     PetscCallCXX(KokkosBlas::update(alpha, xv, beta, yv, gamma, zv));
634:   }
635:   PetscCall(VecRestoreKokkosView(xin, &xv));
636:   PetscCall(VecRestoreKokkosView(yin, &yv));
637:   PetscCall(VecRestoreKokkosView(zin, &zv));
638:   PetscCall(PetscLogGpuTimeEnd());
639:   PetscCall(PetscLogGpuFlops(zin->map->n * 5.0));
640:   PetscFunctionReturn(PETSC_SUCCESS);
641: }

643: /* w = x*y. Any subset of the x, y, and w may be the same vector.

645:   w is of type VecKokkos, but x, y may be not.
646: */
647: PetscErrorCode VecPointwiseMult_SeqKokkos(Vec win, Vec xin, Vec yin)
648: {
649:   PetscInt n;

651:   PetscFunctionBegin;
652:   PetscCall(PetscLogGpuTimeBegin());
653:   PetscCall(VecGetLocalSize(win, &n));
654:   if (xin->offloadmask != PETSC_OFFLOAD_KOKKOS || yin->offloadmask != PETSC_OFFLOAD_KOKKOS) {
655:     PetscScalarKokkosViewHost wv;
656:     const PetscScalar        *xp, *yp;
657:     PetscCall(VecGetArrayRead(xin, &xp));
658:     PetscCall(VecGetArrayRead(yin, &yp));
659:     PetscCall(VecGetKokkosViewWrite(win, &wv));

661:     ConstPetscScalarKokkosViewHost xv(xp, n), yv(yp, n);
662:     Kokkos::parallel_for(
663:       Kokkos::RangePolicy<Kokkos::DefaultHostExecutionSpace>(0, n), KOKKOS_LAMBDA(const PetscInt i) { wv(i) = xv(i) * yv(i); });

665:     PetscCall(VecRestoreArrayRead(xin, &xp));
666:     PetscCall(VecRestoreArrayRead(yin, &yp));
667:     PetscCall(VecRestoreKokkosViewWrite(win, &wv));
668:   } else {
669:     ConstPetscScalarKokkosView xv, yv;
670:     PetscScalarKokkosView      wv;

672:     PetscCall(VecGetKokkosViewWrite(win, &wv));
673:     PetscCall(VecGetKokkosView(xin, &xv));
674:     PetscCall(VecGetKokkosView(yin, &yv));
675:     Kokkos::parallel_for(
676:       n, KOKKOS_LAMBDA(const PetscInt i) { wv(i) = xv(i) * yv(i); });
677:     PetscCall(VecRestoreKokkosView(yin, &yv));
678:     PetscCall(VecRestoreKokkosView(xin, &xv));
679:     PetscCall(VecRestoreKokkosViewWrite(win, &wv));
680:   }
681:   PetscCall(PetscLogGpuTimeEnd());
682:   PetscCall(PetscLogGpuFlops(n));
683:   PetscFunctionReturn(PETSC_SUCCESS);
684: }

686: /* w = x/y */
687: PetscErrorCode VecPointwiseDivide_SeqKokkos(Vec win, Vec xin, Vec yin)
688: {
689:   PetscInt n;

691:   PetscFunctionBegin;
692:   PetscCall(PetscLogGpuTimeBegin());
693:   PetscCall(VecGetLocalSize(win, &n));
694:   if (xin->offloadmask != PETSC_OFFLOAD_KOKKOS || yin->offloadmask != PETSC_OFFLOAD_KOKKOS) {
695:     PetscScalarKokkosViewHost wv;
696:     const PetscScalar        *xp, *yp;
697:     PetscCall(VecGetArrayRead(xin, &xp));
698:     PetscCall(VecGetArrayRead(yin, &yp));
699:     PetscCall(VecGetKokkosViewWrite(win, &wv));

701:     ConstPetscScalarKokkosViewHost xv(xp, n), yv(yp, n);
702:     Kokkos::parallel_for(
703:       Kokkos::RangePolicy<Kokkos::DefaultHostExecutionSpace>(0, n), KOKKOS_LAMBDA(const PetscInt i) {
704:         if (yv(i) != 0.0) wv(i) = xv(i) / yv(i);
705:         else wv(i) = 0.0;
706:       });

708:     PetscCall(VecRestoreArrayRead(xin, &xp));
709:     PetscCall(VecRestoreArrayRead(yin, &yp));
710:     PetscCall(VecRestoreKokkosViewWrite(win, &wv));
711:   } else {
712:     ConstPetscScalarKokkosView xv, yv;
713:     PetscScalarKokkosView      wv;

715:     PetscCall(VecGetKokkosViewWrite(win, &wv));
716:     PetscCall(VecGetKokkosView(xin, &xv));
717:     PetscCall(VecGetKokkosView(yin, &yv));
718:     Kokkos::parallel_for(
719:       n, KOKKOS_LAMBDA(const PetscInt i) {
720:         if (yv(i) != 0.0) wv(i) = xv(i) / yv(i);
721:         else wv(i) = 0.0;
722:       });
723:     PetscCall(VecRestoreKokkosView(yin, &yv));
724:     PetscCall(VecRestoreKokkosView(xin, &xv));
725:     PetscCall(VecRestoreKokkosViewWrite(win, &wv));
726:   }
727:   PetscCall(PetscLogGpuTimeEnd());
728:   PetscCall(PetscLogGpuFlops(win->map->n));
729:   PetscFunctionReturn(PETSC_SUCCESS);
730: }

732: PetscErrorCode VecNorm_SeqKokkos(Vec xin, NormType type, PetscReal *z)
733: {
734:   const PetscInt             n = xin->map->n;
735:   ConstPetscScalarKokkosView xv;

737:   PetscFunctionBegin;
738:   if (type == NORM_1_AND_2) {
739:     PetscCall(VecNorm_SeqKokkos(xin, NORM_1, z));
740:     PetscCall(VecNorm_SeqKokkos(xin, NORM_2, z + 1));
741:   } else {
742:     PetscCall(PetscLogGpuTimeBegin());
743:     PetscCall(VecGetKokkosView(xin, &xv));
744:     if (type == NORM_2 || type == NORM_FROBENIUS) {
745:       *z = KokkosBlas::nrm2(xv);
746:       PetscCall(PetscLogGpuFlops(PetscMax(2.0 * n - 1, 0.0)));
747:     } else if (type == NORM_1) {
748:       *z = KokkosBlas::nrm1(xv);
749:       PetscCall(PetscLogGpuFlops(PetscMax(n - 1.0, 0.0)));
750:     } else if (type == NORM_INFINITY) {
751:       *z = KokkosBlas::nrminf(xv);
752:     }
753:     PetscCall(VecRestoreKokkosView(xin, &xv));
754:     PetscCall(PetscLogGpuTimeEnd());
755:   }
756:   PetscFunctionReturn(PETSC_SUCCESS);
757: }

759: /* A functor for DotNorm2 so that we can compute dp and nm in one kernel */
760: struct DotNorm2 {
761:   typedef PetscScalar                           value_type[];
762:   typedef ConstPetscScalarKokkosView::size_type size_type;

764:   size_type                  value_count;
765:   ConstPetscScalarKokkosView xv_, yv_; /* first and second vectors in VecDotNorm2. The order matters. */

767:   DotNorm2(ConstPetscScalarKokkosView &xv, ConstPetscScalarKokkosView &yv) : value_count(2), xv_(xv), yv_(yv) { }

769:   KOKKOS_INLINE_FUNCTION void operator()(const size_type i, value_type result) const
770:   {
771:     result[0] += PetscConj(yv_(i)) * xv_(i);
772:     result[1] += PetscConj(yv_(i)) * yv_(i);
773:   }

775:   KOKKOS_INLINE_FUNCTION void join(value_type dst, const value_type src) const
776:   {
777:     dst[0] += src[0];
778:     dst[1] += src[1];
779:   }

781:   KOKKOS_INLINE_FUNCTION void init(value_type result) const
782:   {
783:     result[0] = 0.0;
784:     result[1] = 0.0;
785:   }
786: };

788: /* dp = y^H x, nm = y^H y */
789: PetscErrorCode VecDotNorm2_SeqKokkos(Vec xin, Vec yin, PetscScalar *dp, PetscScalar *nm)
790: {
791:   ConstPetscScalarKokkosView xv, yv;
792:   PetscScalar                result[2];

794:   PetscFunctionBegin;
795:   PetscCall(PetscLogGpuTimeBegin());
796:   PetscCall(VecGetKokkosView(xin, &xv));
797:   PetscCall(VecGetKokkosView(yin, &yv));
798:   DotNorm2 dn(xv, yv);
799:   Kokkos::parallel_reduce(xin->map->n, dn, result);
800:   *dp = result[0];
801:   *nm = result[1];
802:   PetscCall(VecRestoreKokkosView(yin, &yv));
803:   PetscCall(VecRestoreKokkosView(xin, &xv));
804:   PetscCall(PetscLogGpuTimeEnd());
805:   PetscCall(PetscLogGpuFlops(4.0 * xin->map->n));
806:   PetscFunctionReturn(PETSC_SUCCESS);
807: }

809: PetscErrorCode VecConjugate_SeqKokkos(Vec xin)
810: {
811: #if defined(PETSC_USE_COMPLEX)
812:   PetscScalarKokkosView xv;

814:   PetscFunctionBegin;
815:   PetscCall(PetscLogGpuTimeBegin());
816:   PetscCall(VecGetKokkosView(xin, &xv));
817:   Kokkos::parallel_for(
818:     xin->map->n, KOKKOS_LAMBDA(int64_t i) { xv(i) = PetscConj(xv(i)); });
819:   PetscCall(VecRestoreKokkosView(xin, &xv));
820:   PetscCall(PetscLogGpuTimeEnd());
821: #else
822:   PetscFunctionBegin;
823: #endif
824:   PetscFunctionReturn(PETSC_SUCCESS);
825: }

827: /* Temporarily replace the array in vin with a[]. Return to the original array with a call to VecResetArray() */
828: PetscErrorCode VecPlaceArray_SeqKokkos(Vec vin, const PetscScalar *a)
829: {
830:   Vec_Seq    *vecseq = (Vec_Seq *)vin->data;
831:   Vec_Kokkos *veckok = static_cast<Vec_Kokkos *>(vin->spptr);

833:   PetscFunctionBegin;
834:   PetscCall(VecPlaceArray_Seq(vin, a));
835:   veckok->UpdateArray<Kokkos::HostSpace>(vecseq->array);
836:   PetscFunctionReturn(PETSC_SUCCESS);
837: }

839: PetscErrorCode VecResetArray_SeqKokkos(Vec vin)
840: {
841:   Vec_Seq    *vecseq = (Vec_Seq *)vin->data;
842:   Vec_Kokkos *veckok = static_cast<Vec_Kokkos *>(vin->spptr);

844:   PetscFunctionBegin;
845:   veckok->v_dual.sync_host();        /* User wants to unhook the provided host array. Sync it so that user can get the latest */
846:   PetscCall(VecResetArray_Seq(vin)); /* Swap back the old host array, assuming its has the latest value */
847:   veckok->UpdateArray<Kokkos::HostSpace>(vecseq->array);
848:   PetscFunctionReturn(PETSC_SUCCESS);
849: }

851: /* Replace the array in vin with a[] that must be allocated by PetscMalloc. a[] is owned by vin afterwards. */
852: PetscErrorCode VecReplaceArray_SeqKokkos(Vec vin, const PetscScalar *a)
853: {
854:   Vec_Seq    *vecseq = (Vec_Seq *)vin->data;
855:   Vec_Kokkos *veckok = static_cast<Vec_Kokkos *>(vin->spptr);

857:   PetscFunctionBegin;
858:   /* Make sure the users array has the latest values */
859:   if (vecseq->array != vecseq->array_allocated) veckok->v_dual.sync_host();
860:   PetscCall(VecReplaceArray_Seq(vin, a));
861:   veckok->UpdateArray<Kokkos::HostSpace>(vecseq->array);
862:   PetscFunctionReturn(PETSC_SUCCESS);
863: }

865: /* Maps the local portion of vector v into vector w */
866: PetscErrorCode VecGetLocalVector_SeqKokkos(Vec v, Vec w)
867: {
868:   Vec_Seq    *vecseq = static_cast<Vec_Seq *>(w->data);
869:   Vec_Kokkos *veckok = static_cast<Vec_Kokkos *>(w->spptr);

871:   PetscFunctionBegin;
872:   PetscCheckTypeNames(w, VECSEQKOKKOS, VECMPIKOKKOS);
873:   /* Destroy w->data, w->spptr */
874:   if (vecseq) {
875:     PetscCall(PetscFree(vecseq->array_allocated));
876:     PetscCall(PetscFree(w->data));
877:   }
878:   delete veckok;

880:   /* Replace with v's */
881:   w->data  = v->data;
882:   w->spptr = v->spptr;
883:   PetscCall(PetscObjectStateIncrease((PetscObject)w));
884:   PetscFunctionReturn(PETSC_SUCCESS);
885: }

887: PetscErrorCode VecRestoreLocalVector_SeqKokkos(Vec v, Vec w)
888: {
889:   PetscFunctionBegin;
890:   PetscCheckTypeNames(w, VECSEQKOKKOS, VECMPIKOKKOS);
891:   v->data  = w->data;
892:   v->spptr = w->spptr;
893:   PetscCall(PetscObjectStateIncrease((PetscObject)v));
894:   /* TODO: need to think if setting w->data/spptr to NULL is safe */
895:   w->data  = NULL;
896:   w->spptr = NULL;
897:   PetscFunctionReturn(PETSC_SUCCESS);
898: }

900: PetscErrorCode VecGetArray_SeqKokkos(Vec v, PetscScalar **a)
901: {
902:   Vec_Kokkos *veckok = static_cast<Vec_Kokkos *>(v->spptr);

904:   PetscFunctionBegin;
905:   veckok->v_dual.sync_host();
906:   *a = *((PetscScalar **)v->data);
907:   PetscFunctionReturn(PETSC_SUCCESS);
908: }

910: PetscErrorCode VecRestoreArray_SeqKokkos(Vec v, PetscScalar **a)
911: {
912:   Vec_Kokkos *veckok = static_cast<Vec_Kokkos *>(v->spptr);

914:   PetscFunctionBegin;
915:   veckok->v_dual.modify_host();
916:   PetscFunctionReturn(PETSC_SUCCESS);
917: }

919: /* Get array on host to overwrite, so no need to sync host. In VecRestoreArrayWrite() we will mark host is modified. */
920: PetscErrorCode VecGetArrayWrite_SeqKokkos(Vec v, PetscScalar **a)
921: {
922:   Vec_Kokkos *veckok = static_cast<Vec_Kokkos *>(v->spptr);

924:   PetscFunctionBegin;
925:   veckok->v_dual.clear_sync_state();
926:   *a = veckok->v_dual.view_host().data();
927:   PetscFunctionReturn(PETSC_SUCCESS);
928: }

930: PetscErrorCode VecGetArrayAndMemType_SeqKokkos(Vec v, PetscScalar **a, PetscMemType *mtype)
931: {
932:   Vec_Kokkos *veckok = static_cast<Vec_Kokkos *>(v->spptr);

934:   PetscFunctionBegin;
935:   if (std::is_same<DefaultMemorySpace, Kokkos::HostSpace>::value) {
936:     *a = veckok->v_dual.view_host().data();
937:     if (mtype) *mtype = PETSC_MEMTYPE_HOST;
938:   } else {
939:     /* When there is device, we always return up-to-date device data */
940:     veckok->v_dual.sync_device();
941:     *a = veckok->v_dual.view_device().data();
942:     if (mtype) *mtype = PETSC_MEMTYPE_KOKKOS;
943:   }
944:   PetscFunctionReturn(PETSC_SUCCESS);
945: }

947: PetscErrorCode VecRestoreArrayAndMemType_SeqKokkos(Vec v, PetscScalar **a)
948: {
949:   Vec_Kokkos *veckok = static_cast<Vec_Kokkos *>(v->spptr);

951:   PetscFunctionBegin;
952:   if (std::is_same<DefaultMemorySpace, Kokkos::HostSpace>::value) {
953:     veckok->v_dual.modify_host();
954:   } else {
955:     veckok->v_dual.modify_device();
956:   }
957:   PetscFunctionReturn(PETSC_SUCCESS);
958: }

960: PetscErrorCode VecGetArrayWriteAndMemType_SeqKokkos(Vec v, PetscScalar **a, PetscMemType *mtype)
961: {
962:   Vec_Kokkos *veckok = static_cast<Vec_Kokkos *>(v->spptr);

964:   PetscFunctionBegin;
965:   if (std::is_same<DefaultMemorySpace, Kokkos::HostSpace>::value) {
966:     *a = veckok->v_dual.view_host().data();
967:     if (mtype) *mtype = PETSC_MEMTYPE_HOST;
968:   } else {
969:     /* When there is device, we always return device data (but no need to sync the device) */
970:     veckok->v_dual.clear_sync_state(); /* So that in restore, we can safely modify_device() */
971:     *a = veckok->v_dual.view_device().data();
972:     if (mtype) *mtype = PETSC_MEMTYPE_KOKKOS;
973:   }
974:   PetscFunctionReturn(PETSC_SUCCESS);
975: }

977: /* Copy xin's sync state to y */
978: static PetscErrorCode VecCopySyncState_Kokkos_Private(Vec xin, Vec yout)
979: {
980:   Vec_Kokkos *xkok = static_cast<Vec_Kokkos *>(xin->spptr);
981:   Vec_Kokkos *ykok = static_cast<Vec_Kokkos *>(yout->spptr);

983:   PetscFunctionBegin;
984:   ykok->v_dual.clear_sync_state();
985:   if (xkok->v_dual.need_sync_host()) {
986:     ykok->v_dual.modify_device();
987:   } else if (xkok->v_dual.need_sync_device()) {
988:     ykok->v_dual.modify_host();
989:   }
990:   PetscFunctionReturn(PETSC_SUCCESS);
991: }

993: /* Internal routine shared by VecGetSubVector_{SeqKokkos,MPIKokkos} */
994: PetscErrorCode VecGetSubVector_Kokkos_Private(Vec x, PetscBool xIsMPI, IS is, Vec *y)
995: {
996:   PetscBool contig;
997:   PetscInt  n, N, start, bs;
998:   MPI_Comm  comm;
999:   Vec       z;

1001:   PetscFunctionBegin;
1002:   PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
1003:   PetscCall(ISGetLocalSize(is, &n));
1004:   PetscCall(ISGetSize(is, &N));
1005:   PetscCall(VecGetSubVectorContiguityAndBS_Private(x, is, &contig, &start, &bs));

1007:   if (contig) { /* We can do a no-copy (in-place) implementation with y sharing x's arrays */
1008:     Vec_Kokkos        *xkok    = static_cast<Vec_Kokkos *>(x->spptr);
1009:     const PetscScalar *array_h = xkok->v_dual.view_host().data() + start;
1010:     const PetscScalar *array_d = xkok->v_dual.view_device().data() + start;

1012:     /* These calls assume the input arrays are synced */
1013:     if (xIsMPI) PetscCall(VecCreateMPIKokkosWithArrays_Private(comm, bs, n, N, array_h, array_d, &z)); /* x could be MPI even when x's comm size = 1 */
1014:     else PetscCall(VecCreateSeqKokkosWithArrays_Private(comm, bs, n, array_h, array_d, &z));

1016:     PetscCall(VecCopySyncState_Kokkos_Private(x, z)); /* Copy x's sync state to z */

1018:     /* This is relevant only in debug mode */
1019:     PetscInt state = 0;
1020:     PetscCall(VecLockGet(x, &state));
1021:     if (state) { /* x is either in read or read/write mode, therefore z, overlapped with x, can only be in read mode */
1022:       PetscCall(VecLockReadPush(z));
1023:     }

1025:     z->ops->placearray   = NULL; /* z's arrays can't be replaced, because z does not own them */
1026:     z->ops->replacearray = NULL;

1028:   } else { /* Have to create a VecScatter and a stand-alone vector */
1029:     PetscCall(VecGetSubVectorThroughVecScatter_Private(x, is, bs, &z));
1030:   }
1031:   *y = z;
1032:   PetscFunctionReturn(PETSC_SUCCESS);
1033: }

1035: PetscErrorCode VecGetSubVector_SeqKokkos(Vec x, IS is, Vec *y)
1036: {
1037:   PetscFunctionBegin;
1038:   PetscCall(VecGetSubVector_Kokkos_Private(x, PETSC_FALSE, is, y));
1039:   PetscFunctionReturn(PETSC_SUCCESS);
1040: }

1042: /* Restore subvector y to x */
1043: PetscErrorCode VecRestoreSubVector_SeqKokkos(Vec x, IS is, Vec *y)
1044: {
1045:   VecScatter                    vscat;
1046:   PETSC_UNUSED PetscObjectState dummystate = 0;
1047:   PetscBool                     unchanged;

1049:   PetscFunctionBegin;
1050:   PetscCall(PetscObjectComposedDataGetInt((PetscObject)*y, VecGetSubVectorSavedStateId, dummystate, unchanged));
1051:   if (unchanged) PetscFunctionReturn(PETSC_SUCCESS); /* If y's state has not changed since VecGetSubVector(), we only need to destroy it */

1053:   PetscCall(PetscObjectQuery((PetscObject)*y, "VecGetSubVector_Scatter", (PetscObject *)&vscat));
1054:   if (vscat) {
1055:     PetscCall(VecScatterBegin(vscat, *y, x, INSERT_VALUES, SCATTER_REVERSE));
1056:     PetscCall(VecScatterEnd(vscat, *y, x, INSERT_VALUES, SCATTER_REVERSE));
1057:   } else { /* y and x's (host and device) arrays overlap */
1058:     Vec_Kokkos *xkok = static_cast<Vec_Kokkos *>(x->spptr);
1059:     Vec_Kokkos *ykok = static_cast<Vec_Kokkos *>((*y)->spptr);
1060:     PetscInt    state;

1062:     PetscCall(VecLockGet(x, &state));
1063:     PetscCheck(!state, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "Vec x is locked for read-only or read/write access");

1065:     /* The tricky part: one has to carefully sync the arrays */
1066:     if (xkok->v_dual.need_sync_device()) {      /* x's host has newer data */
1067:       ykok->v_dual.sync_host();                 /* Move y's latest values to host (since y is just a subset of x) */
1068:     } else if (xkok->v_dual.need_sync_host()) { /* x's device has newer data */
1069:       ykok->v_dual.sync_device();               /* Move y's latest data to device */
1070:     } else {                                    /* x's host and device data is already sync'ed; Copy y's sync state to x */
1071:       PetscCall(VecCopySyncState_Kokkos_Private(*y, x));
1072:     }
1073:     PetscCall(PetscObjectStateIncrease((PetscObject)x)); /* Since x is updated */
1074:   }
1075:   PetscFunctionReturn(PETSC_SUCCESS);
1076: }

1078: static PetscErrorCode VecSetPreallocationCOO_SeqKokkos(Vec x, PetscCount ncoo, const PetscInt coo_i[])
1079: {
1080:   Vec_Seq    *vecseq = static_cast<Vec_Seq *>(x->data);
1081:   Vec_Kokkos *veckok = static_cast<Vec_Kokkos *>(x->spptr);
1082:   PetscInt    m;

1084:   PetscFunctionBegin;
1085:   PetscCall(VecSetPreallocationCOO_Seq(x, ncoo, coo_i));
1086:   PetscCall(VecGetLocalSize(x, &m));
1087:   PetscCallCXX(veckok->SetUpCOO(vecseq, m));
1088:   PetscFunctionReturn(PETSC_SUCCESS);
1089: }

1091: static PetscErrorCode VecSetValuesCOO_SeqKokkos(Vec x, const PetscScalar v[], InsertMode imode)
1092: {
1093:   Vec_Seq                    *vecseq = static_cast<Vec_Seq *>(x->data);
1094:   Vec_Kokkos                 *veckok = static_cast<Vec_Kokkos *>(x->spptr);
1095:   const PetscCountKokkosView &jmap1  = veckok->jmap1_d;
1096:   const PetscCountKokkosView &perm1  = veckok->perm1_d;
1097:   PetscScalarKokkosView       xv; /* View for vector x */
1098:   ConstPetscScalarKokkosView  vv; /* View for array v[] */
1099:   PetscInt                    m;
1100:   PetscMemType                memtype;

1102:   PetscFunctionBegin;
1103:   PetscCall(VecGetLocalSize(x, &m));
1104:   PetscCall(PetscGetMemType(v, &memtype));
1105:   if (PetscMemTypeHost(memtype)) { /* If user gave v[] in host, we might need to copy it to device if any */
1106:     vv = Kokkos::create_mirror_view_and_copy(DefaultMemorySpace(), ConstPetscScalarKokkosViewHost(v, vecseq->coo_n));
1107:   } else {
1108:     vv = ConstPetscScalarKokkosView(v, vecseq->coo_n); /* Directly use v[]'s memory */
1109:   }

1111:   if (imode == INSERT_VALUES) PetscCall(VecGetKokkosViewWrite(x, &xv)); /* write vector */
1112:   else PetscCall(VecGetKokkosView(x, &xv));                             /* read & write vector */

1114:   Kokkos::parallel_for(
1115:     m, KOKKOS_LAMBDA(const PetscCount i) {
1116:       PetscScalar sum = 0.0;
1117:       for (PetscCount k = jmap1(i); k < jmap1(i + 1); k++) sum += vv(perm1(k));
1118:       xv(i) = (imode == INSERT_VALUES ? 0.0 : xv(i)) + sum;
1119:     });

1121:   if (imode == INSERT_VALUES) PetscCall(VecRestoreKokkosViewWrite(x, &xv));
1122:   else PetscCall(VecRestoreKokkosView(x, &xv));
1123:   PetscFunctionReturn(PETSC_SUCCESS);
1124: }

1126: static PetscErrorCode VecSetOps_SeqKokkos(Vec v)
1127: {
1128:   PetscFunctionBegin;
1129:   v->ops->abs             = VecAbs_SeqKokkos;
1130:   v->ops->reciprocal      = VecReciprocal_SeqKokkos;
1131:   v->ops->pointwisemult   = VecPointwiseMult_SeqKokkos;
1132:   v->ops->min             = VecMin_SeqKokkos;
1133:   v->ops->max             = VecMax_SeqKokkos;
1134:   v->ops->sum             = VecSum_SeqKokkos;
1135:   v->ops->shift           = VecShift_SeqKokkos;
1136:   v->ops->norm            = VecNorm_SeqKokkos;
1137:   v->ops->scale           = VecScale_SeqKokkos;
1138:   v->ops->copy            = VecCopy_SeqKokkos;
1139:   v->ops->set             = VecSet_SeqKokkos;
1140:   v->ops->swap            = VecSwap_SeqKokkos;
1141:   v->ops->axpy            = VecAXPY_SeqKokkos;
1142:   v->ops->axpby           = VecAXPBY_SeqKokkos;
1143:   v->ops->axpbypcz        = VecAXPBYPCZ_SeqKokkos;
1144:   v->ops->pointwisedivide = VecPointwiseDivide_SeqKokkos;
1145:   v->ops->setrandom       = VecSetRandom_SeqKokkos;

1147:   v->ops->dot   = VecDot_SeqKokkos;
1148:   v->ops->tdot  = VecTDot_SeqKokkos;
1149:   v->ops->mdot  = VecMDot_SeqKokkos;
1150:   v->ops->mtdot = VecMTDot_SeqKokkos;

1152:   v->ops->dot_local   = VecDot_SeqKokkos;
1153:   v->ops->tdot_local  = VecTDot_SeqKokkos;
1154:   v->ops->mdot_local  = VecMDot_SeqKokkos;
1155:   v->ops->mtdot_local = VecMTDot_SeqKokkos;

1157:   v->ops->norm_local             = VecNorm_SeqKokkos;
1158:   v->ops->maxpy                  = VecMAXPY_SeqKokkos;
1159:   v->ops->aypx                   = VecAYPX_SeqKokkos;
1160:   v->ops->waxpy                  = VecWAXPY_SeqKokkos;
1161:   v->ops->dotnorm2               = VecDotNorm2_SeqKokkos;
1162:   v->ops->placearray             = VecPlaceArray_SeqKokkos;
1163:   v->ops->replacearray           = VecReplaceArray_SeqKokkos;
1164:   v->ops->resetarray             = VecResetArray_SeqKokkos;
1165:   v->ops->destroy                = VecDestroy_SeqKokkos;
1166:   v->ops->duplicate              = VecDuplicate_SeqKokkos;
1167:   v->ops->conjugate              = VecConjugate_SeqKokkos;
1168:   v->ops->getlocalvector         = VecGetLocalVector_SeqKokkos;
1169:   v->ops->restorelocalvector     = VecRestoreLocalVector_SeqKokkos;
1170:   v->ops->getlocalvectorread     = VecGetLocalVector_SeqKokkos;
1171:   v->ops->restorelocalvectorread = VecRestoreLocalVector_SeqKokkos;
1172:   v->ops->getarraywrite          = VecGetArrayWrite_SeqKokkos;
1173:   v->ops->getarray               = VecGetArray_SeqKokkos;
1174:   v->ops->restorearray           = VecRestoreArray_SeqKokkos;

1176:   v->ops->getarrayandmemtype      = VecGetArrayAndMemType_SeqKokkos;
1177:   v->ops->restorearrayandmemtype  = VecRestoreArrayAndMemType_SeqKokkos;
1178:   v->ops->getarraywriteandmemtype = VecGetArrayWriteAndMemType_SeqKokkos;
1179:   v->ops->getsubvector            = VecGetSubVector_SeqKokkos;
1180:   v->ops->restoresubvector        = VecRestoreSubVector_SeqKokkos;

1182:   v->ops->setpreallocationcoo = VecSetPreallocationCOO_SeqKokkos;
1183:   v->ops->setvaluescoo        = VecSetValuesCOO_SeqKokkos;
1184:   PetscFunctionReturn(PETSC_SUCCESS);
1185: }

1187: /*MC
1188:    VECSEQKOKKOS - VECSEQKOKKOS = "seqkokkos" - The basic sequential vector, modified to use Kokkos

1190:    Options Database Keys:
1191: . -vec_type seqkokkos - sets the vector type to VECSEQKOKKOS during a call to VecSetFromOptions()

1193:   Level: beginner

1195: .seealso: `VecCreate()`, `VecSetType()`, `VecSetFromOptions()`, `VecCreateMPIWithArray()`, `VECMPI`, `VecType`, `VecCreateMPI()`
1196: M*/
1197: PetscErrorCode VecCreate_SeqKokkos(Vec v)
1198: {
1199:   Vec_Seq    *vecseq;
1200:   Vec_Kokkos *veckok;

1202:   PetscFunctionBegin;
1203:   PetscCall(PetscKokkosInitializeCheck());
1204:   PetscCall(PetscLayoutSetUp(v->map));
1205:   PetscCall(VecCreate_Seq(v)); /* Build a sequential vector, allocate array */
1206:   PetscCall(PetscObjectChangeTypeName((PetscObject)v, VECSEQKOKKOS));
1207:   PetscCall(VecSetOps_SeqKokkos(v));

1209:   PetscCheck(!v->spptr, PETSC_COMM_SELF, PETSC_ERR_PLIB, "v->spptr not NULL");
1210:   vecseq         = static_cast<Vec_Seq *>(v->data);
1211:   veckok         = new Vec_Kokkos(v->map->n, vecseq->array, NULL); /* Let host claim it has the latest data (zero) */
1212:   v->spptr       = static_cast<void *>(veckok);
1213:   v->offloadmask = PETSC_OFFLOAD_KOKKOS;
1214:   PetscFunctionReturn(PETSC_SUCCESS);
1215: }

1217: /*@C
1218:    VecCreateSeqKokkosWithArray - Creates a Kokkos sequential array-style vector,
1219:    where the user provides the array space to store the vector values. The array
1220:    provided must be a device array.

1222:    Collective

1224:    Input Parameters:
1225: +  comm - the communicator, should be PETSC_COMM_SELF
1226: .  bs - the block size
1227: .  n - the vector length
1228: -  array - device memory where the vector elements are to be stored.

1230:    Output Parameter:
1231: .  v - the vector

1233:    Notes:
1234:    Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
1235:    same type as an existing vector.

1237:    PETSc does NOT free the array when the vector is destroyed via VecDestroy().
1238:    The user should not free the array until the vector is destroyed.

1240:    Level: intermediate

1242: .seealso: `VecCreateMPICUDAWithArray()`, `VecCreate()`, `VecDuplicate()`, `VecDuplicateVecs()`,
1243:           `VecCreateGhost()`, `VecCreateSeq()`, `VecCreateSeqWithArray()`,
1244:           `VecCreateMPIWithArray()`
1245: @*/
1246: PetscErrorCode VecCreateSeqKokkosWithArray(MPI_Comm comm, PetscInt bs, PetscInt n, const PetscScalar darray[], Vec *v)
1247: {
1248:   PetscMPIInt  size;
1249:   Vec          w;
1250:   Vec_Kokkos  *veckok = NULL;
1251:   PetscScalar *harray;

1253:   PetscFunctionBegin;
1254:   PetscCallMPI(MPI_Comm_size(comm, &size));
1255:   PetscCheck(size <= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot create VECSEQKOKKOS on more than one process");

1257:   PetscCall(PetscKokkosInitializeCheck());
1258:   PetscCall(VecCreate(comm, &w));
1259:   PetscCall(VecSetSizes(w, n, n));
1260:   PetscCall(VecSetBlockSize(w, bs));
1261:   if (!darray) { /* Allocate memory ourself if user provided NULL */
1262:     PetscCall(VecSetType(w, VECSEQKOKKOS));
1263:   } else {
1264:     /* Build a VECSEQ, get its harray, and then build Vec_Kokkos along with darray */
1265:     if (std::is_same<DefaultMemorySpace, Kokkos::HostSpace>::value) {
1266:       harray = const_cast<PetscScalar *>(darray);
1267:       PetscCall(VecCreate_Seq_Private(w, harray)); /* Build a sequential vector with harray */
1268:     } else {
1269:       PetscCall(VecSetType(w, VECSEQ));
1270:       harray = static_cast<Vec_Seq *>(w->data)->array;
1271:     }
1272:     PetscCall(PetscObjectChangeTypeName((PetscObject)w, VECSEQKOKKOS)); /* Change it to Kokkos */
1273:     PetscCall(VecSetOps_SeqKokkos(w));
1274:     veckok = new Vec_Kokkos(n, harray, const_cast<PetscScalar *>(darray));
1275:     veckok->v_dual.modify_device(); /* Mark the device is modified */
1276:     w->offloadmask = PETSC_OFFLOAD_KOKKOS;
1277:     w->spptr       = static_cast<void *>(veckok);
1278:   }
1279:   *v = w;
1280:   PetscFunctionReturn(PETSC_SUCCESS);
1281: }

1283: /*
1284:    VecCreateSeqKokkosWithArrays_Private - Creates a Kokkos sequential array-style vector
1285:    with user-provided arrays on host and device.

1287:    Collective

1289:    Input Parameter:
1290: +  comm - the communicator, should be PETSC_COMM_SELF
1291: .  bs - the block size
1292: .  n - the vector length
1293: .  harray - host memory where the vector elements are to be stored.
1294: -  darray - device memory where the vector elements are to be stored.

1296:    Output Parameter:
1297: .  v - the vector

1299:    Notes:
1300:    Unlike VecCreate{Seq,MPI}CUDAWithArrays(), this routine is private since we do not expect users to use it directly.

1302:    If there is no device, then harray and darray must be the same.
1303:    If n is not zero, then harray and darray must be allocated.
1304:    After the call, the created vector is supposed to be in a synchronized state, i.e.,
1305:    we suppose harray and darray have the same data.

1307:    PETSc does NOT free the array when the vector is destroyed via VecDestroy().
1308:    Caller should not free the array until the vector is destroyed.
1309: */
1310: PetscErrorCode VecCreateSeqKokkosWithArrays_Private(MPI_Comm comm, PetscInt bs, PetscInt n, const PetscScalar harray[], const PetscScalar darray[], Vec *v)
1311: {
1312:   PetscMPIInt size;
1313:   Vec         w;

1315:   PetscFunctionBegin;
1316:   PetscCall(PetscKokkosInitializeCheck());
1317:   PetscCallMPI(MPI_Comm_size(comm, &size));
1318:   PetscCheck(size <= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot create VECSEQKOKKOS on more than one process");
1319:   if (n) {
1321:     PetscCheck(darray, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "darray cannot be NULL");
1322:   }
1323:   if (std::is_same<DefaultMemorySpace, Kokkos::HostSpace>::value) PetscCheck(harray == darray, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "harray and darray must be the same");

1325:   PetscCall(VecCreateSeqWithArray(comm, bs, n, harray, &w));
1326:   PetscCall(PetscObjectChangeTypeName((PetscObject)w, VECSEQKOKKOS)); /* Change it to Kokkos */
1327:   PetscCall(VecSetOps_SeqKokkos(w));
1328:   PetscCallCXX(w->spptr = new Vec_Kokkos(n, const_cast<PetscScalar *>(harray), const_cast<PetscScalar *>(darray)));
1329:   w->offloadmask = PETSC_OFFLOAD_KOKKOS;
1330:   *v             = w;
1331:   PetscFunctionReturn(PETSC_SUCCESS);
1332: }

1334: /* TODO: ftn-auto generates veckok.kokkosf.c */
1335: /*@C
1336:  VecCreateSeqKokkos - Creates a standard, sequential array-style vector.

1338:  Collective

1340:  Input Parameter:
1341:  +  comm - the communicator, should be PETSC_COMM_SELF
1342:  -  n - the vector length

1344:  Output Parameter:
1345:  .  v - the vector

1347:  Notes:
1348:  Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
1349:  same type as an existing vector.

1351:  Level: intermediate

1353: .seealso: `VecCreateMPI()`, `VecCreate()`, `VecDuplicate()`, `VecDuplicateVecs()`, `VecCreateGhost()`
1354:  @*/
1355: PetscErrorCode VecCreateSeqKokkos(MPI_Comm comm, PetscInt n, Vec *v)
1356: {
1357:   PetscFunctionBegin;
1358:   PetscCall(PetscKokkosInitializeCheck());
1359:   PetscCall(VecCreate(comm, v));
1360:   PetscCall(VecSetSizes(*v, n, n));
1361:   PetscCall(VecSetType(*v, VECSEQKOKKOS)); /* Calls VecCreate_SeqKokkos */
1362:   PetscFunctionReturn(PETSC_SUCCESS);
1363: }

1365: /* Duplicate layout etc but not the values in the input vector */
1366: PetscErrorCode VecDuplicate_SeqKokkos(Vec win, Vec *v)
1367: {
1368:   PetscFunctionBegin;
1369:   PetscCall(VecDuplicate_Seq(win, v)); /* It also dups ops of win */
1370:   PetscFunctionReturn(PETSC_SUCCESS);
1371: }

1373: PetscErrorCode VecDestroy_SeqKokkos(Vec v)
1374: {
1375:   Vec_Kokkos *veckok = static_cast<Vec_Kokkos *>(v->spptr);
1376:   Vec_Seq    *vecseq = static_cast<Vec_Seq *>(v->data);

1378:   PetscFunctionBegin;
1379:   delete veckok;
1380:   v->spptr = NULL;
1381:   if (vecseq) PetscCall(VecDestroy_Seq(v));
1382:   PetscFunctionReturn(PETSC_SUCCESS);
1383: }