Actual source code: mpikok.kokkos.cxx

  1: /*
  2:    This file contains routines for Parallel vector operations.
  3:  */
  4: #include <petsc_kokkos.hpp>
  5: #include <petscvec_kokkos.hpp>
  6: #include <petsc/private/deviceimpl.h>
  7: #include <petsc/private/vecimpl.h>
  8: #include <../src/vec/vec/impls/mpi/pvecimpl.h>
  9: #include <../src/vec/vec/impls/seq/kokkos/veckokkosimpl.hpp>
 10: #include <petscsf.h>

 12: static PetscErrorCode VecDestroy_MPIKokkos(Vec v)
 13: {
 14:   PetscFunctionBegin;
 15:   delete static_cast<Vec_Kokkos *>(v->spptr);
 16:   v->spptr = NULL;
 17:   PetscCall(VecDestroy_MPI(v));
 18:   PetscFunctionReturn(PETSC_SUCCESS);
 19: }

 21: static PetscErrorCode VecNorm_MPIKokkos(Vec xin, NormType type, PetscReal *z)
 22: {
 23:   PetscFunctionBegin;
 24:   PetscCall(VecNorm_MPI_Default(xin, type, z, VecNorm_SeqKokkos));
 25:   PetscFunctionReturn(PETSC_SUCCESS);
 26: }

 28: static PetscErrorCode VecErrorWeightedNorms_MPIKokkos(Vec U, Vec Y, Vec E, NormType wnormtype, PetscReal atol, Vec vatol, PetscReal rtol, Vec vrtol, PetscReal ignore_max, PetscReal *norm, PetscInt *norm_loc, PetscReal *norma, PetscInt *norma_loc, PetscReal *normr, PetscInt *normr_loc)
 29: {
 30:   PetscFunctionBegin;
 31:   PetscCall(VecErrorWeightedNorms_MPI_Default(U, Y, E, wnormtype, atol, vatol, rtol, vrtol, ignore_max, norm, norm_loc, norma, norma_loc, normr, normr_loc, VecErrorWeightedNorms_SeqKokkos));
 32:   PetscFunctionReturn(PETSC_SUCCESS);
 33: }

 35: /* z = y^H x */
 36: static PetscErrorCode VecDot_MPIKokkos(Vec xin, Vec yin, PetscScalar *z)
 37: {
 38:   PetscFunctionBegin;
 39:   PetscCall(VecXDot_MPI_Default(xin, yin, z, VecDot_SeqKokkos));
 40:   PetscFunctionReturn(PETSC_SUCCESS);
 41: }

 43: /* z = y^T x */
 44: static PetscErrorCode VecTDot_MPIKokkos(Vec xin, Vec yin, PetscScalar *z)
 45: {
 46:   PetscFunctionBegin;
 47:   PetscCall(VecXDot_MPI_Default(xin, yin, z, VecTDot_SeqKokkos));
 48:   PetscFunctionReturn(PETSC_SUCCESS);
 49: }

 51: static PetscErrorCode VecMDot_MPIKokkos(Vec xin, PetscInt nv, const Vec y[], PetscScalar *z)
 52: {
 53:   PetscFunctionBegin;
 54:   PetscCall(VecMXDot_MPI_Default(xin, nv, y, z, VecMDot_SeqKokkos));
 55:   PetscFunctionReturn(PETSC_SUCCESS);
 56: }

 58: static PetscErrorCode VecMTDot_MPIKokkos(Vec xin, PetscInt nv, const Vec y[], PetscScalar *z)
 59: {
 60:   PetscFunctionBegin;
 61:   PetscCall(VecMXDot_MPI_Default(xin, nv, y, z, VecMTDot_SeqKokkos));
 62:   PetscFunctionReturn(PETSC_SUCCESS);
 63: }

 65: static PetscErrorCode VecMDot_MPIKokkos_GEMV(Vec xin, PetscInt nv, const Vec y[], PetscScalar *z)
 66: {
 67:   PetscFunctionBegin;
 68:   PetscCall(VecMXDot_MPI_Default(xin, nv, y, z, VecMDot_SeqKokkos_GEMV));
 69:   PetscFunctionReturn(PETSC_SUCCESS);
 70: }

 72: static PetscErrorCode VecMTDot_MPIKokkos_GEMV(Vec xin, PetscInt nv, const Vec y[], PetscScalar *z)
 73: {
 74:   PetscFunctionBegin;
 75:   PetscCall(VecMXDot_MPI_Default(xin, nv, y, z, VecMTDot_SeqKokkos_GEMV));
 76:   PetscFunctionReturn(PETSC_SUCCESS);
 77: }

 79: static PetscErrorCode VecMax_MPIKokkos(Vec xin, PetscInt *idx, PetscReal *z)
 80: {
 81:   const MPI_Op ops[] = {MPIU_MAXLOC, MPIU_MAX};

 83:   PetscFunctionBegin;
 84:   PetscCall(VecMinMax_MPI_Default(xin, idx, z, VecMax_SeqKokkos, ops));
 85:   PetscFunctionReturn(PETSC_SUCCESS);
 86: }

 88: static PetscErrorCode VecMin_MPIKokkos(Vec xin, PetscInt *idx, PetscReal *z)
 89: {
 90:   const MPI_Op ops[] = {MPIU_MINLOC, MPIU_MIN};

 92:   PetscFunctionBegin;
 93:   PetscCall(VecMinMax_MPI_Default(xin, idx, z, VecMin_SeqKokkos, ops));
 94:   PetscFunctionReturn(PETSC_SUCCESS);
 95: }

 97: static PetscErrorCode VecCreate_MPIKokkos_Common(Vec); // forward declaration

 99: static PetscErrorCode VecDuplicate_MPIKokkos(Vec win, Vec *vv)
100: {
101:   Vec                       v;
102:   Vec_Kokkos               *veckok;
103:   Vec_MPI                  *wdata = (Vec_MPI *)win->data, *vdata;
104:   PetscScalarKokkosDualView w_dual;

106:   PetscFunctionBegin;
107:   PetscCallCXX(w_dual = PetscScalarKokkosDualView("w_dual", win->map->n + wdata->nghost)); // Kokkos init's v_dual to zero

109:   /* Reuse VecDuplicate_MPI, which contains a lot of stuff */
110:   PetscCall(VecDuplicateWithArray_MPI(win, w_dual.view_host().data(), &v)); /* after the call, v is a VECMPI */
111:   PetscCall(PetscObjectChangeTypeName((PetscObject)v, VECMPIKOKKOS));
112:   // In case win is a ghost vector, we also need to convert its localrep to VECKOKKOS. We provide the device array, so allocation in w_dual is not wasted
113:   vdata = static_cast<Vec_MPI *>(v->data);
114:   if (vdata->localrep) PetscCall(VecConvert_Seq_SeqKokkos_inplace(vdata->localrep, w_dual.view_device().data()));
115:   PetscCall(VecCreate_MPIKokkos_Common(v));
116:   v->ops[0] = win->ops[0]; // always follow ops[] in win

118:   /* Build the Vec_Kokkos struct */
119:   veckok         = new Vec_Kokkos(v->map->n, w_dual.view_host().data(), w_dual.view_device().data());
120:   veckok->w_dual = w_dual;
121:   v->spptr       = veckok;
122:   *vv            = v;
123:   PetscFunctionReturn(PETSC_SUCCESS);
124: }

126: static PetscErrorCode VecDotNorm2_MPIKokkos(Vec s, Vec t, PetscScalar *dp, PetscScalar *nm)
127: {
128:   PetscFunctionBegin;
129:   PetscCall(VecDotNorm2_MPI_Default(s, t, dp, nm, VecDotNorm2_SeqKokkos));
130:   PetscFunctionReturn(PETSC_SUCCESS);
131: }

133: static PetscErrorCode VecGetSubVector_MPIKokkos(Vec x, IS is, Vec *y)
134: {
135:   PetscFunctionBegin;
136:   PetscCall(VecGetSubVector_Kokkos_Private(x, PETSC_TRUE, is, y));
137:   PetscFunctionReturn(PETSC_SUCCESS);
138: }

140: static PetscErrorCode VecSetPreallocationCOO_MPIKokkos(Vec x, PetscCount ncoo, const PetscInt coo_i[])
141: {
142:   const auto vecmpi = static_cast<Vec_MPI *>(x->data);
143:   const auto veckok = static_cast<Vec_Kokkos *>(x->spptr);
144:   PetscInt   m;

146:   PetscFunctionBegin;
147:   PetscCall(VecGetLocalSize(x, &m));
148:   PetscCall(VecSetPreallocationCOO_MPI(x, ncoo, coo_i));
149:   PetscCall(veckok->SetUpCOO(vecmpi, m));
150:   PetscFunctionReturn(PETSC_SUCCESS);
151: }

153: static PetscErrorCode VecSetValuesCOO_MPIKokkos(Vec x, const PetscScalar v[], InsertMode imode)
154: {
155:   const auto                  vecmpi  = static_cast<Vec_MPI *>(x->data);
156:   const auto                  veckok  = static_cast<Vec_Kokkos *>(x->spptr);
157:   const PetscCountKokkosView &jmap1   = veckok->jmap1_d;
158:   const PetscCountKokkosView &perm1   = veckok->perm1_d;
159:   const PetscCountKokkosView &imap2   = veckok->imap2_d;
160:   const PetscCountKokkosView &jmap2   = veckok->jmap2_d;
161:   const PetscCountKokkosView &perm2   = veckok->perm2_d;
162:   const PetscCountKokkosView &Cperm   = veckok->Cperm_d;
163:   PetscScalarKokkosView      &sendbuf = veckok->sendbuf_d;
164:   PetscScalarKokkosView      &recvbuf = veckok->recvbuf_d;
165:   PetscScalarKokkosView       xv;
166:   ConstPetscScalarKokkosView  vv;
167:   PetscMemType                memtype;
168:   PetscInt                    m;

170:   PetscFunctionBegin;
171:   PetscCall(VecGetLocalSize(x, &m));
172:   PetscCall(PetscGetMemType(v, &memtype));
173:   if (PetscMemTypeHost(memtype)) { /* If user gave v[] in host, we might need to copy it to device if any */
174:     vv = Kokkos::create_mirror_view_and_copy(DefaultMemorySpace(), PetscScalarKokkosViewHost(const_cast<PetscScalar *>(v), vecmpi->coo_n));
175:   } else {
176:     vv = ConstPetscScalarKokkosView(v, vecmpi->coo_n); /* Directly use v[]'s memory */
177:   }

179:   /* Pack entries to be sent to remote */
180:   Kokkos::parallel_for(Kokkos::RangePolicy<>(PetscGetKokkosExecutionSpace(), 0, vecmpi->sendlen), KOKKOS_LAMBDA(const PetscCount i) { sendbuf(i) = vv(Cperm(i)); });
181:   PetscCall(PetscSFReduceWithMemTypeBegin(vecmpi->coo_sf, MPIU_SCALAR, PETSC_MEMTYPE_KOKKOS, sendbuf.data(), PETSC_MEMTYPE_KOKKOS, recvbuf.data(), MPI_REPLACE));

183:   if (imode == INSERT_VALUES) PetscCall(VecGetKokkosViewWrite(x, &xv)); /* write vector */
184:   else PetscCall(VecGetKokkosView(x, &xv));                             /* read & write vector */

186:   Kokkos::parallel_for(
187:     Kokkos::RangePolicy<>(PetscGetKokkosExecutionSpace(), 0, m), KOKKOS_LAMBDA(const PetscCount i) {
188:       PetscScalar sum = 0.0;
189:       for (PetscCount k = jmap1(i); k < jmap1(i + 1); k++) sum += vv(perm1(k));
190:       xv(i) = (imode == INSERT_VALUES ? 0.0 : xv(i)) + sum;
191:     });

193:   PetscCall(PetscSFReduceEnd(vecmpi->coo_sf, MPIU_SCALAR, sendbuf.data(), recvbuf.data(), MPI_REPLACE));

195:   /* Add received remote entries */
196:   Kokkos::parallel_for(
197:     Kokkos::RangePolicy<>(PetscGetKokkosExecutionSpace(), 0, vecmpi->nnz2), KOKKOS_LAMBDA(PetscCount i) {
198:       for (PetscCount k = jmap2(i); k < jmap2(i + 1); k++) xv(imap2(i)) += recvbuf(perm2(k));
199:     });

201:   if (imode == INSERT_VALUES) PetscCall(VecRestoreKokkosViewWrite(x, &xv));
202:   else PetscCall(VecRestoreKokkosView(x, &xv));
203:   PetscFunctionReturn(PETSC_SUCCESS);
204: }

206: // Shared by all VecCreate/Duplicate routines for VecMPIKokkos
207: static PetscErrorCode VecCreate_MPIKokkos_Common(Vec v)
208: {
209:   PetscFunctionBegin;
210:   v->boundtocpu           = PetscDefined(HAVE_KOKKOS_WITHOUT_GPU) ? PETSC_TRUE : PETSC_FALSE; // VECKOKKOS has yet to support CPU binding. But in this case, we deem it is bound to CPU.
211:   v->ops->bindtocpu       = VecBindToCPU_SeqKokkos;
212:   v->ops->abs             = VecAbs_SeqKokkos;
213:   v->ops->reciprocal      = VecReciprocal_SeqKokkos;
214:   v->ops->pointwisemult   = VecPointwiseMult_SeqKokkos;
215:   v->ops->setrandom       = VecSetRandom_SeqKokkos;
216:   v->ops->dotnorm2        = VecDotNorm2_MPIKokkos;
217:   v->ops->waxpy           = VecWAXPY_SeqKokkos;
218:   v->ops->norm            = VecNorm_MPIKokkos;
219:   v->ops->min             = VecMin_MPIKokkos;
220:   v->ops->max             = VecMax_MPIKokkos;
221:   v->ops->sum             = VecSum_SeqKokkos;
222:   v->ops->shift           = VecShift_SeqKokkos;
223:   v->ops->scale           = VecScale_SeqKokkos;
224:   v->ops->copy            = VecCopy_SeqKokkos;
225:   v->ops->set             = VecSet_SeqKokkos;
226:   v->ops->swap            = VecSwap_SeqKokkos;
227:   v->ops->axpy            = VecAXPY_SeqKokkos;
228:   v->ops->axpby           = VecAXPBY_SeqKokkos;
229:   v->ops->maxpy           = VecMAXPY_SeqKokkos;
230:   v->ops->aypx            = VecAYPX_SeqKokkos;
231:   v->ops->axpbypcz        = VecAXPBYPCZ_SeqKokkos;
232:   v->ops->pointwisedivide = VecPointwiseDivide_SeqKokkos;
233:   v->ops->placearray      = VecPlaceArray_SeqKokkos;
234:   v->ops->replacearray    = VecReplaceArray_SeqKokkos;
235:   v->ops->resetarray      = VecResetArray_SeqKokkos;

237:   v->ops->dot   = VecDot_MPIKokkos;
238:   v->ops->tdot  = VecTDot_MPIKokkos;
239:   v->ops->mdot  = VecMDot_MPIKokkos;
240:   v->ops->mtdot = VecMTDot_MPIKokkos;

242:   v->ops->dot_local   = VecDot_SeqKokkos;
243:   v->ops->tdot_local  = VecTDot_SeqKokkos;
244:   v->ops->mdot_local  = VecMDot_SeqKokkos;
245:   v->ops->mtdot_local = VecMTDot_SeqKokkos;

247:   v->ops->norm_local              = VecNorm_SeqKokkos;
248:   v->ops->duplicate               = VecDuplicate_MPIKokkos;
249:   v->ops->destroy                 = VecDestroy_MPIKokkos;
250:   v->ops->getlocalvector          = VecGetLocalVector_SeqKokkos;
251:   v->ops->restorelocalvector      = VecRestoreLocalVector_SeqKokkos;
252:   v->ops->getlocalvectorread      = VecGetLocalVectorRead_SeqKokkos;
253:   v->ops->restorelocalvectorread  = VecRestoreLocalVectorRead_SeqKokkos;
254:   v->ops->getarraywrite           = VecGetArrayWrite_SeqKokkos;
255:   v->ops->getarray                = VecGetArray_SeqKokkos;
256:   v->ops->restorearray            = VecRestoreArray_SeqKokkos;
257:   v->ops->getarrayandmemtype      = VecGetArrayAndMemType_SeqKokkos;
258:   v->ops->restorearrayandmemtype  = VecRestoreArrayAndMemType_SeqKokkos;
259:   v->ops->getarraywriteandmemtype = VecGetArrayWriteAndMemType_SeqKokkos;
260:   v->ops->getsubvector            = VecGetSubVector_MPIKokkos;
261:   v->ops->restoresubvector        = VecRestoreSubVector_SeqKokkos;

263:   v->ops->setpreallocationcoo = VecSetPreallocationCOO_MPIKokkos;
264:   v->ops->setvaluescoo        = VecSetValuesCOO_MPIKokkos;

266:   v->ops->errorwnorm = VecErrorWeightedNorms_MPIKokkos;

268:   v->offloadmask = PETSC_OFFLOAD_KOKKOS; // Mark this is a VECKOKKOS; We use this flag for cheap VECKOKKOS test.
269:   PetscFunctionReturn(PETSC_SUCCESS);
270: }

272: PETSC_INTERN PetscErrorCode VecConvert_MPI_MPIKokkos_inplace(Vec v)
273: {
274:   Vec_MPI *vecmpi;

276:   PetscFunctionBegin;
277:   PetscCall(PetscKokkosInitializeCheck());
278:   PetscCall(PetscLayoutSetUp(v->map));
279:   PetscCall(PetscObjectChangeTypeName((PetscObject)v, VECMPIKOKKOS));
280:   PetscCall(VecCreate_MPIKokkos_Common(v));
281:   PetscCheck(!v->spptr, PETSC_COMM_SELF, PETSC_ERR_PLIB, "v->spptr not NULL");
282:   vecmpi = static_cast<Vec_MPI *>(v->data);
283:   if (vecmpi->localrep) { // It is a ghost vector
284:     Vec         local = vecmpi->localrep;
285:     Vec_Kokkos *veckok;

287:     PetscCall(VecConvert_Seq_SeqKokkos_inplace(local, NULL));
288:     veckok = static_cast<Vec_Kokkos *>(local->spptr);
289:     // TODO: can we subview on veckok->v_dual?
290:     PetscCallCXX(v->spptr = new Vec_Kokkos(v->map->n, veckok->v_dual.view_host().data(), veckok->v_dual.view_device().data()));
291:   } else PetscCallCXX(v->spptr = new Vec_Kokkos(v->map->n, vecmpi->array, NULL));
292:   PetscFunctionReturn(PETSC_SUCCESS);
293: }

295: // Duplicate a VECMPIKOKKOS
296: static PetscErrorCode VecDuplicateVecs_MPIKokkos_GEMV(Vec w, PetscInt m, Vec *V[])
297: {
298:   PetscInt64                lda; // use 64-bit as we will do "m * lda"
299:   PetscScalar              *array_h, *array_d;
300:   PetscLayout               map;
301:   Vec_MPI                  *wmpi = (Vec_MPI *)w->data;
302:   PetscScalarKokkosDualView w_dual;

304:   PetscFunctionBegin;
305:   PetscCall(PetscKokkosInitializeCheck()); // as we'll call kokkos_malloc()
306:   if (wmpi->nghost) {                      // currently only do GEMV optimization for vectors without ghosts
307:     w->ops->duplicatevecs = VecDuplicateVecs_Default;
308:     PetscCall(VecDuplicateVecs(w, m, V));
309:   } else {
310:     PetscCall(PetscMalloc1(m, V));
311:     PetscCall(VecGetLayout(w, &map));
312:     VecGetLocalSizeAligned(w, 64, &lda); // get in lda the 64-bytes aligned local size

314:     // See comments in VecCreate_SeqKokkos() on why we use DualView to allocate the memory
315:     PetscCallCXX(w_dual = PetscScalarKokkosDualView("VecDuplicateVecs", m * lda)); // Kokkos init's w_dual to zero

317:     // create the m vectors with raw arrays
318:     array_h = w_dual.view_host().data();
319:     array_d = w_dual.view_device().data();
320:     for (PetscInt i = 0; i < m; i++) {
321:       Vec v;
322:       PetscCall(VecCreateMPIKokkosWithLayoutAndArrays_Private(map, &array_h[i * lda], &array_d[i * lda], &v));
323:       PetscCallCXX(static_cast<Vec_Kokkos *>(v->spptr)->v_dual.modify_host()); // as we only init'ed array_h
324:       PetscCall(PetscObjectListDuplicate(((PetscObject)w)->olist, &((PetscObject)v)->olist));
325:       PetscCall(PetscFunctionListDuplicate(((PetscObject)w)->qlist, &((PetscObject)v)->qlist));
326:       v->ops[0]             = w->ops[0];
327:       v->stash.donotstash   = w->stash.donotstash;
328:       v->stash.ignorenegidx = w->stash.ignorenegidx;
329:       v->stash.bs           = w->stash.bs;
330:       v->bstash.bs          = w->bstash.bs;
331:       (*V)[i]               = v;
332:     }

334:     // let the first vector own the raw arrays, so when it is destroyed it will free the arrays
335:     if (m) {
336:       Vec v = (*V)[0];

338:       static_cast<Vec_Kokkos *>(v->spptr)->w_dual = w_dual; // stash the memory
339:       // disable replacearray of the first vector, as freeing its memory also frees others in the group.
340:       // But replacearray of others is ok, as they don't own their array.
341:       if (m > 1) v->ops->replacearray = VecReplaceArray_Default_GEMV_Error;
342:     }
343:   }
344:   PetscFunctionReturn(PETSC_SUCCESS);
345: }

347: /*MC
348:    VECMPIKOKKOS - VECMPIKOKKOS = "mpikokkos" - The basic parallel vector, modified to use Kokkos

350:    Options Database Keys:
351: . -vec_type mpikokkos - sets the vector type to VECMPIKOKKOS during a call to VecSetFromOptions()

353:   Level: beginner

355: .seealso: `VecCreate()`, `VecSetType()`, `VecSetFromOptions()`, `VecCreateMPIKokkosWithArray()`, `VECMPI`, `VecType`, `VecCreateMPI()`
356: M*/
357: PetscErrorCode VecCreate_MPIKokkos(Vec v)
358: {
359:   PetscBool                 mdot_use_gemv  = PETSC_TRUE;
360:   PetscBool                 maxpy_use_gemv = PETSC_FALSE; // default is false as we saw bad performance with vendors' GEMV with tall skinny matrices.
361:   PetscScalarKokkosDualView v_dual;

363:   PetscFunctionBegin;
364:   PetscCall(PetscKokkosInitializeCheck());
365:   PetscCall(PetscLayoutSetUp(v->map));

367:   PetscCallCXX(v_dual = PetscScalarKokkosDualView("v_dual", v->map->n)); // Kokkos init's v_dual to zero
368:   PetscCall(VecCreate_MPI_Private(v, PETSC_FALSE, 0, v_dual.view_host().data()));

370:   PetscCall(PetscObjectChangeTypeName((PetscObject)v, VECMPIKOKKOS));
371:   PetscCall(VecCreate_MPIKokkos_Common(v));
372:   PetscCheck(!v->spptr, PETSC_COMM_SELF, PETSC_ERR_PLIB, "v->spptr not NULL");
373:   PetscCallCXX(v->spptr = new Vec_Kokkos(v_dual));
374:   PetscCall(PetscOptionsGetBool(NULL, NULL, "-vec_mdot_use_gemv", &mdot_use_gemv, NULL));
375:   PetscCall(PetscOptionsGetBool(NULL, NULL, "-vec_maxpy_use_gemv", &maxpy_use_gemv, NULL));

377:   // allocate multiple vectors together
378:   if (mdot_use_gemv || maxpy_use_gemv) v->ops[0].duplicatevecs = VecDuplicateVecs_MPIKokkos_GEMV;

380:   if (mdot_use_gemv) {
381:     v->ops[0].mdot        = VecMDot_MPIKokkos_GEMV;
382:     v->ops[0].mtdot       = VecMTDot_MPIKokkos_GEMV;
383:     v->ops[0].mdot_local  = VecMDot_SeqKokkos_GEMV;
384:     v->ops[0].mtdot_local = VecMTDot_SeqKokkos_GEMV;
385:   }

387:   if (maxpy_use_gemv) v->ops[0].maxpy = VecMAXPY_SeqKokkos_GEMV;
388:   PetscFunctionReturn(PETSC_SUCCESS);
389: }

391: // Create a VECMPIKOKKOS with layout and arrays
392: PetscErrorCode VecCreateMPIKokkosWithLayoutAndArrays_Private(PetscLayout map, const PetscScalar harray[], const PetscScalar darray[], Vec *v)
393: {
394:   Vec w;

396:   PetscFunctionBegin;
397:   if (map->n > 0) PetscCheck(darray, map->comm, PETSC_ERR_ARG_WRONG, "darray cannot be NULL");
398: #if defined(KOKKOS_ENABLE_UNIFIED_MEMORY)
399:   PetscCheck(harray == darray, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "harray and darray must be the same");
400: #endif
401:   PetscCall(VecCreateMPIWithLayoutAndArray_Private(map, harray, &w));
402:   PetscCall(PetscObjectChangeTypeName((PetscObject)w, VECMPIKOKKOS)); // Change it to VECKOKKOS
403:   PetscCall(VecCreate_MPIKokkos_Common(w));
404:   PetscCallCXX(w->spptr = new Vec_Kokkos(map->n, const_cast<PetscScalar *>(harray), const_cast<PetscScalar *>(darray)));
405:   *v = w;
406:   PetscFunctionReturn(PETSC_SUCCESS);
407: }

409: /*@C
410:   VecCreateMPIKokkosWithArray - Creates a parallel, array-style vector,
411:   where the user provides the GPU array space to store the vector values.

413:   Collective

415:   Input Parameters:
416: + comm   - the MPI communicator to use
417: . bs     - block size, same meaning as VecSetBlockSize()
418: . n      - local vector length, cannot be PETSC_DECIDE
419: . N      - global vector length (or PETSC_DECIDE to have calculated)
420: - darray - the user provided GPU array to store the vector values

422:   Output Parameter:
423: . v - the vector

425:   Notes:
426:   Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
427:   same type as an existing vector.

429:   If the user-provided array is NULL, then VecKokkosPlaceArray() can be used
430:   at a later stage to SET the array for storing the vector values.

432:   PETSc does NOT free the array when the vector is destroyed via VecDestroy().
433:   The user should not free the array until the vector is destroyed.

435:   Level: intermediate

437: .seealso: `VecCreateSeqKokkosWithArray()`, `VecCreateMPIWithArray()`, `VecCreateSeqWithArray()`,
438:           `VecCreate()`, `VecDuplicate()`, `VecDuplicateVecs()`, `VecCreateGhost()`,
439:           `VecCreateMPI()`, `VecCreateGhostWithArray()`, `VecPlaceArray()`
440: @*/
441: PetscErrorCode VecCreateMPIKokkosWithArray(MPI_Comm comm, PetscInt bs, PetscInt n, PetscInt N, const PetscScalar darray[], Vec *v)
442: {
443:   Vec          w;
444:   Vec_Kokkos  *veckok;
445:   Vec_MPI     *vecmpi;
446:   PetscScalar *harray;

448:   PetscFunctionBegin;
449:   PetscCheck(n != PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Must set local size of vector");
450:   PetscCall(PetscKokkosInitializeCheck());
451:   PetscCall(PetscSplitOwnership(comm, &n, &N));
452:   PetscCall(VecCreate(comm, &w));
453:   PetscCall(VecSetSizes(w, n, N));
454:   PetscCall(VecSetBlockSize(w, bs));
455:   PetscCall(PetscLayoutSetUp(w->map));

457:   if (std::is_same<DefaultMemorySpace, HostMirrorMemorySpace>::value) {
458:     harray = const_cast<PetscScalar *>(darray);
459:   } else PetscCall(PetscMalloc1(w->map->n, &harray)); /* If device is not the same as host, allocate the host array ourselves */

461:   PetscCall(VecCreate_MPI_Private(w, PETSC_FALSE /*alloc*/, 0 /*nghost*/, harray)); /* Build a sequential vector with provided data */
462:   vecmpi = static_cast<Vec_MPI *>(w->data);

464:   if (!std::is_same<DefaultMemorySpace, HostMirrorMemorySpace>::value) vecmpi->array_allocated = harray; /* The host array was allocated by PETSc */

466:   PetscCall(PetscObjectChangeTypeName((PetscObject)w, VECMPIKOKKOS));
467:   PetscCall(VecCreate_MPIKokkos_Common(w));
468:   veckok = new Vec_Kokkos(n, harray, const_cast<PetscScalar *>(darray));
469:   veckok->v_dual.modify_device(); /* Mark the device is modified */
470:   w->spptr = static_cast<void *>(veckok);
471:   *v       = w;
472:   PetscFunctionReturn(PETSC_SUCCESS);
473: }

475: /*
476:    VecCreateMPIKokkosWithArrays_Private - Creates a Kokkos parallel, array-style vector
477:    with user-provided arrays on host and device.

479:    Collective

481:    Input Parameter:
482: +  comm - the communicator
483: .  bs - the block size
484: .  n - the local vector length
485: .  N - the global vector length
486: -  harray - host memory where the vector elements are to be stored.
487: -  darray - device memory where the vector elements are to be stored.

489:    Output Parameter:
490: .  v - the vector

492:    Notes:
493:    If there is no device, then harray and darray must be the same.
494:    If n is not zero, then harray and darray must be allocated.
495:    After the call, the created vector is supposed to be in a synchronized state, i.e.,
496:    we suppose harray and darray have the same data.

498:    PETSc does NOT free the array when the vector is destroyed via VecDestroy().
499:    The user should not free the array until the vector is destroyed.
500: */
501: PetscErrorCode VecCreateMPIKokkosWithArrays_Private(MPI_Comm comm, PetscInt bs, PetscInt n, PetscInt N, const PetscScalar harray[], const PetscScalar darray[], Vec *v)
502: {
503:   Vec w;

505:   PetscFunctionBegin;
506:   PetscCall(PetscKokkosInitializeCheck());
507:   if (n) {
508:     PetscAssertPointer(harray, 5);
509:     PetscCheck(darray, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "darray cannot be NULL");
510:   }
511:   if (std::is_same<DefaultMemorySpace, HostMirrorMemorySpace>::value) PetscCheck(harray == darray, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "harray and darray must be the same");
512:   PetscCall(VecCreateMPIWithArray(comm, bs, n, N, harray, &w));
513:   PetscCall(PetscObjectChangeTypeName((PetscObject)w, VECMPIKOKKOS)); /* Change it to Kokkos */
514:   PetscCall(VecCreate_MPIKokkos_Common(w));
515:   PetscCallCXX(w->spptr = new Vec_Kokkos(n, const_cast<PetscScalar *>(harray), const_cast<PetscScalar *>(darray)));
516:   *v = w;
517:   PetscFunctionReturn(PETSC_SUCCESS);
518: }

520: /*MC
521:    VECKOKKOS - VECKOKKOS = "kokkos" - The basic vector, modified to use Kokkos

523:    Options Database Keys:
524: . -vec_type kokkos - sets the vector type to VECKOKKOS during a call to VecSetFromOptions()

526:   Level: beginner

528: .seealso: `VecCreate()`, `VecSetType()`, `VecSetFromOptions()`, `VecCreateMPIKokkosWithArray()`, `VECMPI`, `VecType`, `VecCreateMPI()`
529: M*/
530: PetscErrorCode VecCreate_Kokkos(Vec v)
531: {
532:   PetscMPIInt size;

534:   PetscFunctionBegin;
535:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)v), &size));
536:   if (size == 1) PetscCall(VecSetType(v, VECSEQKOKKOS));
537:   else PetscCall(VecSetType(v, VECMPIKOKKOS));
538:   PetscFunctionReturn(PETSC_SUCCESS);
539: }