Actual source code: mpikok.kokkos.cxx

  1: /*
  2:    This file contains routines for Parallel vector operations.
  3:  */
  4: #include <petsc_kokkos.hpp>
  5: #include <petscvec_kokkos.hpp>
  6: #include <petsc/private/deviceimpl.h>
  7: #include <petsc/private/vecimpl.h>
  8: #include <../src/vec/vec/impls/mpi/pvecimpl.h>
  9: #include <../src/vec/vec/impls/seq/kokkos/veckokkosimpl.hpp>
 10: #include <petscsf.h>

 12: static PetscErrorCode VecDestroy_MPIKokkos(Vec v)
 13: {
 14:   PetscFunctionBegin;
 15:   delete static_cast<Vec_Kokkos *>(v->spptr);
 16:   PetscCall(VecDestroy_MPI(v));
 17:   PetscFunctionReturn(PETSC_SUCCESS);
 18: }

 20: static PetscErrorCode VecNorm_MPIKokkos(Vec xin, NormType type, PetscReal *z)
 21: {
 22:   PetscFunctionBegin;
 23:   PetscCall(VecNorm_MPI_Default(xin, type, z, VecNorm_SeqKokkos));
 24:   PetscFunctionReturn(PETSC_SUCCESS);
 25: }

 27: static PetscErrorCode VecErrorWeightedNorms_MPIKokkos(Vec U, Vec Y, Vec E, NormType wnormtype, PetscReal atol, Vec vatol, PetscReal rtol, Vec vrtol, PetscReal ignore_max, PetscReal *norm, PetscInt *norm_loc, PetscReal *norma, PetscInt *norma_loc, PetscReal *normr, PetscInt *normr_loc)
 28: {
 29:   PetscFunctionBegin;
 30:   PetscCall(VecErrorWeightedNorms_MPI_Default(U, Y, E, wnormtype, atol, vatol, rtol, vrtol, ignore_max, norm, norm_loc, norma, norma_loc, normr, normr_loc, VecErrorWeightedNorms_SeqKokkos));
 31:   PetscFunctionReturn(PETSC_SUCCESS);
 32: }

 34: /* z = y^H x */
 35: static PetscErrorCode VecDot_MPIKokkos(Vec xin, Vec yin, PetscScalar *z)
 36: {
 37:   PetscFunctionBegin;
 38:   PetscCall(VecXDot_MPI_Default(xin, yin, z, VecDot_SeqKokkos));
 39:   PetscFunctionReturn(PETSC_SUCCESS);
 40: }

 42: /* z = y^T x */
 43: static PetscErrorCode VecTDot_MPIKokkos(Vec xin, Vec yin, PetscScalar *z)
 44: {
 45:   PetscFunctionBegin;
 46:   PetscCall(VecXDot_MPI_Default(xin, yin, z, VecTDot_SeqKokkos));
 47:   PetscFunctionReturn(PETSC_SUCCESS);
 48: }

 50: static PetscErrorCode VecMDot_MPIKokkos(Vec xin, PetscInt nv, const Vec y[], PetscScalar *z)
 51: {
 52:   PetscFunctionBegin;
 53:   PetscCall(VecMXDot_MPI_Default(xin, nv, y, z, VecMDot_SeqKokkos));
 54:   PetscFunctionReturn(PETSC_SUCCESS);
 55: }

 57: static PetscErrorCode VecMTDot_MPIKokkos(Vec xin, PetscInt nv, const Vec y[], PetscScalar *z)
 58: {
 59:   PetscFunctionBegin;
 60:   PetscCall(VecMXDot_MPI_Default(xin, nv, y, z, VecMTDot_SeqKokkos));
 61:   PetscFunctionReturn(PETSC_SUCCESS);
 62: }

 64: static PetscErrorCode VecMDot_MPIKokkos_GEMV(Vec xin, PetscInt nv, const Vec y[], PetscScalar *z)
 65: {
 66:   PetscFunctionBegin;
 67:   PetscCall(VecMXDot_MPI_Default(xin, nv, y, z, VecMDot_SeqKokkos_GEMV));
 68:   PetscFunctionReturn(PETSC_SUCCESS);
 69: }

 71: static PetscErrorCode VecMTDot_MPIKokkos_GEMV(Vec xin, PetscInt nv, const Vec y[], PetscScalar *z)
 72: {
 73:   PetscFunctionBegin;
 74:   PetscCall(VecMXDot_MPI_Default(xin, nv, y, z, VecMTDot_SeqKokkos_GEMV));
 75:   PetscFunctionReturn(PETSC_SUCCESS);
 76: }

 78: static PetscErrorCode VecMax_MPIKokkos(Vec xin, PetscInt *idx, PetscReal *z)
 79: {
 80:   const MPI_Op ops[] = {MPIU_MAXLOC, MPIU_MAX};

 82:   PetscFunctionBegin;
 83:   PetscCall(VecMinMax_MPI_Default(xin, idx, z, VecMax_SeqKokkos, ops));
 84:   PetscFunctionReturn(PETSC_SUCCESS);
 85: }

 87: static PetscErrorCode VecMin_MPIKokkos(Vec xin, PetscInt *idx, PetscReal *z)
 88: {
 89:   const MPI_Op ops[] = {MPIU_MINLOC, MPIU_MIN};

 91:   PetscFunctionBegin;
 92:   PetscCall(VecMinMax_MPI_Default(xin, idx, z, VecMin_SeqKokkos, ops));
 93:   PetscFunctionReturn(PETSC_SUCCESS);
 94: }

 96: static PetscErrorCode VecDuplicate_MPIKokkos(Vec win, Vec *vv)
 97: {
 98:   Vec         v;
 99:   Vec_MPI    *vecmpi;
100:   Vec_Kokkos *veckok;

102:   PetscFunctionBegin;
103:   /* Reuse VecDuplicate_MPI, which contains a lot of stuff */
104:   PetscCall(VecDuplicate_MPI(win, &v)); /* after the call, v is a VECMPI, with data zero'ed */
105:   PetscCall(PetscObjectChangeTypeName((PetscObject)v, VECMPIKOKKOS));
106:   v->ops[0] = win->ops[0];

108:   /* Build the Vec_Kokkos struct */
109:   vecmpi = static_cast<Vec_MPI *>(v->data);
110:   veckok = new Vec_Kokkos(v->map->n, vecmpi->array);
111:   Kokkos::deep_copy(veckok->v_dual.view_device(), 0.0);
112:   v->spptr       = veckok;
113:   v->offloadmask = PETSC_OFFLOAD_KOKKOS;
114:   *vv            = v;
115:   PetscFunctionReturn(PETSC_SUCCESS);
116: }

118: static PetscErrorCode VecDotNorm2_MPIKokkos(Vec s, Vec t, PetscScalar *dp, PetscScalar *nm)
119: {
120:   PetscFunctionBegin;
121:   PetscCall(VecDotNorm2_MPI_Default(s, t, dp, nm, VecDotNorm2_SeqKokkos));
122:   PetscFunctionReturn(PETSC_SUCCESS);
123: }

125: static PetscErrorCode VecGetSubVector_MPIKokkos(Vec x, IS is, Vec *y)
126: {
127:   PetscFunctionBegin;
128:   PetscCall(VecGetSubVector_Kokkos_Private(x, PETSC_TRUE, is, y));
129:   PetscFunctionReturn(PETSC_SUCCESS);
130: }

132: static PetscErrorCode VecSetPreallocationCOO_MPIKokkos(Vec x, PetscCount ncoo, const PetscInt coo_i[])
133: {
134:   const auto vecmpi = static_cast<Vec_MPI *>(x->data);
135:   const auto veckok = static_cast<Vec_Kokkos *>(x->spptr);
136:   PetscInt   m;

138:   PetscFunctionBegin;
139:   PetscCall(VecGetLocalSize(x, &m));
140:   PetscCall(VecSetPreallocationCOO_MPI(x, ncoo, coo_i));
141:   PetscCall(veckok->SetUpCOO(vecmpi, m));
142:   PetscFunctionReturn(PETSC_SUCCESS);
143: }

145: static PetscErrorCode VecSetValuesCOO_MPIKokkos(Vec x, const PetscScalar v[], InsertMode imode)
146: {
147:   const auto                  vecmpi  = static_cast<Vec_MPI *>(x->data);
148:   const auto                  veckok  = static_cast<Vec_Kokkos *>(x->spptr);
149:   const PetscCountKokkosView &jmap1   = veckok->jmap1_d;
150:   const PetscCountKokkosView &perm1   = veckok->perm1_d;
151:   const PetscCountKokkosView &imap2   = veckok->imap2_d;
152:   const PetscCountKokkosView &jmap2   = veckok->jmap2_d;
153:   const PetscCountKokkosView &perm2   = veckok->perm2_d;
154:   const PetscCountKokkosView &Cperm   = veckok->Cperm_d;
155:   PetscScalarKokkosView      &sendbuf = veckok->sendbuf_d;
156:   PetscScalarKokkosView      &recvbuf = veckok->recvbuf_d;
157:   PetscScalarKokkosView       xv;
158:   ConstPetscScalarKokkosView  vv;
159:   PetscMemType                memtype;
160:   PetscInt                    m;

162:   PetscFunctionBegin;
163:   PetscCall(VecGetLocalSize(x, &m));
164:   PetscCall(PetscGetMemType(v, &memtype));
165:   if (PetscMemTypeHost(memtype)) { /* If user gave v[] in host, we might need to copy it to device if any */
166:     vv = Kokkos::create_mirror_view_and_copy(DefaultMemorySpace(), ConstPetscScalarKokkosViewHost(v, vecmpi->coo_n));
167:   } else {
168:     vv = ConstPetscScalarKokkosView(v, vecmpi->coo_n); /* Directly use v[]'s memory */
169:   }

171:   /* Pack entries to be sent to remote */
172:   Kokkos::parallel_for(Kokkos::RangePolicy<>(PetscGetKokkosExecutionSpace(), 0, vecmpi->sendlen), KOKKOS_LAMBDA(const PetscCount i) { sendbuf(i) = vv(Cperm(i)); });
173:   PetscCall(PetscSFReduceWithMemTypeBegin(vecmpi->coo_sf, MPIU_SCALAR, PETSC_MEMTYPE_KOKKOS, sendbuf.data(), PETSC_MEMTYPE_KOKKOS, recvbuf.data(), MPI_REPLACE));

175:   if (imode == INSERT_VALUES) PetscCall(VecGetKokkosViewWrite(x, &xv)); /* write vector */
176:   else PetscCall(VecGetKokkosView(x, &xv));                             /* read & write vector */

178:   Kokkos::parallel_for(
179:     Kokkos::RangePolicy<>(PetscGetKokkosExecutionSpace(), 0, m), KOKKOS_LAMBDA(const PetscCount i) {
180:       PetscScalar sum = 0.0;
181:       for (PetscCount k = jmap1(i); k < jmap1(i + 1); k++) sum += vv(perm1(k));
182:       xv(i) = (imode == INSERT_VALUES ? 0.0 : xv(i)) + sum;
183:     });

185:   PetscCall(PetscSFReduceEnd(vecmpi->coo_sf, MPIU_SCALAR, sendbuf.data(), recvbuf.data(), MPI_REPLACE));

187:   /* Add received remote entries */
188:   Kokkos::parallel_for(
189:     Kokkos::RangePolicy<>(PetscGetKokkosExecutionSpace(), 0, vecmpi->nnz2), KOKKOS_LAMBDA(PetscCount i) {
190:       for (PetscCount k = jmap2(i); k < jmap2(i + 1); k++) xv(imap2(i)) += recvbuf(perm2(k));
191:     });

193:   if (imode == INSERT_VALUES) PetscCall(VecRestoreKokkosViewWrite(x, &xv));
194:   else PetscCall(VecRestoreKokkosView(x, &xv));
195:   PetscFunctionReturn(PETSC_SUCCESS);
196: }

198: PetscErrorCode VecSetOps_MPIKokkos(Vec v)
199: {
200:   PetscFunctionBegin;
201:   v->ops->abs             = VecAbs_SeqKokkos;
202:   v->ops->reciprocal      = VecReciprocal_SeqKokkos;
203:   v->ops->pointwisemult   = VecPointwiseMult_SeqKokkos;
204:   v->ops->setrandom       = VecSetRandom_SeqKokkos;
205:   v->ops->dotnorm2        = VecDotNorm2_MPIKokkos;
206:   v->ops->waxpy           = VecWAXPY_SeqKokkos;
207:   v->ops->norm            = VecNorm_MPIKokkos;
208:   v->ops->min             = VecMin_MPIKokkos;
209:   v->ops->max             = VecMax_MPIKokkos;
210:   v->ops->sum             = VecSum_SeqKokkos;
211:   v->ops->shift           = VecShift_SeqKokkos;
212:   v->ops->scale           = VecScale_SeqKokkos;
213:   v->ops->copy            = VecCopy_SeqKokkos;
214:   v->ops->set             = VecSet_SeqKokkos;
215:   v->ops->swap            = VecSwap_SeqKokkos;
216:   v->ops->axpy            = VecAXPY_SeqKokkos;
217:   v->ops->axpby           = VecAXPBY_SeqKokkos;
218:   v->ops->maxpy           = VecMAXPY_SeqKokkos;
219:   v->ops->aypx            = VecAYPX_SeqKokkos;
220:   v->ops->axpbypcz        = VecAXPBYPCZ_SeqKokkos;
221:   v->ops->pointwisedivide = VecPointwiseDivide_SeqKokkos;
222:   v->ops->placearray      = VecPlaceArray_SeqKokkos;
223:   v->ops->replacearray    = VecReplaceArray_SeqKokkos;
224:   v->ops->resetarray      = VecResetArray_SeqKokkos;

226:   v->ops->dot   = VecDot_MPIKokkos;
227:   v->ops->tdot  = VecTDot_MPIKokkos;
228:   v->ops->mdot  = VecMDot_MPIKokkos;
229:   v->ops->mtdot = VecMTDot_MPIKokkos;

231:   v->ops->dot_local   = VecDot_SeqKokkos;
232:   v->ops->tdot_local  = VecTDot_SeqKokkos;
233:   v->ops->mdot_local  = VecMDot_SeqKokkos;
234:   v->ops->mtdot_local = VecMTDot_SeqKokkos;

236:   v->ops->norm_local              = VecNorm_SeqKokkos;
237:   v->ops->duplicate               = VecDuplicate_MPIKokkos;
238:   v->ops->destroy                 = VecDestroy_MPIKokkos;
239:   v->ops->getlocalvector          = VecGetLocalVector_SeqKokkos;
240:   v->ops->restorelocalvector      = VecRestoreLocalVector_SeqKokkos;
241:   v->ops->getlocalvectorread      = VecGetLocalVector_SeqKokkos;
242:   v->ops->restorelocalvectorread  = VecRestoreLocalVector_SeqKokkos;
243:   v->ops->getarraywrite           = VecGetArrayWrite_SeqKokkos;
244:   v->ops->getarray                = VecGetArray_SeqKokkos;
245:   v->ops->restorearray            = VecRestoreArray_SeqKokkos;
246:   v->ops->getarrayandmemtype      = VecGetArrayAndMemType_SeqKokkos;
247:   v->ops->restorearrayandmemtype  = VecRestoreArrayAndMemType_SeqKokkos;
248:   v->ops->getarraywriteandmemtype = VecGetArrayWriteAndMemType_SeqKokkos;
249:   v->ops->getsubvector            = VecGetSubVector_MPIKokkos;
250:   v->ops->restoresubvector        = VecRestoreSubVector_SeqKokkos;

252:   v->ops->setpreallocationcoo = VecSetPreallocationCOO_MPIKokkos;
253:   v->ops->setvaluescoo        = VecSetValuesCOO_MPIKokkos;

255:   v->ops->errorwnorm = VecErrorWeightedNorms_MPIKokkos;
256:   PetscFunctionReturn(PETSC_SUCCESS);
257: }

259: PETSC_INTERN PetscErrorCode VecConvert_MPI_MPIKokkos_inplace(Vec v)
260: {
261:   Vec_MPI *vecmpi;

263:   PetscFunctionBegin;
264:   PetscCall(PetscKokkosInitializeCheck());
265:   PetscCall(PetscLayoutSetUp(v->map));
266:   PetscCall(PetscObjectChangeTypeName((PetscObject)v, VECMPIKOKKOS));
267:   PetscCall(VecSetOps_MPIKokkos(v));
268:   PetscCheck(!v->spptr, PETSC_COMM_SELF, PETSC_ERR_PLIB, "v->spptr not NULL");
269:   vecmpi = static_cast<Vec_MPI *>(v->data);
270:   PetscCallCXX(v->spptr = new Vec_Kokkos(v->map->n, vecmpi->array, NULL));
271:   v->offloadmask = PETSC_OFFLOAD_KOKKOS;
272:   PetscFunctionReturn(PETSC_SUCCESS);
273: }

275: // Duplicate a VECMPIKOKKOS
276: static PetscErrorCode VecDuplicateVecs_MPIKokkos_GEMV(Vec w, PetscInt m, Vec *V[])
277: {
278:   PetscInt64   lda; // use 64-bit as we will do "m * lda"
279:   PetscScalar *array_h, *array_d;
280:   PetscLayout  map;
281:   Vec_MPI     *wmpi = (Vec_MPI *)w->data;
282:   PetscBool    mdot_use_gemv, maxpy_use_gemv;

284:   PetscFunctionBegin;
285:   PetscCall(PetscKokkosInitializeCheck()); // as we'll call kokkos_malloc()
286:   if (wmpi->nghost) {                      // currently only do GEMV optimiation for vectors without ghosts
287:     w->ops->duplicatevecs = VecDuplicateVecs_Default;
288:     PetscCall(VecDuplicateVecs(w, m, V));
289:   } else {
290:     PetscCall(PetscMalloc1(m, V));
291:     PetscCall(VecGetLayout(w, &map));
292:     VecGetLocalSizeAligned(w, 64, &lda); // get in lda the 64-bytes aligned local size

294:     // allocate raw arrays on host and device for the whole m vectors
295:     PetscCall(PetscCalloc1(m * lda, &array_h));
296: #if defined(KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_HOST)
297:     array_d = array_h;
298: #else
299:     PetscCallCXX(array_d = static_cast<PetscScalar *>(Kokkos::kokkos_malloc("VecDuplicateVecs", sizeof(PetscScalar) * (m * lda))));
300: #endif
301:     mdot_use_gemv  = (w->ops->mdot == VecMDot_MPIKokkos_GEMV) ? PETSC_TRUE : PETSC_FALSE;
302:     maxpy_use_gemv = (w->ops->maxpy == VecMAXPY_SeqKokkos_GEMV) ? PETSC_TRUE : PETSC_FALSE;
303:     // create the m vectors with raw arrays
304:     for (PetscInt i = 0; i < m; i++) {
305:       Vec v;
306:       PetscCall(VecCreateMPIKokkosWithLayoutAndArrays_Private(map, &array_h[i * lda], &array_d[i * lda], &v));
307:       PetscCallCXX(static_cast<Vec_Kokkos *>(v->spptr)->v_dual.modify_host()); // as we only init'ed array_h
308:       PetscCall(PetscObjectListDuplicate(((PetscObject)w)->olist, &((PetscObject)v)->olist));
309:       PetscCall(PetscFunctionListDuplicate(((PetscObject)w)->qlist, &((PetscObject)v)->qlist));
310:       if (mdot_use_gemv) { // inherit w's mdot/maxpy optimization setting
311:         v->ops->mdot        = VecMDot_MPIKokkos_GEMV;
312:         v->ops->mtdot       = VecMTDot_MPIKokkos_GEMV;
313:         v->ops->mdot_local  = VecMDot_SeqKokkos_GEMV;
314:         v->ops->mtdot_local = VecMTDot_SeqKokkos_GEMV;
315:       }
316:       if (maxpy_use_gemv) v->ops->maxpy = VecMAXPY_SeqKokkos_GEMV;
317:       v->ops->view          = w->ops->view;
318:       v->stash.donotstash   = w->stash.donotstash;
319:       v->stash.ignorenegidx = w->stash.ignorenegidx;
320:       v->stash.bs           = w->stash.bs;
321:       (*V)[i]               = v;
322:     }

324:     // let the first vector own the raw arrays, so when it is destroyed it will free the arrays
325:     if (m) {
326:       Vec v = (*V)[0];

328:       static_cast<Vec_MPI *>(v->data)->array_allocated = array_h;
329: #if !defined(KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_HOST)
330:       static_cast<Vec_Kokkos *>(v->spptr)->raw_array_d_allocated = array_d;
331: #endif
332:       // disable replacearray of the first vector, as freeing its memory also frees others in the group.
333:       // But replacearray of others is ok, as they don't own their array.
334:       if (m > 1) v->ops->replacearray = VecReplaceArray_Default_GEMV_Error;
335:     }
336:   }
337:   PetscFunctionReturn(PETSC_SUCCESS);
338: }

340: /*MC
341:    VECMPIKOKKOS - VECMPIKOKKOS = "mpikokkos" - The basic parallel vector, modified to use Kokkos

343:    Options Database Keys:
344: . -vec_type mpikokkos - sets the vector type to VECMPIKOKKOS during a call to VecSetFromOptions()

346:   Level: beginner

348: .seealso: `VecCreate()`, `VecSetType()`, `VecSetFromOptions()`, `VecCreateMPIKokkosWithArray()`, `VECMPI`, `VecType`, `VecCreateMPI()`
349: M*/
350: PetscErrorCode VecCreate_MPIKokkos(Vec v)
351: {
352:   Vec_MPI    *vecmpi;
353:   Vec_Kokkos *veckok;
354:   PetscBool   mdot_use_gemv  = PETSC_TRUE;
355:   PetscBool   maxpy_use_gemv = PETSC_FALSE; // default is false as we saw bad performance with vendors' GEMV with tall skinny matrices.

357:   PetscFunctionBegin;
358:   PetscCall(PetscKokkosInitializeCheck());
359:   PetscCall(PetscLayoutSetUp(v->map));
360:   PetscCall(VecCreate_MPI(v)); /* Calloc host array */

362:   vecmpi = static_cast<Vec_MPI *>(v->data);
363:   PetscCall(PetscObjectChangeTypeName((PetscObject)v, VECMPIKOKKOS));
364:   PetscCall(VecSetOps_MPIKokkos(v));
365:   veckok         = new Vec_Kokkos(v->map->n, vecmpi->array, NULL); /* Alloc device array but do not init it */
366:   v->spptr       = static_cast<void *>(veckok);
367:   v->offloadmask = PETSC_OFFLOAD_KOKKOS;
368:   PetscCall(PetscOptionsGetBool(NULL, NULL, "-vec_mdot_use_gemv", &mdot_use_gemv, NULL));
369:   PetscCall(PetscOptionsGetBool(NULL, NULL, "-vec_maxpy_use_gemv", &maxpy_use_gemv, NULL));

371:   // allocate multiple vectors together
372:   if (mdot_use_gemv || maxpy_use_gemv) v->ops[0].duplicatevecs = VecDuplicateVecs_MPIKokkos_GEMV;

374:   if (mdot_use_gemv) {
375:     v->ops[0].mdot        = VecMDot_MPIKokkos_GEMV;
376:     v->ops[0].mtdot       = VecMTDot_MPIKokkos_GEMV;
377:     v->ops[0].mdot_local  = VecMDot_SeqKokkos_GEMV;
378:     v->ops[0].mtdot_local = VecMTDot_SeqKokkos_GEMV;
379:   }

381:   if (maxpy_use_gemv) v->ops[0].maxpy = VecMAXPY_SeqKokkos_GEMV;
382:   PetscFunctionReturn(PETSC_SUCCESS);
383: }

385: // Create a VECMPIKOKKOS with layout and arrays
386: PetscErrorCode VecCreateMPIKokkosWithLayoutAndArrays_Private(PetscLayout map, const PetscScalar harray[], const PetscScalar darray[], Vec *v)
387: {
388:   Vec w;

390:   PetscFunctionBegin;
391:   if (map->n > 0) PetscCheck(darray, map->comm, PETSC_ERR_ARG_WRONG, "darray cannot be NULL");
392: #if defined(KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_HOST)
393:   PetscCheck(harray == darray, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "harray and darray must be the same");
394: #endif
395:   PetscCall(VecCreateMPIWithLayoutAndArray_Private(map, harray, &w));
396:   PetscCall(PetscObjectChangeTypeName((PetscObject)w, VECMPIKOKKOS)); // Change it to VECKOKKOS
397:   PetscCall(VecSetOps_MPIKokkos(w));
398:   PetscCallCXX(w->spptr = new Vec_Kokkos(map->n, const_cast<PetscScalar *>(harray), const_cast<PetscScalar *>(darray)));
399:   w->offloadmask = PETSC_OFFLOAD_KOKKOS;
400:   *v             = w;
401:   PetscFunctionReturn(PETSC_SUCCESS);
402: }

404: /*@C
405:   VecCreateMPIKokkosWithArray - Creates a parallel, array-style vector,
406:   where the user provides the GPU array space to store the vector values.

408:   Collective

410:   Input Parameters:
411: + comm   - the MPI communicator to use
412: . bs     - block size, same meaning as VecSetBlockSize()
413: . n      - local vector length, cannot be PETSC_DECIDE
414: . N      - global vector length (or PETSC_DECIDE to have calculated)
415: - darray - the user provided GPU array to store the vector values

417:   Output Parameter:
418: . v - the vector

420:   Notes:
421:   Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
422:   same type as an existing vector.

424:   If the user-provided array is NULL, then VecKokkosPlaceArray() can be used
425:   at a later stage to SET the array for storing the vector values.

427:   PETSc does NOT free the array when the vector is destroyed via VecDestroy().
428:   The user should not free the array until the vector is destroyed.

430:   Level: intermediate

432: .seealso: `VecCreateSeqKokkosWithArray()`, `VecCreateMPIWithArray()`, `VecCreateSeqWithArray()`,
433:           `VecCreate()`, `VecDuplicate()`, `VecDuplicateVecs()`, `VecCreateGhost()`,
434:           `VecCreateMPI()`, `VecCreateGhostWithArray()`, `VecPlaceArray()`

436: @*/
437: PetscErrorCode VecCreateMPIKokkosWithArray(MPI_Comm comm, PetscInt bs, PetscInt n, PetscInt N, const PetscScalar darray[], Vec *v)
438: {
439:   Vec          w;
440:   Vec_Kokkos  *veckok;
441:   Vec_MPI     *vecmpi;
442:   PetscScalar *harray;

444:   PetscFunctionBegin;
445:   PetscCheck(n != PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Must set local size of vector");
446:   PetscCall(PetscKokkosInitializeCheck());
447:   PetscCall(PetscSplitOwnership(comm, &n, &N));
448:   PetscCall(VecCreate(comm, &w));
449:   PetscCall(VecSetSizes(w, n, N));
450:   PetscCall(VecSetBlockSize(w, bs));
451:   PetscCall(PetscLayoutSetUp(w->map));

453:   if (std::is_same<DefaultMemorySpace, Kokkos::HostSpace>::value) {
454:     harray = const_cast<PetscScalar *>(darray);
455:   } else PetscCall(PetscMalloc1(w->map->n, &harray)); /* If device is not the same as host, allocate the host array ourselves */

457:   PetscCall(VecCreate_MPI_Private(w, PETSC_FALSE /*alloc*/, 0 /*nghost*/, harray)); /* Build a sequential vector with provided data */
458:   vecmpi = static_cast<Vec_MPI *>(w->data);

460:   if (!std::is_same<DefaultMemorySpace, Kokkos::HostSpace>::value) vecmpi->array_allocated = harray; /* The host array was allocated by petsc */

462:   PetscCall(PetscObjectChangeTypeName((PetscObject)w, VECMPIKOKKOS));
463:   PetscCall(VecSetOps_MPIKokkos(w));
464:   veckok = new Vec_Kokkos(n, harray, const_cast<PetscScalar *>(darray));
465:   veckok->v_dual.modify_device(); /* Mark the device is modified */
466:   w->spptr       = static_cast<void *>(veckok);
467:   w->offloadmask = PETSC_OFFLOAD_KOKKOS;
468:   *v             = w;
469:   PetscFunctionReturn(PETSC_SUCCESS);
470: }

472: /*
473:    VecCreateMPIKokkosWithArrays_Private - Creates a Kokkos parallel, array-style vector
474:    with user-provided arrays on host and device.

476:    Collective

478:    Input Parameter:
479: +  comm - the communicator
480: .  bs - the block size
481: .  n - the local vector length
482: .  N - the global vector length
483: -  harray - host memory where the vector elements are to be stored.
484: -  darray - device memory where the vector elements are to be stored.

486:    Output Parameter:
487: .  v - the vector

489:    Notes:
490:    If there is no device, then harray and darray must be the same.
491:    If n is not zero, then harray and darray must be allocated.
492:    After the call, the created vector is supposed to be in a synchronized state, i.e.,
493:    we suppose harray and darray have the same data.

495:    PETSc does NOT free the array when the vector is destroyed via VecDestroy().
496:    The user should not free the array until the vector is destroyed.
497: */
498: PetscErrorCode VecCreateMPIKokkosWithArrays_Private(MPI_Comm comm, PetscInt bs, PetscInt n, PetscInt N, const PetscScalar harray[], const PetscScalar darray[], Vec *v)
499: {
500:   Vec w;

502:   PetscFunctionBegin;
503:   PetscCall(PetscKokkosInitializeCheck());
504:   if (n) {
505:     PetscAssertPointer(harray, 5);
506:     PetscCheck(darray, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "darray cannot be NULL");
507:   }
508:   if (std::is_same<DefaultMemorySpace, Kokkos::HostSpace>::value) PetscCheck(harray == darray, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "harray and darray must be the same");
509:   PetscCall(VecCreateMPIWithArray(comm, bs, n, N, harray, &w));
510:   PetscCall(PetscObjectChangeTypeName((PetscObject)w, VECMPIKOKKOS)); /* Change it to Kokkos */
511:   PetscCall(VecSetOps_MPIKokkos(w));
512:   PetscCallCXX(w->spptr = new Vec_Kokkos(n, const_cast<PetscScalar *>(harray), const_cast<PetscScalar *>(darray)));
513:   w->offloadmask = PETSC_OFFLOAD_KOKKOS;
514:   *v             = w;
515:   PetscFunctionReturn(PETSC_SUCCESS);
516: }

518: /*MC
519:    VECKOKKOS - VECKOKKOS = "kokkos" - The basic vector, modified to use Kokkos

521:    Options Database Keys:
522: . -vec_type kokkos - sets the vector type to VECKOKKOS during a call to VecSetFromOptions()

524:   Level: beginner

526: .seealso: `VecCreate()`, `VecSetType()`, `VecSetFromOptions()`, `VecCreateMPIKokkosWithArray()`, `VECMPI`, `VecType`, `VecCreateMPI()`
527: M*/
528: PetscErrorCode VecCreate_Kokkos(Vec v)
529: {
530:   PetscMPIInt size;

532:   PetscFunctionBegin;
533:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)v), &size));
534:   if (size == 1) PetscCall(VecSetType(v, VECSEQKOKKOS));
535:   else PetscCall(VecSetType(v, VECMPIKOKKOS));
536:   PetscFunctionReturn(PETSC_SUCCESS);
537: }