Actual source code: mpikok.kokkos.cxx

  1: /*
  2:    This file contains routines for Parallel vector operations.
  3:  */
  4: #include <petsc_kokkos.hpp>
  5: #include <petscvec_kokkos.hpp>
  6: #include <petsc/private/deviceimpl.h>
  7: #include <petsc/private/vecimpl.h>
  8: #include <../src/vec/vec/impls/mpi/pvecimpl.h>
  9: #include <../src/vec/vec/impls/seq/kokkos/veckokkosimpl.hpp>
 10: #include <petscsf.h>

 12: static PetscErrorCode VecDestroy_MPIKokkos(Vec v)
 13: {
 14:   PetscFunctionBegin;
 15:   delete static_cast<Vec_Kokkos *>(v->spptr);
 16:   PetscCall(VecDestroy_MPI(v));
 17:   PetscFunctionReturn(PETSC_SUCCESS);
 18: }

 20: static PetscErrorCode VecNorm_MPIKokkos(Vec xin, NormType type, PetscReal *z)
 21: {
 22:   PetscFunctionBegin;
 23:   PetscCall(VecNorm_MPI_Default(xin, type, z, VecNorm_SeqKokkos));
 24:   PetscFunctionReturn(PETSC_SUCCESS);
 25: }

 27: static PetscErrorCode VecErrorWeightedNorms_MPIKokkos(Vec U, Vec Y, Vec E, NormType wnormtype, PetscReal atol, Vec vatol, PetscReal rtol, Vec vrtol, PetscReal ignore_max, PetscReal *norm, PetscInt *norm_loc, PetscReal *norma, PetscInt *norma_loc, PetscReal *normr, PetscInt *normr_loc)
 28: {
 29:   PetscFunctionBegin;
 30:   PetscCall(VecErrorWeightedNorms_MPI_Default(U, Y, E, wnormtype, atol, vatol, rtol, vrtol, ignore_max, norm, norm_loc, norma, norma_loc, normr, normr_loc, VecErrorWeightedNorms_SeqKokkos));
 31:   PetscFunctionReturn(PETSC_SUCCESS);
 32: }

 34: /* z = y^H x */
 35: static PetscErrorCode VecDot_MPIKokkos(Vec xin, Vec yin, PetscScalar *z)
 36: {
 37:   PetscFunctionBegin;
 38:   PetscCall(VecXDot_MPI_Default(xin, yin, z, VecDot_SeqKokkos));
 39:   PetscFunctionReturn(PETSC_SUCCESS);
 40: }

 42: /* z = y^T x */
 43: static PetscErrorCode VecTDot_MPIKokkos(Vec xin, Vec yin, PetscScalar *z)
 44: {
 45:   PetscFunctionBegin;
 46:   PetscCall(VecXDot_MPI_Default(xin, yin, z, VecTDot_SeqKokkos));
 47:   PetscFunctionReturn(PETSC_SUCCESS);
 48: }

 50: static PetscErrorCode VecMDot_MPIKokkos(Vec xin, PetscInt nv, const Vec y[], PetscScalar *z)
 51: {
 52:   PetscFunctionBegin;
 53:   PetscCall(VecMXDot_MPI_Default(xin, nv, y, z, VecMDot_SeqKokkos));
 54:   PetscFunctionReturn(PETSC_SUCCESS);
 55: }

 57: static PetscErrorCode VecMTDot_MPIKokkos(Vec xin, PetscInt nv, const Vec y[], PetscScalar *z)
 58: {
 59:   PetscFunctionBegin;
 60:   PetscCall(VecMXDot_MPI_Default(xin, nv, y, z, VecMTDot_SeqKokkos));
 61:   PetscFunctionReturn(PETSC_SUCCESS);
 62: }

 64: static PetscErrorCode VecMDot_MPIKokkos_GEMV(Vec xin, PetscInt nv, const Vec y[], PetscScalar *z)
 65: {
 66:   PetscFunctionBegin;
 67:   PetscCall(VecMXDot_MPI_Default(xin, nv, y, z, VecMDot_SeqKokkos_GEMV));
 68:   PetscFunctionReturn(PETSC_SUCCESS);
 69: }

 71: static PetscErrorCode VecMTDot_MPIKokkos_GEMV(Vec xin, PetscInt nv, const Vec y[], PetscScalar *z)
 72: {
 73:   PetscFunctionBegin;
 74:   PetscCall(VecMXDot_MPI_Default(xin, nv, y, z, VecMTDot_SeqKokkos_GEMV));
 75:   PetscFunctionReturn(PETSC_SUCCESS);
 76: }

 78: static PetscErrorCode VecMax_MPIKokkos(Vec xin, PetscInt *idx, PetscReal *z)
 79: {
 80:   const MPI_Op ops[] = {MPIU_MAXLOC, MPIU_MAX};

 82:   PetscFunctionBegin;
 83:   PetscCall(VecMinMax_MPI_Default(xin, idx, z, VecMax_SeqKokkos, ops));
 84:   PetscFunctionReturn(PETSC_SUCCESS);
 85: }

 87: static PetscErrorCode VecMin_MPIKokkos(Vec xin, PetscInt *idx, PetscReal *z)
 88: {
 89:   const MPI_Op ops[] = {MPIU_MINLOC, MPIU_MIN};

 91:   PetscFunctionBegin;
 92:   PetscCall(VecMinMax_MPI_Default(xin, idx, z, VecMin_SeqKokkos, ops));
 93:   PetscFunctionReturn(PETSC_SUCCESS);
 94: }

 96: static PetscErrorCode VecDuplicate_MPIKokkos(Vec win, Vec *vv)
 97: {
 98:   Vec         v;
 99:   Vec_Kokkos *veckok;
100:   Vec_MPI    *wdata = (Vec_MPI *)win->data;

102:   PetscScalarKokkosDualView w_dual;

104:   PetscFunctionBegin;
105:   PetscCallCXX(w_dual = PetscScalarKokkosDualView("w_dual", win->map->n + wdata->nghost)); // Kokkos init's v_dual to zero

107:   /* Reuse VecDuplicate_MPI, which contains a lot of stuff */
108:   PetscCall(VecDuplicateWithArray_MPI(win, w_dual.view_host().data(), &v)); /* after the call, v is a VECMPI */
109:   PetscCall(PetscObjectChangeTypeName((PetscObject)v, VECMPIKOKKOS));
110:   v->ops[0] = win->ops[0];

112:   /* Build the Vec_Kokkos struct */
113:   veckok         = new Vec_Kokkos(v->map->n, w_dual.view_host().data(), w_dual.view_device().data());
114:   veckok->w_dual = w_dual;
115:   v->spptr       = veckok;
116:   v->offloadmask = PETSC_OFFLOAD_KOKKOS;
117:   *vv            = v;
118:   PetscFunctionReturn(PETSC_SUCCESS);
119: }

121: static PetscErrorCode VecDotNorm2_MPIKokkos(Vec s, Vec t, PetscScalar *dp, PetscScalar *nm)
122: {
123:   PetscFunctionBegin;
124:   PetscCall(VecDotNorm2_MPI_Default(s, t, dp, nm, VecDotNorm2_SeqKokkos));
125:   PetscFunctionReturn(PETSC_SUCCESS);
126: }

128: static PetscErrorCode VecGetSubVector_MPIKokkos(Vec x, IS is, Vec *y)
129: {
130:   PetscFunctionBegin;
131:   PetscCall(VecGetSubVector_Kokkos_Private(x, PETSC_TRUE, is, y));
132:   PetscFunctionReturn(PETSC_SUCCESS);
133: }

135: static PetscErrorCode VecSetPreallocationCOO_MPIKokkos(Vec x, PetscCount ncoo, const PetscInt coo_i[])
136: {
137:   const auto vecmpi = static_cast<Vec_MPI *>(x->data);
138:   const auto veckok = static_cast<Vec_Kokkos *>(x->spptr);
139:   PetscInt   m;

141:   PetscFunctionBegin;
142:   PetscCall(VecGetLocalSize(x, &m));
143:   PetscCall(VecSetPreallocationCOO_MPI(x, ncoo, coo_i));
144:   PetscCall(veckok->SetUpCOO(vecmpi, m));
145:   PetscFunctionReturn(PETSC_SUCCESS);
146: }

148: static PetscErrorCode VecSetValuesCOO_MPIKokkos(Vec x, const PetscScalar v[], InsertMode imode)
149: {
150:   const auto                  vecmpi  = static_cast<Vec_MPI *>(x->data);
151:   const auto                  veckok  = static_cast<Vec_Kokkos *>(x->spptr);
152:   const PetscCountKokkosView &jmap1   = veckok->jmap1_d;
153:   const PetscCountKokkosView &perm1   = veckok->perm1_d;
154:   const PetscCountKokkosView &imap2   = veckok->imap2_d;
155:   const PetscCountKokkosView &jmap2   = veckok->jmap2_d;
156:   const PetscCountKokkosView &perm2   = veckok->perm2_d;
157:   const PetscCountKokkosView &Cperm   = veckok->Cperm_d;
158:   PetscScalarKokkosView      &sendbuf = veckok->sendbuf_d;
159:   PetscScalarKokkosView      &recvbuf = veckok->recvbuf_d;
160:   PetscScalarKokkosView       xv;
161:   ConstPetscScalarKokkosView  vv;
162:   PetscMemType                memtype;
163:   PetscInt                    m;

165:   PetscFunctionBegin;
166:   PetscCall(VecGetLocalSize(x, &m));
167:   PetscCall(PetscGetMemType(v, &memtype));
168:   if (PetscMemTypeHost(memtype)) { /* If user gave v[] in host, we might need to copy it to device if any */
169:     vv = Kokkos::create_mirror_view_and_copy(DefaultMemorySpace(), PetscScalarKokkosViewHost(const_cast<PetscScalar *>(v), vecmpi->coo_n));
170:   } else {
171:     vv = ConstPetscScalarKokkosView(v, vecmpi->coo_n); /* Directly use v[]'s memory */
172:   }

174:   /* Pack entries to be sent to remote */
175:   Kokkos::parallel_for(Kokkos::RangePolicy<>(PetscGetKokkosExecutionSpace(), 0, vecmpi->sendlen), KOKKOS_LAMBDA(const PetscCount i) { sendbuf(i) = vv(Cperm(i)); });
176:   PetscCall(PetscSFReduceWithMemTypeBegin(vecmpi->coo_sf, MPIU_SCALAR, PETSC_MEMTYPE_KOKKOS, sendbuf.data(), PETSC_MEMTYPE_KOKKOS, recvbuf.data(), MPI_REPLACE));

178:   if (imode == INSERT_VALUES) PetscCall(VecGetKokkosViewWrite(x, &xv)); /* write vector */
179:   else PetscCall(VecGetKokkosView(x, &xv));                             /* read & write vector */

181:   Kokkos::parallel_for(
182:     Kokkos::RangePolicy<>(PetscGetKokkosExecutionSpace(), 0, m), KOKKOS_LAMBDA(const PetscCount i) {
183:       PetscScalar sum = 0.0;
184:       for (PetscCount k = jmap1(i); k < jmap1(i + 1); k++) sum += vv(perm1(k));
185:       xv(i) = (imode == INSERT_VALUES ? 0.0 : xv(i)) + sum;
186:     });

188:   PetscCall(PetscSFReduceEnd(vecmpi->coo_sf, MPIU_SCALAR, sendbuf.data(), recvbuf.data(), MPI_REPLACE));

190:   /* Add received remote entries */
191:   Kokkos::parallel_for(
192:     Kokkos::RangePolicy<>(PetscGetKokkosExecutionSpace(), 0, vecmpi->nnz2), KOKKOS_LAMBDA(PetscCount i) {
193:       for (PetscCount k = jmap2(i); k < jmap2(i + 1); k++) xv(imap2(i)) += recvbuf(perm2(k));
194:     });

196:   if (imode == INSERT_VALUES) PetscCall(VecRestoreKokkosViewWrite(x, &xv));
197:   else PetscCall(VecRestoreKokkosView(x, &xv));
198:   PetscFunctionReturn(PETSC_SUCCESS);
199: }

201: PetscErrorCode VecSetOps_MPIKokkos(Vec v)
202: {
203:   PetscFunctionBegin;
204:   v->ops->abs             = VecAbs_SeqKokkos;
205:   v->ops->reciprocal      = VecReciprocal_SeqKokkos;
206:   v->ops->pointwisemult   = VecPointwiseMult_SeqKokkos;
207:   v->ops->setrandom       = VecSetRandom_SeqKokkos;
208:   v->ops->dotnorm2        = VecDotNorm2_MPIKokkos;
209:   v->ops->waxpy           = VecWAXPY_SeqKokkos;
210:   v->ops->norm            = VecNorm_MPIKokkos;
211:   v->ops->min             = VecMin_MPIKokkos;
212:   v->ops->max             = VecMax_MPIKokkos;
213:   v->ops->sum             = VecSum_SeqKokkos;
214:   v->ops->shift           = VecShift_SeqKokkos;
215:   v->ops->scale           = VecScale_SeqKokkos;
216:   v->ops->copy            = VecCopy_SeqKokkos;
217:   v->ops->set             = VecSet_SeqKokkos;
218:   v->ops->swap            = VecSwap_SeqKokkos;
219:   v->ops->axpy            = VecAXPY_SeqKokkos;
220:   v->ops->axpby           = VecAXPBY_SeqKokkos;
221:   v->ops->maxpy           = VecMAXPY_SeqKokkos;
222:   v->ops->aypx            = VecAYPX_SeqKokkos;
223:   v->ops->axpbypcz        = VecAXPBYPCZ_SeqKokkos;
224:   v->ops->pointwisedivide = VecPointwiseDivide_SeqKokkos;
225:   v->ops->placearray      = VecPlaceArray_SeqKokkos;
226:   v->ops->replacearray    = VecReplaceArray_SeqKokkos;
227:   v->ops->resetarray      = VecResetArray_SeqKokkos;

229:   v->ops->dot   = VecDot_MPIKokkos;
230:   v->ops->tdot  = VecTDot_MPIKokkos;
231:   v->ops->mdot  = VecMDot_MPIKokkos;
232:   v->ops->mtdot = VecMTDot_MPIKokkos;

234:   v->ops->dot_local   = VecDot_SeqKokkos;
235:   v->ops->tdot_local  = VecTDot_SeqKokkos;
236:   v->ops->mdot_local  = VecMDot_SeqKokkos;
237:   v->ops->mtdot_local = VecMTDot_SeqKokkos;

239:   v->ops->norm_local              = VecNorm_SeqKokkos;
240:   v->ops->duplicate               = VecDuplicate_MPIKokkos;
241:   v->ops->destroy                 = VecDestroy_MPIKokkos;
242:   v->ops->getlocalvector          = VecGetLocalVector_SeqKokkos;
243:   v->ops->restorelocalvector      = VecRestoreLocalVector_SeqKokkos;
244:   v->ops->getlocalvectorread      = VecGetLocalVector_SeqKokkos;
245:   v->ops->restorelocalvectorread  = VecRestoreLocalVector_SeqKokkos;
246:   v->ops->getarraywrite           = VecGetArrayWrite_SeqKokkos;
247:   v->ops->getarray                = VecGetArray_SeqKokkos;
248:   v->ops->restorearray            = VecRestoreArray_SeqKokkos;
249:   v->ops->getarrayandmemtype      = VecGetArrayAndMemType_SeqKokkos;
250:   v->ops->restorearrayandmemtype  = VecRestoreArrayAndMemType_SeqKokkos;
251:   v->ops->getarraywriteandmemtype = VecGetArrayWriteAndMemType_SeqKokkos;
252:   v->ops->getsubvector            = VecGetSubVector_MPIKokkos;
253:   v->ops->restoresubvector        = VecRestoreSubVector_SeqKokkos;

255:   v->ops->setpreallocationcoo = VecSetPreallocationCOO_MPIKokkos;
256:   v->ops->setvaluescoo        = VecSetValuesCOO_MPIKokkos;

258:   v->ops->errorwnorm = VecErrorWeightedNorms_MPIKokkos;
259:   PetscFunctionReturn(PETSC_SUCCESS);
260: }

262: PETSC_INTERN PetscErrorCode VecConvert_MPI_MPIKokkos_inplace(Vec v)
263: {
264:   Vec_MPI *vecmpi;

266:   PetscFunctionBegin;
267:   PetscCall(PetscKokkosInitializeCheck());
268:   PetscCall(PetscLayoutSetUp(v->map));
269:   PetscCall(PetscObjectChangeTypeName((PetscObject)v, VECMPIKOKKOS));
270:   PetscCall(VecSetOps_MPIKokkos(v));
271:   PetscCheck(!v->spptr, PETSC_COMM_SELF, PETSC_ERR_PLIB, "v->spptr not NULL");
272:   vecmpi = static_cast<Vec_MPI *>(v->data);
273:   PetscCallCXX(v->spptr = new Vec_Kokkos(v->map->n, vecmpi->array, NULL));
274:   v->offloadmask = PETSC_OFFLOAD_KOKKOS;
275:   PetscFunctionReturn(PETSC_SUCCESS);
276: }

278: // Duplicate a VECMPIKOKKOS
279: static PetscErrorCode VecDuplicateVecs_MPIKokkos_GEMV(Vec w, PetscInt m, Vec *V[])
280: {
281:   PetscInt64                lda; // use 64-bit as we will do "m * lda"
282:   PetscScalar              *array_h, *array_d;
283:   PetscLayout               map;
284:   Vec_MPI                  *wmpi = (Vec_MPI *)w->data;
285:   PetscScalarKokkosDualView w_dual;

287:   PetscFunctionBegin;
288:   PetscCall(PetscKokkosInitializeCheck()); // as we'll call kokkos_malloc()
289:   if (wmpi->nghost) {                      // currently only do GEMV optimiation for vectors without ghosts
290:     w->ops->duplicatevecs = VecDuplicateVecs_Default;
291:     PetscCall(VecDuplicateVecs(w, m, V));
292:   } else {
293:     PetscCall(PetscMalloc1(m, V));
294:     PetscCall(VecGetLayout(w, &map));
295:     VecGetLocalSizeAligned(w, 64, &lda); // get in lda the 64-bytes aligned local size

297:     // See comments in VecCreate_SeqKokkos() on why we use DualView to allocate the memory
298:     PetscCallCXX(w_dual = PetscScalarKokkosDualView("VecDuplicateVecs", m * lda)); // Kokkos init's w_dual to zero

300:     // create the m vectors with raw arrays
301:     array_h = w_dual.view_host().data();
302:     array_d = w_dual.view_device().data();
303:     for (PetscInt i = 0; i < m; i++) {
304:       Vec v;
305:       PetscCall(VecCreateMPIKokkosWithLayoutAndArrays_Private(map, &array_h[i * lda], &array_d[i * lda], &v));
306:       PetscCallCXX(static_cast<Vec_Kokkos *>(v->spptr)->v_dual.modify_host()); // as we only init'ed array_h
307:       PetscCall(PetscObjectListDuplicate(((PetscObject)w)->olist, &((PetscObject)v)->olist));
308:       PetscCall(PetscFunctionListDuplicate(((PetscObject)w)->qlist, &((PetscObject)v)->qlist));
309:       v->ops[0]             = w->ops[0];
310:       v->stash.donotstash   = w->stash.donotstash;
311:       v->stash.ignorenegidx = w->stash.ignorenegidx;
312:       v->stash.bs           = w->stash.bs;
313:       v->bstash.bs          = w->bstash.bs;
314:       (*V)[i]               = v;
315:     }

317:     // let the first vector own the raw arrays, so when it is destroyed it will free the arrays
318:     if (m) {
319:       Vec v = (*V)[0];

321:       static_cast<Vec_Kokkos *>(v->spptr)->w_dual = w_dual; // stash the memory
322:       // disable replacearray of the first vector, as freeing its memory also frees others in the group.
323:       // But replacearray of others is ok, as they don't own their array.
324:       if (m > 1) v->ops->replacearray = VecReplaceArray_Default_GEMV_Error;
325:     }
326:   }
327:   PetscFunctionReturn(PETSC_SUCCESS);
328: }

330: /*MC
331:    VECMPIKOKKOS - VECMPIKOKKOS = "mpikokkos" - The basic parallel vector, modified to use Kokkos

333:    Options Database Keys:
334: . -vec_type mpikokkos - sets the vector type to VECMPIKOKKOS during a call to VecSetFromOptions()

336:   Level: beginner

338: .seealso: `VecCreate()`, `VecSetType()`, `VecSetFromOptions()`, `VecCreateMPIKokkosWithArray()`, `VECMPI`, `VecType`, `VecCreateMPI()`
339: M*/
340: PetscErrorCode VecCreate_MPIKokkos(Vec v)
341: {
342:   PetscBool                 mdot_use_gemv  = PETSC_TRUE;
343:   PetscBool                 maxpy_use_gemv = PETSC_FALSE; // default is false as we saw bad performance with vendors' GEMV with tall skinny matrices.
344:   PetscScalarKokkosDualView v_dual;

346:   PetscFunctionBegin;
347:   PetscCall(PetscKokkosInitializeCheck());
348:   PetscCall(PetscLayoutSetUp(v->map));

350:   PetscCallCXX(v_dual = PetscScalarKokkosDualView("v_dual", v->map->n)); // Kokkos init's v_dual to zero
351:   PetscCall(VecCreate_MPI_Private(v, PETSC_FALSE, 0, v_dual.view_host().data()));

353:   PetscCall(PetscObjectChangeTypeName((PetscObject)v, VECMPIKOKKOS));
354:   PetscCall(VecSetOps_MPIKokkos(v));
355:   PetscCheck(!v->spptr, PETSC_COMM_SELF, PETSC_ERR_PLIB, "v->spptr not NULL");
356:   PetscCallCXX(v->spptr = new Vec_Kokkos(v_dual));
357:   v->offloadmask = PETSC_OFFLOAD_KOKKOS;
358:   PetscCall(PetscOptionsGetBool(NULL, NULL, "-vec_mdot_use_gemv", &mdot_use_gemv, NULL));
359:   PetscCall(PetscOptionsGetBool(NULL, NULL, "-vec_maxpy_use_gemv", &maxpy_use_gemv, NULL));

361:   // allocate multiple vectors together
362:   if (mdot_use_gemv || maxpy_use_gemv) v->ops[0].duplicatevecs = VecDuplicateVecs_MPIKokkos_GEMV;

364:   if (mdot_use_gemv) {
365:     v->ops[0].mdot        = VecMDot_MPIKokkos_GEMV;
366:     v->ops[0].mtdot       = VecMTDot_MPIKokkos_GEMV;
367:     v->ops[0].mdot_local  = VecMDot_SeqKokkos_GEMV;
368:     v->ops[0].mtdot_local = VecMTDot_SeqKokkos_GEMV;
369:   }

371:   if (maxpy_use_gemv) v->ops[0].maxpy = VecMAXPY_SeqKokkos_GEMV;
372:   PetscFunctionReturn(PETSC_SUCCESS);
373: }

375: // Create a VECMPIKOKKOS with layout and arrays
376: PetscErrorCode VecCreateMPIKokkosWithLayoutAndArrays_Private(PetscLayout map, const PetscScalar harray[], const PetscScalar darray[], Vec *v)
377: {
378:   Vec w;

380:   PetscFunctionBegin;
381:   if (map->n > 0) PetscCheck(darray, map->comm, PETSC_ERR_ARG_WRONG, "darray cannot be NULL");
382: #if defined(KOKKOS_ENABLE_UNIFIED_MEMORY)
383:   PetscCheck(harray == darray, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "harray and darray must be the same");
384: #endif
385:   PetscCall(VecCreateMPIWithLayoutAndArray_Private(map, harray, &w));
386:   PetscCall(PetscObjectChangeTypeName((PetscObject)w, VECMPIKOKKOS)); // Change it to VECKOKKOS
387:   PetscCall(VecSetOps_MPIKokkos(w));
388:   PetscCallCXX(w->spptr = new Vec_Kokkos(map->n, const_cast<PetscScalar *>(harray), const_cast<PetscScalar *>(darray)));
389:   w->offloadmask = PETSC_OFFLOAD_KOKKOS;
390:   *v             = w;
391:   PetscFunctionReturn(PETSC_SUCCESS);
392: }

394: /*@C
395:   VecCreateMPIKokkosWithArray - Creates a parallel, array-style vector,
396:   where the user provides the GPU array space to store the vector values.

398:   Collective

400:   Input Parameters:
401: + comm   - the MPI communicator to use
402: . bs     - block size, same meaning as VecSetBlockSize()
403: . n      - local vector length, cannot be PETSC_DECIDE
404: . N      - global vector length (or PETSC_DECIDE to have calculated)
405: - darray - the user provided GPU array to store the vector values

407:   Output Parameter:
408: . v - the vector

410:   Notes:
411:   Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
412:   same type as an existing vector.

414:   If the user-provided array is NULL, then VecKokkosPlaceArray() can be used
415:   at a later stage to SET the array for storing the vector values.

417:   PETSc does NOT free the array when the vector is destroyed via VecDestroy().
418:   The user should not free the array until the vector is destroyed.

420:   Level: intermediate

422: .seealso: `VecCreateSeqKokkosWithArray()`, `VecCreateMPIWithArray()`, `VecCreateSeqWithArray()`,
423:           `VecCreate()`, `VecDuplicate()`, `VecDuplicateVecs()`, `VecCreateGhost()`,
424:           `VecCreateMPI()`, `VecCreateGhostWithArray()`, `VecPlaceArray()`

426: @*/
427: PetscErrorCode VecCreateMPIKokkosWithArray(MPI_Comm comm, PetscInt bs, PetscInt n, PetscInt N, const PetscScalar darray[], Vec *v)
428: {
429:   Vec          w;
430:   Vec_Kokkos  *veckok;
431:   Vec_MPI     *vecmpi;
432:   PetscScalar *harray;

434:   PetscFunctionBegin;
435:   PetscCheck(n != PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Must set local size of vector");
436:   PetscCall(PetscKokkosInitializeCheck());
437:   PetscCall(PetscSplitOwnership(comm, &n, &N));
438:   PetscCall(VecCreate(comm, &w));
439:   PetscCall(VecSetSizes(w, n, N));
440:   PetscCall(VecSetBlockSize(w, bs));
441:   PetscCall(PetscLayoutSetUp(w->map));

443:   if (std::is_same<DefaultMemorySpace, HostMirrorMemorySpace>::value) {
444:     harray = const_cast<PetscScalar *>(darray);
445:   } else PetscCall(PetscMalloc1(w->map->n, &harray)); /* If device is not the same as host, allocate the host array ourselves */

447:   PetscCall(VecCreate_MPI_Private(w, PETSC_FALSE /*alloc*/, 0 /*nghost*/, harray)); /* Build a sequential vector with provided data */
448:   vecmpi = static_cast<Vec_MPI *>(w->data);

450:   if (!std::is_same<DefaultMemorySpace, HostMirrorMemorySpace>::value) vecmpi->array_allocated = harray; /* The host array was allocated by petsc */

452:   PetscCall(PetscObjectChangeTypeName((PetscObject)w, VECMPIKOKKOS));
453:   PetscCall(VecSetOps_MPIKokkos(w));
454:   veckok = new Vec_Kokkos(n, harray, const_cast<PetscScalar *>(darray));
455:   veckok->v_dual.modify_device(); /* Mark the device is modified */
456:   w->spptr       = static_cast<void *>(veckok);
457:   w->offloadmask = PETSC_OFFLOAD_KOKKOS;
458:   *v             = w;
459:   PetscFunctionReturn(PETSC_SUCCESS);
460: }

462: /*
463:    VecCreateMPIKokkosWithArrays_Private - Creates a Kokkos parallel, array-style vector
464:    with user-provided arrays on host and device.

466:    Collective

468:    Input Parameter:
469: +  comm - the communicator
470: .  bs - the block size
471: .  n - the local vector length
472: .  N - the global vector length
473: -  harray - host memory where the vector elements are to be stored.
474: -  darray - device memory where the vector elements are to be stored.

476:    Output Parameter:
477: .  v - the vector

479:    Notes:
480:    If there is no device, then harray and darray must be the same.
481:    If n is not zero, then harray and darray must be allocated.
482:    After the call, the created vector is supposed to be in a synchronized state, i.e.,
483:    we suppose harray and darray have the same data.

485:    PETSc does NOT free the array when the vector is destroyed via VecDestroy().
486:    The user should not free the array until the vector is destroyed.
487: */
488: PetscErrorCode VecCreateMPIKokkosWithArrays_Private(MPI_Comm comm, PetscInt bs, PetscInt n, PetscInt N, const PetscScalar harray[], const PetscScalar darray[], Vec *v)
489: {
490:   Vec w;

492:   PetscFunctionBegin;
493:   PetscCall(PetscKokkosInitializeCheck());
494:   if (n) {
495:     PetscAssertPointer(harray, 5);
496:     PetscCheck(darray, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "darray cannot be NULL");
497:   }
498:   if (std::is_same<DefaultMemorySpace, HostMirrorMemorySpace>::value) PetscCheck(harray == darray, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "harray and darray must be the same");
499:   PetscCall(VecCreateMPIWithArray(comm, bs, n, N, harray, &w));
500:   PetscCall(PetscObjectChangeTypeName((PetscObject)w, VECMPIKOKKOS)); /* Change it to Kokkos */
501:   PetscCall(VecSetOps_MPIKokkos(w));
502:   PetscCallCXX(w->spptr = new Vec_Kokkos(n, const_cast<PetscScalar *>(harray), const_cast<PetscScalar *>(darray)));
503:   w->offloadmask = PETSC_OFFLOAD_KOKKOS;
504:   *v             = w;
505:   PetscFunctionReturn(PETSC_SUCCESS);
506: }

508: /*MC
509:    VECKOKKOS - VECKOKKOS = "kokkos" - The basic vector, modified to use Kokkos

511:    Options Database Keys:
512: . -vec_type kokkos - sets the vector type to VECKOKKOS during a call to VecSetFromOptions()

514:   Level: beginner

516: .seealso: `VecCreate()`, `VecSetType()`, `VecSetFromOptions()`, `VecCreateMPIKokkosWithArray()`, `VECMPI`, `VecType`, `VecCreateMPI()`
517: M*/
518: PetscErrorCode VecCreate_Kokkos(Vec v)
519: {
520:   PetscMPIInt size;

522:   PetscFunctionBegin;
523:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)v), &size));
524:   if (size == 1) PetscCall(VecSetType(v, VECSEQKOKKOS));
525:   else PetscCall(VecSetType(v, VECMPIKOKKOS));
526:   PetscFunctionReturn(PETSC_SUCCESS);
527: }