Actual source code: mpikok.kokkos.cxx

  1: /*
  2:    This file contains routines for Parallel vector operations.
  3:  */
  4: #include <petsc_kokkos.hpp>
  5: #include <petscvec_kokkos.hpp>
  6: #include <petsc/private/deviceimpl.h>
  7: #include <petsc/private/vecimpl.h>
  8: #include <../src/vec/vec/impls/mpi/pvecimpl.h>
  9: #include <../src/vec/vec/impls/seq/kokkos/veckokkosimpl.hpp>
 10: #include <petscsf.h>

 12: static PetscErrorCode VecDestroy_MPIKokkos(Vec v)
 13: {
 14:   PetscFunctionBegin;
 15:   delete static_cast<Vec_Kokkos *>(v->spptr);
 16:   PetscCall(VecDestroy_MPI(v));
 17:   PetscFunctionReturn(PETSC_SUCCESS);
 18: }

 20: static PetscErrorCode VecNorm_MPIKokkos(Vec xin, NormType type, PetscReal *z)
 21: {
 22:   PetscFunctionBegin;
 23:   PetscCall(VecNorm_MPI_Default(xin, type, z, VecNorm_SeqKokkos));
 24:   PetscFunctionReturn(PETSC_SUCCESS);
 25: }

 27: static PetscErrorCode VecErrorWeightedNorms_MPIKokkos(Vec U, Vec Y, Vec E, NormType wnormtype, PetscReal atol, Vec vatol, PetscReal rtol, Vec vrtol, PetscReal ignore_max, PetscReal *norm, PetscInt *norm_loc, PetscReal *norma, PetscInt *norma_loc, PetscReal *normr, PetscInt *normr_loc)
 28: {
 29:   PetscFunctionBegin;
 30:   PetscCall(VecErrorWeightedNorms_MPI_Default(U, Y, E, wnormtype, atol, vatol, rtol, vrtol, ignore_max, norm, norm_loc, norma, norma_loc, normr, normr_loc, VecErrorWeightedNorms_SeqKokkos));
 31:   PetscFunctionReturn(PETSC_SUCCESS);
 32: }

 34: /* z = y^H x */
 35: static PetscErrorCode VecDot_MPIKokkos(Vec xin, Vec yin, PetscScalar *z)
 36: {
 37:   PetscFunctionBegin;
 38:   PetscCall(VecXDot_MPI_Default(xin, yin, z, VecDot_SeqKokkos));
 39:   PetscFunctionReturn(PETSC_SUCCESS);
 40: }

 42: /* z = y^T x */
 43: static PetscErrorCode VecTDot_MPIKokkos(Vec xin, Vec yin, PetscScalar *z)
 44: {
 45:   PetscFunctionBegin;
 46:   PetscCall(VecXDot_MPI_Default(xin, yin, z, VecTDot_SeqKokkos));
 47:   PetscFunctionReturn(PETSC_SUCCESS);
 48: }

 50: static PetscErrorCode VecMDot_MPIKokkos(Vec xin, PetscInt nv, const Vec y[], PetscScalar *z)
 51: {
 52:   PetscFunctionBegin;
 53:   PetscCall(VecMXDot_MPI_Default(xin, nv, y, z, VecMDot_SeqKokkos));
 54:   PetscFunctionReturn(PETSC_SUCCESS);
 55: }

 57: static PetscErrorCode VecMTDot_MPIKokkos(Vec xin, PetscInt nv, const Vec y[], PetscScalar *z)
 58: {
 59:   PetscFunctionBegin;
 60:   PetscCall(VecMXDot_MPI_Default(xin, nv, y, z, VecMTDot_SeqKokkos));
 61:   PetscFunctionReturn(PETSC_SUCCESS);
 62: }

 64: static PetscErrorCode VecMDot_MPIKokkos_GEMV(Vec xin, PetscInt nv, const Vec y[], PetscScalar *z)
 65: {
 66:   PetscFunctionBegin;
 67:   PetscCall(VecMXDot_MPI_Default(xin, nv, y, z, VecMDot_SeqKokkos_GEMV));
 68:   PetscFunctionReturn(PETSC_SUCCESS);
 69: }

 71: static PetscErrorCode VecMTDot_MPIKokkos_GEMV(Vec xin, PetscInt nv, const Vec y[], PetscScalar *z)
 72: {
 73:   PetscFunctionBegin;
 74:   PetscCall(VecMXDot_MPI_Default(xin, nv, y, z, VecMTDot_SeqKokkos_GEMV));
 75:   PetscFunctionReturn(PETSC_SUCCESS);
 76: }

 78: static PetscErrorCode VecMax_MPIKokkos(Vec xin, PetscInt *idx, PetscReal *z)
 79: {
 80:   const MPI_Op ops[] = {MPIU_MAXLOC, MPIU_MAX};

 82:   PetscFunctionBegin;
 83:   PetscCall(VecMinMax_MPI_Default(xin, idx, z, VecMax_SeqKokkos, ops));
 84:   PetscFunctionReturn(PETSC_SUCCESS);
 85: }

 87: static PetscErrorCode VecMin_MPIKokkos(Vec xin, PetscInt *idx, PetscReal *z)
 88: {
 89:   const MPI_Op ops[] = {MPIU_MINLOC, MPIU_MIN};

 91:   PetscFunctionBegin;
 92:   PetscCall(VecMinMax_MPI_Default(xin, idx, z, VecMin_SeqKokkos, ops));
 93:   PetscFunctionReturn(PETSC_SUCCESS);
 94: }

 96: static PetscErrorCode VecDuplicate_MPIKokkos(Vec win, Vec *vv)
 97: {
 98:   Vec         v;
 99:   Vec_MPI    *vecmpi;
100:   Vec_Kokkos *veckok;

102:   PetscFunctionBegin;
103:   /* Reuse VecDuplicate_MPI, which contains a lot of stuff */
104:   PetscCall(VecDuplicate_MPI(win, &v)); /* after the call, v is a VECMPI, with data zero'ed */
105:   PetscCall(PetscObjectChangeTypeName((PetscObject)v, VECMPIKOKKOS));
106:   v->ops[0] = win->ops[0];

108:   /* Build the Vec_Kokkos struct */
109:   vecmpi = static_cast<Vec_MPI *>(v->data);
110:   veckok = new Vec_Kokkos(v->map->n, vecmpi->array);
111:   Kokkos::deep_copy(veckok->v_dual.view_device(), 0.0);
112:   v->spptr       = veckok;
113:   v->offloadmask = PETSC_OFFLOAD_KOKKOS;
114:   *vv            = v;
115:   PetscFunctionReturn(PETSC_SUCCESS);
116: }

118: static PetscErrorCode VecDotNorm2_MPIKokkos(Vec s, Vec t, PetscScalar *dp, PetscScalar *nm)
119: {
120:   PetscFunctionBegin;
121:   PetscCall(VecDotNorm2_MPI_Default(s, t, dp, nm, VecDotNorm2_SeqKokkos));
122:   PetscFunctionReturn(PETSC_SUCCESS);
123: }

125: static PetscErrorCode VecGetSubVector_MPIKokkos(Vec x, IS is, Vec *y)
126: {
127:   PetscFunctionBegin;
128:   PetscCall(VecGetSubVector_Kokkos_Private(x, PETSC_TRUE, is, y));
129:   PetscFunctionReturn(PETSC_SUCCESS);
130: }

132: static PetscErrorCode VecSetPreallocationCOO_MPIKokkos(Vec x, PetscCount ncoo, const PetscInt coo_i[])
133: {
134:   const auto vecmpi = static_cast<Vec_MPI *>(x->data);
135:   const auto veckok = static_cast<Vec_Kokkos *>(x->spptr);
136:   PetscInt   m;

138:   PetscFunctionBegin;
139:   PetscCall(VecGetLocalSize(x, &m));
140:   PetscCall(VecSetPreallocationCOO_MPI(x, ncoo, coo_i));
141:   PetscCall(veckok->SetUpCOO(vecmpi, m));
142:   PetscFunctionReturn(PETSC_SUCCESS);
143: }

145: static PetscErrorCode VecSetValuesCOO_MPIKokkos(Vec x, const PetscScalar v[], InsertMode imode)
146: {
147:   const auto                  vecmpi  = static_cast<Vec_MPI *>(x->data);
148:   const auto                  veckok  = static_cast<Vec_Kokkos *>(x->spptr);
149:   const PetscCountKokkosView &jmap1   = veckok->jmap1_d;
150:   const PetscCountKokkosView &perm1   = veckok->perm1_d;
151:   const PetscCountKokkosView &imap2   = veckok->imap2_d;
152:   const PetscCountKokkosView &jmap2   = veckok->jmap2_d;
153:   const PetscCountKokkosView &perm2   = veckok->perm2_d;
154:   const PetscCountKokkosView &Cperm   = veckok->Cperm_d;
155:   PetscScalarKokkosView      &sendbuf = veckok->sendbuf_d;
156:   PetscScalarKokkosView      &recvbuf = veckok->recvbuf_d;
157:   PetscScalarKokkosView       xv;
158:   ConstPetscScalarKokkosView  vv;
159:   PetscMemType                memtype;
160:   PetscInt                    m;

162:   PetscFunctionBegin;
163:   PetscCall(VecGetLocalSize(x, &m));
164:   PetscCall(PetscGetMemType(v, &memtype));
165:   if (PetscMemTypeHost(memtype)) { /* If user gave v[] in host, we might need to copy it to device if any */
166:     vv = Kokkos::create_mirror_view_and_copy(DefaultMemorySpace(), ConstPetscScalarKokkosViewHost(v, vecmpi->coo_n));
167:   } else {
168:     vv = ConstPetscScalarKokkosView(v, vecmpi->coo_n); /* Directly use v[]'s memory */
169:   }

171:   /* Pack entries to be sent to remote */
172:   Kokkos::parallel_for(Kokkos::RangePolicy<>(PetscGetKokkosExecutionSpace(), 0, vecmpi->sendlen), KOKKOS_LAMBDA(const PetscCount i) { sendbuf(i) = vv(Cperm(i)); });
173:   PetscCall(PetscSFReduceWithMemTypeBegin(vecmpi->coo_sf, MPIU_SCALAR, PETSC_MEMTYPE_KOKKOS, sendbuf.data(), PETSC_MEMTYPE_KOKKOS, recvbuf.data(), MPI_REPLACE));

175:   if (imode == INSERT_VALUES) PetscCall(VecGetKokkosViewWrite(x, &xv)); /* write vector */
176:   else PetscCall(VecGetKokkosView(x, &xv));                             /* read & write vector */

178:   Kokkos::parallel_for(
179:     Kokkos::RangePolicy<>(PetscGetKokkosExecutionSpace(), 0, m), KOKKOS_LAMBDA(const PetscCount i) {
180:       PetscScalar sum = 0.0;
181:       for (PetscCount k = jmap1(i); k < jmap1(i + 1); k++) sum += vv(perm1(k));
182:       xv(i) = (imode == INSERT_VALUES ? 0.0 : xv(i)) + sum;
183:     });

185:   PetscCall(PetscSFReduceEnd(vecmpi->coo_sf, MPIU_SCALAR, sendbuf.data(), recvbuf.data(), MPI_REPLACE));

187:   /* Add received remote entries */
188:   Kokkos::parallel_for(
189:     Kokkos::RangePolicy<>(PetscGetKokkosExecutionSpace(), 0, vecmpi->nnz2), KOKKOS_LAMBDA(PetscCount i) {
190:       for (PetscCount k = jmap2(i); k < jmap2(i + 1); k++) xv(imap2(i)) += recvbuf(perm2(k));
191:     });

193:   if (imode == INSERT_VALUES) PetscCall(VecRestoreKokkosViewWrite(x, &xv));
194:   else PetscCall(VecRestoreKokkosView(x, &xv));
195:   PetscFunctionReturn(PETSC_SUCCESS);
196: }

198: PetscErrorCode VecSetOps_MPIKokkos(Vec v)
199: {
200:   PetscFunctionBegin;
201:   v->ops->abs             = VecAbs_SeqKokkos;
202:   v->ops->reciprocal      = VecReciprocal_SeqKokkos;
203:   v->ops->pointwisemult   = VecPointwiseMult_SeqKokkos;
204:   v->ops->setrandom       = VecSetRandom_SeqKokkos;
205:   v->ops->dotnorm2        = VecDotNorm2_MPIKokkos;
206:   v->ops->waxpy           = VecWAXPY_SeqKokkos;
207:   v->ops->norm            = VecNorm_MPIKokkos;
208:   v->ops->min             = VecMin_MPIKokkos;
209:   v->ops->max             = VecMax_MPIKokkos;
210:   v->ops->sum             = VecSum_SeqKokkos;
211:   v->ops->shift           = VecShift_SeqKokkos;
212:   v->ops->scale           = VecScale_SeqKokkos;
213:   v->ops->copy            = VecCopy_SeqKokkos;
214:   v->ops->set             = VecSet_SeqKokkos;
215:   v->ops->swap            = VecSwap_SeqKokkos;
216:   v->ops->axpy            = VecAXPY_SeqKokkos;
217:   v->ops->axpby           = VecAXPBY_SeqKokkos;
218:   v->ops->maxpy           = VecMAXPY_SeqKokkos;
219:   v->ops->aypx            = VecAYPX_SeqKokkos;
220:   v->ops->axpbypcz        = VecAXPBYPCZ_SeqKokkos;
221:   v->ops->pointwisedivide = VecPointwiseDivide_SeqKokkos;
222:   v->ops->placearray      = VecPlaceArray_SeqKokkos;
223:   v->ops->replacearray    = VecReplaceArray_SeqKokkos;
224:   v->ops->resetarray      = VecResetArray_SeqKokkos;

226:   v->ops->dot   = VecDot_MPIKokkos;
227:   v->ops->tdot  = VecTDot_MPIKokkos;
228:   v->ops->mdot  = VecMDot_MPIKokkos;
229:   v->ops->mtdot = VecMTDot_MPIKokkos;

231:   v->ops->dot_local   = VecDot_SeqKokkos;
232:   v->ops->tdot_local  = VecTDot_SeqKokkos;
233:   v->ops->mdot_local  = VecMDot_SeqKokkos;
234:   v->ops->mtdot_local = VecMTDot_SeqKokkos;

236:   v->ops->norm_local              = VecNorm_SeqKokkos;
237:   v->ops->duplicate               = VecDuplicate_MPIKokkos;
238:   v->ops->destroy                 = VecDestroy_MPIKokkos;
239:   v->ops->getlocalvector          = VecGetLocalVector_SeqKokkos;
240:   v->ops->restorelocalvector      = VecRestoreLocalVector_SeqKokkos;
241:   v->ops->getlocalvectorread      = VecGetLocalVector_SeqKokkos;
242:   v->ops->restorelocalvectorread  = VecRestoreLocalVector_SeqKokkos;
243:   v->ops->getarraywrite           = VecGetArrayWrite_SeqKokkos;
244:   v->ops->getarray                = VecGetArray_SeqKokkos;
245:   v->ops->restorearray            = VecRestoreArray_SeqKokkos;
246:   v->ops->getarrayandmemtype      = VecGetArrayAndMemType_SeqKokkos;
247:   v->ops->restorearrayandmemtype  = VecRestoreArrayAndMemType_SeqKokkos;
248:   v->ops->getarraywriteandmemtype = VecGetArrayWriteAndMemType_SeqKokkos;
249:   v->ops->getsubvector            = VecGetSubVector_MPIKokkos;
250:   v->ops->restoresubvector        = VecRestoreSubVector_SeqKokkos;

252:   v->ops->setpreallocationcoo = VecSetPreallocationCOO_MPIKokkos;
253:   v->ops->setvaluescoo        = VecSetValuesCOO_MPIKokkos;

255:   v->ops->errorwnorm = VecErrorWeightedNorms_MPIKokkos;
256:   PetscFunctionReturn(PETSC_SUCCESS);
257: }

259: PETSC_INTERN PetscErrorCode VecConvert_MPI_MPIKokkos_inplace(Vec v)
260: {
261:   Vec_MPI *vecmpi;

263:   PetscFunctionBegin;
264:   PetscCall(PetscKokkosInitializeCheck());
265:   PetscCall(PetscLayoutSetUp(v->map));
266:   PetscCall(PetscObjectChangeTypeName((PetscObject)v, VECMPIKOKKOS));
267:   PetscCall(VecSetOps_MPIKokkos(v));
268:   PetscCheck(!v->spptr, PETSC_COMM_SELF, PETSC_ERR_PLIB, "v->spptr not NULL");
269:   vecmpi = static_cast<Vec_MPI *>(v->data);
270:   PetscCallCXX(v->spptr = new Vec_Kokkos(v->map->n, vecmpi->array, NULL));
271:   v->offloadmask = PETSC_OFFLOAD_KOKKOS;
272:   PetscFunctionReturn(PETSC_SUCCESS);
273: }

275: // Duplicate a VECMPIKOKKOS
276: static PetscErrorCode VecDuplicateVecs_MPIKokkos_GEMV(Vec w, PetscInt m, Vec *V[])
277: {
278:   PetscInt64   lda; // use 64-bit as we will do "m * lda"
279:   PetscScalar *array_h, *array_d;
280:   PetscLayout  map;
281:   Vec_MPI     *wmpi = (Vec_MPI *)w->data;

283:   PetscFunctionBegin;
284:   PetscCall(PetscKokkosInitializeCheck()); // as we'll call kokkos_malloc()
285:   if (wmpi->nghost) {                      // currently only do GEMV optimiation for vectors without ghosts
286:     w->ops->duplicatevecs = VecDuplicateVecs_Default;
287:     PetscCall(VecDuplicateVecs(w, m, V));
288:   } else {
289:     PetscCall(PetscMalloc1(m, V));
290:     PetscCall(VecGetLayout(w, &map));
291:     lda = map->n;
292:     lda = ((lda + 31) / 32) * 32; // make every vector 32-elements aligned

294:     // allocate raw arrays on host and device for the whole m vectors
295:     PetscCall(PetscCalloc1(m * lda, &array_h));
296: #if defined(KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_HOST)
297:     array_d = array_h;
298: #else
299:     PetscCallCXX(array_d = static_cast<PetscScalar *>(Kokkos::kokkos_malloc("VecDuplicateVecs", sizeof(PetscScalar) * (m * lda))));
300: #endif

302:     // create the m vectors with raw arrays
303:     for (PetscInt i = 0; i < m; i++) {
304:       Vec v;
305:       PetscCall(VecCreateMPIKokkosWithLayoutAndArrays_Private(map, &array_h[i * lda], &array_d[i * lda], &v));
306:       PetscCallCXX(static_cast<Vec_Kokkos *>(v->spptr)->v_dual.modify_host()); // as we only init'ed array_h
307:       PetscCall(PetscObjectListDuplicate(((PetscObject)w)->olist, &((PetscObject)v)->olist));
308:       PetscCall(PetscFunctionListDuplicate(((PetscObject)w)->qlist, &((PetscObject)v)->qlist));
309:       v->ops->view          = w->ops->view;
310:       v->stash.donotstash   = w->stash.donotstash;
311:       v->stash.ignorenegidx = w->stash.ignorenegidx;
312:       v->stash.bs           = w->stash.bs;
313:       (*V)[i]               = v;
314:     }

316:     // let the first vector own the raw arrays, so when it is destroyed it will free the arrays
317:     if (m) {
318:       Vec v = (*V)[0];

320:       static_cast<Vec_MPI *>(v->data)->array_allocated = array_h;
321: #if !defined(KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_HOST)
322:       static_cast<Vec_Kokkos *>(v->spptr)->raw_array_d_allocated = array_d;
323: #endif
324:       // disable replacearray of the first vector, as freeing its memory also frees others in the group.
325:       // But replacearray of others is ok, as they don't own their array.
326:       if (m > 1) v->ops->replacearray = VecReplaceArray_Default_GEMV_Error;
327:     }
328:   }
329:   PetscFunctionReturn(PETSC_SUCCESS);
330: }

332: /*MC
333:    VECMPIKOKKOS - VECMPIKOKKOS = "mpikokkos" - The basic parallel vector, modified to use Kokkos

335:    Options Database Keys:
336: . -vec_type mpikokkos - sets the vector type to VECMPIKOKKOS during a call to VecSetFromOptions()

338:   Level: beginner

340: .seealso: `VecCreate()`, `VecSetType()`, `VecSetFromOptions()`, `VecCreateMPIKokkosWithArray()`, `VECMPI`, `VecType`, `VecCreateMPI()`
341: M*/
342: PetscErrorCode VecCreate_MPIKokkos(Vec v)
343: {
344:   Vec_MPI    *vecmpi;
345:   Vec_Kokkos *veckok;
346:   PetscBool   mdot_use_gemv  = PETSC_TRUE;
347:   PetscBool   maxpy_use_gemv = PETSC_FALSE; // default is false as we saw bad performance with vendors' GEMV with tall skinny matrices.

349:   PetscFunctionBegin;
350:   PetscCall(PetscKokkosInitializeCheck());
351:   PetscCall(PetscLayoutSetUp(v->map));
352:   PetscCall(VecCreate_MPI(v)); /* Calloc host array */

354:   vecmpi = static_cast<Vec_MPI *>(v->data);
355:   PetscCall(PetscObjectChangeTypeName((PetscObject)v, VECMPIKOKKOS));
356:   PetscCall(VecSetOps_MPIKokkos(v));
357:   veckok         = new Vec_Kokkos(v->map->n, vecmpi->array, NULL); /* Alloc device array but do not init it */
358:   v->spptr       = static_cast<void *>(veckok);
359:   v->offloadmask = PETSC_OFFLOAD_KOKKOS;
360:   PetscCall(PetscOptionsGetBool(NULL, NULL, "-vec_mdot_use_gemv", &mdot_use_gemv, NULL));
361:   PetscCall(PetscOptionsGetBool(NULL, NULL, "-vec_maxpy_use_gemv", &maxpy_use_gemv, NULL));

363:   // allocate multiple vectors together
364:   if (mdot_use_gemv || maxpy_use_gemv) v->ops[0].duplicatevecs = VecDuplicateVecs_MPIKokkos_GEMV;

366:   if (mdot_use_gemv) {
367:     v->ops[0].mdot        = VecMDot_MPIKokkos_GEMV;
368:     v->ops[0].mtdot       = VecMTDot_MPIKokkos_GEMV;
369:     v->ops[0].mdot_local  = VecMDot_SeqKokkos_GEMV;
370:     v->ops[0].mtdot_local = VecMTDot_SeqKokkos_GEMV;
371:   }

373:   if (maxpy_use_gemv) v->ops[0].maxpy = VecMAXPY_SeqKokkos_GEMV;
374:   PetscFunctionReturn(PETSC_SUCCESS);
375: }

377: // Create a VECMPIKOKKOS with layout and arrays
378: PetscErrorCode VecCreateMPIKokkosWithLayoutAndArrays_Private(PetscLayout map, const PetscScalar harray[], const PetscScalar darray[], Vec *v)
379: {
380:   Vec w;

382:   PetscFunctionBegin;
383:   if (map->n > 0) PetscCheck(darray, map->comm, PETSC_ERR_ARG_WRONG, "darray cannot be NULL");
384: #if defined(KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_HOST)
385:   PetscCheck(harray == darray, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "harray and darray must be the same");
386: #endif
387:   PetscCall(VecCreateMPIWithLayoutAndArray_Private(map, harray, &w));
388:   PetscCall(PetscObjectChangeTypeName((PetscObject)w, VECMPIKOKKOS)); // Change it to VECKOKKOS
389:   PetscCall(VecSetOps_MPIKokkos(w));
390:   PetscCallCXX(w->spptr = new Vec_Kokkos(map->n, const_cast<PetscScalar *>(harray), const_cast<PetscScalar *>(darray)));
391:   w->offloadmask = PETSC_OFFLOAD_KOKKOS;
392:   *v             = w;
393:   PetscFunctionReturn(PETSC_SUCCESS);
394: }

396: /*@C
397:   VecCreateMPIKokkosWithArray - Creates a parallel, array-style vector,
398:   where the user provides the GPU array space to store the vector values.

400:   Collective

402:   Input Parameters:
403: + comm   - the MPI communicator to use
404: . bs     - block size, same meaning as VecSetBlockSize()
405: . n      - local vector length, cannot be PETSC_DECIDE
406: . N      - global vector length (or PETSC_DECIDE to have calculated)
407: - darray - the user provided GPU array to store the vector values

409:   Output Parameter:
410: . v - the vector

412:   Notes:
413:   Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
414:   same type as an existing vector.

416:   If the user-provided array is NULL, then VecKokkosPlaceArray() can be used
417:   at a later stage to SET the array for storing the vector values.

419:   PETSc does NOT free the array when the vector is destroyed via VecDestroy().
420:   The user should not free the array until the vector is destroyed.

422:   Level: intermediate

424: .seealso: `VecCreateSeqKokkosWithArray()`, `VecCreateMPIWithArray()`, `VecCreateSeqWithArray()`,
425:           `VecCreate()`, `VecDuplicate()`, `VecDuplicateVecs()`, `VecCreateGhost()`,
426:           `VecCreateMPI()`, `VecCreateGhostWithArray()`, `VecPlaceArray()`

428: @*/
429: PetscErrorCode VecCreateMPIKokkosWithArray(MPI_Comm comm, PetscInt bs, PetscInt n, PetscInt N, const PetscScalar darray[], Vec *v)
430: {
431:   Vec          w;
432:   Vec_Kokkos  *veckok;
433:   Vec_MPI     *vecmpi;
434:   PetscScalar *harray;

436:   PetscFunctionBegin;
437:   PetscCheck(n != PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Must set local size of vector");
438:   PetscCall(PetscKokkosInitializeCheck());
439:   PetscCall(PetscSplitOwnership(comm, &n, &N));
440:   PetscCall(VecCreate(comm, &w));
441:   PetscCall(VecSetSizes(w, n, N));
442:   PetscCall(VecSetBlockSize(w, bs));
443:   PetscCall(PetscLayoutSetUp(w->map));

445:   if (std::is_same<DefaultMemorySpace, Kokkos::HostSpace>::value) {
446:     harray = const_cast<PetscScalar *>(darray);
447:   } else PetscCall(PetscMalloc1(w->map->n, &harray)); /* If device is not the same as host, allocate the host array ourselves */

449:   PetscCall(VecCreate_MPI_Private(w, PETSC_FALSE /*alloc*/, 0 /*nghost*/, harray)); /* Build a sequential vector with provided data */
450:   vecmpi = static_cast<Vec_MPI *>(w->data);

452:   if (!std::is_same<DefaultMemorySpace, Kokkos::HostSpace>::value) vecmpi->array_allocated = harray; /* The host array was allocated by petsc */

454:   PetscCall(PetscObjectChangeTypeName((PetscObject)w, VECMPIKOKKOS));
455:   PetscCall(VecSetOps_MPIKokkos(w));
456:   veckok = new Vec_Kokkos(n, harray, const_cast<PetscScalar *>(darray));
457:   veckok->v_dual.modify_device(); /* Mark the device is modified */
458:   w->spptr       = static_cast<void *>(veckok);
459:   w->offloadmask = PETSC_OFFLOAD_KOKKOS;
460:   *v             = w;
461:   PetscFunctionReturn(PETSC_SUCCESS);
462: }

464: /*
465:    VecCreateMPIKokkosWithArrays_Private - Creates a Kokkos parallel, array-style vector
466:    with user-provided arrays on host and device.

468:    Collective

470:    Input Parameter:
471: +  comm - the communicator
472: .  bs - the block size
473: .  n - the local vector length
474: .  N - the global vector length
475: -  harray - host memory where the vector elements are to be stored.
476: -  darray - device memory where the vector elements are to be stored.

478:    Output Parameter:
479: .  v - the vector

481:    Notes:
482:    If there is no device, then harray and darray must be the same.
483:    If n is not zero, then harray and darray must be allocated.
484:    After the call, the created vector is supposed to be in a synchronized state, i.e.,
485:    we suppose harray and darray have the same data.

487:    PETSc does NOT free the array when the vector is destroyed via VecDestroy().
488:    The user should not free the array until the vector is destroyed.
489: */
490: PetscErrorCode VecCreateMPIKokkosWithArrays_Private(MPI_Comm comm, PetscInt bs, PetscInt n, PetscInt N, const PetscScalar harray[], const PetscScalar darray[], Vec *v)
491: {
492:   Vec w;

494:   PetscFunctionBegin;
495:   PetscCall(PetscKokkosInitializeCheck());
496:   if (n) {
497:     PetscAssertPointer(harray, 5);
498:     PetscCheck(darray, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "darray cannot be NULL");
499:   }
500:   if (std::is_same<DefaultMemorySpace, Kokkos::HostSpace>::value) PetscCheck(harray == darray, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "harray and darray must be the same");
501:   PetscCall(VecCreateMPIWithArray(comm, bs, n, N, harray, &w));
502:   PetscCall(PetscObjectChangeTypeName((PetscObject)w, VECMPIKOKKOS)); /* Change it to Kokkos */
503:   PetscCall(VecSetOps_MPIKokkos(w));
504:   PetscCallCXX(w->spptr = new Vec_Kokkos(n, const_cast<PetscScalar *>(harray), const_cast<PetscScalar *>(darray)));
505:   w->offloadmask = PETSC_OFFLOAD_KOKKOS;
506:   *v             = w;
507:   PetscFunctionReturn(PETSC_SUCCESS);
508: }

510: /*MC
511:    VECKOKKOS - VECKOKKOS = "kokkos" - The basic vector, modified to use Kokkos

513:    Options Database Keys:
514: . -vec_type kokkos - sets the vector type to VECKOKKOS during a call to VecSetFromOptions()

516:   Level: beginner

518: .seealso: `VecCreate()`, `VecSetType()`, `VecSetFromOptions()`, `VecCreateMPIKokkosWithArray()`, `VECMPI`, `VecType`, `VecCreateMPI()`
519: M*/
520: PetscErrorCode VecCreate_Kokkos(Vec v)
521: {
522:   PetscMPIInt size;

524:   PetscFunctionBegin;
525:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)v), &size));
526:   if (size == 1) PetscCall(VecSetType(v, VECSEQKOKKOS));
527:   else PetscCall(VecSetType(v, VECMPIKOKKOS));
528:   PetscFunctionReturn(PETSC_SUCCESS);
529: }