Actual source code: mpikok.kokkos.cxx
1: /*
2: This file contains routines for Parallel vector operations.
3: */
4: #include <petsc_kokkos.hpp>
5: #include <petscvec_kokkos.hpp>
6: #include <petsc/private/deviceimpl.h>
7: #include <petsc/private/vecimpl.h>
8: #include <../src/vec/vec/impls/mpi/pvecimpl.h>
9: #include <../src/vec/vec/impls/seq/kokkos/veckokkosimpl.hpp>
10: #include <petscsf.h>
12: static PetscErrorCode VecDestroy_MPIKokkos(Vec v)
13: {
14: PetscFunctionBegin;
15: delete static_cast<Vec_Kokkos *>(v->spptr);
16: PetscCall(VecDestroy_MPI(v));
17: PetscFunctionReturn(PETSC_SUCCESS);
18: }
20: static PetscErrorCode VecNorm_MPIKokkos(Vec xin, NormType type, PetscReal *z)
21: {
22: PetscFunctionBegin;
23: PetscCall(VecNorm_MPI_Default(xin, type, z, VecNorm_SeqKokkos));
24: PetscFunctionReturn(PETSC_SUCCESS);
25: }
27: static PetscErrorCode VecErrorWeightedNorms_MPIKokkos(Vec U, Vec Y, Vec E, NormType wnormtype, PetscReal atol, Vec vatol, PetscReal rtol, Vec vrtol, PetscReal ignore_max, PetscReal *norm, PetscInt *norm_loc, PetscReal *norma, PetscInt *norma_loc, PetscReal *normr, PetscInt *normr_loc)
28: {
29: PetscFunctionBegin;
30: PetscCall(VecErrorWeightedNorms_MPI_Default(U, Y, E, wnormtype, atol, vatol, rtol, vrtol, ignore_max, norm, norm_loc, norma, norma_loc, normr, normr_loc, VecErrorWeightedNorms_SeqKokkos));
31: PetscFunctionReturn(PETSC_SUCCESS);
32: }
34: /* z = y^H x */
35: static PetscErrorCode VecDot_MPIKokkos(Vec xin, Vec yin, PetscScalar *z)
36: {
37: PetscFunctionBegin;
38: PetscCall(VecXDot_MPI_Default(xin, yin, z, VecDot_SeqKokkos));
39: PetscFunctionReturn(PETSC_SUCCESS);
40: }
42: /* z = y^T x */
43: static PetscErrorCode VecTDot_MPIKokkos(Vec xin, Vec yin, PetscScalar *z)
44: {
45: PetscFunctionBegin;
46: PetscCall(VecXDot_MPI_Default(xin, yin, z, VecTDot_SeqKokkos));
47: PetscFunctionReturn(PETSC_SUCCESS);
48: }
50: static PetscErrorCode VecMDot_MPIKokkos(Vec xin, PetscInt nv, const Vec y[], PetscScalar *z)
51: {
52: PetscFunctionBegin;
53: PetscCall(VecMXDot_MPI_Default(xin, nv, y, z, VecMDot_SeqKokkos));
54: PetscFunctionReturn(PETSC_SUCCESS);
55: }
57: static PetscErrorCode VecMTDot_MPIKokkos(Vec xin, PetscInt nv, const Vec y[], PetscScalar *z)
58: {
59: PetscFunctionBegin;
60: PetscCall(VecMXDot_MPI_Default(xin, nv, y, z, VecMTDot_SeqKokkos));
61: PetscFunctionReturn(PETSC_SUCCESS);
62: }
64: static PetscErrorCode VecMDot_MPIKokkos_GEMV(Vec xin, PetscInt nv, const Vec y[], PetscScalar *z)
65: {
66: PetscFunctionBegin;
67: PetscCall(VecMXDot_MPI_Default(xin, nv, y, z, VecMDot_SeqKokkos_GEMV));
68: PetscFunctionReturn(PETSC_SUCCESS);
69: }
71: static PetscErrorCode VecMTDot_MPIKokkos_GEMV(Vec xin, PetscInt nv, const Vec y[], PetscScalar *z)
72: {
73: PetscFunctionBegin;
74: PetscCall(VecMXDot_MPI_Default(xin, nv, y, z, VecMTDot_SeqKokkos_GEMV));
75: PetscFunctionReturn(PETSC_SUCCESS);
76: }
78: static PetscErrorCode VecMax_MPIKokkos(Vec xin, PetscInt *idx, PetscReal *z)
79: {
80: const MPI_Op ops[] = {MPIU_MAXLOC, MPIU_MAX};
82: PetscFunctionBegin;
83: PetscCall(VecMinMax_MPI_Default(xin, idx, z, VecMax_SeqKokkos, ops));
84: PetscFunctionReturn(PETSC_SUCCESS);
85: }
87: static PetscErrorCode VecMin_MPIKokkos(Vec xin, PetscInt *idx, PetscReal *z)
88: {
89: const MPI_Op ops[] = {MPIU_MINLOC, MPIU_MIN};
91: PetscFunctionBegin;
92: PetscCall(VecMinMax_MPI_Default(xin, idx, z, VecMin_SeqKokkos, ops));
93: PetscFunctionReturn(PETSC_SUCCESS);
94: }
96: static PetscErrorCode VecDuplicate_MPIKokkos(Vec win, Vec *vv)
97: {
98: Vec v;
99: Vec_MPI *vecmpi;
100: Vec_Kokkos *veckok;
102: PetscFunctionBegin;
103: /* Reuse VecDuplicate_MPI, which contains a lot of stuff */
104: PetscCall(VecDuplicate_MPI(win, &v)); /* after the call, v is a VECMPI, with data zero'ed */
105: PetscCall(PetscObjectChangeTypeName((PetscObject)v, VECMPIKOKKOS));
106: v->ops[0] = win->ops[0];
108: /* Build the Vec_Kokkos struct */
109: vecmpi = static_cast<Vec_MPI *>(v->data);
110: veckok = new Vec_Kokkos(v->map->n, vecmpi->array);
111: Kokkos::deep_copy(veckok->v_dual.view_device(), 0.0);
112: v->spptr = veckok;
113: v->offloadmask = PETSC_OFFLOAD_KOKKOS;
114: *vv = v;
115: PetscFunctionReturn(PETSC_SUCCESS);
116: }
118: static PetscErrorCode VecDotNorm2_MPIKokkos(Vec s, Vec t, PetscScalar *dp, PetscScalar *nm)
119: {
120: PetscFunctionBegin;
121: PetscCall(VecDotNorm2_MPI_Default(s, t, dp, nm, VecDotNorm2_SeqKokkos));
122: PetscFunctionReturn(PETSC_SUCCESS);
123: }
125: static PetscErrorCode VecGetSubVector_MPIKokkos(Vec x, IS is, Vec *y)
126: {
127: PetscFunctionBegin;
128: PetscCall(VecGetSubVector_Kokkos_Private(x, PETSC_TRUE, is, y));
129: PetscFunctionReturn(PETSC_SUCCESS);
130: }
132: static PetscErrorCode VecSetPreallocationCOO_MPIKokkos(Vec x, PetscCount ncoo, const PetscInt coo_i[])
133: {
134: const auto vecmpi = static_cast<Vec_MPI *>(x->data);
135: const auto veckok = static_cast<Vec_Kokkos *>(x->spptr);
136: PetscInt m;
138: PetscFunctionBegin;
139: PetscCall(VecGetLocalSize(x, &m));
140: PetscCall(VecSetPreallocationCOO_MPI(x, ncoo, coo_i));
141: PetscCall(veckok->SetUpCOO(vecmpi, m));
142: PetscFunctionReturn(PETSC_SUCCESS);
143: }
145: static PetscErrorCode VecSetValuesCOO_MPIKokkos(Vec x, const PetscScalar v[], InsertMode imode)
146: {
147: const auto vecmpi = static_cast<Vec_MPI *>(x->data);
148: const auto veckok = static_cast<Vec_Kokkos *>(x->spptr);
149: const PetscCountKokkosView &jmap1 = veckok->jmap1_d;
150: const PetscCountKokkosView &perm1 = veckok->perm1_d;
151: const PetscCountKokkosView &imap2 = veckok->imap2_d;
152: const PetscCountKokkosView &jmap2 = veckok->jmap2_d;
153: const PetscCountKokkosView &perm2 = veckok->perm2_d;
154: const PetscCountKokkosView &Cperm = veckok->Cperm_d;
155: PetscScalarKokkosView &sendbuf = veckok->sendbuf_d;
156: PetscScalarKokkosView &recvbuf = veckok->recvbuf_d;
157: PetscScalarKokkosView xv;
158: ConstPetscScalarKokkosView vv;
159: PetscMemType memtype;
160: PetscInt m;
162: PetscFunctionBegin;
163: PetscCall(VecGetLocalSize(x, &m));
164: PetscCall(PetscGetMemType(v, &memtype));
165: if (PetscMemTypeHost(memtype)) { /* If user gave v[] in host, we might need to copy it to device if any */
166: vv = Kokkos::create_mirror_view_and_copy(DefaultMemorySpace(), ConstPetscScalarKokkosViewHost(v, vecmpi->coo_n));
167: } else {
168: vv = ConstPetscScalarKokkosView(v, vecmpi->coo_n); /* Directly use v[]'s memory */
169: }
171: /* Pack entries to be sent to remote */
172: Kokkos::parallel_for(Kokkos::RangePolicy<>(PetscGetKokkosExecutionSpace(), 0, vecmpi->sendlen), KOKKOS_LAMBDA(const PetscCount i) { sendbuf(i) = vv(Cperm(i)); });
173: PetscCall(PetscSFReduceWithMemTypeBegin(vecmpi->coo_sf, MPIU_SCALAR, PETSC_MEMTYPE_KOKKOS, sendbuf.data(), PETSC_MEMTYPE_KOKKOS, recvbuf.data(), MPI_REPLACE));
175: if (imode == INSERT_VALUES) PetscCall(VecGetKokkosViewWrite(x, &xv)); /* write vector */
176: else PetscCall(VecGetKokkosView(x, &xv)); /* read & write vector */
178: Kokkos::parallel_for(
179: Kokkos::RangePolicy<>(PetscGetKokkosExecutionSpace(), 0, m), KOKKOS_LAMBDA(const PetscCount i) {
180: PetscScalar sum = 0.0;
181: for (PetscCount k = jmap1(i); k < jmap1(i + 1); k++) sum += vv(perm1(k));
182: xv(i) = (imode == INSERT_VALUES ? 0.0 : xv(i)) + sum;
183: });
185: PetscCall(PetscSFReduceEnd(vecmpi->coo_sf, MPIU_SCALAR, sendbuf.data(), recvbuf.data(), MPI_REPLACE));
187: /* Add received remote entries */
188: Kokkos::parallel_for(
189: Kokkos::RangePolicy<>(PetscGetKokkosExecutionSpace(), 0, vecmpi->nnz2), KOKKOS_LAMBDA(PetscCount i) {
190: for (PetscCount k = jmap2(i); k < jmap2(i + 1); k++) xv(imap2(i)) += recvbuf(perm2(k));
191: });
193: if (imode == INSERT_VALUES) PetscCall(VecRestoreKokkosViewWrite(x, &xv));
194: else PetscCall(VecRestoreKokkosView(x, &xv));
195: PetscFunctionReturn(PETSC_SUCCESS);
196: }
198: PetscErrorCode VecSetOps_MPIKokkos(Vec v)
199: {
200: PetscFunctionBegin;
201: v->ops->abs = VecAbs_SeqKokkos;
202: v->ops->reciprocal = VecReciprocal_SeqKokkos;
203: v->ops->pointwisemult = VecPointwiseMult_SeqKokkos;
204: v->ops->setrandom = VecSetRandom_SeqKokkos;
205: v->ops->dotnorm2 = VecDotNorm2_MPIKokkos;
206: v->ops->waxpy = VecWAXPY_SeqKokkos;
207: v->ops->norm = VecNorm_MPIKokkos;
208: v->ops->min = VecMin_MPIKokkos;
209: v->ops->max = VecMax_MPIKokkos;
210: v->ops->sum = VecSum_SeqKokkos;
211: v->ops->shift = VecShift_SeqKokkos;
212: v->ops->scale = VecScale_SeqKokkos;
213: v->ops->copy = VecCopy_SeqKokkos;
214: v->ops->set = VecSet_SeqKokkos;
215: v->ops->swap = VecSwap_SeqKokkos;
216: v->ops->axpy = VecAXPY_SeqKokkos;
217: v->ops->axpby = VecAXPBY_SeqKokkos;
218: v->ops->maxpy = VecMAXPY_SeqKokkos;
219: v->ops->aypx = VecAYPX_SeqKokkos;
220: v->ops->axpbypcz = VecAXPBYPCZ_SeqKokkos;
221: v->ops->pointwisedivide = VecPointwiseDivide_SeqKokkos;
222: v->ops->placearray = VecPlaceArray_SeqKokkos;
223: v->ops->replacearray = VecReplaceArray_SeqKokkos;
224: v->ops->resetarray = VecResetArray_SeqKokkos;
226: v->ops->dot = VecDot_MPIKokkos;
227: v->ops->tdot = VecTDot_MPIKokkos;
228: v->ops->mdot = VecMDot_MPIKokkos;
229: v->ops->mtdot = VecMTDot_MPIKokkos;
231: v->ops->dot_local = VecDot_SeqKokkos;
232: v->ops->tdot_local = VecTDot_SeqKokkos;
233: v->ops->mdot_local = VecMDot_SeqKokkos;
234: v->ops->mtdot_local = VecMTDot_SeqKokkos;
236: v->ops->norm_local = VecNorm_SeqKokkos;
237: v->ops->duplicate = VecDuplicate_MPIKokkos;
238: v->ops->destroy = VecDestroy_MPIKokkos;
239: v->ops->getlocalvector = VecGetLocalVector_SeqKokkos;
240: v->ops->restorelocalvector = VecRestoreLocalVector_SeqKokkos;
241: v->ops->getlocalvectorread = VecGetLocalVector_SeqKokkos;
242: v->ops->restorelocalvectorread = VecRestoreLocalVector_SeqKokkos;
243: v->ops->getarraywrite = VecGetArrayWrite_SeqKokkos;
244: v->ops->getarray = VecGetArray_SeqKokkos;
245: v->ops->restorearray = VecRestoreArray_SeqKokkos;
246: v->ops->getarrayandmemtype = VecGetArrayAndMemType_SeqKokkos;
247: v->ops->restorearrayandmemtype = VecRestoreArrayAndMemType_SeqKokkos;
248: v->ops->getarraywriteandmemtype = VecGetArrayWriteAndMemType_SeqKokkos;
249: v->ops->getsubvector = VecGetSubVector_MPIKokkos;
250: v->ops->restoresubvector = VecRestoreSubVector_SeqKokkos;
252: v->ops->setpreallocationcoo = VecSetPreallocationCOO_MPIKokkos;
253: v->ops->setvaluescoo = VecSetValuesCOO_MPIKokkos;
255: v->ops->errorwnorm = VecErrorWeightedNorms_MPIKokkos;
256: PetscFunctionReturn(PETSC_SUCCESS);
257: }
259: PETSC_INTERN PetscErrorCode VecConvert_MPI_MPIKokkos_inplace(Vec v)
260: {
261: Vec_MPI *vecmpi;
263: PetscFunctionBegin;
264: PetscCall(PetscKokkosInitializeCheck());
265: PetscCall(PetscLayoutSetUp(v->map));
266: PetscCall(PetscObjectChangeTypeName((PetscObject)v, VECMPIKOKKOS));
267: PetscCall(VecSetOps_MPIKokkos(v));
268: PetscCheck(!v->spptr, PETSC_COMM_SELF, PETSC_ERR_PLIB, "v->spptr not NULL");
269: vecmpi = static_cast<Vec_MPI *>(v->data);
270: PetscCallCXX(v->spptr = new Vec_Kokkos(v->map->n, vecmpi->array, NULL));
271: v->offloadmask = PETSC_OFFLOAD_KOKKOS;
272: PetscFunctionReturn(PETSC_SUCCESS);
273: }
275: // Duplicate a VECMPIKOKKOS
276: static PetscErrorCode VecDuplicateVecs_MPIKokkos_GEMV(Vec w, PetscInt m, Vec *V[])
277: {
278: PetscInt64 lda; // use 64-bit as we will do "m * lda"
279: PetscScalar *array_h, *array_d;
280: PetscLayout map;
281: Vec_MPI *wmpi = (Vec_MPI *)w->data;
282: PetscBool mdot_use_gemv, maxpy_use_gemv;
284: PetscFunctionBegin;
285: PetscCall(PetscKokkosInitializeCheck()); // as we'll call kokkos_malloc()
286: if (wmpi->nghost) { // currently only do GEMV optimiation for vectors without ghosts
287: w->ops->duplicatevecs = VecDuplicateVecs_Default;
288: PetscCall(VecDuplicateVecs(w, m, V));
289: } else {
290: PetscCall(PetscMalloc1(m, V));
291: PetscCall(VecGetLayout(w, &map));
292: VecGetLocalSizeAligned(w, 64, &lda); // get in lda the 64-bytes aligned local size
294: // allocate raw arrays on host and device for the whole m vectors
295: PetscCall(PetscCalloc1(m * lda, &array_h));
296: #if defined(KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_HOST)
297: array_d = array_h;
298: #else
299: PetscCallCXX(array_d = static_cast<PetscScalar *>(Kokkos::kokkos_malloc("VecDuplicateVecs", sizeof(PetscScalar) * (m * lda))));
300: #endif
301: mdot_use_gemv = (w->ops->mdot == VecMDot_MPIKokkos_GEMV) ? PETSC_TRUE : PETSC_FALSE;
302: maxpy_use_gemv = (w->ops->maxpy == VecMAXPY_SeqKokkos_GEMV) ? PETSC_TRUE : PETSC_FALSE;
303: // create the m vectors with raw arrays
304: for (PetscInt i = 0; i < m; i++) {
305: Vec v;
306: PetscCall(VecCreateMPIKokkosWithLayoutAndArrays_Private(map, &array_h[i * lda], &array_d[i * lda], &v));
307: PetscCallCXX(static_cast<Vec_Kokkos *>(v->spptr)->v_dual.modify_host()); // as we only init'ed array_h
308: PetscCall(PetscObjectListDuplicate(((PetscObject)w)->olist, &((PetscObject)v)->olist));
309: PetscCall(PetscFunctionListDuplicate(((PetscObject)w)->qlist, &((PetscObject)v)->qlist));
310: if (mdot_use_gemv) { // inherit w's mdot/maxpy optimization setting
311: v->ops->mdot = VecMDot_MPIKokkos_GEMV;
312: v->ops->mtdot = VecMTDot_MPIKokkos_GEMV;
313: v->ops->mdot_local = VecMDot_SeqKokkos_GEMV;
314: v->ops->mtdot_local = VecMTDot_SeqKokkos_GEMV;
315: }
316: if (maxpy_use_gemv) v->ops->maxpy = VecMAXPY_SeqKokkos_GEMV;
317: v->ops->view = w->ops->view;
318: v->stash.donotstash = w->stash.donotstash;
319: v->stash.ignorenegidx = w->stash.ignorenegidx;
320: v->stash.bs = w->stash.bs;
321: (*V)[i] = v;
322: }
324: // let the first vector own the raw arrays, so when it is destroyed it will free the arrays
325: if (m) {
326: Vec v = (*V)[0];
328: static_cast<Vec_MPI *>(v->data)->array_allocated = array_h;
329: #if !defined(KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_HOST)
330: static_cast<Vec_Kokkos *>(v->spptr)->raw_array_d_allocated = array_d;
331: #endif
332: // disable replacearray of the first vector, as freeing its memory also frees others in the group.
333: // But replacearray of others is ok, as they don't own their array.
334: if (m > 1) v->ops->replacearray = VecReplaceArray_Default_GEMV_Error;
335: }
336: }
337: PetscFunctionReturn(PETSC_SUCCESS);
338: }
340: /*MC
341: VECMPIKOKKOS - VECMPIKOKKOS = "mpikokkos" - The basic parallel vector, modified to use Kokkos
343: Options Database Keys:
344: . -vec_type mpikokkos - sets the vector type to VECMPIKOKKOS during a call to VecSetFromOptions()
346: Level: beginner
348: .seealso: `VecCreate()`, `VecSetType()`, `VecSetFromOptions()`, `VecCreateMPIKokkosWithArray()`, `VECMPI`, `VecType`, `VecCreateMPI()`
349: M*/
350: PetscErrorCode VecCreate_MPIKokkos(Vec v)
351: {
352: Vec_MPI *vecmpi;
353: Vec_Kokkos *veckok;
354: PetscBool mdot_use_gemv = PETSC_TRUE;
355: PetscBool maxpy_use_gemv = PETSC_FALSE; // default is false as we saw bad performance with vendors' GEMV with tall skinny matrices.
357: PetscFunctionBegin;
358: PetscCall(PetscKokkosInitializeCheck());
359: PetscCall(PetscLayoutSetUp(v->map));
360: PetscCall(VecCreate_MPI(v)); /* Calloc host array */
362: vecmpi = static_cast<Vec_MPI *>(v->data);
363: PetscCall(PetscObjectChangeTypeName((PetscObject)v, VECMPIKOKKOS));
364: PetscCall(VecSetOps_MPIKokkos(v));
365: veckok = new Vec_Kokkos(v->map->n, vecmpi->array, NULL); /* Alloc device array but do not init it */
366: v->spptr = static_cast<void *>(veckok);
367: v->offloadmask = PETSC_OFFLOAD_KOKKOS;
368: PetscCall(PetscOptionsGetBool(NULL, NULL, "-vec_mdot_use_gemv", &mdot_use_gemv, NULL));
369: PetscCall(PetscOptionsGetBool(NULL, NULL, "-vec_maxpy_use_gemv", &maxpy_use_gemv, NULL));
371: // allocate multiple vectors together
372: if (mdot_use_gemv || maxpy_use_gemv) v->ops[0].duplicatevecs = VecDuplicateVecs_MPIKokkos_GEMV;
374: if (mdot_use_gemv) {
375: v->ops[0].mdot = VecMDot_MPIKokkos_GEMV;
376: v->ops[0].mtdot = VecMTDot_MPIKokkos_GEMV;
377: v->ops[0].mdot_local = VecMDot_SeqKokkos_GEMV;
378: v->ops[0].mtdot_local = VecMTDot_SeqKokkos_GEMV;
379: }
381: if (maxpy_use_gemv) v->ops[0].maxpy = VecMAXPY_SeqKokkos_GEMV;
382: PetscFunctionReturn(PETSC_SUCCESS);
383: }
385: // Create a VECMPIKOKKOS with layout and arrays
386: PetscErrorCode VecCreateMPIKokkosWithLayoutAndArrays_Private(PetscLayout map, const PetscScalar harray[], const PetscScalar darray[], Vec *v)
387: {
388: Vec w;
390: PetscFunctionBegin;
391: if (map->n > 0) PetscCheck(darray, map->comm, PETSC_ERR_ARG_WRONG, "darray cannot be NULL");
392: #if defined(KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_HOST)
393: PetscCheck(harray == darray, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "harray and darray must be the same");
394: #endif
395: PetscCall(VecCreateMPIWithLayoutAndArray_Private(map, harray, &w));
396: PetscCall(PetscObjectChangeTypeName((PetscObject)w, VECMPIKOKKOS)); // Change it to VECKOKKOS
397: PetscCall(VecSetOps_MPIKokkos(w));
398: PetscCallCXX(w->spptr = new Vec_Kokkos(map->n, const_cast<PetscScalar *>(harray), const_cast<PetscScalar *>(darray)));
399: w->offloadmask = PETSC_OFFLOAD_KOKKOS;
400: *v = w;
401: PetscFunctionReturn(PETSC_SUCCESS);
402: }
404: /*@C
405: VecCreateMPIKokkosWithArray - Creates a parallel, array-style vector,
406: where the user provides the GPU array space to store the vector values.
408: Collective
410: Input Parameters:
411: + comm - the MPI communicator to use
412: . bs - block size, same meaning as VecSetBlockSize()
413: . n - local vector length, cannot be PETSC_DECIDE
414: . N - global vector length (or PETSC_DECIDE to have calculated)
415: - darray - the user provided GPU array to store the vector values
417: Output Parameter:
418: . v - the vector
420: Notes:
421: Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
422: same type as an existing vector.
424: If the user-provided array is NULL, then VecKokkosPlaceArray() can be used
425: at a later stage to SET the array for storing the vector values.
427: PETSc does NOT free the array when the vector is destroyed via VecDestroy().
428: The user should not free the array until the vector is destroyed.
430: Level: intermediate
432: .seealso: `VecCreateSeqKokkosWithArray()`, `VecCreateMPIWithArray()`, `VecCreateSeqWithArray()`,
433: `VecCreate()`, `VecDuplicate()`, `VecDuplicateVecs()`, `VecCreateGhost()`,
434: `VecCreateMPI()`, `VecCreateGhostWithArray()`, `VecPlaceArray()`
436: @*/
437: PetscErrorCode VecCreateMPIKokkosWithArray(MPI_Comm comm, PetscInt bs, PetscInt n, PetscInt N, const PetscScalar darray[], Vec *v)
438: {
439: Vec w;
440: Vec_Kokkos *veckok;
441: Vec_MPI *vecmpi;
442: PetscScalar *harray;
444: PetscFunctionBegin;
445: PetscCheck(n != PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Must set local size of vector");
446: PetscCall(PetscKokkosInitializeCheck());
447: PetscCall(PetscSplitOwnership(comm, &n, &N));
448: PetscCall(VecCreate(comm, &w));
449: PetscCall(VecSetSizes(w, n, N));
450: PetscCall(VecSetBlockSize(w, bs));
451: PetscCall(PetscLayoutSetUp(w->map));
453: if (std::is_same<DefaultMemorySpace, Kokkos::HostSpace>::value) {
454: harray = const_cast<PetscScalar *>(darray);
455: } else PetscCall(PetscMalloc1(w->map->n, &harray)); /* If device is not the same as host, allocate the host array ourselves */
457: PetscCall(VecCreate_MPI_Private(w, PETSC_FALSE /*alloc*/, 0 /*nghost*/, harray)); /* Build a sequential vector with provided data */
458: vecmpi = static_cast<Vec_MPI *>(w->data);
460: if (!std::is_same<DefaultMemorySpace, Kokkos::HostSpace>::value) vecmpi->array_allocated = harray; /* The host array was allocated by petsc */
462: PetscCall(PetscObjectChangeTypeName((PetscObject)w, VECMPIKOKKOS));
463: PetscCall(VecSetOps_MPIKokkos(w));
464: veckok = new Vec_Kokkos(n, harray, const_cast<PetscScalar *>(darray));
465: veckok->v_dual.modify_device(); /* Mark the device is modified */
466: w->spptr = static_cast<void *>(veckok);
467: w->offloadmask = PETSC_OFFLOAD_KOKKOS;
468: *v = w;
469: PetscFunctionReturn(PETSC_SUCCESS);
470: }
472: /*
473: VecCreateMPIKokkosWithArrays_Private - Creates a Kokkos parallel, array-style vector
474: with user-provided arrays on host and device.
476: Collective
478: Input Parameter:
479: + comm - the communicator
480: . bs - the block size
481: . n - the local vector length
482: . N - the global vector length
483: - harray - host memory where the vector elements are to be stored.
484: - darray - device memory where the vector elements are to be stored.
486: Output Parameter:
487: . v - the vector
489: Notes:
490: If there is no device, then harray and darray must be the same.
491: If n is not zero, then harray and darray must be allocated.
492: After the call, the created vector is supposed to be in a synchronized state, i.e.,
493: we suppose harray and darray have the same data.
495: PETSc does NOT free the array when the vector is destroyed via VecDestroy().
496: The user should not free the array until the vector is destroyed.
497: */
498: PetscErrorCode VecCreateMPIKokkosWithArrays_Private(MPI_Comm comm, PetscInt bs, PetscInt n, PetscInt N, const PetscScalar harray[], const PetscScalar darray[], Vec *v)
499: {
500: Vec w;
502: PetscFunctionBegin;
503: PetscCall(PetscKokkosInitializeCheck());
504: if (n) {
505: PetscAssertPointer(harray, 5);
506: PetscCheck(darray, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "darray cannot be NULL");
507: }
508: if (std::is_same<DefaultMemorySpace, Kokkos::HostSpace>::value) PetscCheck(harray == darray, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "harray and darray must be the same");
509: PetscCall(VecCreateMPIWithArray(comm, bs, n, N, harray, &w));
510: PetscCall(PetscObjectChangeTypeName((PetscObject)w, VECMPIKOKKOS)); /* Change it to Kokkos */
511: PetscCall(VecSetOps_MPIKokkos(w));
512: PetscCallCXX(w->spptr = new Vec_Kokkos(n, const_cast<PetscScalar *>(harray), const_cast<PetscScalar *>(darray)));
513: w->offloadmask = PETSC_OFFLOAD_KOKKOS;
514: *v = w;
515: PetscFunctionReturn(PETSC_SUCCESS);
516: }
518: /*MC
519: VECKOKKOS - VECKOKKOS = "kokkos" - The basic vector, modified to use Kokkos
521: Options Database Keys:
522: . -vec_type kokkos - sets the vector type to VECKOKKOS during a call to VecSetFromOptions()
524: Level: beginner
526: .seealso: `VecCreate()`, `VecSetType()`, `VecSetFromOptions()`, `VecCreateMPIKokkosWithArray()`, `VECMPI`, `VecType`, `VecCreateMPI()`
527: M*/
528: PetscErrorCode VecCreate_Kokkos(Vec v)
529: {
530: PetscMPIInt size;
532: PetscFunctionBegin;
533: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)v), &size));
534: if (size == 1) PetscCall(VecSetType(v, VECSEQKOKKOS));
535: else PetscCall(VecSetType(v, VECMPIKOKKOS));
536: PetscFunctionReturn(PETSC_SUCCESS);
537: }