Actual source code: mpikok.kokkos.cxx
1: /*
2: This file contains routines for Parallel vector operations.
3: */
4: #include <petsc_kokkos.hpp>
5: #include <petscvec_kokkos.hpp>
6: #include <petsc/private/deviceimpl.h>
7: #include <petsc/private/vecimpl.h>
8: #include <../src/vec/vec/impls/mpi/pvecimpl.h>
9: #include <../src/vec/vec/impls/seq/kokkos/veckokkosimpl.hpp>
10: #include <petscsf.h>
12: static PetscErrorCode VecDestroy_MPIKokkos(Vec v)
13: {
14: PetscFunctionBegin;
15: delete static_cast<Vec_Kokkos *>(v->spptr);
16: v->spptr = NULL;
17: PetscCall(VecDestroy_MPI(v));
18: PetscFunctionReturn(PETSC_SUCCESS);
19: }
21: static PetscErrorCode VecNorm_MPIKokkos(Vec xin, NormType type, PetscReal *z)
22: {
23: PetscFunctionBegin;
24: PetscCall(VecNorm_MPI_Default(xin, type, z, VecNorm_SeqKokkos));
25: PetscFunctionReturn(PETSC_SUCCESS);
26: }
28: static PetscErrorCode VecErrorWeightedNorms_MPIKokkos(Vec U, Vec Y, Vec E, NormType wnormtype, PetscReal atol, Vec vatol, PetscReal rtol, Vec vrtol, PetscReal ignore_max, PetscReal *norm, PetscInt *norm_loc, PetscReal *norma, PetscInt *norma_loc, PetscReal *normr, PetscInt *normr_loc)
29: {
30: PetscFunctionBegin;
31: PetscCall(VecErrorWeightedNorms_MPI_Default(U, Y, E, wnormtype, atol, vatol, rtol, vrtol, ignore_max, norm, norm_loc, norma, norma_loc, normr, normr_loc, VecErrorWeightedNorms_SeqKokkos));
32: PetscFunctionReturn(PETSC_SUCCESS);
33: }
35: /* z = y^H x */
36: static PetscErrorCode VecDot_MPIKokkos(Vec xin, Vec yin, PetscScalar *z)
37: {
38: PetscFunctionBegin;
39: PetscCall(VecXDot_MPI_Default(xin, yin, z, VecDot_SeqKokkos));
40: PetscFunctionReturn(PETSC_SUCCESS);
41: }
43: /* z = y^T x */
44: static PetscErrorCode VecTDot_MPIKokkos(Vec xin, Vec yin, PetscScalar *z)
45: {
46: PetscFunctionBegin;
47: PetscCall(VecXDot_MPI_Default(xin, yin, z, VecTDot_SeqKokkos));
48: PetscFunctionReturn(PETSC_SUCCESS);
49: }
51: static PetscErrorCode VecMDot_MPIKokkos(Vec xin, PetscInt nv, const Vec y[], PetscScalar *z)
52: {
53: PetscFunctionBegin;
54: PetscCall(VecMXDot_MPI_Default(xin, nv, y, z, VecMDot_SeqKokkos));
55: PetscFunctionReturn(PETSC_SUCCESS);
56: }
58: static PetscErrorCode VecMTDot_MPIKokkos(Vec xin, PetscInt nv, const Vec y[], PetscScalar *z)
59: {
60: PetscFunctionBegin;
61: PetscCall(VecMXDot_MPI_Default(xin, nv, y, z, VecMTDot_SeqKokkos));
62: PetscFunctionReturn(PETSC_SUCCESS);
63: }
65: static PetscErrorCode VecMDot_MPIKokkos_GEMV(Vec xin, PetscInt nv, const Vec y[], PetscScalar *z)
66: {
67: PetscFunctionBegin;
68: PetscCall(VecMXDot_MPI_Default(xin, nv, y, z, VecMDot_SeqKokkos_GEMV));
69: PetscFunctionReturn(PETSC_SUCCESS);
70: }
72: static PetscErrorCode VecMTDot_MPIKokkos_GEMV(Vec xin, PetscInt nv, const Vec y[], PetscScalar *z)
73: {
74: PetscFunctionBegin;
75: PetscCall(VecMXDot_MPI_Default(xin, nv, y, z, VecMTDot_SeqKokkos_GEMV));
76: PetscFunctionReturn(PETSC_SUCCESS);
77: }
79: static PetscErrorCode VecMax_MPIKokkos(Vec xin, PetscInt *idx, PetscReal *z)
80: {
81: const MPI_Op ops[] = {MPIU_MAXLOC, MPIU_MAX};
83: PetscFunctionBegin;
84: PetscCall(VecMinMax_MPI_Default(xin, idx, z, VecMax_SeqKokkos, ops));
85: PetscFunctionReturn(PETSC_SUCCESS);
86: }
88: static PetscErrorCode VecMin_MPIKokkos(Vec xin, PetscInt *idx, PetscReal *z)
89: {
90: const MPI_Op ops[] = {MPIU_MINLOC, MPIU_MIN};
92: PetscFunctionBegin;
93: PetscCall(VecMinMax_MPI_Default(xin, idx, z, VecMin_SeqKokkos, ops));
94: PetscFunctionReturn(PETSC_SUCCESS);
95: }
97: static PetscErrorCode VecCreate_MPIKokkos_Common(Vec); // forward declaration
99: static PetscErrorCode VecDuplicate_MPIKokkos(Vec win, Vec *vv)
100: {
101: Vec v;
102: Vec_Kokkos *veckok;
103: Vec_MPI *wdata = (Vec_MPI *)win->data, *vdata;
104: PetscScalarKokkosDualView w_dual;
106: PetscFunctionBegin;
107: PetscCallCXX(w_dual = PetscScalarKokkosDualView("w_dual", win->map->n + wdata->nghost)); // Kokkos init's v_dual to zero
109: /* Reuse VecDuplicate_MPI, which contains a lot of stuff */
110: PetscCall(VecDuplicateWithArray_MPI(win, w_dual.view_host().data(), &v)); /* after the call, v is a VECMPI */
111: PetscCall(PetscObjectChangeTypeName((PetscObject)v, VECMPIKOKKOS));
112: // In case win is a ghost vector, we also need to convert its localrep to VECKOKKOS. We provide the device array, so allocation in w_dual is not wasted
113: vdata = static_cast<Vec_MPI *>(v->data);
114: if (vdata->localrep) PetscCall(VecConvert_Seq_SeqKokkos_inplace(vdata->localrep, w_dual.view_device().data()));
115: PetscCall(VecCreate_MPIKokkos_Common(v));
116: v->ops[0] = win->ops[0]; // always follow ops[] in win
118: /* Build the Vec_Kokkos struct */
119: veckok = new Vec_Kokkos(v->map->n, w_dual.view_host().data(), w_dual.view_device().data());
120: veckok->w_dual = w_dual;
121: v->spptr = veckok;
122: *vv = v;
123: PetscFunctionReturn(PETSC_SUCCESS);
124: }
126: static PetscErrorCode VecDotNorm2_MPIKokkos(Vec s, Vec t, PetscScalar *dp, PetscScalar *nm)
127: {
128: PetscFunctionBegin;
129: PetscCall(VecDotNorm2_MPI_Default(s, t, dp, nm, VecDotNorm2_SeqKokkos));
130: PetscFunctionReturn(PETSC_SUCCESS);
131: }
133: static PetscErrorCode VecGetSubVector_MPIKokkos(Vec x, IS is, Vec *y)
134: {
135: PetscFunctionBegin;
136: PetscCall(VecGetSubVector_Kokkos_Private(x, PETSC_TRUE, is, y));
137: PetscFunctionReturn(PETSC_SUCCESS);
138: }
140: static PetscErrorCode VecSetPreallocationCOO_MPIKokkos(Vec x, PetscCount ncoo, const PetscInt coo_i[])
141: {
142: const auto vecmpi = static_cast<Vec_MPI *>(x->data);
143: const auto veckok = static_cast<Vec_Kokkos *>(x->spptr);
144: PetscInt m;
146: PetscFunctionBegin;
147: PetscCall(VecGetLocalSize(x, &m));
148: PetscCall(VecSetPreallocationCOO_MPI(x, ncoo, coo_i));
149: PetscCall(veckok->SetUpCOO(vecmpi, m));
150: PetscFunctionReturn(PETSC_SUCCESS);
151: }
153: static PetscErrorCode VecSetValuesCOO_MPIKokkos(Vec x, const PetscScalar v[], InsertMode imode)
154: {
155: const auto vecmpi = static_cast<Vec_MPI *>(x->data);
156: const auto veckok = static_cast<Vec_Kokkos *>(x->spptr);
157: const PetscCountKokkosView &jmap1 = veckok->jmap1_d;
158: const PetscCountKokkosView &perm1 = veckok->perm1_d;
159: const PetscCountKokkosView &imap2 = veckok->imap2_d;
160: const PetscCountKokkosView &jmap2 = veckok->jmap2_d;
161: const PetscCountKokkosView &perm2 = veckok->perm2_d;
162: const PetscCountKokkosView &Cperm = veckok->Cperm_d;
163: PetscScalarKokkosView &sendbuf = veckok->sendbuf_d;
164: PetscScalarKokkosView &recvbuf = veckok->recvbuf_d;
165: PetscScalarKokkosView xv;
166: ConstPetscScalarKokkosView vv;
167: PetscMemType memtype;
168: PetscInt m;
170: PetscFunctionBegin;
171: PetscCall(VecGetLocalSize(x, &m));
172: PetscCall(PetscGetMemType(v, &memtype));
173: if (PetscMemTypeHost(memtype)) { /* If user gave v[] in host, we might need to copy it to device if any */
174: vv = Kokkos::create_mirror_view_and_copy(DefaultMemorySpace(), PetscScalarKokkosViewHost(const_cast<PetscScalar *>(v), vecmpi->coo_n));
175: } else {
176: vv = ConstPetscScalarKokkosView(v, vecmpi->coo_n); /* Directly use v[]'s memory */
177: }
179: /* Pack entries to be sent to remote */
180: Kokkos::parallel_for(Kokkos::RangePolicy<>(PetscGetKokkosExecutionSpace(), 0, vecmpi->sendlen), KOKKOS_LAMBDA(const PetscCount i) { sendbuf(i) = vv(Cperm(i)); });
181: PetscCall(PetscSFReduceWithMemTypeBegin(vecmpi->coo_sf, MPIU_SCALAR, PETSC_MEMTYPE_KOKKOS, sendbuf.data(), PETSC_MEMTYPE_KOKKOS, recvbuf.data(), MPI_REPLACE));
183: if (imode == INSERT_VALUES) PetscCall(VecGetKokkosViewWrite(x, &xv)); /* write vector */
184: else PetscCall(VecGetKokkosView(x, &xv)); /* read & write vector */
186: Kokkos::parallel_for(
187: Kokkos::RangePolicy<>(PetscGetKokkosExecutionSpace(), 0, m), KOKKOS_LAMBDA(const PetscCount i) {
188: PetscScalar sum = 0.0;
189: for (PetscCount k = jmap1(i); k < jmap1(i + 1); k++) sum += vv(perm1(k));
190: xv(i) = (imode == INSERT_VALUES ? 0.0 : xv(i)) + sum;
191: });
193: PetscCall(PetscSFReduceEnd(vecmpi->coo_sf, MPIU_SCALAR, sendbuf.data(), recvbuf.data(), MPI_REPLACE));
195: /* Add received remote entries */
196: Kokkos::parallel_for(
197: Kokkos::RangePolicy<>(PetscGetKokkosExecutionSpace(), 0, vecmpi->nnz2), KOKKOS_LAMBDA(PetscCount i) {
198: for (PetscCount k = jmap2(i); k < jmap2(i + 1); k++) xv(imap2(i)) += recvbuf(perm2(k));
199: });
201: if (imode == INSERT_VALUES) PetscCall(VecRestoreKokkosViewWrite(x, &xv));
202: else PetscCall(VecRestoreKokkosView(x, &xv));
203: PetscFunctionReturn(PETSC_SUCCESS);
204: }
206: // Shared by all VecCreate/Duplicate routines for VecMPIKokkos
207: static PetscErrorCode VecCreate_MPIKokkos_Common(Vec v)
208: {
209: PetscFunctionBegin;
210: v->boundtocpu = PetscDefined(HAVE_KOKKOS_WITHOUT_GPU) ? PETSC_TRUE : PETSC_FALSE; // VECKOKKOS has yet to support CPU binding. But in this case, we deem it is bound to CPU.
211: v->ops->bindtocpu = VecBindToCPU_SeqKokkos;
212: v->ops->abs = VecAbs_SeqKokkos;
213: v->ops->reciprocal = VecReciprocal_SeqKokkos;
214: v->ops->pointwisemult = VecPointwiseMult_SeqKokkos;
215: v->ops->setrandom = VecSetRandom_SeqKokkos;
216: v->ops->dotnorm2 = VecDotNorm2_MPIKokkos;
217: v->ops->waxpy = VecWAXPY_SeqKokkos;
218: v->ops->norm = VecNorm_MPIKokkos;
219: v->ops->min = VecMin_MPIKokkos;
220: v->ops->max = VecMax_MPIKokkos;
221: v->ops->sum = VecSum_SeqKokkos;
222: v->ops->shift = VecShift_SeqKokkos;
223: v->ops->scale = VecScale_SeqKokkos;
224: v->ops->copy = VecCopy_SeqKokkos;
225: v->ops->set = VecSet_SeqKokkos;
226: v->ops->swap = VecSwap_SeqKokkos;
227: v->ops->axpy = VecAXPY_SeqKokkos;
228: v->ops->axpby = VecAXPBY_SeqKokkos;
229: v->ops->maxpy = VecMAXPY_SeqKokkos;
230: v->ops->aypx = VecAYPX_SeqKokkos;
231: v->ops->axpbypcz = VecAXPBYPCZ_SeqKokkos;
232: v->ops->pointwisedivide = VecPointwiseDivide_SeqKokkos;
233: v->ops->placearray = VecPlaceArray_SeqKokkos;
234: v->ops->replacearray = VecReplaceArray_SeqKokkos;
235: v->ops->resetarray = VecResetArray_SeqKokkos;
237: v->ops->dot = VecDot_MPIKokkos;
238: v->ops->tdot = VecTDot_MPIKokkos;
239: v->ops->mdot = VecMDot_MPIKokkos;
240: v->ops->mtdot = VecMTDot_MPIKokkos;
242: v->ops->dot_local = VecDot_SeqKokkos;
243: v->ops->tdot_local = VecTDot_SeqKokkos;
244: v->ops->mdot_local = VecMDot_SeqKokkos;
245: v->ops->mtdot_local = VecMTDot_SeqKokkos;
247: v->ops->norm_local = VecNorm_SeqKokkos;
248: v->ops->duplicate = VecDuplicate_MPIKokkos;
249: v->ops->destroy = VecDestroy_MPIKokkos;
250: v->ops->getlocalvector = VecGetLocalVector_SeqKokkos;
251: v->ops->restorelocalvector = VecRestoreLocalVector_SeqKokkos;
252: v->ops->getlocalvectorread = VecGetLocalVectorRead_SeqKokkos;
253: v->ops->restorelocalvectorread = VecRestoreLocalVectorRead_SeqKokkos;
254: v->ops->getarraywrite = VecGetArrayWrite_SeqKokkos;
255: v->ops->getarray = VecGetArray_SeqKokkos;
256: v->ops->restorearray = VecRestoreArray_SeqKokkos;
257: v->ops->getarrayandmemtype = VecGetArrayAndMemType_SeqKokkos;
258: v->ops->restorearrayandmemtype = VecRestoreArrayAndMemType_SeqKokkos;
259: v->ops->getarraywriteandmemtype = VecGetArrayWriteAndMemType_SeqKokkos;
260: v->ops->getsubvector = VecGetSubVector_MPIKokkos;
261: v->ops->restoresubvector = VecRestoreSubVector_SeqKokkos;
263: v->ops->setpreallocationcoo = VecSetPreallocationCOO_MPIKokkos;
264: v->ops->setvaluescoo = VecSetValuesCOO_MPIKokkos;
266: v->ops->errorwnorm = VecErrorWeightedNorms_MPIKokkos;
268: v->offloadmask = PETSC_OFFLOAD_KOKKOS; // Mark this is a VECKOKKOS; We use this flag for cheap VECKOKKOS test.
269: PetscFunctionReturn(PETSC_SUCCESS);
270: }
272: PETSC_INTERN PetscErrorCode VecConvert_MPI_MPIKokkos_inplace(Vec v)
273: {
274: Vec_MPI *vecmpi;
276: PetscFunctionBegin;
277: PetscCall(PetscKokkosInitializeCheck());
278: PetscCall(PetscLayoutSetUp(v->map));
279: PetscCall(PetscObjectChangeTypeName((PetscObject)v, VECMPIKOKKOS));
280: PetscCall(VecCreate_MPIKokkos_Common(v));
281: PetscCheck(!v->spptr, PETSC_COMM_SELF, PETSC_ERR_PLIB, "v->spptr not NULL");
282: vecmpi = static_cast<Vec_MPI *>(v->data);
283: if (vecmpi->localrep) { // It is a ghost vector
284: Vec local = vecmpi->localrep;
285: Vec_Kokkos *veckok;
287: PetscCall(VecConvert_Seq_SeqKokkos_inplace(local, NULL));
288: veckok = static_cast<Vec_Kokkos *>(local->spptr);
289: // TODO: can we subview on veckok->v_dual?
290: PetscCallCXX(v->spptr = new Vec_Kokkos(v->map->n, veckok->v_dual.view_host().data(), veckok->v_dual.view_device().data()));
291: } else PetscCallCXX(v->spptr = new Vec_Kokkos(v->map->n, vecmpi->array, NULL));
292: PetscFunctionReturn(PETSC_SUCCESS);
293: }
295: // Duplicate a VECMPIKOKKOS
296: static PetscErrorCode VecDuplicateVecs_MPIKokkos_GEMV(Vec w, PetscInt m, Vec *V[])
297: {
298: PetscInt64 lda; // use 64-bit as we will do "m * lda"
299: PetscScalar *array_h, *array_d;
300: PetscLayout map;
301: Vec_MPI *wmpi = (Vec_MPI *)w->data;
302: PetscScalarKokkosDualView w_dual;
304: PetscFunctionBegin;
305: PetscCall(PetscKokkosInitializeCheck()); // as we'll call kokkos_malloc()
306: if (wmpi->nghost) { // currently only do GEMV optimization for vectors without ghosts
307: w->ops->duplicatevecs = VecDuplicateVecs_Default;
308: PetscCall(VecDuplicateVecs(w, m, V));
309: } else {
310: PetscCall(PetscMalloc1(m, V));
311: PetscCall(VecGetLayout(w, &map));
312: VecGetLocalSizeAligned(w, 64, &lda); // get in lda the 64-bytes aligned local size
314: // See comments in VecCreate_SeqKokkos() on why we use DualView to allocate the memory
315: PetscCallCXX(w_dual = PetscScalarKokkosDualView("VecDuplicateVecs", m * lda)); // Kokkos init's w_dual to zero
317: // create the m vectors with raw arrays
318: array_h = w_dual.view_host().data();
319: array_d = w_dual.view_device().data();
320: for (PetscInt i = 0; i < m; i++) {
321: Vec v;
322: PetscCall(VecCreateMPIKokkosWithLayoutAndArrays_Private(map, &array_h[i * lda], &array_d[i * lda], &v));
323: PetscCallCXX(static_cast<Vec_Kokkos *>(v->spptr)->v_dual.modify_host()); // as we only init'ed array_h
324: PetscCall(PetscObjectListDuplicate(((PetscObject)w)->olist, &((PetscObject)v)->olist));
325: PetscCall(PetscFunctionListDuplicate(((PetscObject)w)->qlist, &((PetscObject)v)->qlist));
326: v->ops[0] = w->ops[0];
327: v->stash.donotstash = w->stash.donotstash;
328: v->stash.ignorenegidx = w->stash.ignorenegidx;
329: v->stash.bs = w->stash.bs;
330: v->bstash.bs = w->bstash.bs;
331: (*V)[i] = v;
332: }
334: // let the first vector own the raw arrays, so when it is destroyed it will free the arrays
335: if (m) {
336: Vec v = (*V)[0];
338: static_cast<Vec_Kokkos *>(v->spptr)->w_dual = w_dual; // stash the memory
339: // disable replacearray of the first vector, as freeing its memory also frees others in the group.
340: // But replacearray of others is ok, as they don't own their array.
341: if (m > 1) v->ops->replacearray = VecReplaceArray_Default_GEMV_Error;
342: }
343: }
344: PetscFunctionReturn(PETSC_SUCCESS);
345: }
347: /*MC
348: VECMPIKOKKOS - VECMPIKOKKOS = "mpikokkos" - The basic parallel vector, modified to use Kokkos
350: Options Database Keys:
351: . -vec_type mpikokkos - sets the vector type to VECMPIKOKKOS during a call to VecSetFromOptions()
353: Level: beginner
355: .seealso: `VecCreate()`, `VecSetType()`, `VecSetFromOptions()`, `VecCreateMPIKokkosWithArray()`, `VECMPI`, `VecType`, `VecCreateMPI()`
356: M*/
357: PetscErrorCode VecCreate_MPIKokkos(Vec v)
358: {
359: PetscBool mdot_use_gemv = PETSC_TRUE;
360: PetscBool maxpy_use_gemv = PETSC_FALSE; // default is false as we saw bad performance with vendors' GEMV with tall skinny matrices.
361: PetscScalarKokkosDualView v_dual;
363: PetscFunctionBegin;
364: PetscCall(PetscKokkosInitializeCheck());
365: PetscCall(PetscLayoutSetUp(v->map));
367: PetscCallCXX(v_dual = PetscScalarKokkosDualView("v_dual", v->map->n)); // Kokkos init's v_dual to zero
368: PetscCall(VecCreate_MPI_Private(v, PETSC_FALSE, 0, v_dual.view_host().data()));
370: PetscCall(PetscObjectChangeTypeName((PetscObject)v, VECMPIKOKKOS));
371: PetscCall(VecCreate_MPIKokkos_Common(v));
372: PetscCheck(!v->spptr, PETSC_COMM_SELF, PETSC_ERR_PLIB, "v->spptr not NULL");
373: PetscCallCXX(v->spptr = new Vec_Kokkos(v_dual));
374: PetscCall(PetscOptionsGetBool(NULL, NULL, "-vec_mdot_use_gemv", &mdot_use_gemv, NULL));
375: PetscCall(PetscOptionsGetBool(NULL, NULL, "-vec_maxpy_use_gemv", &maxpy_use_gemv, NULL));
377: // allocate multiple vectors together
378: if (mdot_use_gemv || maxpy_use_gemv) v->ops[0].duplicatevecs = VecDuplicateVecs_MPIKokkos_GEMV;
380: if (mdot_use_gemv) {
381: v->ops[0].mdot = VecMDot_MPIKokkos_GEMV;
382: v->ops[0].mtdot = VecMTDot_MPIKokkos_GEMV;
383: v->ops[0].mdot_local = VecMDot_SeqKokkos_GEMV;
384: v->ops[0].mtdot_local = VecMTDot_SeqKokkos_GEMV;
385: }
387: if (maxpy_use_gemv) v->ops[0].maxpy = VecMAXPY_SeqKokkos_GEMV;
388: PetscFunctionReturn(PETSC_SUCCESS);
389: }
391: // Create a VECMPIKOKKOS with layout and arrays
392: PetscErrorCode VecCreateMPIKokkosWithLayoutAndArrays_Private(PetscLayout map, const PetscScalar harray[], const PetscScalar darray[], Vec *v)
393: {
394: Vec w;
396: PetscFunctionBegin;
397: if (map->n > 0) PetscCheck(darray, map->comm, PETSC_ERR_ARG_WRONG, "darray cannot be NULL");
398: #if defined(KOKKOS_ENABLE_UNIFIED_MEMORY)
399: PetscCheck(harray == darray, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "harray and darray must be the same");
400: #endif
401: PetscCall(VecCreateMPIWithLayoutAndArray_Private(map, harray, &w));
402: PetscCall(PetscObjectChangeTypeName((PetscObject)w, VECMPIKOKKOS)); // Change it to VECKOKKOS
403: PetscCall(VecCreate_MPIKokkos_Common(w));
404: PetscCallCXX(w->spptr = new Vec_Kokkos(map->n, const_cast<PetscScalar *>(harray), const_cast<PetscScalar *>(darray)));
405: *v = w;
406: PetscFunctionReturn(PETSC_SUCCESS);
407: }
409: /*@C
410: VecCreateMPIKokkosWithArray - Creates a parallel, array-style vector,
411: where the user provides the GPU array space to store the vector values.
413: Collective
415: Input Parameters:
416: + comm - the MPI communicator to use
417: . bs - block size, same meaning as VecSetBlockSize()
418: . n - local vector length, cannot be PETSC_DECIDE
419: . N - global vector length (or PETSC_DECIDE to have calculated)
420: - darray - the user provided GPU array to store the vector values
422: Output Parameter:
423: . v - the vector
425: Notes:
426: Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
427: same type as an existing vector.
429: If the user-provided array is NULL, then VecKokkosPlaceArray() can be used
430: at a later stage to SET the array for storing the vector values.
432: PETSc does NOT free the array when the vector is destroyed via VecDestroy().
433: The user should not free the array until the vector is destroyed.
435: Level: intermediate
437: .seealso: `VecCreateSeqKokkosWithArray()`, `VecCreateMPIWithArray()`, `VecCreateSeqWithArray()`,
438: `VecCreate()`, `VecDuplicate()`, `VecDuplicateVecs()`, `VecCreateGhost()`,
439: `VecCreateMPI()`, `VecCreateGhostWithArray()`, `VecPlaceArray()`
440: @*/
441: PetscErrorCode VecCreateMPIKokkosWithArray(MPI_Comm comm, PetscInt bs, PetscInt n, PetscInt N, const PetscScalar darray[], Vec *v)
442: {
443: Vec w;
444: Vec_Kokkos *veckok;
445: Vec_MPI *vecmpi;
446: PetscScalar *harray;
448: PetscFunctionBegin;
449: PetscCheck(n != PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Must set local size of vector");
450: PetscCall(PetscKokkosInitializeCheck());
451: PetscCall(PetscSplitOwnership(comm, &n, &N));
452: PetscCall(VecCreate(comm, &w));
453: PetscCall(VecSetSizes(w, n, N));
454: PetscCall(VecSetBlockSize(w, bs));
455: PetscCall(PetscLayoutSetUp(w->map));
457: if (std::is_same<DefaultMemorySpace, HostMirrorMemorySpace>::value) {
458: harray = const_cast<PetscScalar *>(darray);
459: } else PetscCall(PetscMalloc1(w->map->n, &harray)); /* If device is not the same as host, allocate the host array ourselves */
461: PetscCall(VecCreate_MPI_Private(w, PETSC_FALSE /*alloc*/, 0 /*nghost*/, harray)); /* Build a sequential vector with provided data */
462: vecmpi = static_cast<Vec_MPI *>(w->data);
464: if (!std::is_same<DefaultMemorySpace, HostMirrorMemorySpace>::value) vecmpi->array_allocated = harray; /* The host array was allocated by PETSc */
466: PetscCall(PetscObjectChangeTypeName((PetscObject)w, VECMPIKOKKOS));
467: PetscCall(VecCreate_MPIKokkos_Common(w));
468: veckok = new Vec_Kokkos(n, harray, const_cast<PetscScalar *>(darray));
469: veckok->v_dual.modify_device(); /* Mark the device is modified */
470: w->spptr = static_cast<void *>(veckok);
471: *v = w;
472: PetscFunctionReturn(PETSC_SUCCESS);
473: }
475: /*
476: VecCreateMPIKokkosWithArrays_Private - Creates a Kokkos parallel, array-style vector
477: with user-provided arrays on host and device.
479: Collective
481: Input Parameter:
482: + comm - the communicator
483: . bs - the block size
484: . n - the local vector length
485: . N - the global vector length
486: - harray - host memory where the vector elements are to be stored.
487: - darray - device memory where the vector elements are to be stored.
489: Output Parameter:
490: . v - the vector
492: Notes:
493: If there is no device, then harray and darray must be the same.
494: If n is not zero, then harray and darray must be allocated.
495: After the call, the created vector is supposed to be in a synchronized state, i.e.,
496: we suppose harray and darray have the same data.
498: PETSc does NOT free the array when the vector is destroyed via VecDestroy().
499: The user should not free the array until the vector is destroyed.
500: */
501: PetscErrorCode VecCreateMPIKokkosWithArrays_Private(MPI_Comm comm, PetscInt bs, PetscInt n, PetscInt N, const PetscScalar harray[], const PetscScalar darray[], Vec *v)
502: {
503: Vec w;
505: PetscFunctionBegin;
506: PetscCall(PetscKokkosInitializeCheck());
507: if (n) {
508: PetscAssertPointer(harray, 5);
509: PetscCheck(darray, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "darray cannot be NULL");
510: }
511: if (std::is_same<DefaultMemorySpace, HostMirrorMemorySpace>::value) PetscCheck(harray == darray, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "harray and darray must be the same");
512: PetscCall(VecCreateMPIWithArray(comm, bs, n, N, harray, &w));
513: PetscCall(PetscObjectChangeTypeName((PetscObject)w, VECMPIKOKKOS)); /* Change it to Kokkos */
514: PetscCall(VecCreate_MPIKokkos_Common(w));
515: PetscCallCXX(w->spptr = new Vec_Kokkos(n, const_cast<PetscScalar *>(harray), const_cast<PetscScalar *>(darray)));
516: *v = w;
517: PetscFunctionReturn(PETSC_SUCCESS);
518: }
520: /*MC
521: VECKOKKOS - VECKOKKOS = "kokkos" - The basic vector, modified to use Kokkos
523: Options Database Keys:
524: . -vec_type kokkos - sets the vector type to VECKOKKOS during a call to VecSetFromOptions()
526: Level: beginner
528: .seealso: `VecCreate()`, `VecSetType()`, `VecSetFromOptions()`, `VecCreateMPIKokkosWithArray()`, `VECMPI`, `VecType`, `VecCreateMPI()`
529: M*/
530: PetscErrorCode VecCreate_Kokkos(Vec v)
531: {
532: PetscMPIInt size;
534: PetscFunctionBegin;
535: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)v), &size));
536: if (size == 1) PetscCall(VecSetType(v, VECSEQKOKKOS));
537: else PetscCall(VecSetType(v, VECMPIKOKKOS));
538: PetscFunctionReturn(PETSC_SUCCESS);
539: }