Actual source code: mpiviennacl.cxx
1: /*
2: This file contains routines for Parallel vector operations.
3: */
4: #include <petscconf.h>
5: #include <../src/vec/vec/impls/mpi/pvecimpl.h>
6: #include <../src/vec/vec/impls/seq/seqviennacl/viennaclvecimpl.h>
8: /*MC
9: VECVIENNACL - VECVIENNACL = "viennacl" - A VECSEQVIENNACL on a single-process communicator, and VECMPIVIENNACL otherwise.
11: Options Database Keys:
12: . -vec_type viennacl - sets the vector type to VECVIENNACL during a call to VecSetFromOptions()
14: Level: beginner
16: .seealso: `VecCreate()`, `VecSetType()`, `VecSetFromOptions()`, `VecCreateMPIWithArray()`, `VECSEQVIENNACL`, `VECMPIVIENNACL`, `VECSTANDARD`, `VecType`, `VecCreateMPI()`, `VecCreateMPI()`
17: M*/
19: static PetscErrorCode VecDestroy_MPIViennaCL(Vec v)
20: {
21: PetscFunctionBegin;
22: try {
23: if (v->spptr) {
24: delete ((Vec_ViennaCL *)v->spptr)->GPUarray_allocated;
25: delete (Vec_ViennaCL *)v->spptr;
26: }
27: } catch (std::exception const &ex) {
28: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
29: }
30: PetscCall(VecDestroy_MPI(v));
31: PetscFunctionReturn(PETSC_SUCCESS);
32: }
34: static PetscErrorCode VecNorm_MPIViennaCL(Vec xin, NormType type, PetscReal *z)
35: {
36: PetscReal sum, work = 0.0;
38: PetscFunctionBegin;
39: if (type == NORM_2 || type == NORM_FROBENIUS) {
40: PetscCall(VecNorm_SeqViennaCL(xin, NORM_2, &work));
41: work *= work;
42: PetscCallMPI(MPIU_Allreduce(&work, &sum, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)xin)));
43: *z = PetscSqrtReal(sum);
44: } else if (type == NORM_1) {
45: /* Find the local part */
46: PetscCall(VecNorm_SeqViennaCL(xin, NORM_1, &work));
47: /* Find the global max */
48: PetscCallMPI(MPIU_Allreduce(&work, z, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)xin)));
49: } else if (type == NORM_INFINITY) {
50: /* Find the local max */
51: PetscCall(VecNorm_SeqViennaCL(xin, NORM_INFINITY, &work));
52: /* Find the global max */
53: PetscCallMPI(MPIU_Allreduce(&work, z, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)xin)));
54: } else if (type == NORM_1_AND_2) {
55: PetscReal temp[2];
56: PetscCall(VecNorm_SeqViennaCL(xin, NORM_1, temp));
57: PetscCall(VecNorm_SeqViennaCL(xin, NORM_2, temp + 1));
58: temp[1] = temp[1] * temp[1];
59: PetscCallMPI(MPIU_Allreduce(temp, z, 2, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)xin)));
60: z[1] = PetscSqrtReal(z[1]);
61: }
62: PetscFunctionReturn(PETSC_SUCCESS);
63: }
65: static PetscErrorCode VecDot_MPIViennaCL(Vec xin, Vec yin, PetscScalar *z)
66: {
67: PetscScalar sum, work;
69: PetscFunctionBegin;
70: PetscCall(VecDot_SeqViennaCL(xin, yin, &work));
71: PetscCallMPI(MPIU_Allreduce(&work, &sum, 1, MPIU_SCALAR, MPIU_SUM, PetscObjectComm((PetscObject)xin)));
72: *z = sum;
73: PetscFunctionReturn(PETSC_SUCCESS);
74: }
76: static PetscErrorCode VecTDot_MPIViennaCL(Vec xin, Vec yin, PetscScalar *z)
77: {
78: PetscScalar sum, work;
80: PetscFunctionBegin;
81: PetscCall(VecTDot_SeqViennaCL(xin, yin, &work));
82: PetscCallMPI(MPIU_Allreduce(&work, &sum, 1, MPIU_SCALAR, MPIU_SUM, PetscObjectComm((PetscObject)xin)));
83: *z = sum;
84: PetscFunctionReturn(PETSC_SUCCESS);
85: }
87: static PetscErrorCode VecMDot_MPIViennaCL(Vec xin, PetscInt nv, const Vec y[], PetscScalar *z)
88: {
89: PetscScalar awork[128], *work = awork;
91: PetscFunctionBegin;
92: if (nv > 128) PetscCall(PetscMalloc1(nv, &work));
93: PetscCall(VecMDot_SeqViennaCL(xin, nv, y, work));
94: PetscCallMPI(MPIU_Allreduce(work, z, nv, MPIU_SCALAR, MPIU_SUM, PetscObjectComm((PetscObject)xin)));
95: if (nv > 128) PetscCall(PetscFree(work));
96: PetscFunctionReturn(PETSC_SUCCESS);
97: }
99: /*MC
100: VECMPIVIENNACL - VECMPIVIENNACL = "mpiviennacl" - The basic parallel vector, modified to use ViennaCL
102: Options Database Keys:
103: . -vec_type mpiviennacl - sets the vector type to VECMPIVIENNACL during a call to VecSetFromOptions()
105: Level: beginner
107: .seealso: `VecCreate()`, `VecSetType()`, `VecSetFromOptions()`, `VecCreateMPIWithArray()`, `VECMPI`, `VecType`, `VecCreateMPI()`, `VecCreateMPI()`
108: M*/
110: static PetscErrorCode VecDuplicate_MPIViennaCL(Vec win, Vec *v)
111: {
112: Vec_MPI *vw, *w = (Vec_MPI *)win->data;
113: PetscScalar *array;
115: PetscFunctionBegin;
116: PetscCall(VecCreate(PetscObjectComm((PetscObject)win), v));
117: PetscCall(PetscLayoutReference(win->map, &(*v)->map));
119: PetscCall(VecCreate_MPI_Private(*v, PETSC_FALSE, w->nghost, 0));
120: vw = (Vec_MPI *)(*v)->data;
121: (*v)->ops[0] = win->ops[0];
123: /* save local representation of the parallel vector (and scatter) if it exists */
124: if (w->localrep) {
125: PetscCall(VecGetArray(*v, &array));
126: PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, win->map->n + w->nghost, array, &vw->localrep));
127: vw->localrep->ops[0] = w->localrep->ops[0];
128: PetscCall(VecRestoreArray(*v, &array));
129: vw->localupdate = w->localupdate;
130: if (vw->localupdate) PetscCall(PetscObjectReference((PetscObject)vw->localupdate));
131: }
133: /* New vector should inherit stashing property of parent */
134: (*v)->stash.donotstash = win->stash.donotstash;
135: (*v)->stash.ignorenegidx = win->stash.ignorenegidx;
137: /* change type_name appropriately */
138: PetscCall(PetscObjectChangeTypeName((PetscObject)*v, VECMPIVIENNACL));
140: PetscCall(PetscObjectListDuplicate(((PetscObject)win)->olist, &((PetscObject)*v)->olist));
141: PetscCall(PetscFunctionListDuplicate(((PetscObject)win)->qlist, &((PetscObject)*v)->qlist));
142: (*v)->map->bs = PetscAbs(win->map->bs);
143: (*v)->bstash.bs = win->bstash.bs;
144: PetscFunctionReturn(PETSC_SUCCESS);
145: }
147: static PetscErrorCode VecDotNorm2_MPIViennaCL(Vec s, Vec t, PetscScalar *dp, PetscScalar *nm)
148: {
149: PetscScalar work[2], sum[2];
151: PetscFunctionBegin;
152: PetscCall(VecDotNorm2_SeqViennaCL(s, t, work, work + 1));
153: PetscCallMPI(MPIU_Allreduce((void *)&work, (void *)&sum, 2, MPIU_SCALAR, MPIU_SUM, PetscObjectComm((PetscObject)s)));
154: *dp = sum[0];
155: *nm = sum[1];
156: PetscFunctionReturn(PETSC_SUCCESS);
157: }
159: static PetscErrorCode VecBindToCPU_MPIViennaCL(Vec vv, PetscBool bind)
160: {
161: PetscFunctionBegin;
162: vv->boundtocpu = bind;
164: if (bind) {
165: PetscCall(VecViennaCLCopyFromGPU(vv));
166: vv->offloadmask = PETSC_OFFLOAD_CPU; /* since the CPU code will likely change values in the vector */
167: vv->ops->dotnorm2 = NULL;
168: vv->ops->waxpy = VecWAXPY_Seq;
169: vv->ops->dot = VecDot_MPI;
170: vv->ops->mdot = VecMDot_MPI;
171: vv->ops->tdot = VecTDot_MPI;
172: vv->ops->norm = VecNorm_MPI;
173: vv->ops->scale = VecScale_Seq;
174: vv->ops->copy = VecCopy_Seq;
175: vv->ops->set = VecSet_Seq;
176: vv->ops->swap = VecSwap_Seq;
177: vv->ops->axpy = VecAXPY_Seq;
178: vv->ops->axpby = VecAXPBY_Seq;
179: vv->ops->maxpy = VecMAXPY_Seq;
180: vv->ops->aypx = VecAYPX_Seq;
181: vv->ops->axpbypcz = VecAXPBYPCZ_Seq;
182: vv->ops->pointwisemult = VecPointwiseMult_Seq;
183: vv->ops->setrandom = VecSetRandom_Seq;
184: vv->ops->placearray = VecPlaceArray_Seq;
185: vv->ops->replacearray = VecReplaceArray_Seq;
186: vv->ops->resetarray = VecResetArray_Seq;
187: vv->ops->dot_local = VecDot_Seq;
188: vv->ops->tdot_local = VecTDot_Seq;
189: vv->ops->norm_local = VecNorm_Seq;
190: vv->ops->mdot_local = VecMDot_Seq;
191: vv->ops->pointwisedivide = VecPointwiseDivide_Seq;
192: vv->ops->getlocalvector = NULL;
193: vv->ops->restorelocalvector = NULL;
194: vv->ops->getlocalvectorread = NULL;
195: vv->ops->restorelocalvectorread = NULL;
196: vv->ops->getarraywrite = NULL;
197: } else {
198: vv->ops->dotnorm2 = VecDotNorm2_MPIViennaCL;
199: vv->ops->waxpy = VecWAXPY_SeqViennaCL;
200: vv->ops->duplicate = VecDuplicate_MPIViennaCL;
201: vv->ops->dot = VecDot_MPIViennaCL;
202: vv->ops->mdot = VecMDot_MPIViennaCL;
203: vv->ops->tdot = VecTDot_MPIViennaCL;
204: vv->ops->norm = VecNorm_MPIViennaCL;
205: vv->ops->scale = VecScale_SeqViennaCL;
206: vv->ops->copy = VecCopy_SeqViennaCL;
207: vv->ops->set = VecSet_SeqViennaCL;
208: vv->ops->swap = VecSwap_SeqViennaCL;
209: vv->ops->axpy = VecAXPY_SeqViennaCL;
210: vv->ops->axpby = VecAXPBY_SeqViennaCL;
211: vv->ops->maxpy = VecMAXPY_SeqViennaCL;
212: vv->ops->aypx = VecAYPX_SeqViennaCL;
213: vv->ops->axpbypcz = VecAXPBYPCZ_SeqViennaCL;
214: vv->ops->pointwisemult = VecPointwiseMult_SeqViennaCL;
215: vv->ops->setrandom = VecSetRandom_SeqViennaCL;
216: vv->ops->dot_local = VecDot_SeqViennaCL;
217: vv->ops->tdot_local = VecTDot_SeqViennaCL;
218: vv->ops->norm_local = VecNorm_SeqViennaCL;
219: vv->ops->mdot_local = VecMDot_SeqViennaCL;
220: vv->ops->destroy = VecDestroy_MPIViennaCL;
221: vv->ops->pointwisedivide = VecPointwiseDivide_SeqViennaCL;
222: vv->ops->placearray = VecPlaceArray_SeqViennaCL;
223: vv->ops->replacearray = VecReplaceArray_SeqViennaCL;
224: vv->ops->resetarray = VecResetArray_SeqViennaCL;
225: vv->ops->getarraywrite = VecGetArrayWrite_SeqViennaCL;
226: vv->ops->getarray = VecGetArray_SeqViennaCL;
227: vv->ops->restorearray = VecRestoreArray_SeqViennaCL;
228: }
229: vv->ops->duplicatevecs = VecDuplicateVecs_Default;
230: PetscFunctionReturn(PETSC_SUCCESS);
231: }
233: PETSC_EXTERN PetscErrorCode VecCreate_MPIViennaCL(Vec vv)
234: {
235: PetscFunctionBegin;
236: PetscCall(PetscLayoutSetUp(vv->map));
237: PetscCall(VecViennaCLAllocateCheck(vv));
238: PetscCall(VecCreate_MPIViennaCL_Private(vv, PETSC_FALSE, 0, ((Vec_ViennaCL *)vv->spptr)->GPUarray));
239: PetscCall(VecViennaCLAllocateCheckHost(vv));
240: PetscCall(VecSet(vv, 0.0));
241: PetscCall(VecSet_Seq(vv, 0.0));
242: vv->offloadmask = PETSC_OFFLOAD_BOTH;
243: PetscFunctionReturn(PETSC_SUCCESS);
244: }
246: PETSC_EXTERN PetscErrorCode VecCreate_ViennaCL(Vec v)
247: {
248: PetscMPIInt size;
250: PetscFunctionBegin;
251: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)v), &size));
252: if (size == 1) {
253: PetscCall(VecSetType(v, VECSEQVIENNACL));
254: } else {
255: PetscCall(VecSetType(v, VECMPIVIENNACL));
256: }
257: PetscFunctionReturn(PETSC_SUCCESS);
258: }
260: /*@C
261: VecCreateMPIViennaCLWithArray - Creates a parallel, array-style vector,
262: where the user provides the viennacl vector to store the vector values.
264: Collective
266: Input Parameters:
267: + comm - the MPI communicator to use
268: . bs - block size, same meaning as VecSetBlockSize()
269: . n - local vector length, cannot be PETSC_DECIDE
270: . N - global vector length (or PETSC_DECIDE to have calculated)
271: - array - the user provided GPU array to store the vector values
273: Output Parameter:
274: . vv - the vector
276: Notes:
277: Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
278: same type as an existing vector.
280: If the user-provided array is NULL, then VecViennaCLPlaceArray() can be used
281: at a later stage to SET the array for storing the vector values.
283: PETSc does NOT free the array when the vector is destroyed via VecDestroy().
284: The user should not free the array until the vector is destroyed.
286: Level: intermediate
288: .seealso: `VecCreateSeqViennaCLWithArray()`, `VecCreateMPIWithArray()`, `VecCreateSeqWithArray()`,
289: `VecCreate()`, `VecCreateMPI()`, `VecCreateGhostWithArray()`, `VecViennaCLPlaceArray()`
291: @*/
292: PetscErrorCode VecCreateMPIViennaCLWithArray(MPI_Comm comm, PetscInt bs, PetscInt n, PetscInt N, const ViennaCLVector *array, Vec *vv)
293: {
294: PetscFunctionBegin;
295: PetscCheck(n != PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Must set local size of vector");
296: PetscCall(PetscSplitOwnership(comm, &n, &N));
297: PetscCall(VecCreate(comm, vv));
298: PetscCall(VecSetSizes(*vv, n, N));
299: PetscCall(VecSetBlockSize(*vv, bs));
300: PetscCall(VecCreate_MPIViennaCL_Private(*vv, PETSC_FALSE, 0, array));
301: PetscFunctionReturn(PETSC_SUCCESS);
302: }
304: /*@C
305: VecCreateMPIViennaCLWithArrays - Creates a parallel, array-style vector,
306: where the user provides the ViennaCL vector to store the vector values.
308: Collective
310: Input Parameters:
311: + comm - the MPI communicator to use
312: . bs - block size, same meaning as VecSetBlockSize()
313: . n - local vector length, cannot be PETSC_DECIDE
314: . N - global vector length (or PETSC_DECIDE to have calculated)
315: . cpuarray - the user provided CPU array to store the vector values
316: - viennaclvec - ViennaCL vector where the Vec entries are to be stored on the device.
318: Output Parameter:
319: . vv - the vector
321: Notes:
322: If both cpuarray and viennaclvec are provided, the caller must ensure that
323: the provided arrays have identical values.
325: Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
326: same type as an existing vector.
328: PETSc does NOT free the provided arrays when the vector is destroyed via
329: VecDestroy(). The user should not free the array until the vector is
330: destroyed.
332: Level: intermediate
334: .seealso: `VecCreateSeqViennaCLWithArrays()`, `VecCreateMPIWithArray()`
335: `VecCreate()`, `VecDuplicate()`, `VecDuplicateVecs()`, `VecCreateGhost()`,
336: `VecCreateMPI()`, `VecCreateGhostWithArray()`, `VecViennaCLPlaceArray()`,
337: `VecPlaceArray()`, `VecCreateMPICUDAWithArrays()`,
338: `VecViennaCLAllocateCheckHost()`
339: @*/
340: PetscErrorCode VecCreateMPIViennaCLWithArrays(MPI_Comm comm, PetscInt bs, PetscInt n, PetscInt N, const PetscScalar cpuarray[], const ViennaCLVector *viennaclvec, Vec *vv)
341: {
342: PetscFunctionBegin;
343: PetscCall(VecCreateMPIViennaCLWithArray(comm, bs, n, N, viennaclvec, vv));
344: if (cpuarray && viennaclvec) {
345: Vec_MPI *s = (Vec_MPI *)((*vv)->data);
346: s->array = (PetscScalar *)cpuarray;
347: (*vv)->offloadmask = PETSC_OFFLOAD_BOTH;
348: } else if (cpuarray) {
349: Vec_MPI *s = (Vec_MPI *)((*vv)->data);
350: s->array = (PetscScalar *)cpuarray;
351: (*vv)->offloadmask = PETSC_OFFLOAD_CPU;
352: } else if (viennaclvec) {
353: (*vv)->offloadmask = PETSC_OFFLOAD_GPU;
354: } else {
355: (*vv)->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
356: }
357: PetscFunctionReturn(PETSC_SUCCESS);
358: }
360: PetscErrorCode VecCreate_MPIViennaCL_Private(Vec vv, PetscBool alloc, PetscInt nghost, const ViennaCLVector *array)
361: {
362: Vec_ViennaCL *vecviennacl;
364: PetscFunctionBegin;
365: PetscCall(VecCreate_MPI_Private(vv, PETSC_FALSE, 0, 0));
366: PetscCall(PetscObjectChangeTypeName((PetscObject)vv, VECMPIVIENNACL));
368: PetscCall(VecBindToCPU_MPIViennaCL(vv, PETSC_FALSE));
369: vv->ops->bindtocpu = VecBindToCPU_MPIViennaCL;
371: if (alloc && !array) {
372: PetscCall(VecViennaCLAllocateCheck(vv));
373: PetscCall(VecViennaCLAllocateCheckHost(vv));
374: PetscCall(VecSet(vv, 0.0));
375: PetscCall(VecSet_Seq(vv, 0.0));
376: vv->offloadmask = PETSC_OFFLOAD_BOTH;
377: }
378: if (array) {
379: if (!vv->spptr) vv->spptr = new Vec_ViennaCL;
380: vecviennacl = (Vec_ViennaCL *)vv->spptr;
381: vecviennacl->GPUarray_allocated = 0;
382: vecviennacl->GPUarray = (ViennaCLVector *)array;
383: vv->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
384: }
385: PetscFunctionReturn(PETSC_SUCCESS);
386: }