Actual source code: vecviennacl.cxx
1: /*
2: Implements the sequential ViennaCL vectors.
3: */
5: #include <petscconf.h>
6: #include <petsc/private/vecimpl.h>
7: #include <../src/vec/vec/impls/dvecimpl.h>
8: #include <../src/vec/vec/impls/seq/seqviennacl/viennaclvecimpl.h>
10: #include "viennacl/linalg/inner_prod.hpp"
11: #include "viennacl/linalg/norm_1.hpp"
12: #include "viennacl/linalg/norm_2.hpp"
13: #include "viennacl/linalg/norm_inf.hpp"
15: #ifdef VIENNACL_WITH_OPENCL
16: #include "viennacl/ocl/backend.hpp"
17: #endif
19: PETSC_EXTERN PetscErrorCode VecViennaCLGetArray(Vec v, ViennaCLVector **a)
20: {
21: PetscFunctionBegin;
22: PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
23: *a = 0;
24: PetscCall(VecViennaCLCopyToGPU(v));
25: *a = ((Vec_ViennaCL *)v->spptr)->GPUarray;
26: ViennaCLWaitForGPU();
27: PetscFunctionReturn(PETSC_SUCCESS);
28: }
30: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArray(Vec v, ViennaCLVector **a)
31: {
32: PetscFunctionBegin;
33: PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
34: v->offloadmask = PETSC_OFFLOAD_GPU;
36: PetscCall(PetscObjectStateIncrease((PetscObject)v));
37: PetscFunctionReturn(PETSC_SUCCESS);
38: }
40: PETSC_EXTERN PetscErrorCode VecViennaCLGetArrayRead(Vec v, const ViennaCLVector **a)
41: {
42: PetscFunctionBegin;
43: PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
44: *a = 0;
45: PetscCall(VecViennaCLCopyToGPU(v));
46: *a = ((Vec_ViennaCL *)v->spptr)->GPUarray;
47: ViennaCLWaitForGPU();
48: PetscFunctionReturn(PETSC_SUCCESS);
49: }
51: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArrayRead(Vec v, const ViennaCLVector **a)
52: {
53: PetscFunctionBegin;
54: PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
55: PetscFunctionReturn(PETSC_SUCCESS);
56: }
58: PETSC_EXTERN PetscErrorCode VecViennaCLGetArrayWrite(Vec v, ViennaCLVector **a)
59: {
60: PetscFunctionBegin;
61: PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
62: *a = 0;
63: PetscCall(VecViennaCLAllocateCheck(v));
64: *a = ((Vec_ViennaCL *)v->spptr)->GPUarray;
65: ViennaCLWaitForGPU();
66: PetscFunctionReturn(PETSC_SUCCESS);
67: }
69: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArrayWrite(Vec v, ViennaCLVector **a)
70: {
71: PetscFunctionBegin;
72: PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
73: v->offloadmask = PETSC_OFFLOAD_GPU;
75: PetscCall(PetscObjectStateIncrease((PetscObject)v));
76: PetscFunctionReturn(PETSC_SUCCESS);
77: }
79: PETSC_EXTERN PetscErrorCode PetscViennaCLInit()
80: {
81: char string[20];
82: PetscBool flg, flg_cuda, flg_opencl, flg_openmp;
84: PetscFunctionBegin;
85: /* ViennaCL backend selection: CUDA, OpenCL, or OpenMP */
86: PetscCall(PetscOptionsGetString(NULL, NULL, "-viennacl_backend", string, sizeof(string), &flg));
87: if (flg) {
88: try {
89: PetscCall(PetscStrcasecmp(string, "cuda", &flg_cuda));
90: PetscCall(PetscStrcasecmp(string, "opencl", &flg_opencl));
91: PetscCall(PetscStrcasecmp(string, "openmp", &flg_openmp));
93: /* A default (sequential) CPU backend is always available - even if OpenMP is not enabled. */
94: if (flg_openmp) viennacl::backend::default_memory_type(viennacl::MAIN_MEMORY);
95: #if defined(PETSC_HAVE_CUDA)
96: else if (flg_cuda) viennacl::backend::default_memory_type(viennacl::CUDA_MEMORY);
97: #endif
98: #if defined(PETSC_HAVE_OPENCL)
99: else if (flg_opencl) viennacl::backend::default_memory_type(viennacl::OPENCL_MEMORY);
100: #endif
101: else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: Backend not recognized or available: %s.\n Pass -viennacl_view to see available backends for ViennaCL.", string);
102: } catch (std::exception const &ex) {
103: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
104: }
105: }
107: #if defined(PETSC_HAVE_OPENCL)
108: /* ViennaCL OpenCL device type configuration */
109: PetscCall(PetscOptionsGetString(NULL, NULL, "-viennacl_opencl_device_type", string, sizeof(string), &flg));
110: if (flg) {
111: try {
112: PetscCall(PetscStrcasecmp(string, "cpu", &flg));
113: if (flg) viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_CPU);
115: PetscCall(PetscStrcasecmp(string, "gpu", &flg));
116: if (flg) viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_GPU);
118: PetscCall(PetscStrcasecmp(string, "accelerator", &flg));
119: if (flg) viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_ACCELERATOR);
120: } catch (std::exception const &ex) {
121: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
122: }
123: }
124: #endif
126: /* Print available backends */
127: PetscCall(PetscOptionsHasName(NULL, NULL, "-viennacl_view", &flg));
128: if (flg) {
129: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "ViennaCL backends available: "));
130: #if defined(PETSC_HAVE_CUDA)
131: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "CUDA, "));
132: #endif
133: #if defined(PETSC_HAVE_OPENCL)
134: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "OpenCL, "));
135: #endif
136: #if defined(PETSC_HAVE_OPENMP)
137: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "OpenMP "));
138: #else
139: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "OpenMP (1 thread) "));
140: #endif
141: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "\n"));
143: /* Print selected backends */
144: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "ViennaCL backend selected: "));
145: #if defined(PETSC_HAVE_CUDA)
146: if (viennacl::backend::default_memory_type() == viennacl::CUDA_MEMORY) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "CUDA "));
147: #endif
148: #if defined(PETSC_HAVE_OPENCL)
149: if (viennacl::backend::default_memory_type() == viennacl::OPENCL_MEMORY) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "OpenCL "));
150: #endif
151: #if defined(PETSC_HAVE_OPENMP)
152: if (viennacl::backend::default_memory_type() == viennacl::MAIN_MEMORY) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "OpenMP "));
153: #else
154: if (viennacl::backend::default_memory_type() == viennacl::MAIN_MEMORY) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "OpenMP (sequential - consider reconfiguration: --with-openmp=1) "));
155: #endif
156: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "\n"));
157: }
158: PetscFunctionReturn(PETSC_SUCCESS);
159: }
161: /*
162: Allocates space for the vector array on the Host if it does not exist.
163: Does NOT change the PetscViennaCLFlag for the vector
164: Does NOT zero the ViennaCL array
165: */
166: PETSC_EXTERN PetscErrorCode VecViennaCLAllocateCheckHost(Vec v)
167: {
168: PetscScalar *array;
169: Vec_Seq *s;
170: PetscInt n = v->map->n;
172: PetscFunctionBegin;
173: s = (Vec_Seq *)v->data;
174: PetscCall(VecViennaCLAllocateCheck(v));
175: if (s->array == 0) {
176: PetscCall(PetscMalloc1(n, &array));
177: s->array = array;
178: s->array_allocated = array;
179: }
180: PetscFunctionReturn(PETSC_SUCCESS);
181: }
183: /*
184: Allocates space for the vector array on the GPU if it does not exist.
185: Does NOT change the PetscViennaCLFlag for the vector
186: Does NOT zero the ViennaCL array
188: */
189: PetscErrorCode VecViennaCLAllocateCheck(Vec v)
190: {
191: PetscFunctionBegin;
192: if (!v->spptr) {
193: try {
194: v->spptr = new Vec_ViennaCL;
195: ((Vec_ViennaCL *)v->spptr)->GPUarray_allocated = new ViennaCLVector((PetscBLASInt)v->map->n);
196: ((Vec_ViennaCL *)v->spptr)->GPUarray = ((Vec_ViennaCL *)v->spptr)->GPUarray_allocated;
198: } catch (std::exception const &ex) {
199: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
200: }
201: }
202: PetscFunctionReturn(PETSC_SUCCESS);
203: }
205: /* Copies a vector from the CPU to the GPU unless we already have an up-to-date copy on the GPU */
206: PetscErrorCode VecViennaCLCopyToGPU(Vec v)
207: {
208: PetscFunctionBegin;
209: PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
210: PetscCall(VecViennaCLAllocateCheck(v));
211: if (v->map->n > 0) {
212: if (v->offloadmask == PETSC_OFFLOAD_CPU) {
213: PetscCall(PetscLogEventBegin(VEC_ViennaCLCopyToGPU, v, 0, 0, 0));
214: try {
215: ViennaCLVector *vec = ((Vec_ViennaCL *)v->spptr)->GPUarray;
216: viennacl::fast_copy(*(PetscScalar **)v->data, *(PetscScalar **)v->data + v->map->n, vec->begin());
217: ViennaCLWaitForGPU();
218: } catch (std::exception const &ex) {
219: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
220: }
221: PetscCall(PetscLogCpuToGpu(v->map->n * sizeof(PetscScalar)));
222: PetscCall(PetscLogEventEnd(VEC_ViennaCLCopyToGPU, v, 0, 0, 0));
223: v->offloadmask = PETSC_OFFLOAD_BOTH;
224: }
225: }
226: PetscFunctionReturn(PETSC_SUCCESS);
227: }
229: /*
230: VecViennaCLCopyFromGPU - Copies a vector from the GPU to the CPU unless we already have an up-to-date copy on the CPU
231: */
232: PetscErrorCode VecViennaCLCopyFromGPU(Vec v)
233: {
234: PetscFunctionBegin;
235: PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
236: PetscCall(VecViennaCLAllocateCheckHost(v));
237: if (v->offloadmask == PETSC_OFFLOAD_GPU) {
238: PetscCall(PetscLogEventBegin(VEC_ViennaCLCopyFromGPU, v, 0, 0, 0));
239: try {
240: ViennaCLVector *vec = ((Vec_ViennaCL *)v->spptr)->GPUarray;
241: viennacl::fast_copy(vec->begin(), vec->end(), *(PetscScalar **)v->data);
242: ViennaCLWaitForGPU();
243: } catch (std::exception const &ex) {
244: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
245: }
246: PetscCall(PetscLogGpuToCpu(v->map->n * sizeof(PetscScalar)));
247: PetscCall(PetscLogEventEnd(VEC_ViennaCLCopyFromGPU, v, 0, 0, 0));
248: v->offloadmask = PETSC_OFFLOAD_BOTH;
249: }
250: PetscFunctionReturn(PETSC_SUCCESS);
251: }
253: /* Copy on CPU */
254: static PetscErrorCode VecCopy_SeqViennaCL_Private(Vec xin, Vec yin)
255: {
256: PetscScalar *ya;
257: const PetscScalar *xa;
259: PetscFunctionBegin;
260: PetscCall(VecViennaCLAllocateCheckHost(xin));
261: PetscCall(VecViennaCLAllocateCheckHost(yin));
262: if (xin != yin) {
263: PetscCall(VecGetArrayRead(xin, &xa));
264: PetscCall(VecGetArray(yin, &ya));
265: PetscCall(PetscArraycpy(ya, xa, xin->map->n));
266: PetscCall(VecRestoreArrayRead(xin, &xa));
267: PetscCall(VecRestoreArray(yin, &ya));
268: }
269: PetscFunctionReturn(PETSC_SUCCESS);
270: }
272: static PetscErrorCode VecSetRandom_SeqViennaCL_Private(Vec xin, PetscRandom r)
273: {
274: PetscInt n = xin->map->n, i;
275: PetscScalar *xx;
277: PetscFunctionBegin;
278: PetscCall(VecGetArray(xin, &xx));
279: for (i = 0; i < n; i++) PetscCall(PetscRandomGetValue(r, &xx[i]));
280: PetscCall(VecRestoreArray(xin, &xx));
281: PetscFunctionReturn(PETSC_SUCCESS);
282: }
284: static PetscErrorCode VecDestroy_SeqViennaCL_Private(Vec v)
285: {
286: Vec_Seq *vs = (Vec_Seq *)v->data;
288: PetscFunctionBegin;
289: PetscCall(PetscObjectSAWsViewOff(v));
290: PetscCall(PetscLogObjectState((PetscObject)v, "Length=%" PetscInt_FMT, v->map->n));
291: if (vs->array_allocated) PetscCall(PetscFree(vs->array_allocated));
292: PetscCall(PetscFree(vs));
293: PetscFunctionReturn(PETSC_SUCCESS);
294: }
296: static PetscErrorCode VecResetArray_SeqViennaCL_Private(Vec vin)
297: {
298: Vec_Seq *v = (Vec_Seq *)vin->data;
300: PetscFunctionBegin;
301: v->array = v->unplacedarray;
302: v->unplacedarray = 0;
303: PetscFunctionReturn(PETSC_SUCCESS);
304: }
306: /*MC
307: VECSEQVIENNACL - VECSEQVIENNACL = "seqviennacl" - The basic sequential vector, modified to use ViennaCL
309: Options Database Keys:
310: . -vec_type seqviennacl - sets the vector type to VECSEQVIENNACL during a call to VecSetFromOptions()
312: Level: beginner
314: .seealso: `VecCreate()`, `VecSetType()`, `VecSetFromOptions()`, `VecCreateSeqWithArray()`, `VECMPI`, `VecType`, `VecCreateMPI()`, `VecCreateSeq()`
315: M*/
317: PetscErrorCode VecAYPX_SeqViennaCL(Vec yin, PetscScalar alpha, Vec xin)
318: {
319: const ViennaCLVector *xgpu;
320: ViennaCLVector *ygpu;
322: PetscFunctionBegin;
323: PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
324: PetscCall(VecViennaCLGetArray(yin, &ygpu));
325: PetscCall(PetscLogGpuTimeBegin());
326: try {
327: if (alpha != 0.0 && xin->map->n > 0) {
328: *ygpu = *xgpu + alpha * *ygpu;
329: PetscCall(PetscLogGpuFlops(2.0 * yin->map->n));
330: } else {
331: *ygpu = *xgpu;
332: }
333: ViennaCLWaitForGPU();
334: } catch (std::exception const &ex) {
335: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
336: }
337: PetscCall(PetscLogGpuTimeEnd());
338: PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
339: PetscCall(VecViennaCLRestoreArray(yin, &ygpu));
340: PetscFunctionReturn(PETSC_SUCCESS);
341: }
343: PetscErrorCode VecAXPY_SeqViennaCL(Vec yin, PetscScalar alpha, Vec xin)
344: {
345: const ViennaCLVector *xgpu;
346: ViennaCLVector *ygpu;
348: PetscFunctionBegin;
349: if (alpha != 0.0 && xin->map->n > 0) {
350: PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
351: PetscCall(VecViennaCLGetArray(yin, &ygpu));
352: PetscCall(PetscLogGpuTimeBegin());
353: try {
354: *ygpu += alpha * *xgpu;
355: ViennaCLWaitForGPU();
356: } catch (std::exception const &ex) {
357: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
358: }
359: PetscCall(PetscLogGpuTimeEnd());
360: PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
361: PetscCall(VecViennaCLRestoreArray(yin, &ygpu));
362: PetscCall(PetscLogGpuFlops(2.0 * yin->map->n));
363: }
364: PetscFunctionReturn(PETSC_SUCCESS);
365: }
367: PetscErrorCode VecPointwiseDivide_SeqViennaCL(Vec win, Vec xin, Vec yin)
368: {
369: const ViennaCLVector *xgpu, *ygpu;
370: ViennaCLVector *wgpu;
372: PetscFunctionBegin;
373: if (xin->map->n > 0) {
374: PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
375: PetscCall(VecViennaCLGetArrayRead(yin, &ygpu));
376: PetscCall(VecViennaCLGetArrayWrite(win, &wgpu));
377: PetscCall(PetscLogGpuTimeBegin());
378: try {
379: *wgpu = viennacl::linalg::element_div(*xgpu, *ygpu);
380: ViennaCLWaitForGPU();
381: } catch (std::exception const &ex) {
382: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
383: }
384: PetscCall(PetscLogGpuTimeEnd());
385: PetscCall(PetscLogGpuFlops(win->map->n));
386: PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
387: PetscCall(VecViennaCLRestoreArrayRead(yin, &ygpu));
388: PetscCall(VecViennaCLRestoreArrayWrite(win, &wgpu));
389: }
390: PetscFunctionReturn(PETSC_SUCCESS);
391: }
393: PetscErrorCode VecWAXPY_SeqViennaCL(Vec win, PetscScalar alpha, Vec xin, Vec yin)
394: {
395: const ViennaCLVector *xgpu, *ygpu;
396: ViennaCLVector *wgpu;
398: PetscFunctionBegin;
399: if (alpha == 0.0 && xin->map->n > 0) {
400: PetscCall(VecCopy_SeqViennaCL(yin, win));
401: } else {
402: PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
403: PetscCall(VecViennaCLGetArrayRead(yin, &ygpu));
404: PetscCall(VecViennaCLGetArrayWrite(win, &wgpu));
405: PetscCall(PetscLogGpuTimeBegin());
406: if (alpha == 1.0) {
407: try {
408: *wgpu = *ygpu + *xgpu;
409: } catch (std::exception const &ex) {
410: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
411: }
412: PetscCall(PetscLogGpuFlops(win->map->n));
413: } else if (alpha == -1.0) {
414: try {
415: *wgpu = *ygpu - *xgpu;
416: } catch (std::exception const &ex) {
417: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
418: }
419: PetscCall(PetscLogGpuFlops(win->map->n));
420: } else {
421: try {
422: *wgpu = *ygpu + alpha * *xgpu;
423: } catch (std::exception const &ex) {
424: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
425: }
426: PetscCall(PetscLogGpuFlops(2 * win->map->n));
427: }
428: ViennaCLWaitForGPU();
429: PetscCall(PetscLogGpuTimeEnd());
430: PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
431: PetscCall(VecViennaCLRestoreArrayRead(yin, &ygpu));
432: PetscCall(VecViennaCLRestoreArrayWrite(win, &wgpu));
433: }
434: PetscFunctionReturn(PETSC_SUCCESS);
435: }
437: /*
438: * Operation x = x + sum_i alpha_i * y_i for vectors x, y_i and scalars alpha_i
439: *
440: * ViennaCL supports a fast evaluation of x += alpha * y and x += alpha * y + beta * z,
441: * hence there is an iterated application of these until the final result is obtained
442: */
443: PetscErrorCode VecMAXPY_SeqViennaCL(Vec xin, PetscInt nv, const PetscScalar *alpha, Vec *y)
444: {
445: PetscInt j;
447: PetscFunctionBegin;
448: for (j = 0; j < nv; ++j) {
449: if (j + 1 < nv) {
450: PetscCall(VecAXPBYPCZ_SeqViennaCL(xin, alpha[j], alpha[j + 1], 1.0, y[j], y[j + 1]));
451: ++j;
452: } else {
453: PetscCall(VecAXPY_SeqViennaCL(xin, alpha[j], y[j]));
454: }
455: }
456: ViennaCLWaitForGPU();
457: PetscFunctionReturn(PETSC_SUCCESS);
458: }
460: PetscErrorCode VecDot_SeqViennaCL(Vec xin, Vec yin, PetscScalar *z)
461: {
462: const ViennaCLVector *xgpu, *ygpu;
464: PetscFunctionBegin;
465: if (xin->map->n > 0) {
466: PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
467: PetscCall(VecViennaCLGetArrayRead(yin, &ygpu));
468: PetscCall(PetscLogGpuTimeBegin());
469: try {
470: *z = viennacl::linalg::inner_prod(*xgpu, *ygpu);
471: } catch (std::exception const &ex) {
472: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
473: }
474: ViennaCLWaitForGPU();
475: PetscCall(PetscLogGpuTimeEnd());
476: if (xin->map->n > 0) PetscCall(PetscLogGpuFlops(2.0 * xin->map->n - 1));
477: PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
478: PetscCall(VecViennaCLRestoreArrayRead(yin, &ygpu));
479: } else *z = 0.0;
480: PetscFunctionReturn(PETSC_SUCCESS);
481: }
483: /*
484: * Operation z[j] = dot(x, y[j])
485: *
486: * We use an iterated application of dot() for each j. For small ranges of j this is still faster than an allocation of extra memory in order to use gemv().
487: */
488: PetscErrorCode VecMDot_SeqViennaCL(Vec xin, PetscInt nv, const Vec yin[], PetscScalar *z)
489: {
490: PetscInt n = xin->map->n, i;
491: const ViennaCLVector *xgpu, *ygpu;
492: Vec *yyin = (Vec *)yin;
493: std::vector<viennacl::vector_base<PetscScalar> const *> ygpu_array(nv);
495: PetscFunctionBegin;
496: if (xin->map->n > 0) {
497: PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
498: for (i = 0; i < nv; i++) {
499: PetscCall(VecViennaCLGetArrayRead(yyin[i], &ygpu));
500: ygpu_array[i] = ygpu;
501: }
502: PetscCall(PetscLogGpuTimeBegin());
503: viennacl::vector_tuple<PetscScalar> y_tuple(ygpu_array);
504: ViennaCLVector result = viennacl::linalg::inner_prod(*xgpu, y_tuple);
505: viennacl::copy(result.begin(), result.end(), z);
506: for (i = 0; i < nv; i++) PetscCall(VecViennaCLRestoreArrayRead(yyin[i], &ygpu));
507: ViennaCLWaitForGPU();
508: PetscCall(PetscLogGpuTimeEnd());
509: PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
510: PetscCall(PetscLogGpuFlops(PetscMax(nv * (2.0 * n - 1), 0.0)));
511: } else {
512: for (i = 0; i < nv; i++) z[i] = 0.0;
513: }
514: PetscFunctionReturn(PETSC_SUCCESS);
515: }
517: static PetscErrorCode VecMTDot_SeqViennaCL(Vec xin, PetscInt nv, const Vec yin[], PetscScalar *z)
518: {
519: PetscFunctionBegin;
520: /* Since complex case is not supported at the moment, this is the same as VecMDot_SeqViennaCL */
521: PetscCall(VecMDot_SeqViennaCL(xin, nv, yin, z));
522: ViennaCLWaitForGPU();
523: PetscFunctionReturn(PETSC_SUCCESS);
524: }
526: PetscErrorCode VecSet_SeqViennaCL(Vec xin, PetscScalar alpha)
527: {
528: ViennaCLVector *xgpu;
530: PetscFunctionBegin;
531: if (xin->map->n > 0) {
532: PetscCall(VecViennaCLGetArrayWrite(xin, &xgpu));
533: PetscCall(PetscLogGpuTimeBegin());
534: try {
535: *xgpu = viennacl::scalar_vector<PetscScalar>(xgpu->size(), alpha);
536: ViennaCLWaitForGPU();
537: } catch (std::exception const &ex) {
538: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
539: }
540: PetscCall(PetscLogGpuTimeEnd());
541: PetscCall(VecViennaCLRestoreArrayWrite(xin, &xgpu));
542: }
543: PetscFunctionReturn(PETSC_SUCCESS);
544: }
546: PetscErrorCode VecScale_SeqViennaCL(Vec xin, PetscScalar alpha)
547: {
548: ViennaCLVector *xgpu;
550: PetscFunctionBegin;
551: if (alpha == 0.0 && xin->map->n > 0) {
552: PetscCall(VecSet_SeqViennaCL(xin, alpha));
553: PetscCall(PetscLogGpuFlops(xin->map->n));
554: } else if (alpha != 1.0 && xin->map->n > 0) {
555: PetscCall(VecViennaCLGetArray(xin, &xgpu));
556: PetscCall(PetscLogGpuTimeBegin());
557: try {
558: *xgpu *= alpha;
559: ViennaCLWaitForGPU();
560: } catch (std::exception const &ex) {
561: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
562: }
563: PetscCall(PetscLogGpuTimeEnd());
564: PetscCall(VecViennaCLRestoreArray(xin, &xgpu));
565: PetscCall(PetscLogGpuFlops(xin->map->n));
566: }
567: PetscFunctionReturn(PETSC_SUCCESS);
568: }
570: PetscErrorCode VecTDot_SeqViennaCL(Vec xin, Vec yin, PetscScalar *z)
571: {
572: PetscFunctionBegin;
573: /* Since complex case is not supported at the moment, this is the same as VecDot_SeqViennaCL */
574: PetscCall(VecDot_SeqViennaCL(xin, yin, z));
575: ViennaCLWaitForGPU();
576: PetscFunctionReturn(PETSC_SUCCESS);
577: }
579: PetscErrorCode VecCopy_SeqViennaCL(Vec xin, Vec yin)
580: {
581: const ViennaCLVector *xgpu;
582: ViennaCLVector *ygpu;
584: PetscFunctionBegin;
585: if (xin != yin && xin->map->n > 0) {
586: if (xin->offloadmask == PETSC_OFFLOAD_GPU) {
587: PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
588: PetscCall(VecViennaCLGetArrayWrite(yin, &ygpu));
589: PetscCall(PetscLogGpuTimeBegin());
590: try {
591: *ygpu = *xgpu;
592: ViennaCLWaitForGPU();
593: } catch (std::exception const &ex) {
594: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
595: }
596: PetscCall(PetscLogGpuTimeEnd());
597: PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
598: PetscCall(VecViennaCLRestoreArrayWrite(yin, &ygpu));
600: } else if (xin->offloadmask == PETSC_OFFLOAD_CPU) {
601: /* copy in CPU if we are on the CPU*/
602: PetscCall(VecCopy_SeqViennaCL_Private(xin, yin));
603: ViennaCLWaitForGPU();
604: } else if (xin->offloadmask == PETSC_OFFLOAD_BOTH) {
605: /* if xin is valid in both places, see where yin is and copy there (because it's probably where we'll want to next use it) */
606: if (yin->offloadmask == PETSC_OFFLOAD_CPU) {
607: /* copy in CPU */
608: PetscCall(VecCopy_SeqViennaCL_Private(xin, yin));
609: ViennaCLWaitForGPU();
610: } else if (yin->offloadmask == PETSC_OFFLOAD_GPU) {
611: /* copy in GPU */
612: PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
613: PetscCall(VecViennaCLGetArrayWrite(yin, &ygpu));
614: PetscCall(PetscLogGpuTimeBegin());
615: try {
616: *ygpu = *xgpu;
617: ViennaCLWaitForGPU();
618: } catch (std::exception const &ex) {
619: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
620: }
621: PetscCall(PetscLogGpuTimeEnd());
622: PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
623: PetscCall(VecViennaCLRestoreArrayWrite(yin, &ygpu));
624: } else if (yin->offloadmask == PETSC_OFFLOAD_BOTH) {
625: /* xin and yin are both valid in both places (or yin was unallocated before the earlier call to allocatecheck
626: default to copy in GPU (this is an arbitrary choice) */
627: PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
628: PetscCall(VecViennaCLGetArrayWrite(yin, &ygpu));
629: PetscCall(PetscLogGpuTimeBegin());
630: try {
631: *ygpu = *xgpu;
632: ViennaCLWaitForGPU();
633: } catch (std::exception const &ex) {
634: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
635: }
636: PetscCall(PetscLogGpuTimeEnd());
637: PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
638: PetscCall(VecViennaCLRestoreArrayWrite(yin, &ygpu));
639: } else {
640: PetscCall(VecCopy_SeqViennaCL_Private(xin, yin));
641: ViennaCLWaitForGPU();
642: }
643: }
644: }
645: PetscFunctionReturn(PETSC_SUCCESS);
646: }
648: PetscErrorCode VecSwap_SeqViennaCL(Vec xin, Vec yin)
649: {
650: ViennaCLVector *xgpu, *ygpu;
652: PetscFunctionBegin;
653: if (xin != yin && xin->map->n > 0) {
654: PetscCall(VecViennaCLGetArray(xin, &xgpu));
655: PetscCall(VecViennaCLGetArray(yin, &ygpu));
656: PetscCall(PetscLogGpuTimeBegin());
657: try {
658: viennacl::swap(*xgpu, *ygpu);
659: ViennaCLWaitForGPU();
660: } catch (std::exception const &ex) {
661: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
662: }
663: PetscCall(PetscLogGpuTimeEnd());
664: PetscCall(VecViennaCLRestoreArray(xin, &xgpu));
665: PetscCall(VecViennaCLRestoreArray(yin, &ygpu));
666: }
667: PetscFunctionReturn(PETSC_SUCCESS);
668: }
670: // y = alpha * x + beta * y
671: PetscErrorCode VecAXPBY_SeqViennaCL(Vec yin, PetscScalar alpha, PetscScalar beta, Vec xin)
672: {
673: PetscScalar a = alpha, b = beta;
674: const ViennaCLVector *xgpu;
675: ViennaCLVector *ygpu;
677: PetscFunctionBegin;
678: if (a == 0.0 && xin->map->n > 0) {
679: PetscCall(VecScale_SeqViennaCL(yin, beta));
680: } else if (b == 1.0 && xin->map->n > 0) {
681: PetscCall(VecAXPY_SeqViennaCL(yin, alpha, xin));
682: } else if (a == 1.0 && xin->map->n > 0) {
683: PetscCall(VecAYPX_SeqViennaCL(yin, beta, xin));
684: } else if (b == 0.0 && xin->map->n > 0) {
685: PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
686: PetscCall(VecViennaCLGetArray(yin, &ygpu));
687: PetscCall(PetscLogGpuTimeBegin());
688: try {
689: *ygpu = *xgpu * alpha;
690: ViennaCLWaitForGPU();
691: } catch (std::exception const &ex) {
692: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
693: }
694: PetscCall(PetscLogGpuTimeEnd());
695: PetscCall(PetscLogGpuFlops(xin->map->n));
696: PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
697: PetscCall(VecViennaCLRestoreArray(yin, &ygpu));
698: } else if (xin->map->n > 0) {
699: PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
700: PetscCall(VecViennaCLGetArray(yin, &ygpu));
701: PetscCall(PetscLogGpuTimeBegin());
702: try {
703: *ygpu = *xgpu * alpha + *ygpu * beta;
704: ViennaCLWaitForGPU();
705: } catch (std::exception const &ex) {
706: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
707: }
708: PetscCall(PetscLogGpuTimeEnd());
709: PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
710: PetscCall(VecViennaCLRestoreArray(yin, &ygpu));
711: PetscCall(PetscLogGpuFlops(3.0 * xin->map->n));
712: }
713: PetscFunctionReturn(PETSC_SUCCESS);
714: }
716: /* operation z = alpha * x + beta *y + gamma *z*/
717: PetscErrorCode VecAXPBYPCZ_SeqViennaCL(Vec zin, PetscScalar alpha, PetscScalar beta, PetscScalar gamma, Vec xin, Vec yin)
718: {
719: PetscInt n = zin->map->n;
720: const ViennaCLVector *xgpu, *ygpu;
721: ViennaCLVector *zgpu;
723: PetscFunctionBegin;
724: PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
725: PetscCall(VecViennaCLGetArrayRead(yin, &ygpu));
726: PetscCall(VecViennaCLGetArray(zin, &zgpu));
727: if (alpha == 0.0 && xin->map->n > 0) {
728: PetscCall(PetscLogGpuTimeBegin());
729: try {
730: if (beta == 0.0) {
731: *zgpu = gamma * *zgpu;
732: ViennaCLWaitForGPU();
733: PetscCall(PetscLogGpuFlops(1.0 * n));
734: } else if (gamma == 0.0) {
735: *zgpu = beta * *ygpu;
736: ViennaCLWaitForGPU();
737: PetscCall(PetscLogGpuFlops(1.0 * n));
738: } else {
739: *zgpu = beta * *ygpu + gamma * *zgpu;
740: ViennaCLWaitForGPU();
741: PetscCall(PetscLogGpuFlops(3.0 * n));
742: }
743: } catch (std::exception const &ex) {
744: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
745: }
746: PetscCall(PetscLogGpuTimeEnd());
747: PetscCall(PetscLogGpuFlops(3.0 * n));
748: } else if (beta == 0.0 && xin->map->n > 0) {
749: PetscCall(PetscLogGpuTimeBegin());
750: try {
751: if (gamma == 0.0) {
752: *zgpu = alpha * *xgpu;
753: ViennaCLWaitForGPU();
754: PetscCall(PetscLogGpuFlops(1.0 * n));
755: } else {
756: *zgpu = alpha * *xgpu + gamma * *zgpu;
757: ViennaCLWaitForGPU();
758: PetscCall(PetscLogGpuFlops(3.0 * n));
759: }
760: } catch (std::exception const &ex) {
761: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
762: }
763: PetscCall(PetscLogGpuTimeEnd());
764: } else if (gamma == 0.0 && xin->map->n > 0) {
765: PetscCall(PetscLogGpuTimeBegin());
766: try {
767: *zgpu = alpha * *xgpu + beta * *ygpu;
768: ViennaCLWaitForGPU();
769: } catch (std::exception const &ex) {
770: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
771: }
772: PetscCall(PetscLogGpuTimeEnd());
773: PetscCall(PetscLogGpuFlops(3.0 * n));
774: } else if (xin->map->n > 0) {
775: PetscCall(PetscLogGpuTimeBegin());
776: try {
777: /* Split operation into two steps. This is not completely ideal, but avoids temporaries (which are far worse) */
778: if (gamma != 1.0) *zgpu *= gamma;
779: *zgpu += alpha * *xgpu + beta * *ygpu;
780: ViennaCLWaitForGPU();
781: } catch (std::exception const &ex) {
782: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
783: }
784: PetscCall(PetscLogGpuTimeEnd());
785: PetscCall(VecViennaCLRestoreArray(zin, &zgpu));
786: PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
787: PetscCall(VecViennaCLRestoreArrayRead(yin, &ygpu));
788: PetscCall(PetscLogGpuFlops(5.0 * n));
789: }
790: PetscFunctionReturn(PETSC_SUCCESS);
791: }
793: PetscErrorCode VecPointwiseMult_SeqViennaCL(Vec win, Vec xin, Vec yin)
794: {
795: PetscInt n = win->map->n;
796: const ViennaCLVector *xgpu, *ygpu;
797: ViennaCLVector *wgpu;
799: PetscFunctionBegin;
800: if (xin->map->n > 0) {
801: PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
802: PetscCall(VecViennaCLGetArrayRead(yin, &ygpu));
803: PetscCall(VecViennaCLGetArray(win, &wgpu));
804: PetscCall(PetscLogGpuTimeBegin());
805: try {
806: *wgpu = viennacl::linalg::element_prod(*xgpu, *ygpu);
807: ViennaCLWaitForGPU();
808: } catch (std::exception const &ex) {
809: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
810: }
811: PetscCall(PetscLogGpuTimeEnd());
812: PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
813: PetscCall(VecViennaCLRestoreArrayRead(yin, &ygpu));
814: PetscCall(VecViennaCLRestoreArray(win, &wgpu));
815: PetscCall(PetscLogGpuFlops(n));
816: }
817: PetscFunctionReturn(PETSC_SUCCESS);
818: }
820: PetscErrorCode VecNorm_SeqViennaCL(Vec xin, NormType type, PetscReal *z)
821: {
822: PetscInt n = xin->map->n;
823: PetscBLASInt bn;
824: const ViennaCLVector *xgpu;
826: PetscFunctionBegin;
827: if (xin->map->n > 0) {
828: PetscCall(PetscBLASIntCast(n, &bn));
829: PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
830: if (type == NORM_2 || type == NORM_FROBENIUS) {
831: PetscCall(PetscLogGpuTimeBegin());
832: try {
833: *z = viennacl::linalg::norm_2(*xgpu);
834: ViennaCLWaitForGPU();
835: } catch (std::exception const &ex) {
836: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
837: }
838: PetscCall(PetscLogGpuTimeEnd());
839: PetscCall(PetscLogGpuFlops(PetscMax(2.0 * n - 1, 0.0)));
840: } else if (type == NORM_INFINITY) {
841: PetscCall(PetscLogGpuTimeBegin());
842: try {
843: *z = viennacl::linalg::norm_inf(*xgpu);
844: ViennaCLWaitForGPU();
845: } catch (std::exception const &ex) {
846: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
847: }
848: PetscCall(PetscLogGpuTimeEnd());
849: } else if (type == NORM_1) {
850: PetscCall(PetscLogGpuTimeBegin());
851: try {
852: *z = viennacl::linalg::norm_1(*xgpu);
853: ViennaCLWaitForGPU();
854: } catch (std::exception const &ex) {
855: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
856: }
857: PetscCall(PetscLogGpuTimeEnd());
858: PetscCall(PetscLogGpuFlops(PetscMax(n - 1.0, 0.0)));
859: } else if (type == NORM_1_AND_2) {
860: PetscCall(PetscLogGpuTimeBegin());
861: try {
862: *z = viennacl::linalg::norm_1(*xgpu);
863: *(z + 1) = viennacl::linalg::norm_2(*xgpu);
864: ViennaCLWaitForGPU();
865: } catch (std::exception const &ex) {
866: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
867: }
868: PetscCall(PetscLogGpuTimeEnd());
869: PetscCall(PetscLogGpuFlops(PetscMax(2.0 * n - 1, 0.0)));
870: PetscCall(PetscLogGpuFlops(PetscMax(n - 1.0, 0.0)));
871: }
872: PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
873: } else if (type == NORM_1_AND_2) {
874: *z = 0.0;
875: *(z + 1) = 0.0;
876: } else *z = 0.0;
877: PetscFunctionReturn(PETSC_SUCCESS);
878: }
880: PetscErrorCode VecSetRandom_SeqViennaCL(Vec xin, PetscRandom r)
881: {
882: PetscFunctionBegin;
883: PetscCall(VecSetRandom_SeqViennaCL_Private(xin, r));
884: xin->offloadmask = PETSC_OFFLOAD_CPU;
885: PetscFunctionReturn(PETSC_SUCCESS);
886: }
888: PetscErrorCode VecResetArray_SeqViennaCL(Vec vin)
889: {
890: PetscFunctionBegin;
891: PetscCheckTypeNames(vin, VECSEQVIENNACL, VECMPIVIENNACL);
892: PetscCall(VecViennaCLCopyFromGPU(vin));
893: PetscCall(VecResetArray_SeqViennaCL_Private(vin));
894: vin->offloadmask = PETSC_OFFLOAD_CPU;
895: PetscFunctionReturn(PETSC_SUCCESS);
896: }
898: PetscErrorCode VecPlaceArray_SeqViennaCL(Vec vin, const PetscScalar *a)
899: {
900: PetscFunctionBegin;
901: PetscCheckTypeNames(vin, VECSEQVIENNACL, VECMPIVIENNACL);
902: PetscCall(VecViennaCLCopyFromGPU(vin));
903: PetscCall(VecPlaceArray_Seq(vin, a));
904: vin->offloadmask = PETSC_OFFLOAD_CPU;
905: PetscFunctionReturn(PETSC_SUCCESS);
906: }
908: PetscErrorCode VecReplaceArray_SeqViennaCL(Vec vin, const PetscScalar *a)
909: {
910: PetscFunctionBegin;
911: PetscCheckTypeNames(vin, VECSEQVIENNACL, VECMPIVIENNACL);
912: PetscCall(VecViennaCLCopyFromGPU(vin));
913: PetscCall(VecReplaceArray_Seq(vin, a));
914: vin->offloadmask = PETSC_OFFLOAD_CPU;
915: PetscFunctionReturn(PETSC_SUCCESS);
916: }
918: /*@C
919: VecCreateSeqViennaCL - Creates a standard, sequential array-style vector.
921: Collective
923: Input Parameters:
924: + comm - the communicator, should be PETSC_COMM_SELF
925: - n - the vector length
927: Output Parameter:
928: . v - the vector
930: Notes:
931: Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
932: same type as an existing vector.
934: Level: intermediate
936: .seealso: `VecCreateMPI()`, `VecCreate()`, `VecDuplicate()`, `VecDuplicateVecs()`, `VecCreateGhost()`
937: @*/
938: PetscErrorCode VecCreateSeqViennaCL(MPI_Comm comm, PetscInt n, Vec *v)
939: {
940: PetscFunctionBegin;
941: PetscCall(VecCreate(comm, v));
942: PetscCall(VecSetSizes(*v, n, n));
943: PetscCall(VecSetType(*v, VECSEQVIENNACL));
944: PetscFunctionReturn(PETSC_SUCCESS);
945: }
947: /*@C
948: VecCreateSeqViennaCLWithArray - Creates a viennacl sequential array-style vector,
949: where the user provides the array space to store the vector values.
951: Collective
953: Input Parameters:
954: + comm - the communicator, should be PETSC_COMM_SELF
955: . bs - the block size
956: . n - the vector length
957: - array - viennacl array where the vector elements are to be stored.
959: Output Parameter:
960: . V - the vector
962: Notes:
963: Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
964: same type as an existing vector.
966: If the user-provided array is NULL, then VecViennaCLPlaceArray() can be used
967: at a later stage to SET the array for storing the vector values.
969: PETSc does NOT free the array when the vector is destroyed via VecDestroy().
970: The user should not free the array until the vector is destroyed.
972: Level: intermediate
974: .seealso: `VecCreateMPIViennaCLWithArray()`, `VecCreate()`, `VecDuplicate()`, `VecDuplicateVecs()`,
975: `VecCreateGhost()`, `VecCreateSeq()`, `VecCUDAPlaceArray()`, `VecCreateSeqWithArray()`,
976: `VecCreateMPIWithArray()`
977: @*/
978: PETSC_EXTERN PetscErrorCode VecCreateSeqViennaCLWithArray(MPI_Comm comm, PetscInt bs, PetscInt n, const ViennaCLVector *array, Vec *V)
979: {
980: PetscMPIInt size;
982: PetscFunctionBegin;
983: PetscCall(VecCreate(comm, V));
984: PetscCall(VecSetSizes(*V, n, n));
985: PetscCall(VecSetBlockSize(*V, bs));
986: PetscCallMPI(MPI_Comm_size(comm, &size));
987: PetscCheck(size <= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot create VECSEQ on more than one process");
988: PetscCall(VecCreate_SeqViennaCL_Private(*V, array));
989: PetscFunctionReturn(PETSC_SUCCESS);
990: }
992: /*@C
993: VecCreateSeqViennaCLWithArrays - Creates a ViennaCL sequential vector, where
994: the user provides the array space to store the vector values.
996: Collective
998: Input Parameters:
999: + comm - the communicator, should be PETSC_COMM_SELF
1000: . bs - the block size
1001: . n - the vector length
1002: . cpuarray - CPU memory where the vector elements are to be stored.
1003: - viennaclvec - ViennaCL vector where the Vec entries are to be stored on the device.
1005: Output Parameter:
1006: . V - the vector
1008: Notes:
1009: If both cpuarray and viennaclvec are provided, the caller must ensure that
1010: the provided arrays have identical values.
1012: PETSc does NOT free the provided arrays when the vector is destroyed via
1013: VecDestroy(). The user should not free the array until the vector is
1014: destroyed.
1016: Level: intermediate
1018: .seealso: `VecCreateMPIViennaCLWithArrays()`, `VecCreate()`, `VecCreateSeqWithArray()`,
1019: `VecViennaCLPlaceArray()`, `VecPlaceArray()`, `VecCreateSeqCUDAWithArrays()`,
1020: `VecViennaCLAllocateCheckHost()`
1021: @*/
1022: PetscErrorCode VecCreateSeqViennaCLWithArrays(MPI_Comm comm, PetscInt bs, PetscInt n, const PetscScalar cpuarray[], const ViennaCLVector *viennaclvec, Vec *V)
1023: {
1024: PetscMPIInt size;
1026: PetscFunctionBegin;
1027: PetscCallMPI(MPI_Comm_size(comm, &size));
1028: PetscCheck(size <= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot create VECSEQ on more than one process");
1030: // set V's viennaclvec to be viennaclvec, do not allocate memory on host yet.
1031: PetscCall(VecCreateSeqViennaCLWithArray(comm, bs, n, viennaclvec, V));
1033: if (cpuarray && viennaclvec) {
1034: Vec_Seq *s = (Vec_Seq *)((*V)->data);
1035: s->array = (PetscScalar *)cpuarray;
1036: (*V)->offloadmask = PETSC_OFFLOAD_BOTH;
1037: } else if (cpuarray) {
1038: Vec_Seq *s = (Vec_Seq *)((*V)->data);
1039: s->array = (PetscScalar *)cpuarray;
1040: (*V)->offloadmask = PETSC_OFFLOAD_CPU;
1041: } else if (viennaclvec) {
1042: (*V)->offloadmask = PETSC_OFFLOAD_GPU;
1043: } else {
1044: (*V)->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1045: }
1046: PetscFunctionReturn(PETSC_SUCCESS);
1047: }
1049: /*@C
1050: VecViennaCLPlaceArray - Replace the viennacl vector in a Vec with
1051: the one provided by the user. This is useful to avoid a copy.
1053: Not Collective
1055: Input Parameters:
1056: + vin - the vector
1057: - a - the ViennaCL vector
1059: Notes:
1060: You can return to the original viennacl vector with a call to `VecViennaCLResetArray()`.
1061: It is not possible to use `VecViennaCLPlaceArray()` and `VecPlaceArray()` at the same time on
1062: the same vector.
1064: Level: intermediate
1066: .seealso: `VecPlaceArray()`, `VecSetValues()`, `VecViennaCLResetArray()`,
1067: `VecCUDAPlaceArray()`,
1069: @*/
1070: PETSC_EXTERN PetscErrorCode VecViennaCLPlaceArray(Vec vin, const ViennaCLVector *a)
1071: {
1072: PetscFunctionBegin;
1073: PetscCheckTypeNames(vin, VECSEQVIENNACL, VECMPIVIENNACL);
1074: PetscCall(VecViennaCLCopyToGPU(vin));
1075: PetscCheck(!((Vec_Seq *)vin->data)->unplacedarray, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "VecViennaCLPlaceArray()/VecPlaceArray() was already called on this vector, without a call to VecViennaCLResetArray()/VecResetArray()");
1076: ((Vec_Seq *)vin->data)->unplacedarray = (PetscScalar *)((Vec_ViennaCL *)vin->spptr)->GPUarray; /* save previous GPU array so reset can bring it back */
1077: ((Vec_ViennaCL *)vin->spptr)->GPUarray = (ViennaCLVector *)a;
1078: vin->offloadmask = PETSC_OFFLOAD_GPU;
1079: PetscCall(PetscObjectStateIncrease((PetscObject)vin));
1080: PetscFunctionReturn(PETSC_SUCCESS);
1081: }
1083: /*@C
1084: VecViennaCLResetArray - Resets a vector to use its default memory. Call this
1085: after the use of VecViennaCLPlaceArray().
1087: Not Collective
1089: Input Parameters:
1090: . vin - the vector
1092: Level: developer
1094: .seealso: `VecViennaCLPlaceArray()`, `VecResetArray()`, `VecCUDAResetArray()`, `VecPlaceArray()`
1095: @*/
1096: PETSC_EXTERN PetscErrorCode VecViennaCLResetArray(Vec vin)
1097: {
1098: PetscFunctionBegin;
1099: PetscCheckTypeNames(vin, VECSEQVIENNACL, VECMPIVIENNACL);
1100: PetscCall(VecViennaCLCopyToGPU(vin));
1101: ((Vec_ViennaCL *)vin->spptr)->GPUarray = (ViennaCLVector *)((Vec_Seq *)vin->data)->unplacedarray;
1102: ((Vec_Seq *)vin->data)->unplacedarray = 0;
1103: vin->offloadmask = PETSC_OFFLOAD_GPU;
1104: PetscCall(PetscObjectStateIncrease((PetscObject)vin));
1105: PetscFunctionReturn(PETSC_SUCCESS);
1106: }
1108: /* VecDotNorm2 - computes the inner product of two vectors and the 2-norm squared of the second vector
1109: *
1110: * Simply reuses VecDot() and VecNorm(). Performance improvement through custom kernel (kernel generator) possible.
1111: */
1112: PetscErrorCode VecDotNorm2_SeqViennaCL(Vec s, Vec t, PetscScalar *dp, PetscScalar *nm)
1113: {
1114: PetscFunctionBegin;
1115: PetscCall(VecDot_SeqViennaCL(s, t, dp));
1116: PetscCall(VecNorm_SeqViennaCL(t, NORM_2, nm));
1117: *nm *= *nm; //squared norm required
1118: PetscFunctionReturn(PETSC_SUCCESS);
1119: }
1121: PetscErrorCode VecDuplicate_SeqViennaCL(Vec win, Vec *V)
1122: {
1123: PetscFunctionBegin;
1124: PetscCall(VecCreateSeqViennaCL(PetscObjectComm((PetscObject)win), win->map->n, V));
1125: PetscCall(PetscLayoutReference(win->map, &(*V)->map));
1126: PetscCall(PetscObjectListDuplicate(((PetscObject)win)->olist, &((PetscObject)*V)->olist));
1127: PetscCall(PetscFunctionListDuplicate(((PetscObject)win)->qlist, &((PetscObject)*V)->qlist));
1128: (*V)->stash.ignorenegidx = win->stash.ignorenegidx;
1129: PetscFunctionReturn(PETSC_SUCCESS);
1130: }
1132: PetscErrorCode VecDestroy_SeqViennaCL(Vec v)
1133: {
1134: PetscFunctionBegin;
1135: try {
1136: if (v->spptr) {
1137: delete ((Vec_ViennaCL *)v->spptr)->GPUarray_allocated;
1138: delete (Vec_ViennaCL *)v->spptr;
1139: }
1140: } catch (char *ex) {
1141: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex);
1142: }
1143: PetscCall(VecDestroy_SeqViennaCL_Private(v));
1144: PetscFunctionReturn(PETSC_SUCCESS);
1145: }
1147: PetscErrorCode VecGetArray_SeqViennaCL(Vec v, PetscScalar **a)
1148: {
1149: PetscFunctionBegin;
1150: if (v->offloadmask == PETSC_OFFLOAD_GPU) {
1151: PetscCall(VecViennaCLCopyFromGPU(v));
1152: } else {
1153: PetscCall(VecViennaCLAllocateCheckHost(v));
1154: }
1155: *a = *((PetscScalar **)v->data);
1156: PetscFunctionReturn(PETSC_SUCCESS);
1157: }
1159: PetscErrorCode VecRestoreArray_SeqViennaCL(Vec v, PetscScalar **a)
1160: {
1161: PetscFunctionBegin;
1162: v->offloadmask = PETSC_OFFLOAD_CPU;
1163: PetscFunctionReturn(PETSC_SUCCESS);
1164: }
1166: PetscErrorCode VecGetArrayWrite_SeqViennaCL(Vec v, PetscScalar **a)
1167: {
1168: PetscFunctionBegin;
1169: PetscCall(VecViennaCLAllocateCheckHost(v));
1170: *a = *((PetscScalar **)v->data);
1171: PetscFunctionReturn(PETSC_SUCCESS);
1172: }
1174: static PetscErrorCode VecBindToCPU_SeqAIJViennaCL(Vec V, PetscBool flg)
1175: {
1176: PetscFunctionBegin;
1177: V->boundtocpu = flg;
1178: if (flg) {
1179: PetscCall(VecViennaCLCopyFromGPU(V));
1180: V->offloadmask = PETSC_OFFLOAD_CPU; /* since the CPU code will likely change values in the vector */
1181: V->ops->dot = VecDot_Seq;
1182: V->ops->norm = VecNorm_Seq;
1183: V->ops->tdot = VecTDot_Seq;
1184: V->ops->scale = VecScale_Seq;
1185: V->ops->copy = VecCopy_Seq;
1186: V->ops->set = VecSet_Seq;
1187: V->ops->swap = VecSwap_Seq;
1188: V->ops->axpy = VecAXPY_Seq;
1189: V->ops->axpby = VecAXPBY_Seq;
1190: V->ops->axpbypcz = VecAXPBYPCZ_Seq;
1191: V->ops->pointwisemult = VecPointwiseMult_Seq;
1192: V->ops->pointwisedivide = VecPointwiseDivide_Seq;
1193: V->ops->setrandom = VecSetRandom_Seq;
1194: V->ops->dot_local = VecDot_Seq;
1195: V->ops->tdot_local = VecTDot_Seq;
1196: V->ops->norm_local = VecNorm_Seq;
1197: V->ops->mdot_local = VecMDot_Seq;
1198: V->ops->mtdot_local = VecMTDot_Seq;
1199: V->ops->maxpy = VecMAXPY_Seq;
1200: V->ops->mdot = VecMDot_Seq;
1201: V->ops->mtdot = VecMTDot_Seq;
1202: V->ops->aypx = VecAYPX_Seq;
1203: V->ops->waxpy = VecWAXPY_Seq;
1204: V->ops->dotnorm2 = NULL;
1205: V->ops->placearray = VecPlaceArray_Seq;
1206: V->ops->replacearray = VecReplaceArray_Seq;
1207: V->ops->resetarray = VecResetArray_Seq;
1208: V->ops->duplicate = VecDuplicate_Seq;
1209: } else {
1210: V->ops->dot = VecDot_SeqViennaCL;
1211: V->ops->norm = VecNorm_SeqViennaCL;
1212: V->ops->tdot = VecTDot_SeqViennaCL;
1213: V->ops->scale = VecScale_SeqViennaCL;
1214: V->ops->copy = VecCopy_SeqViennaCL;
1215: V->ops->set = VecSet_SeqViennaCL;
1216: V->ops->swap = VecSwap_SeqViennaCL;
1217: V->ops->axpy = VecAXPY_SeqViennaCL;
1218: V->ops->axpby = VecAXPBY_SeqViennaCL;
1219: V->ops->axpbypcz = VecAXPBYPCZ_SeqViennaCL;
1220: V->ops->pointwisemult = VecPointwiseMult_SeqViennaCL;
1221: V->ops->pointwisedivide = VecPointwiseDivide_SeqViennaCL;
1222: V->ops->setrandom = VecSetRandom_SeqViennaCL;
1223: V->ops->dot_local = VecDot_SeqViennaCL;
1224: V->ops->tdot_local = VecTDot_SeqViennaCL;
1225: V->ops->norm_local = VecNorm_SeqViennaCL;
1226: V->ops->mdot_local = VecMDot_SeqViennaCL;
1227: V->ops->mtdot_local = VecMTDot_SeqViennaCL;
1228: V->ops->maxpy = VecMAXPY_SeqViennaCL;
1229: V->ops->mdot = VecMDot_SeqViennaCL;
1230: V->ops->mtdot = VecMTDot_SeqViennaCL;
1231: V->ops->aypx = VecAYPX_SeqViennaCL;
1232: V->ops->waxpy = VecWAXPY_SeqViennaCL;
1233: V->ops->dotnorm2 = VecDotNorm2_SeqViennaCL;
1234: V->ops->placearray = VecPlaceArray_SeqViennaCL;
1235: V->ops->replacearray = VecReplaceArray_SeqViennaCL;
1236: V->ops->resetarray = VecResetArray_SeqViennaCL;
1237: V->ops->destroy = VecDestroy_SeqViennaCL;
1238: V->ops->duplicate = VecDuplicate_SeqViennaCL;
1239: V->ops->getarraywrite = VecGetArrayWrite_SeqViennaCL;
1240: V->ops->getarray = VecGetArray_SeqViennaCL;
1241: V->ops->restorearray = VecRestoreArray_SeqViennaCL;
1242: }
1243: V->ops->duplicatevecs = VecDuplicateVecs_Default;
1244: PetscFunctionReturn(PETSC_SUCCESS);
1245: }
1247: PETSC_EXTERN PetscErrorCode VecCreate_SeqViennaCL(Vec V)
1248: {
1249: PetscMPIInt size;
1251: PetscFunctionBegin;
1252: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)V), &size));
1253: PetscCheck(size <= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot create VECSEQVIENNACL on more than one process");
1254: PetscCall(VecCreate_Seq_Private(V, 0));
1255: PetscCall(PetscObjectChangeTypeName((PetscObject)V, VECSEQVIENNACL));
1257: PetscCall(VecBindToCPU_SeqAIJViennaCL(V, PETSC_FALSE));
1258: V->ops->bindtocpu = VecBindToCPU_SeqAIJViennaCL;
1260: PetscCall(VecViennaCLAllocateCheck(V));
1261: PetscCall(VecSet_SeqViennaCL(V, 0.0));
1262: PetscFunctionReturn(PETSC_SUCCESS);
1263: }
1265: /*@C
1266: VecViennaCLGetCLContext - Get the OpenCL context in which the Vec resides.
1268: Caller should cast (*ctx) to (const cl_context). Caller is responsible for
1269: invoking clReleaseContext().
1271: Input Parameter:
1272: . v - the vector
1274: Output Parameter:
1275: . ctx - pointer to the underlying CL context
1277: Level: intermediate
1279: .seealso: `VecViennaCLGetCLQueue()`, `VecViennaCLGetCLMemRead()`
1280: @*/
1281: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLContext(Vec v, PETSC_UINTPTR_T *ctx)
1282: {
1283: #if !defined(PETSC_HAVE_OPENCL)
1284: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to get the associated cl_context.");
1285: #else
1287: PetscFunctionBegin;
1288: PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
1289: const ViennaCLVector *v_vcl;
1290: PetscCall(VecViennaCLGetArrayRead(v, &v_vcl));
1291: try {
1292: viennacl::ocl::context vcl_ctx = v_vcl->handle().opencl_handle().context();
1293: const cl_context ocl_ctx = vcl_ctx.handle().get();
1294: clRetainContext(ocl_ctx);
1295: *ctx = (PETSC_UINTPTR_T)ocl_ctx;
1296: } catch (std::exception const &ex) {
1297: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
1298: }
1299: PetscFunctionReturn(PETSC_SUCCESS);
1300: #endif
1301: }
1303: /*@C
1304: VecViennaCLGetCLQueue - Get the OpenCL command queue to which all
1305: operations of the Vec are enqueued.
1307: Caller should cast (*queue) to (const cl_command_queue). Caller is
1308: responsible for invoking clReleaseCommandQueue().
1310: Input Parameter:
1311: . v - the vector
1313: Output Parameter:
1314: . queue - pointer to the CL command queue
1316: Level: intermediate
1318: .seealso: `VecViennaCLGetCLContext()`, `VecViennaCLGetCLMemRead()`
1319: @*/
1320: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLQueue(Vec v, PETSC_UINTPTR_T *queue)
1321: {
1322: #if !defined(PETSC_HAVE_OPENCL)
1323: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to get the associated cl_command_queue.");
1324: #else
1325: PetscFunctionBegin;
1326: PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
1327: const ViennaCLVector *v_vcl;
1328: PetscCall(VecViennaCLGetArrayRead(v, &v_vcl));
1329: try {
1330: viennacl::ocl::context vcl_ctx = v_vcl->handle().opencl_handle().context();
1331: const viennacl::ocl::command_queue &vcl_queue = vcl_ctx.current_queue();
1332: const cl_command_queue ocl_queue = vcl_queue.handle().get();
1333: clRetainCommandQueue(ocl_queue);
1334: *queue = (PETSC_UINTPTR_T)ocl_queue;
1335: } catch (std::exception const &ex) {
1336: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
1337: }
1338: PetscFunctionReturn(PETSC_SUCCESS);
1339: #endif
1340: }
1342: /*@C
1343: VecViennaCLGetCLMemRead - Provides access to the CL buffer inside a Vec.
1345: Caller should cast (*mem) to (const cl_mem). Caller is responsible for
1346: invoking clReleaseMemObject().
1348: Input Parameter:
1349: . v - the vector
1351: Output Parameter:
1352: . mem - pointer to the device buffer
1354: Level: intermediate
1356: .seealso: `VecViennaCLGetCLContext()`, `VecViennaCLGetCLMemWrite()`
1357: @*/
1358: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLMemRead(Vec v, PETSC_UINTPTR_T *mem)
1359: {
1360: #if !defined(PETSC_HAVE_OPENCL)
1361: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to get a Vec's cl_mem");
1362: #else
1363: PetscFunctionBegin;
1364: PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
1365: const ViennaCLVector *v_vcl;
1366: PetscCall(VecViennaCLGetArrayRead(v, &v_vcl));
1367: try {
1368: const cl_mem ocl_mem = v_vcl->handle().opencl_handle().get();
1369: clRetainMemObject(ocl_mem);
1370: *mem = (PETSC_UINTPTR_T)ocl_mem;
1371: } catch (std::exception const &ex) {
1372: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
1373: }
1374: PetscFunctionReturn(PETSC_SUCCESS);
1375: #endif
1376: }
1378: /*@C
1379: VecViennaCLGetCLMemWrite - Provides access to the CL buffer inside a Vec.
1381: Caller should cast (*mem) to (const cl_mem). Caller is responsible for
1382: invoking clReleaseMemObject().
1384: The device pointer has to be released by calling
1385: VecViennaCLRestoreCLMemWrite(). Upon restoring the vector data the data on
1386: the host will be marked as out of date. A subsequent access of the host data
1387: will thus incur a data transfer from the device to the host.
1389: Input Parameter:
1390: . v - the vector
1392: Output Parameter:
1393: . mem - pointer to the device buffer
1395: Level: intermediate
1397: .seealso: `VecViennaCLGetCLContext()`, `VecViennaCLRestoreCLMemWrite()`
1398: @*/
1399: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLMemWrite(Vec v, PETSC_UINTPTR_T *mem)
1400: {
1401: #if !defined(PETSC_HAVE_OPENCL)
1402: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to get a Vec's cl_mem");
1403: #else
1404: PetscFunctionBegin;
1405: PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
1406: ViennaCLVector *v_vcl;
1407: PetscCall(VecViennaCLGetArrayWrite(v, &v_vcl));
1408: try {
1409: const cl_mem ocl_mem = v_vcl->handle().opencl_handle().get();
1410: clRetainMemObject(ocl_mem);
1411: *mem = (PETSC_UINTPTR_T)ocl_mem;
1412: } catch (std::exception const &ex) {
1413: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
1414: }
1415: PetscFunctionReturn(PETSC_SUCCESS);
1416: #endif
1417: }
1419: /*@C
1420: VecViennaCLRestoreCLMemWrite - Restores a CL buffer pointer previously
1421: acquired with VecViennaCLGetCLMemWrite().
1423: This marks the host data as out of date. Subsequent access to the
1424: vector data on the host side with for instance VecGetArray() incurs a
1425: data transfer.
1427: Input Parameter:
1428: . v - the vector
1430: Level: intermediate
1432: .seealso: `VecViennaCLGetCLContext()`, `VecViennaCLGetCLMemWrite()`
1433: @*/
1434: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreCLMemWrite(Vec v)
1435: {
1436: #if !defined(PETSC_HAVE_OPENCL)
1437: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to restore a Vec's cl_mem");
1438: #else
1439: PetscFunctionBegin;
1440: PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
1441: PetscCall(VecViennaCLRestoreArrayWrite(v, NULL));
1442: PetscFunctionReturn(PETSC_SUCCESS);
1443: #endif
1444: }
1446: /*@C
1447: VecViennaCLGetCLMem - Provides access to the CL buffer inside a Vec.
1449: Caller should cast (*mem) to (const cl_mem). Caller is responsible for
1450: invoking clReleaseMemObject().
1452: The device pointer has to be released by calling VecViennaCLRestoreCLMem().
1453: Upon restoring the vector data the data on the host will be marked as out of
1454: date. A subsequent access of the host data will thus incur a data transfer
1455: from the device to the host.
1457: Input Parameter:
1458: . v - the vector
1460: Output Parameter:
1461: . mem - pointer to the device buffer
1463: Level: intermediate
1465: .seealso: `VecViennaCLGetCLContext()`, `VecViennaCLRestoreCLMem()`
1466: @*/
1467: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLMem(Vec v, PETSC_UINTPTR_T *mem)
1468: {
1469: #if !defined(PETSC_HAVE_OPENCL)
1470: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to get a Vec's cl_mem");
1471: #else
1472: PetscFunctionBegin;
1473: PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
1474: ViennaCLVector *v_vcl;
1475: PetscCall(VecViennaCLGetArray(v, &v_vcl));
1476: try {
1477: const cl_mem ocl_mem = v_vcl->handle().opencl_handle().get();
1478: clRetainMemObject(ocl_mem);
1479: *mem = (PETSC_UINTPTR_T)ocl_mem;
1480: } catch (std::exception const &ex) {
1481: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
1482: }
1483: PetscFunctionReturn(PETSC_SUCCESS);
1484: #endif
1485: }
1487: /*@C
1488: VecViennaCLRestoreCLMem - Restores a CL buffer pointer previously
1489: acquired with VecViennaCLGetCLMem().
1491: This marks the host data as out of date. Subsequent access to the vector
1492: data on the host side with for instance VecGetArray() incurs a data
1493: transfer.
1495: Input Parameter:
1496: . v - the vector
1498: Level: intermediate
1500: .seealso: `VecViennaCLGetCLContext()`, `VecViennaCLGetCLMem()`
1501: @*/
1502: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreCLMem(Vec v)
1503: {
1504: #if !defined(PETSC_HAVE_OPENCL)
1505: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to restore a Vec's cl_mem");
1506: #else
1507: PetscFunctionBegin;
1508: PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
1509: PetscCall(VecViennaCLRestoreArray(v, NULL));
1510: PetscFunctionReturn(PETSC_SUCCESS);
1511: #endif
1512: }
1514: PetscErrorCode VecCreate_SeqViennaCL_Private(Vec V, const ViennaCLVector *array)
1515: {
1516: Vec_ViennaCL *vecviennacl;
1517: PetscMPIInt size;
1519: PetscFunctionBegin;
1520: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)V), &size));
1521: PetscCheck(size <= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot create VECSEQVIENNACL on more than one process");
1522: PetscCall(VecCreate_Seq_Private(V, 0));
1523: PetscCall(PetscObjectChangeTypeName((PetscObject)V, VECSEQVIENNACL));
1524: PetscCall(VecBindToCPU_SeqAIJViennaCL(V, PETSC_FALSE));
1525: V->ops->bindtocpu = VecBindToCPU_SeqAIJViennaCL;
1527: if (array) {
1528: if (!V->spptr) V->spptr = new Vec_ViennaCL;
1529: vecviennacl = (Vec_ViennaCL *)V->spptr;
1530: vecviennacl->GPUarray_allocated = 0;
1531: vecviennacl->GPUarray = (ViennaCLVector *)array;
1532: V->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1533: }
1534: PetscFunctionReturn(PETSC_SUCCESS);
1535: }