Actual source code: vecviennacl.cxx

  1: /*
  2:    Implements the sequential ViennaCL vectors.
  3: */

  5: #include <petscconf.h>
  6: #include <petsc/private/vecimpl.h>
  7: #include <../src/vec/vec/impls/dvecimpl.h>
  8: #include <../src/vec/vec/impls/seq/seqviennacl/viennaclvecimpl.h>

 10: #include "viennacl/linalg/inner_prod.hpp"
 11: #include "viennacl/linalg/norm_1.hpp"
 12: #include "viennacl/linalg/norm_2.hpp"
 13: #include "viennacl/linalg/norm_inf.hpp"

 15: #ifdef VIENNACL_WITH_OPENCL
 16:   #include "viennacl/ocl/backend.hpp"
 17: #endif

 19: PETSC_EXTERN PetscErrorCode VecViennaCLGetArray(Vec v, ViennaCLVector **a)
 20: {
 21:   PetscFunctionBegin;
 22:   PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
 23:   *a = 0;
 24:   PetscCall(VecViennaCLCopyToGPU(v));
 25:   *a = ((Vec_ViennaCL *)v->spptr)->GPUarray;
 26:   ViennaCLWaitForGPU();
 27:   PetscFunctionReturn(PETSC_SUCCESS);
 28: }

 30: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArray(Vec v, ViennaCLVector **a)
 31: {
 32:   PetscFunctionBegin;
 33:   PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
 34:   v->offloadmask = PETSC_OFFLOAD_GPU;

 36:   PetscCall(PetscObjectStateIncrease((PetscObject)v));
 37:   PetscFunctionReturn(PETSC_SUCCESS);
 38: }

 40: PETSC_EXTERN PetscErrorCode VecViennaCLGetArrayRead(Vec v, const ViennaCLVector **a)
 41: {
 42:   PetscFunctionBegin;
 43:   PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
 44:   *a = 0;
 45:   PetscCall(VecViennaCLCopyToGPU(v));
 46:   *a = ((Vec_ViennaCL *)v->spptr)->GPUarray;
 47:   ViennaCLWaitForGPU();
 48:   PetscFunctionReturn(PETSC_SUCCESS);
 49: }

 51: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArrayRead(Vec v, const ViennaCLVector **a)
 52: {
 53:   PetscFunctionBegin;
 54:   PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
 55:   PetscFunctionReturn(PETSC_SUCCESS);
 56: }

 58: PETSC_EXTERN PetscErrorCode VecViennaCLGetArrayWrite(Vec v, ViennaCLVector **a)
 59: {
 60:   PetscFunctionBegin;
 61:   PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
 62:   *a = 0;
 63:   PetscCall(VecViennaCLAllocateCheck(v));
 64:   *a = ((Vec_ViennaCL *)v->spptr)->GPUarray;
 65:   ViennaCLWaitForGPU();
 66:   PetscFunctionReturn(PETSC_SUCCESS);
 67: }

 69: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArrayWrite(Vec v, ViennaCLVector **a)
 70: {
 71:   PetscFunctionBegin;
 72:   PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
 73:   v->offloadmask = PETSC_OFFLOAD_GPU;

 75:   PetscCall(PetscObjectStateIncrease((PetscObject)v));
 76:   PetscFunctionReturn(PETSC_SUCCESS);
 77: }

 79: PETSC_EXTERN PetscErrorCode PetscViennaCLInit()
 80: {
 81:   char      string[20];
 82:   PetscBool flg, flg_cuda, flg_opencl, flg_openmp;

 84:   PetscFunctionBegin;
 85:   /* ViennaCL backend selection: CUDA, OpenCL, or OpenMP */
 86:   PetscCall(PetscOptionsGetString(NULL, NULL, "-viennacl_backend", string, sizeof(string), &flg));
 87:   if (flg) {
 88:     try {
 89:       PetscCall(PetscStrcasecmp(string, "cuda", &flg_cuda));
 90:       PetscCall(PetscStrcasecmp(string, "opencl", &flg_opencl));
 91:       PetscCall(PetscStrcasecmp(string, "openmp", &flg_openmp));

 93:       /* A default (sequential) CPU backend is always available - even if OpenMP is not enabled. */
 94:       if (flg_openmp) viennacl::backend::default_memory_type(viennacl::MAIN_MEMORY);
 95: #if defined(PETSC_HAVE_CUDA)
 96:       else if (flg_cuda) viennacl::backend::default_memory_type(viennacl::CUDA_MEMORY);
 97: #endif
 98: #if defined(PETSC_HAVE_OPENCL)
 99:       else if (flg_opencl) viennacl::backend::default_memory_type(viennacl::OPENCL_MEMORY);
100: #endif
101:       else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: Backend not recognized or available: %s.\n Pass -viennacl_view to see available backends for ViennaCL.", string);
102:     } catch (std::exception const &ex) {
103:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
104:     }
105:   }

107: #if defined(PETSC_HAVE_OPENCL)
108:   /* ViennaCL OpenCL device type configuration */
109:   PetscCall(PetscOptionsGetString(NULL, NULL, "-viennacl_opencl_device_type", string, sizeof(string), &flg));
110:   if (flg) {
111:     try {
112:       PetscCall(PetscStrcasecmp(string, "cpu", &flg));
113:       if (flg) viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_CPU);

115:       PetscCall(PetscStrcasecmp(string, "gpu", &flg));
116:       if (flg) viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_GPU);

118:       PetscCall(PetscStrcasecmp(string, "accelerator", &flg));
119:       if (flg) viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_ACCELERATOR);
120:     } catch (std::exception const &ex) {
121:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
122:     }
123:   }
124: #endif

126:   /* Print available backends */
127:   PetscCall(PetscOptionsHasName(NULL, NULL, "-viennacl_view", &flg));
128:   if (flg) {
129:     PetscCall(PetscPrintf(PETSC_COMM_WORLD, "ViennaCL backends available: "));
130: #if defined(PETSC_HAVE_CUDA)
131:     PetscCall(PetscPrintf(PETSC_COMM_WORLD, "CUDA, "));
132: #endif
133: #if defined(PETSC_HAVE_OPENCL)
134:     PetscCall(PetscPrintf(PETSC_COMM_WORLD, "OpenCL, "));
135: #endif
136: #if defined(PETSC_HAVE_OPENMP)
137:     PetscCall(PetscPrintf(PETSC_COMM_WORLD, "OpenMP "));
138: #else
139:     PetscCall(PetscPrintf(PETSC_COMM_WORLD, "OpenMP (1 thread) "));
140: #endif
141:     PetscCall(PetscPrintf(PETSC_COMM_WORLD, "\n"));

143:     /* Print selected backends */
144:     PetscCall(PetscPrintf(PETSC_COMM_WORLD, "ViennaCL backend  selected: "));
145: #if defined(PETSC_HAVE_CUDA)
146:     if (viennacl::backend::default_memory_type() == viennacl::CUDA_MEMORY) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "CUDA "));
147: #endif
148: #if defined(PETSC_HAVE_OPENCL)
149:     if (viennacl::backend::default_memory_type() == viennacl::OPENCL_MEMORY) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "OpenCL "));
150: #endif
151: #if defined(PETSC_HAVE_OPENMP)
152:     if (viennacl::backend::default_memory_type() == viennacl::MAIN_MEMORY) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "OpenMP "));
153: #else
154:     if (viennacl::backend::default_memory_type() == viennacl::MAIN_MEMORY) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "OpenMP (sequential - consider reconfiguration: --with-openmp=1) "));
155: #endif
156:     PetscCall(PetscPrintf(PETSC_COMM_WORLD, "\n"));
157:   }
158:   PetscFunctionReturn(PETSC_SUCCESS);
159: }

161: /*
162:     Allocates space for the vector array on the Host if it does not exist.
163:     Does NOT change the PetscViennaCLFlag for the vector
164:     Does NOT zero the ViennaCL array
165:  */
166: PETSC_EXTERN PetscErrorCode VecViennaCLAllocateCheckHost(Vec v)
167: {
168:   PetscScalar *array;
169:   Vec_Seq     *s;
170:   PetscInt     n = v->map->n;

172:   PetscFunctionBegin;
173:   s = (Vec_Seq *)v->data;
174:   PetscCall(VecViennaCLAllocateCheck(v));
175:   if (s->array == 0) {
176:     PetscCall(PetscMalloc1(n, &array));
177:     s->array           = array;
178:     s->array_allocated = array;
179:   }
180:   PetscFunctionReturn(PETSC_SUCCESS);
181: }

183: /*
184:     Allocates space for the vector array on the GPU if it does not exist.
185:     Does NOT change the PetscViennaCLFlag for the vector
186:     Does NOT zero the ViennaCL array

188:  */
189: PetscErrorCode VecViennaCLAllocateCheck(Vec v)
190: {
191:   PetscFunctionBegin;
192:   if (!v->spptr) {
193:     try {
194:       v->spptr                                       = new Vec_ViennaCL;
195:       ((Vec_ViennaCL *)v->spptr)->GPUarray_allocated = new ViennaCLVector((PetscBLASInt)v->map->n);
196:       ((Vec_ViennaCL *)v->spptr)->GPUarray           = ((Vec_ViennaCL *)v->spptr)->GPUarray_allocated;

198:     } catch (std::exception const &ex) {
199:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
200:     }
201:   }
202:   PetscFunctionReturn(PETSC_SUCCESS);
203: }

205: /* Copies a vector from the CPU to the GPU unless we already have an up-to-date copy on the GPU */
206: PetscErrorCode VecViennaCLCopyToGPU(Vec v)
207: {
208:   PetscFunctionBegin;
209:   PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
210:   PetscCall(VecViennaCLAllocateCheck(v));
211:   if (v->map->n > 0) {
212:     if (v->offloadmask == PETSC_OFFLOAD_CPU) {
213:       PetscCall(PetscLogEventBegin(VEC_ViennaCLCopyToGPU, v, 0, 0, 0));
214:       try {
215:         ViennaCLVector *vec = ((Vec_ViennaCL *)v->spptr)->GPUarray;
216:         viennacl::fast_copy(*(PetscScalar **)v->data, *(PetscScalar **)v->data + v->map->n, vec->begin());
217:         ViennaCLWaitForGPU();
218:       } catch (std::exception const &ex) {
219:         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
220:       }
221:       PetscCall(PetscLogCpuToGpu(v->map->n * sizeof(PetscScalar)));
222:       PetscCall(PetscLogEventEnd(VEC_ViennaCLCopyToGPU, v, 0, 0, 0));
223:       v->offloadmask = PETSC_OFFLOAD_BOTH;
224:     }
225:   }
226:   PetscFunctionReturn(PETSC_SUCCESS);
227: }

229: /*
230:      VecViennaCLCopyFromGPU - Copies a vector from the GPU to the CPU unless we already have an up-to-date copy on the CPU
231: */
232: PetscErrorCode VecViennaCLCopyFromGPU(Vec v)
233: {
234:   PetscFunctionBegin;
235:   PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
236:   PetscCall(VecViennaCLAllocateCheckHost(v));
237:   if (v->offloadmask == PETSC_OFFLOAD_GPU) {
238:     PetscCall(PetscLogEventBegin(VEC_ViennaCLCopyFromGPU, v, 0, 0, 0));
239:     try {
240:       ViennaCLVector *vec = ((Vec_ViennaCL *)v->spptr)->GPUarray;
241:       viennacl::fast_copy(vec->begin(), vec->end(), *(PetscScalar **)v->data);
242:       ViennaCLWaitForGPU();
243:     } catch (std::exception const &ex) {
244:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
245:     }
246:     PetscCall(PetscLogGpuToCpu(v->map->n * sizeof(PetscScalar)));
247:     PetscCall(PetscLogEventEnd(VEC_ViennaCLCopyFromGPU, v, 0, 0, 0));
248:     v->offloadmask = PETSC_OFFLOAD_BOTH;
249:   }
250:   PetscFunctionReturn(PETSC_SUCCESS);
251: }

253: /* Copy on CPU */
254: static PetscErrorCode VecCopy_SeqViennaCL_Private(Vec xin, Vec yin)
255: {
256:   PetscScalar       *ya;
257:   const PetscScalar *xa;

259:   PetscFunctionBegin;
260:   PetscCall(VecViennaCLAllocateCheckHost(xin));
261:   PetscCall(VecViennaCLAllocateCheckHost(yin));
262:   if (xin != yin) {
263:     PetscCall(VecGetArrayRead(xin, &xa));
264:     PetscCall(VecGetArray(yin, &ya));
265:     PetscCall(PetscArraycpy(ya, xa, xin->map->n));
266:     PetscCall(VecRestoreArrayRead(xin, &xa));
267:     PetscCall(VecRestoreArray(yin, &ya));
268:   }
269:   PetscFunctionReturn(PETSC_SUCCESS);
270: }

272: static PetscErrorCode VecSetRandom_SeqViennaCL_Private(Vec xin, PetscRandom r)
273: {
274:   PetscInt     n = xin->map->n, i;
275:   PetscScalar *xx;

277:   PetscFunctionBegin;
278:   PetscCall(VecGetArray(xin, &xx));
279:   for (i = 0; i < n; i++) PetscCall(PetscRandomGetValue(r, &xx[i]));
280:   PetscCall(VecRestoreArray(xin, &xx));
281:   PetscFunctionReturn(PETSC_SUCCESS);
282: }

284: static PetscErrorCode VecDestroy_SeqViennaCL_Private(Vec v)
285: {
286:   Vec_Seq *vs = (Vec_Seq *)v->data;

288:   PetscFunctionBegin;
289:   PetscCall(PetscObjectSAWsViewOff(v));
290:   PetscCall(PetscLogObjectState((PetscObject)v, "Length=%" PetscInt_FMT, v->map->n));
291:   if (vs->array_allocated) PetscCall(PetscFree(vs->array_allocated));
292:   PetscCall(PetscFree(vs));
293:   PetscFunctionReturn(PETSC_SUCCESS);
294: }

296: static PetscErrorCode VecResetArray_SeqViennaCL_Private(Vec vin)
297: {
298:   Vec_Seq *v = (Vec_Seq *)vin->data;

300:   PetscFunctionBegin;
301:   v->array         = v->unplacedarray;
302:   v->unplacedarray = 0;
303:   PetscFunctionReturn(PETSC_SUCCESS);
304: }

306: /*MC
307:    VECSEQVIENNACL - VECSEQVIENNACL = "seqviennacl" - The basic sequential vector, modified to use ViennaCL

309:    Options Database Keys:
310: . -vec_type seqviennacl - sets the vector type to VECSEQVIENNACL during a call to VecSetFromOptions()

312:   Level: beginner

314: .seealso: `VecCreate()`, `VecSetType()`, `VecSetFromOptions()`, `VecCreateSeqWithArray()`, `VECMPI`, `VecType`, `VecCreateMPI()`, `VecCreateSeq()`
315: M*/

317: PetscErrorCode VecAYPX_SeqViennaCL(Vec yin, PetscScalar alpha, Vec xin)
318: {
319:   const ViennaCLVector *xgpu;
320:   ViennaCLVector       *ygpu;

322:   PetscFunctionBegin;
323:   PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
324:   PetscCall(VecViennaCLGetArray(yin, &ygpu));
325:   PetscCall(PetscLogGpuTimeBegin());
326:   try {
327:     if (alpha != 0.0 && xin->map->n > 0) {
328:       *ygpu = *xgpu + alpha * *ygpu;
329:       PetscCall(PetscLogGpuFlops(2.0 * yin->map->n));
330:     } else {
331:       *ygpu = *xgpu;
332:     }
333:     ViennaCLWaitForGPU();
334:   } catch (std::exception const &ex) {
335:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
336:   }
337:   PetscCall(PetscLogGpuTimeEnd());
338:   PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
339:   PetscCall(VecViennaCLRestoreArray(yin, &ygpu));
340:   PetscFunctionReturn(PETSC_SUCCESS);
341: }

343: PetscErrorCode VecAXPY_SeqViennaCL(Vec yin, PetscScalar alpha, Vec xin)
344: {
345:   const ViennaCLVector *xgpu;
346:   ViennaCLVector       *ygpu;

348:   PetscFunctionBegin;
349:   if (alpha != 0.0 && xin->map->n > 0) {
350:     PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
351:     PetscCall(VecViennaCLGetArray(yin, &ygpu));
352:     PetscCall(PetscLogGpuTimeBegin());
353:     try {
354:       *ygpu += alpha * *xgpu;
355:       ViennaCLWaitForGPU();
356:     } catch (std::exception const &ex) {
357:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
358:     }
359:     PetscCall(PetscLogGpuTimeEnd());
360:     PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
361:     PetscCall(VecViennaCLRestoreArray(yin, &ygpu));
362:     PetscCall(PetscLogGpuFlops(2.0 * yin->map->n));
363:   }
364:   PetscFunctionReturn(PETSC_SUCCESS);
365: }

367: PetscErrorCode VecPointwiseDivide_SeqViennaCL(Vec win, Vec xin, Vec yin)
368: {
369:   const ViennaCLVector *xgpu, *ygpu;
370:   ViennaCLVector       *wgpu;

372:   PetscFunctionBegin;
373:   if (xin->map->n > 0) {
374:     PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
375:     PetscCall(VecViennaCLGetArrayRead(yin, &ygpu));
376:     PetscCall(VecViennaCLGetArrayWrite(win, &wgpu));
377:     PetscCall(PetscLogGpuTimeBegin());
378:     try {
379:       *wgpu = viennacl::linalg::element_div(*xgpu, *ygpu);
380:       ViennaCLWaitForGPU();
381:     } catch (std::exception const &ex) {
382:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
383:     }
384:     PetscCall(PetscLogGpuTimeEnd());
385:     PetscCall(PetscLogGpuFlops(win->map->n));
386:     PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
387:     PetscCall(VecViennaCLRestoreArrayRead(yin, &ygpu));
388:     PetscCall(VecViennaCLRestoreArrayWrite(win, &wgpu));
389:   }
390:   PetscFunctionReturn(PETSC_SUCCESS);
391: }

393: PetscErrorCode VecWAXPY_SeqViennaCL(Vec win, PetscScalar alpha, Vec xin, Vec yin)
394: {
395:   const ViennaCLVector *xgpu, *ygpu;
396:   ViennaCLVector       *wgpu;

398:   PetscFunctionBegin;
399:   if (alpha == 0.0 && xin->map->n > 0) {
400:     PetscCall(VecCopy_SeqViennaCL(yin, win));
401:   } else {
402:     PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
403:     PetscCall(VecViennaCLGetArrayRead(yin, &ygpu));
404:     PetscCall(VecViennaCLGetArrayWrite(win, &wgpu));
405:     PetscCall(PetscLogGpuTimeBegin());
406:     if (alpha == 1.0) {
407:       try {
408:         *wgpu = *ygpu + *xgpu;
409:       } catch (std::exception const &ex) {
410:         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
411:       }
412:       PetscCall(PetscLogGpuFlops(win->map->n));
413:     } else if (alpha == -1.0) {
414:       try {
415:         *wgpu = *ygpu - *xgpu;
416:       } catch (std::exception const &ex) {
417:         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
418:       }
419:       PetscCall(PetscLogGpuFlops(win->map->n));
420:     } else {
421:       try {
422:         *wgpu = *ygpu + alpha * *xgpu;
423:       } catch (std::exception const &ex) {
424:         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
425:       }
426:       PetscCall(PetscLogGpuFlops(2 * win->map->n));
427:     }
428:     ViennaCLWaitForGPU();
429:     PetscCall(PetscLogGpuTimeEnd());
430:     PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
431:     PetscCall(VecViennaCLRestoreArrayRead(yin, &ygpu));
432:     PetscCall(VecViennaCLRestoreArrayWrite(win, &wgpu));
433:   }
434:   PetscFunctionReturn(PETSC_SUCCESS);
435: }

437: /*
438:  * Operation x = x + sum_i alpha_i * y_i for vectors x, y_i and scalars alpha_i
439:  *
440:  * ViennaCL supports a fast evaluation of x += alpha * y and x += alpha * y + beta * z,
441:  * hence there is an iterated application of these until the final result is obtained
442:  */
443: PetscErrorCode VecMAXPY_SeqViennaCL(Vec xin, PetscInt nv, const PetscScalar *alpha, Vec *y)
444: {
445:   PetscInt j;

447:   PetscFunctionBegin;
448:   for (j = 0; j < nv; ++j) {
449:     if (j + 1 < nv) {
450:       PetscCall(VecAXPBYPCZ_SeqViennaCL(xin, alpha[j], alpha[j + 1], 1.0, y[j], y[j + 1]));
451:       ++j;
452:     } else {
453:       PetscCall(VecAXPY_SeqViennaCL(xin, alpha[j], y[j]));
454:     }
455:   }
456:   ViennaCLWaitForGPU();
457:   PetscFunctionReturn(PETSC_SUCCESS);
458: }

460: PetscErrorCode VecDot_SeqViennaCL(Vec xin, Vec yin, PetscScalar *z)
461: {
462:   const ViennaCLVector *xgpu, *ygpu;

464:   PetscFunctionBegin;
465:   if (xin->map->n > 0) {
466:     PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
467:     PetscCall(VecViennaCLGetArrayRead(yin, &ygpu));
468:     PetscCall(PetscLogGpuTimeBegin());
469:     try {
470:       *z = viennacl::linalg::inner_prod(*xgpu, *ygpu);
471:     } catch (std::exception const &ex) {
472:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
473:     }
474:     ViennaCLWaitForGPU();
475:     PetscCall(PetscLogGpuTimeEnd());
476:     if (xin->map->n > 0) PetscCall(PetscLogGpuFlops(2.0 * xin->map->n - 1));
477:     PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
478:     PetscCall(VecViennaCLRestoreArrayRead(yin, &ygpu));
479:   } else *z = 0.0;
480:   PetscFunctionReturn(PETSC_SUCCESS);
481: }

483: /*
484:  * Operation z[j] = dot(x, y[j])
485:  *
486:  * We use an iterated application of dot() for each j. For small ranges of j this is still faster than an allocation of extra memory in order to use gemv().
487:  */
488: PetscErrorCode VecMDot_SeqViennaCL(Vec xin, PetscInt nv, const Vec yin[], PetscScalar *z)
489: {
490:   PetscInt                                                n = xin->map->n, i;
491:   const ViennaCLVector                                   *xgpu, *ygpu;
492:   Vec                                                    *yyin = (Vec *)yin;
493:   std::vector<viennacl::vector_base<PetscScalar> const *> ygpu_array(nv);

495:   PetscFunctionBegin;
496:   if (xin->map->n > 0) {
497:     PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
498:     for (i = 0; i < nv; i++) {
499:       PetscCall(VecViennaCLGetArrayRead(yyin[i], &ygpu));
500:       ygpu_array[i] = ygpu;
501:     }
502:     PetscCall(PetscLogGpuTimeBegin());
503:     viennacl::vector_tuple<PetscScalar> y_tuple(ygpu_array);
504:     ViennaCLVector                      result = viennacl::linalg::inner_prod(*xgpu, y_tuple);
505:     viennacl::copy(result.begin(), result.end(), z);
506:     for (i = 0; i < nv; i++) PetscCall(VecViennaCLRestoreArrayRead(yyin[i], &ygpu));
507:     ViennaCLWaitForGPU();
508:     PetscCall(PetscLogGpuTimeEnd());
509:     PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
510:     PetscCall(PetscLogGpuFlops(PetscMax(nv * (2.0 * n - 1), 0.0)));
511:   } else {
512:     for (i = 0; i < nv; i++) z[i] = 0.0;
513:   }
514:   PetscFunctionReturn(PETSC_SUCCESS);
515: }

517: static PetscErrorCode VecMTDot_SeqViennaCL(Vec xin, PetscInt nv, const Vec yin[], PetscScalar *z)
518: {
519:   PetscFunctionBegin;
520:   /* Since complex case is not supported at the moment, this is the same as VecMDot_SeqViennaCL */
521:   PetscCall(VecMDot_SeqViennaCL(xin, nv, yin, z));
522:   ViennaCLWaitForGPU();
523:   PetscFunctionReturn(PETSC_SUCCESS);
524: }

526: PetscErrorCode VecSet_SeqViennaCL(Vec xin, PetscScalar alpha)
527: {
528:   ViennaCLVector *xgpu;

530:   PetscFunctionBegin;
531:   if (xin->map->n > 0) {
532:     PetscCall(VecViennaCLGetArrayWrite(xin, &xgpu));
533:     PetscCall(PetscLogGpuTimeBegin());
534:     try {
535:       *xgpu = viennacl::scalar_vector<PetscScalar>(xgpu->size(), alpha);
536:       ViennaCLWaitForGPU();
537:     } catch (std::exception const &ex) {
538:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
539:     }
540:     PetscCall(PetscLogGpuTimeEnd());
541:     PetscCall(VecViennaCLRestoreArrayWrite(xin, &xgpu));
542:   }
543:   PetscFunctionReturn(PETSC_SUCCESS);
544: }

546: PetscErrorCode VecScale_SeqViennaCL(Vec xin, PetscScalar alpha)
547: {
548:   ViennaCLVector *xgpu;

550:   PetscFunctionBegin;
551:   if (alpha == 0.0 && xin->map->n > 0) {
552:     PetscCall(VecSet_SeqViennaCL(xin, alpha));
553:     PetscCall(PetscLogGpuFlops(xin->map->n));
554:   } else if (alpha != 1.0 && xin->map->n > 0) {
555:     PetscCall(VecViennaCLGetArray(xin, &xgpu));
556:     PetscCall(PetscLogGpuTimeBegin());
557:     try {
558:       *xgpu *= alpha;
559:       ViennaCLWaitForGPU();
560:     } catch (std::exception const &ex) {
561:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
562:     }
563:     PetscCall(PetscLogGpuTimeEnd());
564:     PetscCall(VecViennaCLRestoreArray(xin, &xgpu));
565:     PetscCall(PetscLogGpuFlops(xin->map->n));
566:   }
567:   PetscFunctionReturn(PETSC_SUCCESS);
568: }

570: PetscErrorCode VecTDot_SeqViennaCL(Vec xin, Vec yin, PetscScalar *z)
571: {
572:   PetscFunctionBegin;
573:   /* Since complex case is not supported at the moment, this is the same as VecDot_SeqViennaCL */
574:   PetscCall(VecDot_SeqViennaCL(xin, yin, z));
575:   ViennaCLWaitForGPU();
576:   PetscFunctionReturn(PETSC_SUCCESS);
577: }

579: PetscErrorCode VecCopy_SeqViennaCL(Vec xin, Vec yin)
580: {
581:   const ViennaCLVector *xgpu;
582:   ViennaCLVector       *ygpu;

584:   PetscFunctionBegin;
585:   if (xin != yin && xin->map->n > 0) {
586:     if (xin->offloadmask == PETSC_OFFLOAD_GPU) {
587:       PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
588:       PetscCall(VecViennaCLGetArrayWrite(yin, &ygpu));
589:       PetscCall(PetscLogGpuTimeBegin());
590:       try {
591:         *ygpu = *xgpu;
592:         ViennaCLWaitForGPU();
593:       } catch (std::exception const &ex) {
594:         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
595:       }
596:       PetscCall(PetscLogGpuTimeEnd());
597:       PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
598:       PetscCall(VecViennaCLRestoreArrayWrite(yin, &ygpu));

600:     } else if (xin->offloadmask == PETSC_OFFLOAD_CPU) {
601:       /* copy in CPU if we are on the CPU*/
602:       PetscCall(VecCopy_SeqViennaCL_Private(xin, yin));
603:       ViennaCLWaitForGPU();
604:     } else if (xin->offloadmask == PETSC_OFFLOAD_BOTH) {
605:       /* if xin is valid in both places, see where yin is and copy there (because it's probably where we'll want to next use it) */
606:       if (yin->offloadmask == PETSC_OFFLOAD_CPU) {
607:         /* copy in CPU */
608:         PetscCall(VecCopy_SeqViennaCL_Private(xin, yin));
609:         ViennaCLWaitForGPU();
610:       } else if (yin->offloadmask == PETSC_OFFLOAD_GPU) {
611:         /* copy in GPU */
612:         PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
613:         PetscCall(VecViennaCLGetArrayWrite(yin, &ygpu));
614:         PetscCall(PetscLogGpuTimeBegin());
615:         try {
616:           *ygpu = *xgpu;
617:           ViennaCLWaitForGPU();
618:         } catch (std::exception const &ex) {
619:           SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
620:         }
621:         PetscCall(PetscLogGpuTimeEnd());
622:         PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
623:         PetscCall(VecViennaCLRestoreArrayWrite(yin, &ygpu));
624:       } else if (yin->offloadmask == PETSC_OFFLOAD_BOTH) {
625:         /* xin and yin are both valid in both places (or yin was unallocated before the earlier call to allocatecheck
626:            default to copy in GPU (this is an arbitrary choice) */
627:         PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
628:         PetscCall(VecViennaCLGetArrayWrite(yin, &ygpu));
629:         PetscCall(PetscLogGpuTimeBegin());
630:         try {
631:           *ygpu = *xgpu;
632:           ViennaCLWaitForGPU();
633:         } catch (std::exception const &ex) {
634:           SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
635:         }
636:         PetscCall(PetscLogGpuTimeEnd());
637:         PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
638:         PetscCall(VecViennaCLRestoreArrayWrite(yin, &ygpu));
639:       } else {
640:         PetscCall(VecCopy_SeqViennaCL_Private(xin, yin));
641:         ViennaCLWaitForGPU();
642:       }
643:     }
644:   }
645:   PetscFunctionReturn(PETSC_SUCCESS);
646: }

648: PetscErrorCode VecSwap_SeqViennaCL(Vec xin, Vec yin)
649: {
650:   ViennaCLVector *xgpu, *ygpu;

652:   PetscFunctionBegin;
653:   if (xin != yin && xin->map->n > 0) {
654:     PetscCall(VecViennaCLGetArray(xin, &xgpu));
655:     PetscCall(VecViennaCLGetArray(yin, &ygpu));
656:     PetscCall(PetscLogGpuTimeBegin());
657:     try {
658:       viennacl::swap(*xgpu, *ygpu);
659:       ViennaCLWaitForGPU();
660:     } catch (std::exception const &ex) {
661:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
662:     }
663:     PetscCall(PetscLogGpuTimeEnd());
664:     PetscCall(VecViennaCLRestoreArray(xin, &xgpu));
665:     PetscCall(VecViennaCLRestoreArray(yin, &ygpu));
666:   }
667:   PetscFunctionReturn(PETSC_SUCCESS);
668: }

670: // y = alpha * x + beta * y
671: PetscErrorCode VecAXPBY_SeqViennaCL(Vec yin, PetscScalar alpha, PetscScalar beta, Vec xin)
672: {
673:   PetscScalar           a = alpha, b = beta;
674:   const ViennaCLVector *xgpu;
675:   ViennaCLVector       *ygpu;

677:   PetscFunctionBegin;
678:   if (a == 0.0 && xin->map->n > 0) {
679:     PetscCall(VecScale_SeqViennaCL(yin, beta));
680:   } else if (b == 1.0 && xin->map->n > 0) {
681:     PetscCall(VecAXPY_SeqViennaCL(yin, alpha, xin));
682:   } else if (a == 1.0 && xin->map->n > 0) {
683:     PetscCall(VecAYPX_SeqViennaCL(yin, beta, xin));
684:   } else if (b == 0.0 && xin->map->n > 0) {
685:     PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
686:     PetscCall(VecViennaCLGetArray(yin, &ygpu));
687:     PetscCall(PetscLogGpuTimeBegin());
688:     try {
689:       *ygpu = *xgpu * alpha;
690:       ViennaCLWaitForGPU();
691:     } catch (std::exception const &ex) {
692:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
693:     }
694:     PetscCall(PetscLogGpuTimeEnd());
695:     PetscCall(PetscLogGpuFlops(xin->map->n));
696:     PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
697:     PetscCall(VecViennaCLRestoreArray(yin, &ygpu));
698:   } else if (xin->map->n > 0) {
699:     PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
700:     PetscCall(VecViennaCLGetArray(yin, &ygpu));
701:     PetscCall(PetscLogGpuTimeBegin());
702:     try {
703:       *ygpu = *xgpu * alpha + *ygpu * beta;
704:       ViennaCLWaitForGPU();
705:     } catch (std::exception const &ex) {
706:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
707:     }
708:     PetscCall(PetscLogGpuTimeEnd());
709:     PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
710:     PetscCall(VecViennaCLRestoreArray(yin, &ygpu));
711:     PetscCall(PetscLogGpuFlops(3.0 * xin->map->n));
712:   }
713:   PetscFunctionReturn(PETSC_SUCCESS);
714: }

716: /* operation  z = alpha * x + beta *y + gamma *z*/
717: PetscErrorCode VecAXPBYPCZ_SeqViennaCL(Vec zin, PetscScalar alpha, PetscScalar beta, PetscScalar gamma, Vec xin, Vec yin)
718: {
719:   PetscInt              n = zin->map->n;
720:   const ViennaCLVector *xgpu, *ygpu;
721:   ViennaCLVector       *zgpu;

723:   PetscFunctionBegin;
724:   PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
725:   PetscCall(VecViennaCLGetArrayRead(yin, &ygpu));
726:   PetscCall(VecViennaCLGetArray(zin, &zgpu));
727:   if (alpha == 0.0 && xin->map->n > 0) {
728:     PetscCall(PetscLogGpuTimeBegin());
729:     try {
730:       if (beta == 0.0) {
731:         *zgpu = gamma * *zgpu;
732:         ViennaCLWaitForGPU();
733:         PetscCall(PetscLogGpuFlops(1.0 * n));
734:       } else if (gamma == 0.0) {
735:         *zgpu = beta * *ygpu;
736:         ViennaCLWaitForGPU();
737:         PetscCall(PetscLogGpuFlops(1.0 * n));
738:       } else {
739:         *zgpu = beta * *ygpu + gamma * *zgpu;
740:         ViennaCLWaitForGPU();
741:         PetscCall(PetscLogGpuFlops(3.0 * n));
742:       }
743:     } catch (std::exception const &ex) {
744:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
745:     }
746:     PetscCall(PetscLogGpuTimeEnd());
747:     PetscCall(PetscLogGpuFlops(3.0 * n));
748:   } else if (beta == 0.0 && xin->map->n > 0) {
749:     PetscCall(PetscLogGpuTimeBegin());
750:     try {
751:       if (gamma == 0.0) {
752:         *zgpu = alpha * *xgpu;
753:         ViennaCLWaitForGPU();
754:         PetscCall(PetscLogGpuFlops(1.0 * n));
755:       } else {
756:         *zgpu = alpha * *xgpu + gamma * *zgpu;
757:         ViennaCLWaitForGPU();
758:         PetscCall(PetscLogGpuFlops(3.0 * n));
759:       }
760:     } catch (std::exception const &ex) {
761:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
762:     }
763:     PetscCall(PetscLogGpuTimeEnd());
764:   } else if (gamma == 0.0 && xin->map->n > 0) {
765:     PetscCall(PetscLogGpuTimeBegin());
766:     try {
767:       *zgpu = alpha * *xgpu + beta * *ygpu;
768:       ViennaCLWaitForGPU();
769:     } catch (std::exception const &ex) {
770:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
771:     }
772:     PetscCall(PetscLogGpuTimeEnd());
773:     PetscCall(PetscLogGpuFlops(3.0 * n));
774:   } else if (xin->map->n > 0) {
775:     PetscCall(PetscLogGpuTimeBegin());
776:     try {
777:       /* Split operation into two steps. This is not completely ideal, but avoids temporaries (which are far worse) */
778:       if (gamma != 1.0) *zgpu *= gamma;
779:       *zgpu += alpha * *xgpu + beta * *ygpu;
780:       ViennaCLWaitForGPU();
781:     } catch (std::exception const &ex) {
782:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
783:     }
784:     PetscCall(PetscLogGpuTimeEnd());
785:     PetscCall(VecViennaCLRestoreArray(zin, &zgpu));
786:     PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
787:     PetscCall(VecViennaCLRestoreArrayRead(yin, &ygpu));
788:     PetscCall(PetscLogGpuFlops(5.0 * n));
789:   }
790:   PetscFunctionReturn(PETSC_SUCCESS);
791: }

793: PetscErrorCode VecPointwiseMult_SeqViennaCL(Vec win, Vec xin, Vec yin)
794: {
795:   PetscInt              n = win->map->n;
796:   const ViennaCLVector *xgpu, *ygpu;
797:   ViennaCLVector       *wgpu;

799:   PetscFunctionBegin;
800:   if (xin->map->n > 0) {
801:     PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
802:     PetscCall(VecViennaCLGetArrayRead(yin, &ygpu));
803:     PetscCall(VecViennaCLGetArray(win, &wgpu));
804:     PetscCall(PetscLogGpuTimeBegin());
805:     try {
806:       *wgpu = viennacl::linalg::element_prod(*xgpu, *ygpu);
807:       ViennaCLWaitForGPU();
808:     } catch (std::exception const &ex) {
809:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
810:     }
811:     PetscCall(PetscLogGpuTimeEnd());
812:     PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
813:     PetscCall(VecViennaCLRestoreArrayRead(yin, &ygpu));
814:     PetscCall(VecViennaCLRestoreArray(win, &wgpu));
815:     PetscCall(PetscLogGpuFlops(n));
816:   }
817:   PetscFunctionReturn(PETSC_SUCCESS);
818: }

820: PetscErrorCode VecNorm_SeqViennaCL(Vec xin, NormType type, PetscReal *z)
821: {
822:   PetscInt              n = xin->map->n;
823:   PetscBLASInt          bn;
824:   const ViennaCLVector *xgpu;

826:   PetscFunctionBegin;
827:   if (xin->map->n > 0) {
828:     PetscCall(PetscBLASIntCast(n, &bn));
829:     PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
830:     if (type == NORM_2 || type == NORM_FROBENIUS) {
831:       PetscCall(PetscLogGpuTimeBegin());
832:       try {
833:         *z = viennacl::linalg::norm_2(*xgpu);
834:         ViennaCLWaitForGPU();
835:       } catch (std::exception const &ex) {
836:         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
837:       }
838:       PetscCall(PetscLogGpuTimeEnd());
839:       PetscCall(PetscLogGpuFlops(PetscMax(2.0 * n - 1, 0.0)));
840:     } else if (type == NORM_INFINITY) {
841:       PetscCall(PetscLogGpuTimeBegin());
842:       try {
843:         *z = viennacl::linalg::norm_inf(*xgpu);
844:         ViennaCLWaitForGPU();
845:       } catch (std::exception const &ex) {
846:         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
847:       }
848:       PetscCall(PetscLogGpuTimeEnd());
849:     } else if (type == NORM_1) {
850:       PetscCall(PetscLogGpuTimeBegin());
851:       try {
852:         *z = viennacl::linalg::norm_1(*xgpu);
853:         ViennaCLWaitForGPU();
854:       } catch (std::exception const &ex) {
855:         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
856:       }
857:       PetscCall(PetscLogGpuTimeEnd());
858:       PetscCall(PetscLogGpuFlops(PetscMax(n - 1.0, 0.0)));
859:     } else if (type == NORM_1_AND_2) {
860:       PetscCall(PetscLogGpuTimeBegin());
861:       try {
862:         *z       = viennacl::linalg::norm_1(*xgpu);
863:         *(z + 1) = viennacl::linalg::norm_2(*xgpu);
864:         ViennaCLWaitForGPU();
865:       } catch (std::exception const &ex) {
866:         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
867:       }
868:       PetscCall(PetscLogGpuTimeEnd());
869:       PetscCall(PetscLogGpuFlops(PetscMax(2.0 * n - 1, 0.0)));
870:       PetscCall(PetscLogGpuFlops(PetscMax(n - 1.0, 0.0)));
871:     }
872:     PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
873:   } else if (type == NORM_1_AND_2) {
874:     *z       = 0.0;
875:     *(z + 1) = 0.0;
876:   } else *z = 0.0;
877:   PetscFunctionReturn(PETSC_SUCCESS);
878: }

880: PetscErrorCode VecSetRandom_SeqViennaCL(Vec xin, PetscRandom r)
881: {
882:   PetscFunctionBegin;
883:   PetscCall(VecSetRandom_SeqViennaCL_Private(xin, r));
884:   xin->offloadmask = PETSC_OFFLOAD_CPU;
885:   PetscFunctionReturn(PETSC_SUCCESS);
886: }

888: PetscErrorCode VecResetArray_SeqViennaCL(Vec vin)
889: {
890:   PetscFunctionBegin;
891:   PetscCheckTypeNames(vin, VECSEQVIENNACL, VECMPIVIENNACL);
892:   PetscCall(VecViennaCLCopyFromGPU(vin));
893:   PetscCall(VecResetArray_SeqViennaCL_Private(vin));
894:   vin->offloadmask = PETSC_OFFLOAD_CPU;
895:   PetscFunctionReturn(PETSC_SUCCESS);
896: }

898: PetscErrorCode VecPlaceArray_SeqViennaCL(Vec vin, const PetscScalar *a)
899: {
900:   PetscFunctionBegin;
901:   PetscCheckTypeNames(vin, VECSEQVIENNACL, VECMPIVIENNACL);
902:   PetscCall(VecViennaCLCopyFromGPU(vin));
903:   PetscCall(VecPlaceArray_Seq(vin, a));
904:   vin->offloadmask = PETSC_OFFLOAD_CPU;
905:   PetscFunctionReturn(PETSC_SUCCESS);
906: }

908: PetscErrorCode VecReplaceArray_SeqViennaCL(Vec vin, const PetscScalar *a)
909: {
910:   PetscFunctionBegin;
911:   PetscCheckTypeNames(vin, VECSEQVIENNACL, VECMPIVIENNACL);
912:   PetscCall(VecViennaCLCopyFromGPU(vin));
913:   PetscCall(VecReplaceArray_Seq(vin, a));
914:   vin->offloadmask = PETSC_OFFLOAD_CPU;
915:   PetscFunctionReturn(PETSC_SUCCESS);
916: }

918: /*@C
919:   VecCreateSeqViennaCL - Creates a standard, sequential array-style vector.

921:   Collective

923:   Input Parameters:
924: + comm - the communicator, should be PETSC_COMM_SELF
925: - n    - the vector length

927:   Output Parameter:
928: . v - the vector

930:   Notes:
931:   Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
932:   same type as an existing vector.

934:   Level: intermediate

936: .seealso: `VecCreateMPI()`, `VecCreate()`, `VecDuplicate()`, `VecDuplicateVecs()`, `VecCreateGhost()`
937: @*/
938: PetscErrorCode VecCreateSeqViennaCL(MPI_Comm comm, PetscInt n, Vec *v)
939: {
940:   PetscFunctionBegin;
941:   PetscCall(VecCreate(comm, v));
942:   PetscCall(VecSetSizes(*v, n, n));
943:   PetscCall(VecSetType(*v, VECSEQVIENNACL));
944:   PetscFunctionReturn(PETSC_SUCCESS);
945: }

947: /*@C
948:   VecCreateSeqViennaCLWithArray - Creates a viennacl sequential array-style vector,
949:   where the user provides the array space to store the vector values.

951:   Collective

953:   Input Parameters:
954: + comm  - the communicator, should be PETSC_COMM_SELF
955: . bs    - the block size
956: . n     - the vector length
957: - array - viennacl array where the vector elements are to be stored.

959:   Output Parameter:
960: . V - the vector

962:   Notes:
963:   Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
964:   same type as an existing vector.

966:   If the user-provided array is NULL, then VecViennaCLPlaceArray() can be used
967:   at a later stage to SET the array for storing the vector values.

969:   PETSc does NOT free the array when the vector is destroyed via VecDestroy().
970:   The user should not free the array until the vector is destroyed.

972:   Level: intermediate

974: .seealso: `VecCreateMPIViennaCLWithArray()`, `VecCreate()`, `VecDuplicate()`, `VecDuplicateVecs()`,
975:           `VecCreateGhost()`, `VecCreateSeq()`, `VecCUDAPlaceArray()`, `VecCreateSeqWithArray()`,
976:           `VecCreateMPIWithArray()`
977: @*/
978: PETSC_EXTERN PetscErrorCode VecCreateSeqViennaCLWithArray(MPI_Comm comm, PetscInt bs, PetscInt n, const ViennaCLVector *array, Vec *V)
979: {
980:   PetscMPIInt size;

982:   PetscFunctionBegin;
983:   PetscCall(VecCreate(comm, V));
984:   PetscCall(VecSetSizes(*V, n, n));
985:   PetscCall(VecSetBlockSize(*V, bs));
986:   PetscCallMPI(MPI_Comm_size(comm, &size));
987:   PetscCheck(size <= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot create VECSEQ on more than one process");
988:   PetscCall(VecCreate_SeqViennaCL_Private(*V, array));
989:   PetscFunctionReturn(PETSC_SUCCESS);
990: }

992: /*@C
993:   VecCreateSeqViennaCLWithArrays - Creates a ViennaCL sequential vector, where
994:   the user provides the array space to store the vector values.

996:   Collective

998:   Input Parameters:
999: + comm        - the communicator, should be PETSC_COMM_SELF
1000: . bs          - the block size
1001: . n           - the vector length
1002: . cpuarray    - CPU memory where the vector elements are to be stored.
1003: - viennaclvec - ViennaCL vector where the Vec entries are to be stored on the device.

1005:   Output Parameter:
1006: . V - the vector

1008:   Notes:
1009:   If both cpuarray and viennaclvec are provided, the caller must ensure that
1010:   the provided arrays have identical values.

1012:   PETSc does NOT free the provided arrays when the vector is destroyed via
1013:   VecDestroy(). The user should not free the array until the vector is
1014:   destroyed.

1016:   Level: intermediate

1018: .seealso: `VecCreateMPIViennaCLWithArrays()`, `VecCreate()`, `VecCreateSeqWithArray()`,
1019:           `VecViennaCLPlaceArray()`, `VecPlaceArray()`, `VecCreateSeqCUDAWithArrays()`,
1020:           `VecViennaCLAllocateCheckHost()`
1021: @*/
1022: PetscErrorCode VecCreateSeqViennaCLWithArrays(MPI_Comm comm, PetscInt bs, PetscInt n, const PetscScalar cpuarray[], const ViennaCLVector *viennaclvec, Vec *V)
1023: {
1024:   PetscMPIInt size;

1026:   PetscFunctionBegin;
1027:   PetscCallMPI(MPI_Comm_size(comm, &size));
1028:   PetscCheck(size <= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot create VECSEQ on more than one process");

1030:   // set V's viennaclvec to be viennaclvec, do not allocate memory on host yet.
1031:   PetscCall(VecCreateSeqViennaCLWithArray(comm, bs, n, viennaclvec, V));

1033:   if (cpuarray && viennaclvec) {
1034:     Vec_Seq *s        = (Vec_Seq *)((*V)->data);
1035:     s->array          = (PetscScalar *)cpuarray;
1036:     (*V)->offloadmask = PETSC_OFFLOAD_BOTH;
1037:   } else if (cpuarray) {
1038:     Vec_Seq *s        = (Vec_Seq *)((*V)->data);
1039:     s->array          = (PetscScalar *)cpuarray;
1040:     (*V)->offloadmask = PETSC_OFFLOAD_CPU;
1041:   } else if (viennaclvec) {
1042:     (*V)->offloadmask = PETSC_OFFLOAD_GPU;
1043:   } else {
1044:     (*V)->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1045:   }
1046:   PetscFunctionReturn(PETSC_SUCCESS);
1047: }

1049: /*@C
1050:   VecViennaCLPlaceArray - Replace the viennacl vector in a Vec with
1051:   the one provided by the user. This is useful to avoid a copy.

1053:   Not Collective

1055:   Input Parameters:
1056: + vin - the vector
1057: - a   - the ViennaCL vector

1059:   Notes:
1060:   You can return to the original viennacl vector with a call to `VecViennaCLResetArray()`.
1061:   It is not possible to use `VecViennaCLPlaceArray()` and `VecPlaceArray()` at the same time on
1062:   the same vector.

1064:   Level: intermediate

1066: .seealso: `VecPlaceArray()`, `VecSetValues()`, `VecViennaCLResetArray()`,
1067:           `VecCUDAPlaceArray()`,

1069: @*/
1070: PETSC_EXTERN PetscErrorCode VecViennaCLPlaceArray(Vec vin, const ViennaCLVector *a)
1071: {
1072:   PetscFunctionBegin;
1073:   PetscCheckTypeNames(vin, VECSEQVIENNACL, VECMPIVIENNACL);
1074:   PetscCall(VecViennaCLCopyToGPU(vin));
1075:   PetscCheck(!((Vec_Seq *)vin->data)->unplacedarray, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "VecViennaCLPlaceArray()/VecPlaceArray() was already called on this vector, without a call to VecViennaCLResetArray()/VecResetArray()");
1076:   ((Vec_Seq *)vin->data)->unplacedarray  = (PetscScalar *)((Vec_ViennaCL *)vin->spptr)->GPUarray; /* save previous GPU array so reset can bring it back */
1077:   ((Vec_ViennaCL *)vin->spptr)->GPUarray = (ViennaCLVector *)a;
1078:   vin->offloadmask                       = PETSC_OFFLOAD_GPU;
1079:   PetscCall(PetscObjectStateIncrease((PetscObject)vin));
1080:   PetscFunctionReturn(PETSC_SUCCESS);
1081: }

1083: /*@C
1084:   VecViennaCLResetArray - Resets a vector to use its default memory. Call this
1085:   after the use of VecViennaCLPlaceArray().

1087:   Not Collective

1089:   Input Parameters:
1090: . vin - the vector

1092:   Level: developer

1094: .seealso: `VecViennaCLPlaceArray()`, `VecResetArray()`, `VecCUDAResetArray()`, `VecPlaceArray()`
1095: @*/
1096: PETSC_EXTERN PetscErrorCode VecViennaCLResetArray(Vec vin)
1097: {
1098:   PetscFunctionBegin;
1099:   PetscCheckTypeNames(vin, VECSEQVIENNACL, VECMPIVIENNACL);
1100:   PetscCall(VecViennaCLCopyToGPU(vin));
1101:   ((Vec_ViennaCL *)vin->spptr)->GPUarray = (ViennaCLVector *)((Vec_Seq *)vin->data)->unplacedarray;
1102:   ((Vec_Seq *)vin->data)->unplacedarray  = 0;
1103:   vin->offloadmask                       = PETSC_OFFLOAD_GPU;
1104:   PetscCall(PetscObjectStateIncrease((PetscObject)vin));
1105:   PetscFunctionReturn(PETSC_SUCCESS);
1106: }

1108: /*  VecDotNorm2 - computes the inner product of two vectors and the 2-norm squared of the second vector
1109:  *
1110:  *  Simply reuses VecDot() and VecNorm(). Performance improvement through custom kernel (kernel generator) possible.
1111:  */
1112: PetscErrorCode VecDotNorm2_SeqViennaCL(Vec s, Vec t, PetscScalar *dp, PetscScalar *nm)
1113: {
1114:   PetscFunctionBegin;
1115:   PetscCall(VecDot_SeqViennaCL(s, t, dp));
1116:   PetscCall(VecNorm_SeqViennaCL(t, NORM_2, nm));
1117:   *nm *= *nm; //squared norm required
1118:   PetscFunctionReturn(PETSC_SUCCESS);
1119: }

1121: PetscErrorCode VecDuplicate_SeqViennaCL(Vec win, Vec *V)
1122: {
1123:   PetscFunctionBegin;
1124:   PetscCall(VecCreateSeqViennaCL(PetscObjectComm((PetscObject)win), win->map->n, V));
1125:   PetscCall(PetscLayoutReference(win->map, &(*V)->map));
1126:   PetscCall(PetscObjectListDuplicate(((PetscObject)win)->olist, &((PetscObject)*V)->olist));
1127:   PetscCall(PetscFunctionListDuplicate(((PetscObject)win)->qlist, &((PetscObject)*V)->qlist));
1128:   (*V)->stash.ignorenegidx = win->stash.ignorenegidx;
1129:   PetscFunctionReturn(PETSC_SUCCESS);
1130: }

1132: PetscErrorCode VecDestroy_SeqViennaCL(Vec v)
1133: {
1134:   PetscFunctionBegin;
1135:   try {
1136:     if (v->spptr) {
1137:       delete ((Vec_ViennaCL *)v->spptr)->GPUarray_allocated;
1138:       delete (Vec_ViennaCL *)v->spptr;
1139:     }
1140:   } catch (char *ex) {
1141:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex);
1142:   }
1143:   PetscCall(VecDestroy_SeqViennaCL_Private(v));
1144:   PetscFunctionReturn(PETSC_SUCCESS);
1145: }

1147: PetscErrorCode VecGetArray_SeqViennaCL(Vec v, PetscScalar **a)
1148: {
1149:   PetscFunctionBegin;
1150:   if (v->offloadmask == PETSC_OFFLOAD_GPU) {
1151:     PetscCall(VecViennaCLCopyFromGPU(v));
1152:   } else {
1153:     PetscCall(VecViennaCLAllocateCheckHost(v));
1154:   }
1155:   *a = *((PetscScalar **)v->data);
1156:   PetscFunctionReturn(PETSC_SUCCESS);
1157: }

1159: PetscErrorCode VecRestoreArray_SeqViennaCL(Vec v, PetscScalar **a)
1160: {
1161:   PetscFunctionBegin;
1162:   v->offloadmask = PETSC_OFFLOAD_CPU;
1163:   PetscFunctionReturn(PETSC_SUCCESS);
1164: }

1166: PetscErrorCode VecGetArrayWrite_SeqViennaCL(Vec v, PetscScalar **a)
1167: {
1168:   PetscFunctionBegin;
1169:   PetscCall(VecViennaCLAllocateCheckHost(v));
1170:   *a = *((PetscScalar **)v->data);
1171:   PetscFunctionReturn(PETSC_SUCCESS);
1172: }

1174: static PetscErrorCode VecBindToCPU_SeqAIJViennaCL(Vec V, PetscBool flg)
1175: {
1176:   PetscFunctionBegin;
1177:   V->boundtocpu = flg;
1178:   if (flg) {
1179:     PetscCall(VecViennaCLCopyFromGPU(V));
1180:     V->offloadmask          = PETSC_OFFLOAD_CPU; /* since the CPU code will likely change values in the vector */
1181:     V->ops->dot             = VecDot_Seq;
1182:     V->ops->norm            = VecNorm_Seq;
1183:     V->ops->tdot            = VecTDot_Seq;
1184:     V->ops->scale           = VecScale_Seq;
1185:     V->ops->copy            = VecCopy_Seq;
1186:     V->ops->set             = VecSet_Seq;
1187:     V->ops->swap            = VecSwap_Seq;
1188:     V->ops->axpy            = VecAXPY_Seq;
1189:     V->ops->axpby           = VecAXPBY_Seq;
1190:     V->ops->axpbypcz        = VecAXPBYPCZ_Seq;
1191:     V->ops->pointwisemult   = VecPointwiseMult_Seq;
1192:     V->ops->pointwisedivide = VecPointwiseDivide_Seq;
1193:     V->ops->setrandom       = VecSetRandom_Seq;
1194:     V->ops->dot_local       = VecDot_Seq;
1195:     V->ops->tdot_local      = VecTDot_Seq;
1196:     V->ops->norm_local      = VecNorm_Seq;
1197:     V->ops->mdot_local      = VecMDot_Seq;
1198:     V->ops->mtdot_local     = VecMTDot_Seq;
1199:     V->ops->maxpy           = VecMAXPY_Seq;
1200:     V->ops->mdot            = VecMDot_Seq;
1201:     V->ops->mtdot           = VecMTDot_Seq;
1202:     V->ops->aypx            = VecAYPX_Seq;
1203:     V->ops->waxpy           = VecWAXPY_Seq;
1204:     V->ops->dotnorm2        = NULL;
1205:     V->ops->placearray      = VecPlaceArray_Seq;
1206:     V->ops->replacearray    = VecReplaceArray_Seq;
1207:     V->ops->resetarray      = VecResetArray_Seq;
1208:     V->ops->duplicate       = VecDuplicate_Seq;
1209:   } else {
1210:     V->ops->dot             = VecDot_SeqViennaCL;
1211:     V->ops->norm            = VecNorm_SeqViennaCL;
1212:     V->ops->tdot            = VecTDot_SeqViennaCL;
1213:     V->ops->scale           = VecScale_SeqViennaCL;
1214:     V->ops->copy            = VecCopy_SeqViennaCL;
1215:     V->ops->set             = VecSet_SeqViennaCL;
1216:     V->ops->swap            = VecSwap_SeqViennaCL;
1217:     V->ops->axpy            = VecAXPY_SeqViennaCL;
1218:     V->ops->axpby           = VecAXPBY_SeqViennaCL;
1219:     V->ops->axpbypcz        = VecAXPBYPCZ_SeqViennaCL;
1220:     V->ops->pointwisemult   = VecPointwiseMult_SeqViennaCL;
1221:     V->ops->pointwisedivide = VecPointwiseDivide_SeqViennaCL;
1222:     V->ops->setrandom       = VecSetRandom_SeqViennaCL;
1223:     V->ops->dot_local       = VecDot_SeqViennaCL;
1224:     V->ops->tdot_local      = VecTDot_SeqViennaCL;
1225:     V->ops->norm_local      = VecNorm_SeqViennaCL;
1226:     V->ops->mdot_local      = VecMDot_SeqViennaCL;
1227:     V->ops->mtdot_local     = VecMTDot_SeqViennaCL;
1228:     V->ops->maxpy           = VecMAXPY_SeqViennaCL;
1229:     V->ops->mdot            = VecMDot_SeqViennaCL;
1230:     V->ops->mtdot           = VecMTDot_SeqViennaCL;
1231:     V->ops->aypx            = VecAYPX_SeqViennaCL;
1232:     V->ops->waxpy           = VecWAXPY_SeqViennaCL;
1233:     V->ops->dotnorm2        = VecDotNorm2_SeqViennaCL;
1234:     V->ops->placearray      = VecPlaceArray_SeqViennaCL;
1235:     V->ops->replacearray    = VecReplaceArray_SeqViennaCL;
1236:     V->ops->resetarray      = VecResetArray_SeqViennaCL;
1237:     V->ops->destroy         = VecDestroy_SeqViennaCL;
1238:     V->ops->duplicate       = VecDuplicate_SeqViennaCL;
1239:     V->ops->getarraywrite   = VecGetArrayWrite_SeqViennaCL;
1240:     V->ops->getarray        = VecGetArray_SeqViennaCL;
1241:     V->ops->restorearray    = VecRestoreArray_SeqViennaCL;
1242:   }
1243:   V->ops->duplicatevecs = VecDuplicateVecs_Default;
1244:   PetscFunctionReturn(PETSC_SUCCESS);
1245: }

1247: PETSC_EXTERN PetscErrorCode VecCreate_SeqViennaCL(Vec V)
1248: {
1249:   PetscMPIInt size;

1251:   PetscFunctionBegin;
1252:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)V), &size));
1253:   PetscCheck(size <= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot create VECSEQVIENNACL on more than one process");
1254:   PetscCall(VecCreate_Seq_Private(V, 0));
1255:   PetscCall(PetscObjectChangeTypeName((PetscObject)V, VECSEQVIENNACL));

1257:   PetscCall(VecBindToCPU_SeqAIJViennaCL(V, PETSC_FALSE));
1258:   V->ops->bindtocpu = VecBindToCPU_SeqAIJViennaCL;

1260:   PetscCall(VecViennaCLAllocateCheck(V));
1261:   PetscCall(VecSet_SeqViennaCL(V, 0.0));
1262:   PetscFunctionReturn(PETSC_SUCCESS);
1263: }

1265: /*@C
1266:   VecViennaCLGetCLContext - Get the OpenCL context in which the Vec resides.

1268:   Caller should cast (*ctx) to (const cl_context). Caller is responsible for
1269:   invoking clReleaseContext().

1271:   Input Parameter:
1272: . v - the vector

1274:   Output Parameter:
1275: . ctx - pointer to the underlying CL context

1277:   Level: intermediate

1279: .seealso: `VecViennaCLGetCLQueue()`, `VecViennaCLGetCLMemRead()`
1280: @*/
1281: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLContext(Vec v, PETSC_UINTPTR_T *ctx)
1282: {
1283: #if !defined(PETSC_HAVE_OPENCL)
1284:   SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to get the associated cl_context.");
1285: #else

1287:   PetscFunctionBegin;
1288:   PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
1289:   const ViennaCLVector *v_vcl;
1290:   PetscCall(VecViennaCLGetArrayRead(v, &v_vcl));
1291:   try {
1292:     viennacl::ocl::context vcl_ctx = v_vcl->handle().opencl_handle().context();
1293:     const cl_context       ocl_ctx = vcl_ctx.handle().get();
1294:     clRetainContext(ocl_ctx);
1295:     *ctx = (PETSC_UINTPTR_T)(ocl_ctx);
1296:   } catch (std::exception const &ex) {
1297:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
1298:   }
1299:   PetscFunctionReturn(PETSC_SUCCESS);
1300: #endif
1301: }

1303: /*@C
1304:   VecViennaCLGetCLQueue - Get the OpenCL command queue to which all
1305:   operations of the Vec are enqueued.

1307:   Caller should cast (*queue) to (const cl_command_queue). Caller is
1308:   responsible for invoking clReleaseCommandQueue().

1310:   Input Parameter:
1311: . v - the vector

1313:   Output Parameter:
1314: . queue - pointer to the CL command queue

1316:   Level: intermediate

1318: .seealso: `VecViennaCLGetCLContext()`, `VecViennaCLGetCLMemRead()`
1319: @*/
1320: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLQueue(Vec v, PETSC_UINTPTR_T *queue)
1321: {
1322: #if !defined(PETSC_HAVE_OPENCL)
1323:   SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to get the associated cl_command_queue.");
1324: #else
1325:   PetscFunctionBegin;
1326:   PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
1327:   const ViennaCLVector *v_vcl;
1328:   PetscCall(VecViennaCLGetArrayRead(v, &v_vcl));
1329:   try {
1330:     viennacl::ocl::context              vcl_ctx   = v_vcl->handle().opencl_handle().context();
1331:     const viennacl::ocl::command_queue &vcl_queue = vcl_ctx.current_queue();
1332:     const cl_command_queue              ocl_queue = vcl_queue.handle().get();
1333:     clRetainCommandQueue(ocl_queue);
1334:     *queue = (PETSC_UINTPTR_T)(ocl_queue);
1335:   } catch (std::exception const &ex) {
1336:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
1337:   }
1338:   PetscFunctionReturn(PETSC_SUCCESS);
1339: #endif
1340: }

1342: /*@C
1343:   VecViennaCLGetCLMemRead - Provides access to the CL buffer inside a Vec.

1345:   Caller should cast (*mem) to (const cl_mem). Caller is responsible for
1346:   invoking clReleaseMemObject().

1348:   Input Parameter:
1349: . v - the vector

1351:   Output Parameter:
1352: . mem - pointer to the device buffer

1354:   Level: intermediate

1356: .seealso: `VecViennaCLGetCLContext()`, `VecViennaCLGetCLMemWrite()`
1357: @*/
1358: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLMemRead(Vec v, PETSC_UINTPTR_T *mem)
1359: {
1360: #if !defined(PETSC_HAVE_OPENCL)
1361:   SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to get a Vec's cl_mem");
1362: #else
1363:   PetscFunctionBegin;
1364:   PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
1365:   const ViennaCLVector *v_vcl;
1366:   PetscCall(VecViennaCLGetArrayRead(v, &v_vcl));
1367:   try {
1368:     const cl_mem ocl_mem = v_vcl->handle().opencl_handle().get();
1369:     clRetainMemObject(ocl_mem);
1370:     *mem = (PETSC_UINTPTR_T)(ocl_mem);
1371:   } catch (std::exception const &ex) {
1372:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
1373:   }
1374:   PetscFunctionReturn(PETSC_SUCCESS);
1375: #endif
1376: }

1378: /*@C
1379:   VecViennaCLGetCLMemWrite - Provides access to the CL buffer inside a Vec.

1381:   Caller should cast (*mem) to (const cl_mem). Caller is responsible for
1382:   invoking clReleaseMemObject().

1384:   The device pointer has to be released by calling
1385:   VecViennaCLRestoreCLMemWrite().  Upon restoring the vector data the data on
1386:   the host will be marked as out of date.  A subsequent access of the host data
1387:   will thus incur a data transfer from the device to the host.

1389:   Input Parameter:
1390: . v - the vector

1392:   Output Parameter:
1393: . mem - pointer to the device buffer

1395:   Level: intermediate

1397: .seealso: `VecViennaCLGetCLContext()`, `VecViennaCLRestoreCLMemWrite()`
1398: @*/
1399: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLMemWrite(Vec v, PETSC_UINTPTR_T *mem)
1400: {
1401: #if !defined(PETSC_HAVE_OPENCL)
1402:   SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to get a Vec's cl_mem");
1403: #else
1404:   PetscFunctionBegin;
1405:   PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
1406:   ViennaCLVector *v_vcl;
1407:   PetscCall(VecViennaCLGetArrayWrite(v, &v_vcl));
1408:   try {
1409:     const cl_mem ocl_mem = v_vcl->handle().opencl_handle().get();
1410:     clRetainMemObject(ocl_mem);
1411:     *mem = (PETSC_UINTPTR_T)(ocl_mem);
1412:   } catch (std::exception const &ex) {
1413:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
1414:   }
1415:   PetscFunctionReturn(PETSC_SUCCESS);
1416: #endif
1417: }

1419: /*@C
1420:   VecViennaCLRestoreCLMemWrite - Restores a CL buffer pointer previously
1421:   acquired with VecViennaCLGetCLMemWrite().

1423:   This marks the host data as out of date.  Subsequent access to the
1424:   vector data on the host side with for instance VecGetArray() incurs a
1425:   data transfer.

1427:   Input Parameter:
1428: . v - the vector

1430:   Level: intermediate

1432: .seealso: `VecViennaCLGetCLContext()`, `VecViennaCLGetCLMemWrite()`
1433: @*/
1434: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreCLMemWrite(Vec v)
1435: {
1436: #if !defined(PETSC_HAVE_OPENCL)
1437:   SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to restore a Vec's cl_mem");
1438: #else
1439:   PetscFunctionBegin;
1440:   PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
1441:   PetscCall(VecViennaCLRestoreArrayWrite(v, NULL));
1442:   PetscFunctionReturn(PETSC_SUCCESS);
1443: #endif
1444: }

1446: /*@C
1447:   VecViennaCLGetCLMem - Provides access to the CL buffer inside a Vec.

1449:   Caller should cast (*mem) to (const cl_mem). Caller is responsible for
1450:   invoking clReleaseMemObject().

1452:   The device pointer has to be released by calling VecViennaCLRestoreCLMem().
1453:   Upon restoring the vector data the data on the host will be marked as out of
1454:   date.  A subsequent access of the host data will thus incur a data transfer
1455:   from the device to the host.

1457:   Input Parameter:
1458: . v - the vector

1460:   Output Parameter:
1461: . mem - pointer to the device buffer

1463:   Level: intermediate

1465: .seealso: `VecViennaCLGetCLContext()`, `VecViennaCLRestoreCLMem()`
1466: @*/
1467: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLMem(Vec v, PETSC_UINTPTR_T *mem)
1468: {
1469: #if !defined(PETSC_HAVE_OPENCL)
1470:   SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to get a Vec's cl_mem");
1471: #else
1472:   PetscFunctionBegin;
1473:   PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
1474:   ViennaCLVector *v_vcl;
1475:   PetscCall(VecViennaCLGetArray(v, &v_vcl));
1476:   try {
1477:     const cl_mem ocl_mem = v_vcl->handle().opencl_handle().get();
1478:     clRetainMemObject(ocl_mem);
1479:     *mem = (PETSC_UINTPTR_T)(ocl_mem);
1480:   } catch (std::exception const &ex) {
1481:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
1482:   }
1483:   PetscFunctionReturn(PETSC_SUCCESS);
1484: #endif
1485: }

1487: /*@C
1488:   VecViennaCLRestoreCLMem - Restores a CL buffer pointer previously
1489:   acquired with VecViennaCLGetCLMem().

1491:   This marks the host data as out of date. Subsequent access to the vector
1492:   data on the host side with for instance VecGetArray() incurs a data
1493:   transfer.

1495:   Input Parameter:
1496: . v - the vector

1498:   Level: intermediate

1500: .seealso: `VecViennaCLGetCLContext()`, `VecViennaCLGetCLMem()`
1501: @*/
1502: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreCLMem(Vec v)
1503: {
1504: #if !defined(PETSC_HAVE_OPENCL)
1505:   SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to restore a Vec's cl_mem");
1506: #else
1507:   PetscFunctionBegin;
1508:   PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
1509:   PetscCall(VecViennaCLRestoreArray(v, NULL));
1510:   PetscFunctionReturn(PETSC_SUCCESS);
1511: #endif
1512: }

1514: PetscErrorCode VecCreate_SeqViennaCL_Private(Vec V, const ViennaCLVector *array)
1515: {
1516:   Vec_ViennaCL *vecviennacl;
1517:   PetscMPIInt   size;

1519:   PetscFunctionBegin;
1520:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)V), &size));
1521:   PetscCheck(size <= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot create VECSEQVIENNACL on more than one process");
1522:   PetscCall(VecCreate_Seq_Private(V, 0));
1523:   PetscCall(PetscObjectChangeTypeName((PetscObject)V, VECSEQVIENNACL));
1524:   PetscCall(VecBindToCPU_SeqAIJViennaCL(V, PETSC_FALSE));
1525:   V->ops->bindtocpu = VecBindToCPU_SeqAIJViennaCL;

1527:   if (array) {
1528:     if (!V->spptr) V->spptr = new Vec_ViennaCL;
1529:     vecviennacl                     = (Vec_ViennaCL *)V->spptr;
1530:     vecviennacl->GPUarray_allocated = 0;
1531:     vecviennacl->GPUarray           = (ViennaCLVector *)array;
1532:     V->offloadmask                  = PETSC_OFFLOAD_UNALLOCATED;
1533:   }
1534:   PetscFunctionReturn(PETSC_SUCCESS);
1535: }