Actual source code: vecviennacl.cxx

  1: /*
  2:    Implements the sequential ViennaCL vectors.
  3: */

  5: #include <petscconf.h>
  6: #include <petsc/private/vecimpl.h>
  7: #include <../src/vec/vec/impls/dvecimpl.h>
  8: #include <../src/vec/vec/impls/seq/seqviennacl/viennaclvecimpl.h>

 10: #include "viennacl/linalg/inner_prod.hpp"
 11: #include "viennacl/linalg/norm_1.hpp"
 12: #include "viennacl/linalg/norm_2.hpp"
 13: #include "viennacl/linalg/norm_inf.hpp"

 15: #ifdef VIENNACL_WITH_OPENCL
 16:   #include "viennacl/ocl/backend.hpp"
 17: #endif

 19: PETSC_EXTERN PetscErrorCode VecViennaCLGetArray(Vec v, ViennaCLVector **a)
 20: {
 22:   *a = 0;
 23:   VecViennaCLCopyToGPU(v);
 24:   *a = ((Vec_ViennaCL *)v->spptr)->GPUarray;
 25:   ViennaCLWaitForGPU();
 26:   return 0;
 27: }

 29: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArray(Vec v, ViennaCLVector **a)
 30: {
 32:   v->offloadmask = PETSC_OFFLOAD_GPU;

 34:   PetscObjectStateIncrease((PetscObject)v);
 35:   return 0;
 36: }

 38: PETSC_EXTERN PetscErrorCode VecViennaCLGetArrayRead(Vec v, const ViennaCLVector **a)
 39: {
 41:   *a = 0;
 42:   VecViennaCLCopyToGPU(v);
 43:   *a = ((Vec_ViennaCL *)v->spptr)->GPUarray;
 44:   ViennaCLWaitForGPU();
 45:   return 0;
 46: }

 48: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArrayRead(Vec v, const ViennaCLVector **a)
 49: {
 51:   return 0;
 52: }

 54: PETSC_EXTERN PetscErrorCode VecViennaCLGetArrayWrite(Vec v, ViennaCLVector **a)
 55: {
 57:   *a = 0;
 58:   VecViennaCLAllocateCheck(v);
 59:   *a = ((Vec_ViennaCL *)v->spptr)->GPUarray;
 60:   ViennaCLWaitForGPU();
 61:   return 0;
 62: }

 64: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArrayWrite(Vec v, ViennaCLVector **a)
 65: {
 67:   v->offloadmask = PETSC_OFFLOAD_GPU;

 69:   PetscObjectStateIncrease((PetscObject)v);
 70:   return 0;
 71: }

 73: PETSC_EXTERN PetscErrorCode PetscViennaCLInit()
 74: {
 75:   char      string[20];
 76:   PetscBool flg, flg_cuda, flg_opencl, flg_openmp;

 78:   /* ViennaCL backend selection: CUDA, OpenCL, or OpenMP */
 79:   PetscOptionsGetString(NULL, NULL, "-viennacl_backend", string, sizeof(string), &flg);
 80:   if (flg) {
 81:     try {
 82:       PetscStrcasecmp(string, "cuda", &flg_cuda);
 83:       PetscStrcasecmp(string, "opencl", &flg_opencl);
 84:       PetscStrcasecmp(string, "openmp", &flg_openmp);

 86:       /* A default (sequential) CPU backend is always available - even if OpenMP is not enabled. */
 87:       if (flg_openmp) viennacl::backend::default_memory_type(viennacl::MAIN_MEMORY);
 88: #if defined(PETSC_HAVE_CUDA)
 89:       else if (flg_cuda) viennacl::backend::default_memory_type(viennacl::CUDA_MEMORY);
 90: #endif
 91: #if defined(PETSC_HAVE_OPENCL)
 92:       else if (flg_opencl) viennacl::backend::default_memory_type(viennacl::OPENCL_MEMORY);
 93: #endif
 94:       else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: Backend not recognized or available: %s.\n Pass -viennacl_view to see available backends for ViennaCL.", string);
 95:     } catch (std::exception const &ex) {
 96:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
 97:     }
 98:   }

100: #if defined(PETSC_HAVE_OPENCL)
101:   /* ViennaCL OpenCL device type configuration */
102:   PetscOptionsGetString(NULL, NULL, "-viennacl_opencl_device_type", string, sizeof(string), &flg);
103:   if (flg) {
104:     try {
105:       PetscStrcasecmp(string, "cpu", &flg);
106:       if (flg) viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_CPU);

108:       PetscStrcasecmp(string, "gpu", &flg);
109:       if (flg) viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_GPU);

111:       PetscStrcasecmp(string, "accelerator", &flg);
112:       if (flg) viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_ACCELERATOR);
113:     } catch (std::exception const &ex) {
114:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
115:     }
116:   }
117: #endif

119:   /* Print available backends */
120:   PetscOptionsHasName(NULL, NULL, "-viennacl_view", &flg);
121:   if (flg) {
122:     PetscPrintf(PETSC_COMM_WORLD, "ViennaCL backends available: ");
123: #if defined(PETSC_HAVE_CUDA)
124:     PetscPrintf(PETSC_COMM_WORLD, "CUDA, ");
125: #endif
126: #if defined(PETSC_HAVE_OPENCL)
127:     PetscPrintf(PETSC_COMM_WORLD, "OpenCL, ");
128: #endif
129: #if defined(PETSC_HAVE_OPENMP)
130:     PetscPrintf(PETSC_COMM_WORLD, "OpenMP ");
131: #else
132:     PetscPrintf(PETSC_COMM_WORLD, "OpenMP (1 thread) ");
133: #endif
134:     PetscPrintf(PETSC_COMM_WORLD, "\n");

136:     /* Print selected backends */
137:     PetscPrintf(PETSC_COMM_WORLD, "ViennaCL backend  selected: ");
138: #if defined(PETSC_HAVE_CUDA)
139:     if (viennacl::backend::default_memory_type() == viennacl::CUDA_MEMORY) PetscPrintf(PETSC_COMM_WORLD, "CUDA ");
140: #endif
141: #if defined(PETSC_HAVE_OPENCL)
142:     if (viennacl::backend::default_memory_type() == viennacl::OPENCL_MEMORY) PetscPrintf(PETSC_COMM_WORLD, "OpenCL ");
143: #endif
144: #if defined(PETSC_HAVE_OPENMP)
145:     if (viennacl::backend::default_memory_type() == viennacl::MAIN_MEMORY) PetscPrintf(PETSC_COMM_WORLD, "OpenMP ");
146: #else
147:     if (viennacl::backend::default_memory_type() == viennacl::MAIN_MEMORY) PetscPrintf(PETSC_COMM_WORLD, "OpenMP (sequential - consider reconfiguration: --with-openmp=1) ");
148: #endif
149:     PetscPrintf(PETSC_COMM_WORLD, "\n");
150:   }
151:   return 0;
152: }

154: /*
155:     Allocates space for the vector array on the Host if it does not exist.
156:     Does NOT change the PetscViennaCLFlag for the vector
157:     Does NOT zero the ViennaCL array
158:  */
159: PETSC_EXTERN PetscErrorCode VecViennaCLAllocateCheckHost(Vec v)
160: {
161:   PetscScalar *array;
162:   Vec_Seq     *s;
163:   PetscInt     n = v->map->n;

165:   s = (Vec_Seq *)v->data;
166:   VecViennaCLAllocateCheck(v);
167:   if (s->array == 0) {
168:     PetscMalloc1(n, &array);
169:     s->array           = array;
170:     s->array_allocated = array;
171:   }
172:   return 0;
173: }

175: /*
176:     Allocates space for the vector array on the GPU if it does not exist.
177:     Does NOT change the PetscViennaCLFlag for the vector
178:     Does NOT zero the ViennaCL array

180:  */
181: PetscErrorCode VecViennaCLAllocateCheck(Vec v)
182: {
183:   if (!v->spptr) {
184:     try {
185:       v->spptr                                       = new Vec_ViennaCL;
186:       ((Vec_ViennaCL *)v->spptr)->GPUarray_allocated = new ViennaCLVector((PetscBLASInt)v->map->n);
187:       ((Vec_ViennaCL *)v->spptr)->GPUarray           = ((Vec_ViennaCL *)v->spptr)->GPUarray_allocated;

189:     } catch (std::exception const &ex) {
190:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
191:     }
192:   }
193:   return 0;
194: }

196: /* Copies a vector from the CPU to the GPU unless we already have an up-to-date copy on the GPU */
197: PetscErrorCode VecViennaCLCopyToGPU(Vec v)
198: {
200:   VecViennaCLAllocateCheck(v);
201:   if (v->map->n > 0) {
202:     if (v->offloadmask == PETSC_OFFLOAD_CPU) {
203:       PetscLogEventBegin(VEC_ViennaCLCopyToGPU, v, 0, 0, 0);
204:       try {
205:         ViennaCLVector *vec = ((Vec_ViennaCL *)v->spptr)->GPUarray;
206:         viennacl::fast_copy(*(PetscScalar **)v->data, *(PetscScalar **)v->data + v->map->n, vec->begin());
207:         ViennaCLWaitForGPU();
208:       } catch (std::exception const &ex) {
209:         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
210:       }
211:       PetscLogCpuToGpu((v->map->n) * sizeof(PetscScalar));
212:       PetscLogEventEnd(VEC_ViennaCLCopyToGPU, v, 0, 0, 0);
213:       v->offloadmask = PETSC_OFFLOAD_BOTH;
214:     }
215:   }
216:   return 0;
217: }

219: /*
220:      VecViennaCLCopyFromGPU - Copies a vector from the GPU to the CPU unless we already have an up-to-date copy on the CPU
221: */
222: PetscErrorCode VecViennaCLCopyFromGPU(Vec v)
223: {
225:   VecViennaCLAllocateCheckHost(v);
226:   if (v->offloadmask == PETSC_OFFLOAD_GPU) {
227:     PetscLogEventBegin(VEC_ViennaCLCopyFromGPU, v, 0, 0, 0);
228:     try {
229:       ViennaCLVector *vec = ((Vec_ViennaCL *)v->spptr)->GPUarray;
230:       viennacl::fast_copy(vec->begin(), vec->end(), *(PetscScalar **)v->data);
231:       ViennaCLWaitForGPU();
232:     } catch (std::exception const &ex) {
233:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
234:     }
235:     PetscLogGpuToCpu((v->map->n) * sizeof(PetscScalar));
236:     PetscLogEventEnd(VEC_ViennaCLCopyFromGPU, v, 0, 0, 0);
237:     v->offloadmask = PETSC_OFFLOAD_BOTH;
238:   }
239:   return 0;
240: }

242: /* Copy on CPU */
243: static PetscErrorCode VecCopy_SeqViennaCL_Private(Vec xin, Vec yin)
244: {
245:   PetscScalar       *ya;
246:   const PetscScalar *xa;

248:   VecViennaCLAllocateCheckHost(xin);
249:   VecViennaCLAllocateCheckHost(yin);
250:   if (xin != yin) {
251:     VecGetArrayRead(xin, &xa);
252:     VecGetArray(yin, &ya);
253:     PetscArraycpy(ya, xa, xin->map->n);
254:     VecRestoreArrayRead(xin, &xa);
255:     VecRestoreArray(yin, &ya);
256:   }
257:   return 0;
258: }

260: static PetscErrorCode VecSetRandom_SeqViennaCL_Private(Vec xin, PetscRandom r)
261: {
262:   PetscInt     n = xin->map->n, i;
263:   PetscScalar *xx;

265:   VecGetArray(xin, &xx);
266:   for (i = 0; i < n; i++) PetscRandomGetValue(r, &xx[i]);
267:   VecRestoreArray(xin, &xx);
268:   return 0;
269: }

271: static PetscErrorCode VecDestroy_SeqViennaCL_Private(Vec v)
272: {
273:   Vec_Seq *vs = (Vec_Seq *)v->data;

275:   PetscObjectSAWsViewOff(v);
276: #if defined(PETSC_USE_LOG)
277:   PetscLogObjectState((PetscObject)v, "Length=%" PetscInt_FMT, v->map->n);
278: #endif
279:   if (vs->array_allocated) PetscFree(vs->array_allocated);
280:   PetscFree(vs);
281:   return 0;
282: }

284: static PetscErrorCode VecResetArray_SeqViennaCL_Private(Vec vin)
285: {
286:   Vec_Seq *v = (Vec_Seq *)vin->data;

288:   v->array         = v->unplacedarray;
289:   v->unplacedarray = 0;
290:   return 0;
291: }

293: /*MC
294:    VECSEQVIENNACL - VECSEQVIENNACL = "seqviennacl" - The basic sequential vector, modified to use ViennaCL

296:    Options Database Keys:
297: . -vec_type seqviennacl - sets the vector type to VECSEQVIENNACL during a call to VecSetFromOptions()

299:   Level: beginner

301: .seealso: `VecCreate()`, `VecSetType()`, `VecSetFromOptions()`, `VecCreateSeqWithArray()`, `VECMPI`, `VecType`, `VecCreateMPI()`, `VecCreateSeq()`
302: M*/

304: PetscErrorCode VecAYPX_SeqViennaCL(Vec yin, PetscScalar alpha, Vec xin)
305: {
306:   const ViennaCLVector *xgpu;
307:   ViennaCLVector       *ygpu;

309:   VecViennaCLGetArrayRead(xin, &xgpu);
310:   VecViennaCLGetArray(yin, &ygpu);
311:   PetscLogGpuTimeBegin();
312:   try {
313:     if (alpha != 0.0 && xin->map->n > 0) {
314:       *ygpu = *xgpu + alpha * *ygpu;
315:       PetscLogGpuFlops(2.0 * yin->map->n);
316:     } else {
317:       *ygpu = *xgpu;
318:     }
319:     ViennaCLWaitForGPU();
320:   } catch (std::exception const &ex) {
321:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
322:   }
323:   PetscLogGpuTimeEnd();
324:   VecViennaCLRestoreArrayRead(xin, &xgpu);
325:   VecViennaCLRestoreArray(yin, &ygpu);
326:   return 0;
327: }

329: PetscErrorCode VecAXPY_SeqViennaCL(Vec yin, PetscScalar alpha, Vec xin)
330: {
331:   const ViennaCLVector *xgpu;
332:   ViennaCLVector       *ygpu;

334:   if (alpha != 0.0 && xin->map->n > 0) {
335:     VecViennaCLGetArrayRead(xin, &xgpu);
336:     VecViennaCLGetArray(yin, &ygpu);
337:     PetscLogGpuTimeBegin();
338:     try {
339:       *ygpu += alpha * *xgpu;
340:       ViennaCLWaitForGPU();
341:     } catch (std::exception const &ex) {
342:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
343:     }
344:     PetscLogGpuTimeEnd();
345:     VecViennaCLRestoreArrayRead(xin, &xgpu);
346:     VecViennaCLRestoreArray(yin, &ygpu);
347:     PetscLogGpuFlops(2.0 * yin->map->n);
348:   }
349:   return 0;
350: }

352: PetscErrorCode VecPointwiseDivide_SeqViennaCL(Vec win, Vec xin, Vec yin)
353: {
354:   const ViennaCLVector *xgpu, *ygpu;
355:   ViennaCLVector       *wgpu;

357:   if (xin->map->n > 0) {
358:     VecViennaCLGetArrayRead(xin, &xgpu);
359:     VecViennaCLGetArrayRead(yin, &ygpu);
360:     VecViennaCLGetArrayWrite(win, &wgpu);
361:     PetscLogGpuTimeBegin();
362:     try {
363:       *wgpu = viennacl::linalg::element_div(*xgpu, *ygpu);
364:       ViennaCLWaitForGPU();
365:     } catch (std::exception const &ex) {
366:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
367:     }
368:     PetscLogGpuTimeEnd();
369:     PetscLogGpuFlops(win->map->n);
370:     VecViennaCLRestoreArrayRead(xin, &xgpu);
371:     VecViennaCLRestoreArrayRead(yin, &ygpu);
372:     VecViennaCLRestoreArrayWrite(win, &wgpu);
373:   }
374:   return 0;
375: }

377: PetscErrorCode VecWAXPY_SeqViennaCL(Vec win, PetscScalar alpha, Vec xin, Vec yin)
378: {
379:   const ViennaCLVector *xgpu, *ygpu;
380:   ViennaCLVector       *wgpu;

382:   if (alpha == 0.0 && xin->map->n > 0) {
383:     VecCopy_SeqViennaCL(yin, win);
384:   } else {
385:     VecViennaCLGetArrayRead(xin, &xgpu);
386:     VecViennaCLGetArrayRead(yin, &ygpu);
387:     VecViennaCLGetArrayWrite(win, &wgpu);
388:     PetscLogGpuTimeBegin();
389:     if (alpha == 1.0) {
390:       try {
391:         *wgpu = *ygpu + *xgpu;
392:       } catch (std::exception const &ex) {
393:         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
394:       }
395:       PetscLogGpuFlops(win->map->n);
396:     } else if (alpha == -1.0) {
397:       try {
398:         *wgpu = *ygpu - *xgpu;
399:       } catch (std::exception const &ex) {
400:         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
401:       }
402:       PetscLogGpuFlops(win->map->n);
403:     } else {
404:       try {
405:         *wgpu = *ygpu + alpha * *xgpu;
406:       } catch (std::exception const &ex) {
407:         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
408:       }
409:       PetscLogGpuFlops(2 * win->map->n);
410:     }
411:     ViennaCLWaitForGPU();
412:     PetscLogGpuTimeEnd();
413:     VecViennaCLRestoreArrayRead(xin, &xgpu);
414:     VecViennaCLRestoreArrayRead(yin, &ygpu);
415:     VecViennaCLRestoreArrayWrite(win, &wgpu);
416:   }
417:   return 0;
418: }

420: /*
421:  * Operation x = x + sum_i alpha_i * y_i for vectors x, y_i and scalars alpha_i
422:  *
423:  * ViennaCL supports a fast evaluation of x += alpha * y and x += alpha * y + beta * z,
424:  * hence there is an iterated application of these until the final result is obtained
425:  */
426: PetscErrorCode VecMAXPY_SeqViennaCL(Vec xin, PetscInt nv, const PetscScalar *alpha, Vec *y)
427: {
428:   PetscInt j;

430:   for (j = 0; j < nv; ++j) {
431:     if (j + 1 < nv) {
432:       VecAXPBYPCZ_SeqViennaCL(xin, alpha[j], alpha[j + 1], 1.0, y[j], y[j + 1]);
433:       ++j;
434:     } else {
435:       VecAXPY_SeqViennaCL(xin, alpha[j], y[j]);
436:     }
437:   }
438:   ViennaCLWaitForGPU();
439:   return 0;
440: }

442: PetscErrorCode VecDot_SeqViennaCL(Vec xin, Vec yin, PetscScalar *z)
443: {
444:   const ViennaCLVector *xgpu, *ygpu;

446:   if (xin->map->n > 0) {
447:     VecViennaCLGetArrayRead(xin, &xgpu);
448:     VecViennaCLGetArrayRead(yin, &ygpu);
449:     PetscLogGpuTimeBegin();
450:     try {
451:       *z = viennacl::linalg::inner_prod(*xgpu, *ygpu);
452:     } catch (std::exception const &ex) {
453:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
454:     }
455:     ViennaCLWaitForGPU();
456:     PetscLogGpuTimeEnd();
457:     if (xin->map->n > 0) PetscLogGpuFlops(2.0 * xin->map->n - 1);
458:     VecViennaCLRestoreArrayRead(xin, &xgpu);
459:     VecViennaCLRestoreArrayRead(yin, &ygpu);
460:   } else *z = 0.0;
461:   return 0;
462: }

464: /*
465:  * Operation z[j] = dot(x, y[j])
466:  *
467:  * We use an iterated application of dot() for each j. For small ranges of j this is still faster than an allocation of extra memory in order to use gemv().
468:  */
469: PetscErrorCode VecMDot_SeqViennaCL(Vec xin, PetscInt nv, const Vec yin[], PetscScalar *z)
470: {
471:   PetscInt                                                n = xin->map->n, i;
472:   const ViennaCLVector                                   *xgpu, *ygpu;
473:   Vec                                                    *yyin = (Vec *)yin;
474:   std::vector<viennacl::vector_base<PetscScalar> const *> ygpu_array(nv);

476:   if (xin->map->n > 0) {
477:     VecViennaCLGetArrayRead(xin, &xgpu);
478:     for (i = 0; i < nv; i++) {
479:       VecViennaCLGetArrayRead(yyin[i], &ygpu);
480:       ygpu_array[i] = ygpu;
481:     }
482:     PetscLogGpuTimeBegin();
483:     viennacl::vector_tuple<PetscScalar> y_tuple(ygpu_array);
484:     ViennaCLVector                      result = viennacl::linalg::inner_prod(*xgpu, y_tuple);
485:     viennacl::copy(result.begin(), result.end(), z);
486:     for (i = 0; i < nv; i++) VecViennaCLRestoreArrayRead(yyin[i], &ygpu);
487:     ViennaCLWaitForGPU();
488:     PetscLogGpuTimeEnd();
489:     VecViennaCLRestoreArrayRead(xin, &xgpu);
490:     PetscLogGpuFlops(PetscMax(nv * (2.0 * n - 1), 0.0));
491:   } else {
492:     for (i = 0; i < nv; i++) z[i] = 0.0;
493:   }
494:   return 0;
495: }

497: PetscErrorCode VecMTDot_SeqViennaCL(Vec xin, PetscInt nv, const Vec yin[], PetscScalar *z)
498: {
499:   /* Since complex case is not supported at the moment, this is the same as VecMDot_SeqViennaCL */
500:   VecMDot_SeqViennaCL(xin, nv, yin, z);
501:   ViennaCLWaitForGPU();
502:   return 0;
503: }

505: PetscErrorCode VecSet_SeqViennaCL(Vec xin, PetscScalar alpha)
506: {
507:   ViennaCLVector *xgpu;

509:   if (xin->map->n > 0) {
510:     VecViennaCLGetArrayWrite(xin, &xgpu);
511:     PetscLogGpuTimeBegin();
512:     try {
513:       *xgpu = viennacl::scalar_vector<PetscScalar>(xgpu->size(), alpha);
514:       ViennaCLWaitForGPU();
515:     } catch (std::exception const &ex) {
516:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
517:     }
518:     PetscLogGpuTimeEnd();
519:     VecViennaCLRestoreArrayWrite(xin, &xgpu);
520:   }
521:   return 0;
522: }

524: PetscErrorCode VecScale_SeqViennaCL(Vec xin, PetscScalar alpha)
525: {
526:   ViennaCLVector *xgpu;

528:   if (alpha == 0.0 && xin->map->n > 0) {
529:     VecSet_SeqViennaCL(xin, alpha);
530:     PetscLogGpuFlops(xin->map->n);
531:   } else if (alpha != 1.0 && xin->map->n > 0) {
532:     VecViennaCLGetArray(xin, &xgpu);
533:     PetscLogGpuTimeBegin();
534:     try {
535:       *xgpu *= alpha;
536:       ViennaCLWaitForGPU();
537:     } catch (std::exception const &ex) {
538:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
539:     }
540:     PetscLogGpuTimeEnd();
541:     VecViennaCLRestoreArray(xin, &xgpu);
542:     PetscLogGpuFlops(xin->map->n);
543:   }
544:   return 0;
545: }

547: PetscErrorCode VecTDot_SeqViennaCL(Vec xin, Vec yin, PetscScalar *z)
548: {
549:   /* Since complex case is not supported at the moment, this is the same as VecDot_SeqViennaCL */
550:   VecDot_SeqViennaCL(xin, yin, z);
551:   ViennaCLWaitForGPU();
552:   return 0;
553: }

555: PetscErrorCode VecCopy_SeqViennaCL(Vec xin, Vec yin)
556: {
557:   const ViennaCLVector *xgpu;
558:   ViennaCLVector       *ygpu;

560:   if (xin != yin && xin->map->n > 0) {
561:     if (xin->offloadmask == PETSC_OFFLOAD_GPU) {
562:       VecViennaCLGetArrayRead(xin, &xgpu);
563:       VecViennaCLGetArrayWrite(yin, &ygpu);
564:       PetscLogGpuTimeBegin();
565:       try {
566:         *ygpu = *xgpu;
567:         ViennaCLWaitForGPU();
568:       } catch (std::exception const &ex) {
569:         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
570:       }
571:       PetscLogGpuTimeEnd();
572:       VecViennaCLRestoreArrayRead(xin, &xgpu);
573:       VecViennaCLRestoreArrayWrite(yin, &ygpu);

575:     } else if (xin->offloadmask == PETSC_OFFLOAD_CPU) {
576:       /* copy in CPU if we are on the CPU*/
577:       VecCopy_SeqViennaCL_Private(xin, yin);
578:       ViennaCLWaitForGPU();
579:     } else if (xin->offloadmask == PETSC_OFFLOAD_BOTH) {
580:       /* if xin is valid in both places, see where yin is and copy there (because it's probably where we'll want to next use it) */
581:       if (yin->offloadmask == PETSC_OFFLOAD_CPU) {
582:         /* copy in CPU */
583:         VecCopy_SeqViennaCL_Private(xin, yin);
584:         ViennaCLWaitForGPU();
585:       } else if (yin->offloadmask == PETSC_OFFLOAD_GPU) {
586:         /* copy in GPU */
587:         VecViennaCLGetArrayRead(xin, &xgpu);
588:         VecViennaCLGetArrayWrite(yin, &ygpu);
589:         PetscLogGpuTimeBegin();
590:         try {
591:           *ygpu = *xgpu;
592:           ViennaCLWaitForGPU();
593:         } catch (std::exception const &ex) {
594:           SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
595:         }
596:         PetscLogGpuTimeEnd();
597:         VecViennaCLRestoreArrayRead(xin, &xgpu);
598:         VecViennaCLRestoreArrayWrite(yin, &ygpu);
599:       } else if (yin->offloadmask == PETSC_OFFLOAD_BOTH) {
600:         /* xin and yin are both valid in both places (or yin was unallocated before the earlier call to allocatecheck
601:            default to copy in GPU (this is an arbitrary choice) */
602:         VecViennaCLGetArrayRead(xin, &xgpu);
603:         VecViennaCLGetArrayWrite(yin, &ygpu);
604:         PetscLogGpuTimeBegin();
605:         try {
606:           *ygpu = *xgpu;
607:           ViennaCLWaitForGPU();
608:         } catch (std::exception const &ex) {
609:           SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
610:         }
611:         PetscLogGpuTimeEnd();
612:         VecViennaCLRestoreArrayRead(xin, &xgpu);
613:         VecViennaCLRestoreArrayWrite(yin, &ygpu);
614:       } else {
615:         VecCopy_SeqViennaCL_Private(xin, yin);
616:         ViennaCLWaitForGPU();
617:       }
618:     }
619:   }
620:   return 0;
621: }

623: PetscErrorCode VecSwap_SeqViennaCL(Vec xin, Vec yin)
624: {
625:   ViennaCLVector *xgpu, *ygpu;

627:   if (xin != yin && xin->map->n > 0) {
628:     VecViennaCLGetArray(xin, &xgpu);
629:     VecViennaCLGetArray(yin, &ygpu);
630:     PetscLogGpuTimeBegin();
631:     try {
632:       viennacl::swap(*xgpu, *ygpu);
633:       ViennaCLWaitForGPU();
634:     } catch (std::exception const &ex) {
635:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
636:     }
637:     PetscLogGpuTimeEnd();
638:     VecViennaCLRestoreArray(xin, &xgpu);
639:     VecViennaCLRestoreArray(yin, &ygpu);
640:   }
641:   return 0;
642: }

644: // y = alpha * x + beta * y
645: PetscErrorCode VecAXPBY_SeqViennaCL(Vec yin, PetscScalar alpha, PetscScalar beta, Vec xin)
646: {
647:   PetscScalar           a = alpha, b = beta;
648:   const ViennaCLVector *xgpu;
649:   ViennaCLVector       *ygpu;

651:   if (a == 0.0 && xin->map->n > 0) {
652:     VecScale_SeqViennaCL(yin, beta);
653:   } else if (b == 1.0 && xin->map->n > 0) {
654:     VecAXPY_SeqViennaCL(yin, alpha, xin);
655:   } else if (a == 1.0 && xin->map->n > 0) {
656:     VecAYPX_SeqViennaCL(yin, beta, xin);
657:   } else if (b == 0.0 && xin->map->n > 0) {
658:     VecViennaCLGetArrayRead(xin, &xgpu);
659:     VecViennaCLGetArray(yin, &ygpu);
660:     PetscLogGpuTimeBegin();
661:     try {
662:       *ygpu = *xgpu * alpha;
663:       ViennaCLWaitForGPU();
664:     } catch (std::exception const &ex) {
665:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
666:     }
667:     PetscLogGpuTimeEnd();
668:     PetscLogGpuFlops(xin->map->n);
669:     VecViennaCLRestoreArrayRead(xin, &xgpu);
670:     VecViennaCLRestoreArray(yin, &ygpu);
671:   } else if (xin->map->n > 0) {
672:     VecViennaCLGetArrayRead(xin, &xgpu);
673:     VecViennaCLGetArray(yin, &ygpu);
674:     PetscLogGpuTimeBegin();
675:     try {
676:       *ygpu = *xgpu * alpha + *ygpu * beta;
677:       ViennaCLWaitForGPU();
678:     } catch (std::exception const &ex) {
679:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
680:     }
681:     PetscLogGpuTimeEnd();
682:     VecViennaCLRestoreArrayRead(xin, &xgpu);
683:     VecViennaCLRestoreArray(yin, &ygpu);
684:     PetscLogGpuFlops(3.0 * xin->map->n);
685:   }
686:   return 0;
687: }

689: /* operation  z = alpha * x + beta *y + gamma *z*/
690: PetscErrorCode VecAXPBYPCZ_SeqViennaCL(Vec zin, PetscScalar alpha, PetscScalar beta, PetscScalar gamma, Vec xin, Vec yin)
691: {
692:   PetscInt              n = zin->map->n;
693:   const ViennaCLVector *xgpu, *ygpu;
694:   ViennaCLVector       *zgpu;

696:   VecViennaCLGetArrayRead(xin, &xgpu);
697:   VecViennaCLGetArrayRead(yin, &ygpu);
698:   VecViennaCLGetArray(zin, &zgpu);
699:   if (alpha == 0.0 && xin->map->n > 0) {
700:     PetscLogGpuTimeBegin();
701:     try {
702:       if (beta == 0.0) {
703:         *zgpu = gamma * *zgpu;
704:         ViennaCLWaitForGPU();
705:         PetscLogGpuFlops(1.0 * n);
706:       } else if (gamma == 0.0) {
707:         *zgpu = beta * *ygpu;
708:         ViennaCLWaitForGPU();
709:         PetscLogGpuFlops(1.0 * n);
710:       } else {
711:         *zgpu = beta * *ygpu + gamma * *zgpu;
712:         ViennaCLWaitForGPU();
713:         PetscLogGpuFlops(3.0 * n);
714:       }
715:     } catch (std::exception const &ex) {
716:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
717:     }
718:     PetscLogGpuTimeEnd();
719:     PetscLogGpuFlops(3.0 * n);
720:   } else if (beta == 0.0 && xin->map->n > 0) {
721:     PetscLogGpuTimeBegin();
722:     try {
723:       if (gamma == 0.0) {
724:         *zgpu = alpha * *xgpu;
725:         ViennaCLWaitForGPU();
726:         PetscLogGpuFlops(1.0 * n);
727:       } else {
728:         *zgpu = alpha * *xgpu + gamma * *zgpu;
729:         ViennaCLWaitForGPU();
730:         PetscLogGpuFlops(3.0 * n);
731:       }
732:     } catch (std::exception const &ex) {
733:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
734:     }
735:     PetscLogGpuTimeEnd();
736:   } else if (gamma == 0.0 && xin->map->n > 0) {
737:     PetscLogGpuTimeBegin();
738:     try {
739:       *zgpu = alpha * *xgpu + beta * *ygpu;
740:       ViennaCLWaitForGPU();
741:     } catch (std::exception const &ex) {
742:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
743:     }
744:     PetscLogGpuTimeEnd();
745:     PetscLogGpuFlops(3.0 * n);
746:   } else if (xin->map->n > 0) {
747:     PetscLogGpuTimeBegin();
748:     try {
749:       /* Split operation into two steps. This is not completely ideal, but avoids temporaries (which are far worse) */
750:       if (gamma != 1.0) *zgpu *= gamma;
751:       *zgpu += alpha * *xgpu + beta * *ygpu;
752:       ViennaCLWaitForGPU();
753:     } catch (std::exception const &ex) {
754:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
755:     }
756:     PetscLogGpuTimeEnd();
757:     VecViennaCLRestoreArray(zin, &zgpu);
758:     VecViennaCLRestoreArrayRead(xin, &xgpu);
759:     VecViennaCLRestoreArrayRead(yin, &ygpu);
760:     PetscLogGpuFlops(5.0 * n);
761:   }
762:   return 0;
763: }

765: PetscErrorCode VecPointwiseMult_SeqViennaCL(Vec win, Vec xin, Vec yin)
766: {
767:   PetscInt              n = win->map->n;
768:   const ViennaCLVector *xgpu, *ygpu;
769:   ViennaCLVector       *wgpu;

771:   if (xin->map->n > 0) {
772:     VecViennaCLGetArrayRead(xin, &xgpu);
773:     VecViennaCLGetArrayRead(yin, &ygpu);
774:     VecViennaCLGetArray(win, &wgpu);
775:     PetscLogGpuTimeBegin();
776:     try {
777:       *wgpu = viennacl::linalg::element_prod(*xgpu, *ygpu);
778:       ViennaCLWaitForGPU();
779:     } catch (std::exception const &ex) {
780:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
781:     }
782:     PetscLogGpuTimeEnd();
783:     VecViennaCLRestoreArrayRead(xin, &xgpu);
784:     VecViennaCLRestoreArrayRead(yin, &ygpu);
785:     VecViennaCLRestoreArray(win, &wgpu);
786:     PetscLogGpuFlops(n);
787:   }
788:   return 0;
789: }

791: PetscErrorCode VecNorm_SeqViennaCL(Vec xin, NormType type, PetscReal *z)
792: {
793:   PetscInt              n = xin->map->n;
794:   PetscBLASInt          bn;
795:   const ViennaCLVector *xgpu;

797:   if (xin->map->n > 0) {
798:     PetscBLASIntCast(n, &bn);
799:     VecViennaCLGetArrayRead(xin, &xgpu);
800:     if (type == NORM_2 || type == NORM_FROBENIUS) {
801:       PetscLogGpuTimeBegin();
802:       try {
803:         *z = viennacl::linalg::norm_2(*xgpu);
804:         ViennaCLWaitForGPU();
805:       } catch (std::exception const &ex) {
806:         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
807:       }
808:       PetscLogGpuTimeEnd();
809:       PetscLogGpuFlops(PetscMax(2.0 * n - 1, 0.0));
810:     } else if (type == NORM_INFINITY) {
811:       PetscLogGpuTimeBegin();
812:       try {
813:         *z = viennacl::linalg::norm_inf(*xgpu);
814:         ViennaCLWaitForGPU();
815:       } catch (std::exception const &ex) {
816:         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
817:       }
818:       PetscLogGpuTimeEnd();
819:     } else if (type == NORM_1) {
820:       PetscLogGpuTimeBegin();
821:       try {
822:         *z = viennacl::linalg::norm_1(*xgpu);
823:         ViennaCLWaitForGPU();
824:       } catch (std::exception const &ex) {
825:         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
826:       }
827:       PetscLogGpuTimeEnd();
828:       PetscLogGpuFlops(PetscMax(n - 1.0, 0.0));
829:     } else if (type == NORM_1_AND_2) {
830:       PetscLogGpuTimeBegin();
831:       try {
832:         *z       = viennacl::linalg::norm_1(*xgpu);
833:         *(z + 1) = viennacl::linalg::norm_2(*xgpu);
834:         ViennaCLWaitForGPU();
835:       } catch (std::exception const &ex) {
836:         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
837:       }
838:       PetscLogGpuTimeEnd();
839:       PetscLogGpuFlops(PetscMax(2.0 * n - 1, 0.0));
840:       PetscLogGpuFlops(PetscMax(n - 1.0, 0.0));
841:     }
842:     VecViennaCLRestoreArrayRead(xin, &xgpu);
843:   } else if (type == NORM_1_AND_2) {
844:     *z       = 0.0;
845:     *(z + 1) = 0.0;
846:   } else *z = 0.0;
847:   return 0;
848: }

850: PetscErrorCode VecSetRandom_SeqViennaCL(Vec xin, PetscRandom r)
851: {
852:   VecSetRandom_SeqViennaCL_Private(xin, r);
853:   xin->offloadmask = PETSC_OFFLOAD_CPU;
854:   return 0;
855: }

857: PetscErrorCode VecResetArray_SeqViennaCL(Vec vin)
858: {
860:   VecViennaCLCopyFromGPU(vin);
861:   VecResetArray_SeqViennaCL_Private(vin);
862:   vin->offloadmask = PETSC_OFFLOAD_CPU;
863:   return 0;
864: }

866: PetscErrorCode VecPlaceArray_SeqViennaCL(Vec vin, const PetscScalar *a)
867: {
869:   VecViennaCLCopyFromGPU(vin);
870:   VecPlaceArray_Seq(vin, a);
871:   vin->offloadmask = PETSC_OFFLOAD_CPU;
872:   return 0;
873: }

875: PetscErrorCode VecReplaceArray_SeqViennaCL(Vec vin, const PetscScalar *a)
876: {
878:   VecViennaCLCopyFromGPU(vin);
879:   VecReplaceArray_Seq(vin, a);
880:   vin->offloadmask = PETSC_OFFLOAD_CPU;
881:   return 0;
882: }

884: /*@C
885:    VecCreateSeqViennaCL - Creates a standard, sequential array-style vector.

887:    Collective

889:    Input Parameters:
890: +  comm - the communicator, should be PETSC_COMM_SELF
891: -  n - the vector length

893:    Output Parameter:
894: .  V - the vector

896:    Notes:
897:    Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
898:    same type as an existing vector.

900:    Level: intermediate

902: .seealso: `VecCreateMPI()`, `VecCreate()`, `VecDuplicate()`, `VecDuplicateVecs()`, `VecCreateGhost()`
903: @*/
904: PetscErrorCode VecCreateSeqViennaCL(MPI_Comm comm, PetscInt n, Vec *v)
905: {
906:   VecCreate(comm, v);
907:   VecSetSizes(*v, n, n);
908:   VecSetType(*v, VECSEQVIENNACL);
909:   return 0;
910: }

912: /*@C
913:    VecCreateSeqViennaCLWithArray - Creates a viennacl sequential array-style vector,
914:    where the user provides the array space to store the vector values.

916:    Collective

918:    Input Parameters:
919: +  comm - the communicator, should be PETSC_COMM_SELF
920: .  bs - the block size
921: .  n - the vector length
922: -  array - viennacl array where the vector elements are to be stored.

924:    Output Parameter:
925: .  V - the vector

927:    Notes:
928:    Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
929:    same type as an existing vector.

931:    If the user-provided array is NULL, then VecViennaCLPlaceArray() can be used
932:    at a later stage to SET the array for storing the vector values.

934:    PETSc does NOT free the array when the vector is destroyed via VecDestroy().
935:    The user should not free the array until the vector is destroyed.

937:    Level: intermediate

939: .seealso: `VecCreateMPIViennaCLWithArray()`, `VecCreate()`, `VecDuplicate()`, `VecDuplicateVecs()`,
940:           `VecCreateGhost()`, `VecCreateSeq()`, `VecCUDAPlaceArray()`, `VecCreateSeqWithArray()`,
941:           `VecCreateMPIWithArray()`
942: @*/
943: PETSC_EXTERN PetscErrorCode VecCreateSeqViennaCLWithArray(MPI_Comm comm, PetscInt bs, PetscInt n, const ViennaCLVector *array, Vec *V)
944: {
945:   PetscMPIInt size;

947:   VecCreate(comm, V);
948:   VecSetSizes(*V, n, n);
949:   VecSetBlockSize(*V, bs);
950:   MPI_Comm_size(comm, &size);
952:   VecCreate_SeqViennaCL_Private(*V, array);
953:   return 0;
954: }

956: /*@C
957:    VecCreateSeqViennaCLWithArrays - Creates a ViennaCL sequential vector, where
958:    the user provides the array space to store the vector values.

960:    Collective

962:    Input Parameters:
963: +  comm - the communicator, should be PETSC_COMM_SELF
964: .  bs - the block size
965: .  n - the vector length
966: -  cpuarray - CPU memory where the vector elements are to be stored.
967: -  viennaclvec - ViennaCL vector where the Vec entries are to be stored on the device.

969:    Output Parameter:
970: .  V - the vector

972:    Notes:
973:    If both cpuarray and viennaclvec are provided, the caller must ensure that
974:    the provided arrays have identical values.

976:    PETSc does NOT free the provided arrays when the vector is destroyed via
977:    VecDestroy(). The user should not free the array until the vector is
978:    destroyed.

980:    Level: intermediate

982: .seealso: `VecCreateMPIViennaCLWithArrays()`, `VecCreate()`, `VecCreateSeqWithArray()`,
983:           `VecViennaCLPlaceArray()`, `VecPlaceArray()`, `VecCreateSeqCUDAWithArrays()`,
984:           `VecViennaCLAllocateCheckHost()`
985: @*/
986: PetscErrorCode VecCreateSeqViennaCLWithArrays(MPI_Comm comm, PetscInt bs, PetscInt n, const PetscScalar cpuarray[], const ViennaCLVector *viennaclvec, Vec *V)
987: {
988:   PetscMPIInt size;


991:   MPI_Comm_size(comm, &size);

994:   // set V's viennaclvec to be viennaclvec, do not allocate memory on host yet.
995:   VecCreateSeqViennaCLWithArray(comm, bs, n, viennaclvec, V);

997:   if (cpuarray && viennaclvec) {
998:     Vec_Seq *s        = (Vec_Seq *)((*V)->data);
999:     s->array          = (PetscScalar *)cpuarray;
1000:     (*V)->offloadmask = PETSC_OFFLOAD_BOTH;
1001:   } else if (cpuarray) {
1002:     Vec_Seq *s        = (Vec_Seq *)((*V)->data);
1003:     s->array          = (PetscScalar *)cpuarray;
1004:     (*V)->offloadmask = PETSC_OFFLOAD_CPU;
1005:   } else if (viennaclvec) {
1006:     (*V)->offloadmask = PETSC_OFFLOAD_GPU;
1007:   } else {
1008:     (*V)->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1009:   }

1011:   return 0;
1012: }

1014: /*@C
1015:    VecViennaCLPlaceArray - Replace the viennacl vector in a Vec with
1016:    the one provided by the user. This is useful to avoid a copy.

1018:    Not Collective

1020:    Input Parameters:
1021: +  vec - the vector
1022: -  array - the ViennaCL vector

1024:    Notes:
1025:    You can return to the original viennacl vector with a call to
1026:    VecViennaCLResetArray() It is not possible to use VecViennaCLPlaceArray()
1027:    and VecPlaceArray() at the same time on the same vector.

1029:    Level: intermediate

1031: .seealso: `VecPlaceArray()`, `VecSetValues()`, `VecViennaCLResetArray()`,
1032:           `VecCUDAPlaceArray()`,

1034: @*/
1035: PETSC_EXTERN PetscErrorCode VecViennaCLPlaceArray(Vec vin, const ViennaCLVector *a)
1036: {
1038:   VecViennaCLCopyToGPU(vin);
1040:   ((Vec_Seq *)vin->data)->unplacedarray  = (PetscScalar *)((Vec_ViennaCL *)vin->spptr)->GPUarray; /* save previous GPU array so reset can bring it back */
1041:   ((Vec_ViennaCL *)vin->spptr)->GPUarray = (ViennaCLVector *)a;
1042:   vin->offloadmask                       = PETSC_OFFLOAD_GPU;
1043:   PetscObjectStateIncrease((PetscObject)vin);
1044:   return 0;
1045: }

1047: /*@C
1048:    VecViennaCLResetArray - Resets a vector to use its default memory. Call this
1049:    after the use of VecViennaCLPlaceArray().

1051:    Not Collective

1053:    Input Parameters:
1054: .  vec - the vector

1056:    Level: developer

1058: .seealso: `VecViennaCLPlaceArray()`, `VecResetArray()`, `VecCUDAResetArray()`, `VecPlaceArray()`
1059: @*/
1060: PETSC_EXTERN PetscErrorCode VecViennaCLResetArray(Vec vin)
1061: {
1063:   VecViennaCLCopyToGPU(vin);
1064:   ((Vec_ViennaCL *)vin->spptr)->GPUarray = (ViennaCLVector *)((Vec_Seq *)vin->data)->unplacedarray;
1065:   ((Vec_Seq *)vin->data)->unplacedarray  = 0;
1066:   vin->offloadmask                       = PETSC_OFFLOAD_GPU;
1067:   PetscObjectStateIncrease((PetscObject)vin);
1068:   return 0;
1069: }

1071: /*  VecDotNorm2 - computes the inner product of two vectors and the 2-norm squared of the second vector
1072:  *
1073:  *  Simply reuses VecDot() and VecNorm(). Performance improvement through custom kernel (kernel generator) possible.
1074:  */
1075: PetscErrorCode VecDotNorm2_SeqViennaCL(Vec s, Vec t, PetscScalar *dp, PetscScalar *nm)
1076: {
1077:   VecDot_SeqViennaCL(s, t, dp);
1078:   VecNorm_SeqViennaCL(t, NORM_2, nm);
1079:   *nm *= *nm; //squared norm required
1080:   return 0;
1081: }

1083: PetscErrorCode VecDuplicate_SeqViennaCL(Vec win, Vec *V)
1084: {
1085:   VecCreateSeqViennaCL(PetscObjectComm((PetscObject)win), win->map->n, V);
1086:   PetscLayoutReference(win->map, &(*V)->map);
1087:   PetscObjectListDuplicate(((PetscObject)win)->olist, &((PetscObject)(*V))->olist);
1088:   PetscFunctionListDuplicate(((PetscObject)win)->qlist, &((PetscObject)(*V))->qlist);
1089:   (*V)->stash.ignorenegidx = win->stash.ignorenegidx;
1090:   return 0;
1091: }

1093: PetscErrorCode VecDestroy_SeqViennaCL(Vec v)
1094: {
1095:   try {
1096:     if (v->spptr) {
1097:       delete ((Vec_ViennaCL *)v->spptr)->GPUarray_allocated;
1098:       delete (Vec_ViennaCL *)v->spptr;
1099:     }
1100:   } catch (char *ex) {
1101:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex);
1102:   }
1103:   VecDestroy_SeqViennaCL_Private(v);
1104:   return 0;
1105: }

1107: PetscErrorCode VecGetArray_SeqViennaCL(Vec v, PetscScalar **a)
1108: {
1109:   if (v->offloadmask == PETSC_OFFLOAD_GPU) {
1110:     VecViennaCLCopyFromGPU(v);
1111:   } else {
1112:     VecViennaCLAllocateCheckHost(v);
1113:   }
1114:   *a = *((PetscScalar **)v->data);
1115:   return 0;
1116: }

1118: PetscErrorCode VecRestoreArray_SeqViennaCL(Vec v, PetscScalar **a)
1119: {
1120:   v->offloadmask = PETSC_OFFLOAD_CPU;
1121:   return 0;
1122: }

1124: PetscErrorCode VecGetArrayWrite_SeqViennaCL(Vec v, PetscScalar **a)
1125: {
1126:   VecViennaCLAllocateCheckHost(v);
1127:   *a = *((PetscScalar **)v->data);
1128:   return 0;
1129: }

1131: static PetscErrorCode VecBindToCPU_SeqAIJViennaCL(Vec V, PetscBool flg)
1132: {
1133:   V->boundtocpu = flg;
1134:   if (flg) {
1135:     VecViennaCLCopyFromGPU(V);
1136:     V->offloadmask          = PETSC_OFFLOAD_CPU; /* since the CPU code will likely change values in the vector */
1137:     V->ops->dot             = VecDot_Seq;
1138:     V->ops->norm            = VecNorm_Seq;
1139:     V->ops->tdot            = VecTDot_Seq;
1140:     V->ops->scale           = VecScale_Seq;
1141:     V->ops->copy            = VecCopy_Seq;
1142:     V->ops->set             = VecSet_Seq;
1143:     V->ops->swap            = VecSwap_Seq;
1144:     V->ops->axpy            = VecAXPY_Seq;
1145:     V->ops->axpby           = VecAXPBY_Seq;
1146:     V->ops->axpbypcz        = VecAXPBYPCZ_Seq;
1147:     V->ops->pointwisemult   = VecPointwiseMult_Seq;
1148:     V->ops->pointwisedivide = VecPointwiseDivide_Seq;
1149:     V->ops->setrandom       = VecSetRandom_Seq;
1150:     V->ops->dot_local       = VecDot_Seq;
1151:     V->ops->tdot_local      = VecTDot_Seq;
1152:     V->ops->norm_local      = VecNorm_Seq;
1153:     V->ops->mdot_local      = VecMDot_Seq;
1154:     V->ops->mtdot_local     = VecMTDot_Seq;
1155:     V->ops->maxpy           = VecMAXPY_Seq;
1156:     V->ops->mdot            = VecMDot_Seq;
1157:     V->ops->mtdot           = VecMTDot_Seq;
1158:     V->ops->aypx            = VecAYPX_Seq;
1159:     V->ops->waxpy           = VecWAXPY_Seq;
1160:     V->ops->dotnorm2        = NULL;
1161:     V->ops->placearray      = VecPlaceArray_Seq;
1162:     V->ops->replacearray    = VecReplaceArray_Seq;
1163:     V->ops->resetarray      = VecResetArray_Seq;
1164:     V->ops->duplicate       = VecDuplicate_Seq;
1165:   } else {
1166:     V->ops->dot             = VecDot_SeqViennaCL;
1167:     V->ops->norm            = VecNorm_SeqViennaCL;
1168:     V->ops->tdot            = VecTDot_SeqViennaCL;
1169:     V->ops->scale           = VecScale_SeqViennaCL;
1170:     V->ops->copy            = VecCopy_SeqViennaCL;
1171:     V->ops->set             = VecSet_SeqViennaCL;
1172:     V->ops->swap            = VecSwap_SeqViennaCL;
1173:     V->ops->axpy            = VecAXPY_SeqViennaCL;
1174:     V->ops->axpby           = VecAXPBY_SeqViennaCL;
1175:     V->ops->axpbypcz        = VecAXPBYPCZ_SeqViennaCL;
1176:     V->ops->pointwisemult   = VecPointwiseMult_SeqViennaCL;
1177:     V->ops->pointwisedivide = VecPointwiseDivide_SeqViennaCL;
1178:     V->ops->setrandom       = VecSetRandom_SeqViennaCL;
1179:     V->ops->dot_local       = VecDot_SeqViennaCL;
1180:     V->ops->tdot_local      = VecTDot_SeqViennaCL;
1181:     V->ops->norm_local      = VecNorm_SeqViennaCL;
1182:     V->ops->mdot_local      = VecMDot_SeqViennaCL;
1183:     V->ops->mtdot_local     = VecMTDot_SeqViennaCL;
1184:     V->ops->maxpy           = VecMAXPY_SeqViennaCL;
1185:     V->ops->mdot            = VecMDot_SeqViennaCL;
1186:     V->ops->mtdot           = VecMTDot_SeqViennaCL;
1187:     V->ops->aypx            = VecAYPX_SeqViennaCL;
1188:     V->ops->waxpy           = VecWAXPY_SeqViennaCL;
1189:     V->ops->dotnorm2        = VecDotNorm2_SeqViennaCL;
1190:     V->ops->placearray      = VecPlaceArray_SeqViennaCL;
1191:     V->ops->replacearray    = VecReplaceArray_SeqViennaCL;
1192:     V->ops->resetarray      = VecResetArray_SeqViennaCL;
1193:     V->ops->destroy         = VecDestroy_SeqViennaCL;
1194:     V->ops->duplicate       = VecDuplicate_SeqViennaCL;
1195:     V->ops->getarraywrite   = VecGetArrayWrite_SeqViennaCL;
1196:     V->ops->getarray        = VecGetArray_SeqViennaCL;
1197:     V->ops->restorearray    = VecRestoreArray_SeqViennaCL;
1198:   }
1199:   return 0;
1200: }

1202: PETSC_EXTERN PetscErrorCode VecCreate_SeqViennaCL(Vec V)
1203: {
1204:   PetscMPIInt size;

1206:   MPI_Comm_size(PetscObjectComm((PetscObject)V), &size);
1208:   VecCreate_Seq_Private(V, 0);
1209:   PetscObjectChangeTypeName((PetscObject)V, VECSEQVIENNACL);

1211:   VecBindToCPU_SeqAIJViennaCL(V, PETSC_FALSE);
1212:   V->ops->bindtocpu = VecBindToCPU_SeqAIJViennaCL;

1214:   VecViennaCLAllocateCheck(V);
1215:   VecSet_SeqViennaCL(V, 0.0);
1216:   return 0;
1217: }

1219: /*@C
1220:   VecViennaCLGetCLContext - Get the OpenCL context in which the Vec resides.

1222:   Caller should cast (*ctx) to (const cl_context). Caller is responsible for
1223:   invoking clReleaseContext().

1225:   Input Parameters:
1226: .  v    - the vector

1228:   Output Parameters:
1229: .  ctx - pointer to the underlying CL context

1231:   Level: intermediate

1233: .seealso: `VecViennaCLGetCLQueue()`, `VecViennaCLGetCLMemRead()`
1234: @*/
1235: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLContext(Vec v, PETSC_UINTPTR_T *ctx)
1236: {
1237: #if !defined(PETSC_HAVE_OPENCL)
1238:   SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to get the associated cl_context.");
1239: #else

1242:   const ViennaCLVector *v_vcl;
1243:   VecViennaCLGetArrayRead(v, &v_vcl);
1244:   try {
1245:     viennacl::ocl::context vcl_ctx = v_vcl->handle().opencl_handle().context();
1246:     const cl_context ocl_ctx = vcl_ctx.handle().get();
1247:     clRetainContext(ocl_ctx);
1248:     *ctx = (PETSC_UINTPTR_T)(ocl_ctx);
1249:   } catch (std::exception const &ex) {
1250:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
1251:   }

1253:   return 0;
1254: #endif
1255: }

1257: /*@C
1258:   VecViennaCLGetCLQueue - Get the OpenCL command queue to which all
1259:   operations of the Vec are enqueued.

1261:   Caller should cast (*queue) to (const cl_command_queue). Caller is
1262:   responsible for invoking clReleaseCommandQueue().

1264:   Input Parameters:
1265: .  v    - the vector

1267:   Output Parameters:
1268: .  ctx - pointer to the CL command queue

1270:   Level: intermediate

1272: .seealso: `VecViennaCLGetCLContext()`, `VecViennaCLGetCLMemRead()`
1273: @*/
1274: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLQueue(Vec v, PETSC_UINTPTR_T *queue)
1275: {
1276: #if !defined(PETSC_HAVE_OPENCL)
1277:   SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to get the associated cl_command_queue.");
1278: #else
1280:   const ViennaCLVector *v_vcl;
1281:   VecViennaCLGetArrayRead(v, &v_vcl);
1282:   try {
1283:     viennacl::ocl::context vcl_ctx = v_vcl->handle().opencl_handle().context();
1284:     const viennacl::ocl::command_queue &vcl_queue = vcl_ctx.current_queue();
1285:     const cl_command_queue ocl_queue = vcl_queue.handle().get();
1286:     clRetainCommandQueue(ocl_queue);
1287:     *queue = (PETSC_UINTPTR_T)(ocl_queue);
1288:   } catch (std::exception const &ex) {
1289:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
1290:   }

1292:   return 0;
1293: #endif
1294: }

1296: /*@C
1297:   VecViennaCLGetCLMemRead - Provides access to the the CL buffer inside a Vec.

1299:   Caller should cast (*mem) to (const cl_mem). Caller is responsible for
1300:   invoking clReleaseMemObject().

1302:   Input Parameters:
1303: .  v    - the vector

1305:   Output Parameters:
1306: .  mem - pointer to the device buffer

1308:   Level: intermediate

1310: .seealso: `VecViennaCLGetCLContext()`, `VecViennaCLGetCLMemWrite()`
1311: @*/
1312: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLMemRead(Vec v, PETSC_UINTPTR_T *mem)
1313: {
1314: #if !defined(PETSC_HAVE_OPENCL)
1315:   SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to get a Vec's cl_mem");
1316: #else
1318:   const ViennaCLVector *v_vcl;
1319:   VecViennaCLGetArrayRead(v, &v_vcl);
1320:   try {
1321:     const cl_mem ocl_mem = v_vcl->handle().opencl_handle().get();
1322:     clRetainMemObject(ocl_mem);
1323:     *mem = (PETSC_UINTPTR_T)(ocl_mem);
1324:   } catch (std::exception const &ex) {
1325:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
1326:   }
1327:   return 0;
1328: #endif
1329: }

1331: /*@C
1332:   VecViennaCLGetCLMemWrite - Provides access to the the CL buffer inside a Vec.

1334:   Caller should cast (*mem) to (const cl_mem). Caller is responsible for
1335:   invoking clReleaseMemObject().

1337:   The device pointer has to be released by calling
1338:   VecViennaCLRestoreCLMemWrite().  Upon restoring the vector data the data on
1339:   the host will be marked as out of date.  A subsequent access of the host data
1340:   will thus incur a data transfer from the device to the host.

1342:   Input Parameters:
1343: .  v    - the vector

1345:   Output Parameters:
1346: .  mem - pointer to the device buffer

1348:   Level: intermediate

1350: .seealso: `VecViennaCLGetCLContext()`, `VecViennaCLRestoreCLMemWrite()`
1351: @*/
1352: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLMemWrite(Vec v, PETSC_UINTPTR_T *mem)
1353: {
1354: #if !defined(PETSC_HAVE_OPENCL)
1355:   SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to get a Vec's cl_mem");
1356: #else
1358:   ViennaCLVector *v_vcl;
1359:   VecViennaCLGetArrayWrite(v, &v_vcl);
1360:   try {
1361:     const cl_mem ocl_mem = v_vcl->handle().opencl_handle().get();
1362:     clRetainMemObject(ocl_mem);
1363:     *mem = (PETSC_UINTPTR_T)(ocl_mem);
1364:   } catch (std::exception const &ex) {
1365:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
1366:   }

1368:   return 0;
1369: #endif
1370: }

1372: /*@C
1373:   VecViennaCLRestoreCLMemWrite - Restores a CL buffer pointer previously
1374:   acquired with VecViennaCLGetCLMemWrite().

1376:    This marks the host data as out of date.  Subsequent access to the
1377:    vector data on the host side with for instance VecGetArray() incurs a
1378:    data transfer.

1380:   Input Parameters:
1381: .  v    - the vector

1383:   Level: intermediate

1385: .seealso: `VecViennaCLGetCLContext()`, `VecViennaCLGetCLMemWrite()`
1386: @*/
1387: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreCLMemWrite(Vec v)
1388: {
1389: #if !defined(PETSC_HAVE_OPENCL)
1390:   SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to restore a Vec's cl_mem");
1391: #else
1393:   VecViennaCLRestoreArrayWrite(v, PETSC_NULL);

1395:   return 0;
1396: #endif
1397: }

1399: /*@C
1400:   VecViennaCLGetCLMem - Provides access to the the CL buffer inside a Vec.

1402:   Caller should cast (*mem) to (const cl_mem). Caller is responsible for
1403:   invoking clReleaseMemObject().

1405:   The device pointer has to be released by calling VecViennaCLRestoreCLMem().
1406:   Upon restoring the vector data the data on the host will be marked as out of
1407:   date.  A subsequent access of the host data will thus incur a data transfer
1408:   from the device to the host.

1410:   Input Parameters:
1411: .  v    - the vector

1413:   Output Parameters:
1414: .  mem - pointer to the device buffer

1416:   Level: intermediate

1418: .seealso: `VecViennaCLGetCLContext()`, `VecViennaCLRestoreCLMem()`
1419: @*/
1420: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLMem(Vec v, PETSC_UINTPTR_T *mem)
1421: {
1422: #if !defined(PETSC_HAVE_OPENCL)
1423:   SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to get a Vec's cl_mem");
1424: #else
1426:   ViennaCLVector *v_vcl;
1427:   VecViennaCLGetArray(v, &v_vcl);
1428:   try {
1429:     const cl_mem ocl_mem = v_vcl->handle().opencl_handle().get();
1430:     clRetainMemObject(ocl_mem);
1431:     *mem = (PETSC_UINTPTR_T)(ocl_mem);
1432:   } catch (std::exception const &ex) {
1433:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
1434:   }

1436:   return 0;
1437: #endif
1438: }

1440: /*@C
1441:   VecViennaCLRestoreCLMem - Restores a CL buffer pointer previously
1442:   acquired with VecViennaCLGetCLMem().

1444:    This marks the host data as out of date. Subsequent access to the vector
1445:    data on the host side with for instance VecGetArray() incurs a data
1446:    transfer.

1448:   Input Parameters:
1449: .  v    - the vector

1451:   Level: intermediate

1453: .seealso: `VecViennaCLGetCLContext()`, `VecViennaCLGetCLMem()`
1454: @*/
1455: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreCLMem(Vec v)
1456: {
1457: #if !defined(PETSC_HAVE_OPENCL)
1458:   SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to restore a Vec's cl_mem");
1459: #else
1461:   VecViennaCLRestoreArray(v, PETSC_NULL);

1463:   return 0;
1464: #endif
1465: }

1467: PetscErrorCode VecCreate_SeqViennaCL_Private(Vec V, const ViennaCLVector *array)
1468: {
1469:   Vec_ViennaCL *vecviennacl;
1470:   PetscMPIInt   size;

1472:   MPI_Comm_size(PetscObjectComm((PetscObject)V), &size);
1474:   VecCreate_Seq_Private(V, 0);
1475:   PetscObjectChangeTypeName((PetscObject)V, VECSEQVIENNACL);
1476:   VecBindToCPU_SeqAIJViennaCL(V, PETSC_FALSE);
1477:   V->ops->bindtocpu = VecBindToCPU_SeqAIJViennaCL;

1479:   if (array) {
1480:     if (!V->spptr) V->spptr = new Vec_ViennaCL;
1481:     vecviennacl                     = (Vec_ViennaCL *)V->spptr;
1482:     vecviennacl->GPUarray_allocated = 0;
1483:     vecviennacl->GPUarray           = (ViennaCLVector *)array;
1484:     V->offloadmask                  = PETSC_OFFLOAD_UNALLOCATED;
1485:   }

1487:   return 0;
1488: }