Actual source code: mpiviennacl.cxx

  1: /*
  2:    This file contains routines for Parallel vector operations.
  3:  */
  4: #include <petscconf.h>
  5: #include <../src/vec/vec/impls/mpi/pvecimpl.h>
  6: #include <../src/vec/vec/impls/seq/seqviennacl/viennaclvecimpl.h>

  8: /*MC
  9:    VECVIENNACL - VECVIENNACL = "viennacl" - A VECSEQVIENNACL on a single-process communicator, and VECMPIVIENNACL otherwise.

 11:    Options Database Keys:
 12: . -vec_type viennacl - sets the vector type to VECVIENNACL during a call to VecSetFromOptions()

 14:   Level: beginner

 16: .seealso: `VecCreate()`, `VecSetType()`, `VecSetFromOptions()`, `VecCreateMPIWithArray()`, `VECSEQVIENNACL`, `VECMPIVIENNACL`, `VECSTANDARD`, `VecType`, `VecCreateMPI()`, `VecCreateMPI()`
 17: M*/

 19: static PetscErrorCode VecDestroy_MPIViennaCL(Vec v)
 20: {
 21:   PetscFunctionBegin;
 22:   try {
 23:     if (v->spptr) {
 24:       delete ((Vec_ViennaCL *)v->spptr)->GPUarray_allocated;
 25:       delete (Vec_ViennaCL *)v->spptr;
 26:     }
 27:   } catch (std::exception const &ex) {
 28:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
 29:   }
 30:   PetscCall(VecDestroy_MPI(v));
 31:   PetscFunctionReturn(PETSC_SUCCESS);
 32: }

 34: static PetscErrorCode VecNorm_MPIViennaCL(Vec xin, NormType type, PetscReal *z)
 35: {
 36:   PetscReal sum, work = 0.0;

 38:   PetscFunctionBegin;
 39:   if (type == NORM_2 || type == NORM_FROBENIUS) {
 40:     PetscCall(VecNorm_SeqViennaCL(xin, NORM_2, &work));
 41:     work *= work;
 42:     PetscCall(MPIU_Allreduce(&work, &sum, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)xin)));
 43:     *z = PetscSqrtReal(sum);
 44:   } else if (type == NORM_1) {
 45:     /* Find the local part */
 46:     PetscCall(VecNorm_SeqViennaCL(xin, NORM_1, &work));
 47:     /* Find the global max */
 48:     PetscCall(MPIU_Allreduce(&work, z, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)xin)));
 49:   } else if (type == NORM_INFINITY) {
 50:     /* Find the local max */
 51:     PetscCall(VecNorm_SeqViennaCL(xin, NORM_INFINITY, &work));
 52:     /* Find the global max */
 53:     PetscCall(MPIU_Allreduce(&work, z, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)xin)));
 54:   } else if (type == NORM_1_AND_2) {
 55:     PetscReal temp[2];
 56:     PetscCall(VecNorm_SeqViennaCL(xin, NORM_1, temp));
 57:     PetscCall(VecNorm_SeqViennaCL(xin, NORM_2, temp + 1));
 58:     temp[1] = temp[1] * temp[1];
 59:     PetscCall(MPIU_Allreduce(temp, z, 2, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)xin)));
 60:     z[1] = PetscSqrtReal(z[1]);
 61:   }
 62:   PetscFunctionReturn(PETSC_SUCCESS);
 63: }

 65: static PetscErrorCode VecDot_MPIViennaCL(Vec xin, Vec yin, PetscScalar *z)
 66: {
 67:   PetscScalar sum, work;

 69:   PetscFunctionBegin;
 70:   PetscCall(VecDot_SeqViennaCL(xin, yin, &work));
 71:   PetscCall(MPIU_Allreduce(&work, &sum, 1, MPIU_SCALAR, MPIU_SUM, PetscObjectComm((PetscObject)xin)));
 72:   *z = sum;
 73:   PetscFunctionReturn(PETSC_SUCCESS);
 74: }

 76: static PetscErrorCode VecTDot_MPIViennaCL(Vec xin, Vec yin, PetscScalar *z)
 77: {
 78:   PetscScalar sum, work;

 80:   PetscFunctionBegin;
 81:   PetscCall(VecTDot_SeqViennaCL(xin, yin, &work));
 82:   PetscCall(MPIU_Allreduce(&work, &sum, 1, MPIU_SCALAR, MPIU_SUM, PetscObjectComm((PetscObject)xin)));
 83:   *z = sum;
 84:   PetscFunctionReturn(PETSC_SUCCESS);
 85: }

 87: static PetscErrorCode VecMDot_MPIViennaCL(Vec xin, PetscInt nv, const Vec y[], PetscScalar *z)
 88: {
 89:   PetscScalar awork[128], *work = awork;

 91:   PetscFunctionBegin;
 92:   if (nv > 128) PetscCall(PetscMalloc1(nv, &work));
 93:   PetscCall(VecMDot_SeqViennaCL(xin, nv, y, work));
 94:   PetscCall(MPIU_Allreduce(work, z, nv, MPIU_SCALAR, MPIU_SUM, PetscObjectComm((PetscObject)xin)));
 95:   if (nv > 128) PetscCall(PetscFree(work));
 96:   PetscFunctionReturn(PETSC_SUCCESS);
 97: }

 99: /*MC
100:    VECMPIVIENNACL - VECMPIVIENNACL = "mpiviennacl" - The basic parallel vector, modified to use ViennaCL

102:    Options Database Keys:
103: . -vec_type mpiviennacl - sets the vector type to VECMPIVIENNACL during a call to VecSetFromOptions()

105:   Level: beginner

107: .seealso: `VecCreate()`, `VecSetType()`, `VecSetFromOptions()`, `VecCreateMPIWithArray()`, `VECMPI`, `VecType`, `VecCreateMPI()`, `VecCreateMPI()`
108: M*/

110: static PetscErrorCode VecDuplicate_MPIViennaCL(Vec win, Vec *v)
111: {
112:   Vec_MPI     *vw, *w = (Vec_MPI *)win->data;
113:   PetscScalar *array;

115:   PetscFunctionBegin;
116:   PetscCall(VecCreate(PetscObjectComm((PetscObject)win), v));
117:   PetscCall(PetscLayoutReference(win->map, &(*v)->map));

119:   PetscCall(VecCreate_MPI_Private(*v, PETSC_FALSE, w->nghost, 0));
120:   vw           = (Vec_MPI *)(*v)->data;
121:   (*v)->ops[0] = win->ops[0];

123:   /* save local representation of the parallel vector (and scatter) if it exists */
124:   if (w->localrep) {
125:     PetscCall(VecGetArray(*v, &array));
126:     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, win->map->n + w->nghost, array, &vw->localrep));
127:     vw->localrep->ops[0] = w->localrep->ops[0];
128:     PetscCall(VecRestoreArray(*v, &array));
129:     vw->localupdate = w->localupdate;
130:     if (vw->localupdate) PetscCall(PetscObjectReference((PetscObject)vw->localupdate));
131:   }

133:   /* New vector should inherit stashing property of parent */
134:   (*v)->stash.donotstash   = win->stash.donotstash;
135:   (*v)->stash.ignorenegidx = win->stash.ignorenegidx;

137:   /* change type_name appropriately */
138:   PetscCall(PetscObjectChangeTypeName((PetscObject)*v, VECMPIVIENNACL));

140:   PetscCall(PetscObjectListDuplicate(((PetscObject)win)->olist, &((PetscObject)*v)->olist));
141:   PetscCall(PetscFunctionListDuplicate(((PetscObject)win)->qlist, &((PetscObject)*v)->qlist));
142:   (*v)->map->bs   = PetscAbs(win->map->bs);
143:   (*v)->bstash.bs = win->bstash.bs;
144:   PetscFunctionReturn(PETSC_SUCCESS);
145: }

147: static PetscErrorCode VecDotNorm2_MPIViennaCL(Vec s, Vec t, PetscScalar *dp, PetscScalar *nm)
148: {
149:   PetscScalar work[2], sum[2];

151:   PetscFunctionBegin;
152:   PetscCall(VecDotNorm2_SeqViennaCL(s, t, work, work + 1));
153:   PetscCall(MPIU_Allreduce((void *)&work, (void *)&sum, 2, MPIU_SCALAR, MPIU_SUM, PetscObjectComm((PetscObject)s)));
154:   *dp = sum[0];
155:   *nm = sum[1];
156:   PetscFunctionReturn(PETSC_SUCCESS);
157: }

159: static PetscErrorCode VecBindToCPU_MPIViennaCL(Vec vv, PetscBool bind)
160: {
161:   PetscFunctionBegin;
162:   vv->boundtocpu = bind;

164:   if (bind) {
165:     PetscCall(VecViennaCLCopyFromGPU(vv));
166:     vv->offloadmask                 = PETSC_OFFLOAD_CPU; /* since the CPU code will likely change values in the vector */
167:     vv->ops->dotnorm2               = NULL;
168:     vv->ops->waxpy                  = VecWAXPY_Seq;
169:     vv->ops->dot                    = VecDot_MPI;
170:     vv->ops->mdot                   = VecMDot_MPI;
171:     vv->ops->tdot                   = VecTDot_MPI;
172:     vv->ops->norm                   = VecNorm_MPI;
173:     vv->ops->scale                  = VecScale_Seq;
174:     vv->ops->copy                   = VecCopy_Seq;
175:     vv->ops->set                    = VecSet_Seq;
176:     vv->ops->swap                   = VecSwap_Seq;
177:     vv->ops->axpy                   = VecAXPY_Seq;
178:     vv->ops->axpby                  = VecAXPBY_Seq;
179:     vv->ops->maxpy                  = VecMAXPY_Seq;
180:     vv->ops->aypx                   = VecAYPX_Seq;
181:     vv->ops->axpbypcz               = VecAXPBYPCZ_Seq;
182:     vv->ops->pointwisemult          = VecPointwiseMult_Seq;
183:     vv->ops->setrandom              = VecSetRandom_Seq;
184:     vv->ops->placearray             = VecPlaceArray_Seq;
185:     vv->ops->replacearray           = VecReplaceArray_Seq;
186:     vv->ops->resetarray             = VecResetArray_Seq;
187:     vv->ops->dot_local              = VecDot_Seq;
188:     vv->ops->tdot_local             = VecTDot_Seq;
189:     vv->ops->norm_local             = VecNorm_Seq;
190:     vv->ops->mdot_local             = VecMDot_Seq;
191:     vv->ops->pointwisedivide        = VecPointwiseDivide_Seq;
192:     vv->ops->getlocalvector         = NULL;
193:     vv->ops->restorelocalvector     = NULL;
194:     vv->ops->getlocalvectorread     = NULL;
195:     vv->ops->restorelocalvectorread = NULL;
196:     vv->ops->getarraywrite          = NULL;
197:   } else {
198:     vv->ops->dotnorm2        = VecDotNorm2_MPIViennaCL;
199:     vv->ops->waxpy           = VecWAXPY_SeqViennaCL;
200:     vv->ops->duplicate       = VecDuplicate_MPIViennaCL;
201:     vv->ops->dot             = VecDot_MPIViennaCL;
202:     vv->ops->mdot            = VecMDot_MPIViennaCL;
203:     vv->ops->tdot            = VecTDot_MPIViennaCL;
204:     vv->ops->norm            = VecNorm_MPIViennaCL;
205:     vv->ops->scale           = VecScale_SeqViennaCL;
206:     vv->ops->copy            = VecCopy_SeqViennaCL;
207:     vv->ops->set             = VecSet_SeqViennaCL;
208:     vv->ops->swap            = VecSwap_SeqViennaCL;
209:     vv->ops->axpy            = VecAXPY_SeqViennaCL;
210:     vv->ops->axpby           = VecAXPBY_SeqViennaCL;
211:     vv->ops->maxpy           = VecMAXPY_SeqViennaCL;
212:     vv->ops->aypx            = VecAYPX_SeqViennaCL;
213:     vv->ops->axpbypcz        = VecAXPBYPCZ_SeqViennaCL;
214:     vv->ops->pointwisemult   = VecPointwiseMult_SeqViennaCL;
215:     vv->ops->setrandom       = VecSetRandom_SeqViennaCL;
216:     vv->ops->dot_local       = VecDot_SeqViennaCL;
217:     vv->ops->tdot_local      = VecTDot_SeqViennaCL;
218:     vv->ops->norm_local      = VecNorm_SeqViennaCL;
219:     vv->ops->mdot_local      = VecMDot_SeqViennaCL;
220:     vv->ops->destroy         = VecDestroy_MPIViennaCL;
221:     vv->ops->pointwisedivide = VecPointwiseDivide_SeqViennaCL;
222:     vv->ops->placearray      = VecPlaceArray_SeqViennaCL;
223:     vv->ops->replacearray    = VecReplaceArray_SeqViennaCL;
224:     vv->ops->resetarray      = VecResetArray_SeqViennaCL;
225:     vv->ops->getarraywrite   = VecGetArrayWrite_SeqViennaCL;
226:     vv->ops->getarray        = VecGetArray_SeqViennaCL;
227:     vv->ops->restorearray    = VecRestoreArray_SeqViennaCL;
228:   }
229:   vv->ops->duplicatevecs = VecDuplicateVecs_Default;
230:   PetscFunctionReturn(PETSC_SUCCESS);
231: }

233: PETSC_EXTERN PetscErrorCode VecCreate_MPIViennaCL(Vec vv)
234: {
235:   PetscFunctionBegin;
236:   PetscCall(PetscLayoutSetUp(vv->map));
237:   PetscCall(VecViennaCLAllocateCheck(vv));
238:   PetscCall(VecCreate_MPIViennaCL_Private(vv, PETSC_FALSE, 0, ((Vec_ViennaCL *)vv->spptr)->GPUarray));
239:   PetscCall(VecViennaCLAllocateCheckHost(vv));
240:   PetscCall(VecSet(vv, 0.0));
241:   PetscCall(VecSet_Seq(vv, 0.0));
242:   vv->offloadmask = PETSC_OFFLOAD_BOTH;
243:   PetscFunctionReturn(PETSC_SUCCESS);
244: }

246: PETSC_EXTERN PetscErrorCode VecCreate_ViennaCL(Vec v)
247: {
248:   PetscMPIInt size;

250:   PetscFunctionBegin;
251:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)v), &size));
252:   if (size == 1) {
253:     PetscCall(VecSetType(v, VECSEQVIENNACL));
254:   } else {
255:     PetscCall(VecSetType(v, VECMPIVIENNACL));
256:   }
257:   PetscFunctionReturn(PETSC_SUCCESS);
258: }

260: /*@C
261:   VecCreateMPIViennaCLWithArray - Creates a parallel, array-style vector,
262:   where the user provides the viennacl vector to store the vector values.

264:   Collective

266:   Input Parameters:
267: + comm  - the MPI communicator to use
268: . bs    - block size, same meaning as VecSetBlockSize()
269: . n     - local vector length, cannot be PETSC_DECIDE
270: . N     - global vector length (or PETSC_DECIDE to have calculated)
271: - array - the user provided GPU array to store the vector values

273:   Output Parameter:
274: . vv - the vector

276:   Notes:
277:   Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
278:   same type as an existing vector.

280:   If the user-provided array is NULL, then VecViennaCLPlaceArray() can be used
281:   at a later stage to SET the array for storing the vector values.

283:   PETSc does NOT free the array when the vector is destroyed via VecDestroy().
284:   The user should not free the array until the vector is destroyed.

286:   Level: intermediate

288: .seealso: `VecCreateSeqViennaCLWithArray()`, `VecCreateMPIWithArray()`, `VecCreateSeqWithArray()`,
289:           `VecCreate()`, `VecCreateMPI()`, `VecCreateGhostWithArray()`, `VecViennaCLPlaceArray()`

291: @*/
292: PetscErrorCode VecCreateMPIViennaCLWithArray(MPI_Comm comm, PetscInt bs, PetscInt n, PetscInt N, const ViennaCLVector *array, Vec *vv)
293: {
294:   PetscFunctionBegin;
295:   PetscCheck(n != PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Must set local size of vector");
296:   PetscCall(PetscSplitOwnership(comm, &n, &N));
297:   PetscCall(VecCreate(comm, vv));
298:   PetscCall(VecSetSizes(*vv, n, N));
299:   PetscCall(VecSetBlockSize(*vv, bs));
300:   PetscCall(VecCreate_MPIViennaCL_Private(*vv, PETSC_FALSE, 0, array));
301:   PetscFunctionReturn(PETSC_SUCCESS);
302: }

304: /*@C
305:   VecCreateMPIViennaCLWithArrays - Creates a parallel, array-style vector,
306:   where the user provides the ViennaCL vector to store the vector values.

308:   Collective

310:   Input Parameters:
311: + comm        - the MPI communicator to use
312: . bs          - block size, same meaning as VecSetBlockSize()
313: . n           - local vector length, cannot be PETSC_DECIDE
314: . N           - global vector length (or PETSC_DECIDE to have calculated)
315: . cpuarray    - the user provided CPU array to store the vector values
316: - viennaclvec - ViennaCL vector where the Vec entries are to be stored on the device.

318:   Output Parameter:
319: . vv - the vector

321:   Notes:
322:   If both cpuarray and viennaclvec are provided, the caller must ensure that
323:   the provided arrays have identical values.

325:   Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
326:   same type as an existing vector.

328:   PETSc does NOT free the provided arrays when the vector is destroyed via
329:   VecDestroy(). The user should not free the array until the vector is
330:   destroyed.

332:   Level: intermediate

334: .seealso: `VecCreateSeqViennaCLWithArrays()`, `VecCreateMPIWithArray()`
335:           `VecCreate()`, `VecDuplicate()`, `VecDuplicateVecs()`, `VecCreateGhost()`,
336:           `VecCreateMPI()`, `VecCreateGhostWithArray()`, `VecViennaCLPlaceArray()`,
337:           `VecPlaceArray()`, `VecCreateMPICUDAWithArrays()`,
338:           `VecViennaCLAllocateCheckHost()`
339: @*/
340: PetscErrorCode VecCreateMPIViennaCLWithArrays(MPI_Comm comm, PetscInt bs, PetscInt n, PetscInt N, const PetscScalar cpuarray[], const ViennaCLVector *viennaclvec, Vec *vv)
341: {
342:   PetscFunctionBegin;
343:   PetscCall(VecCreateMPIViennaCLWithArray(comm, bs, n, N, viennaclvec, vv));
344:   if (cpuarray && viennaclvec) {
345:     Vec_MPI *s         = (Vec_MPI *)((*vv)->data);
346:     s->array           = (PetscScalar *)cpuarray;
347:     (*vv)->offloadmask = PETSC_OFFLOAD_BOTH;
348:   } else if (cpuarray) {
349:     Vec_MPI *s         = (Vec_MPI *)((*vv)->data);
350:     s->array           = (PetscScalar *)cpuarray;
351:     (*vv)->offloadmask = PETSC_OFFLOAD_CPU;
352:   } else if (viennaclvec) {
353:     (*vv)->offloadmask = PETSC_OFFLOAD_GPU;
354:   } else {
355:     (*vv)->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
356:   }
357:   PetscFunctionReturn(PETSC_SUCCESS);
358: }

360: PetscErrorCode VecCreate_MPIViennaCL_Private(Vec vv, PetscBool alloc, PetscInt nghost, const ViennaCLVector *array)
361: {
362:   Vec_ViennaCL *vecviennacl;

364:   PetscFunctionBegin;
365:   PetscCall(VecCreate_MPI_Private(vv, PETSC_FALSE, 0, 0));
366:   PetscCall(PetscObjectChangeTypeName((PetscObject)vv, VECMPIVIENNACL));

368:   PetscCall(VecBindToCPU_MPIViennaCL(vv, PETSC_FALSE));
369:   vv->ops->bindtocpu = VecBindToCPU_MPIViennaCL;

371:   if (alloc && !array) {
372:     PetscCall(VecViennaCLAllocateCheck(vv));
373:     PetscCall(VecViennaCLAllocateCheckHost(vv));
374:     PetscCall(VecSet(vv, 0.0));
375:     PetscCall(VecSet_Seq(vv, 0.0));
376:     vv->offloadmask = PETSC_OFFLOAD_BOTH;
377:   }
378:   if (array) {
379:     if (!vv->spptr) vv->spptr = new Vec_ViennaCL;
380:     vecviennacl                     = (Vec_ViennaCL *)vv->spptr;
381:     vecviennacl->GPUarray_allocated = 0;
382:     vecviennacl->GPUarray           = (ViennaCLVector *)array;
383:     vv->offloadmask                 = PETSC_OFFLOAD_UNALLOCATED;
384:   }
385:   PetscFunctionReturn(PETSC_SUCCESS);
386: }