Actual source code: mpiviennacl.cxx


  2: /*
  3:    This file contains routines for Parallel vector operations.
  4:  */
  5: #include <petscconf.h>
  6: #include <../src/vec/vec/impls/mpi/pvecimpl.h>
  7: #include <../src/vec/vec/impls/seq/seqviennacl/viennaclvecimpl.h>

  9: /*MC
 10:    VECVIENNACL - VECVIENNACL = "viennacl" - A VECSEQVIENNACL on a single-process communicator, and VECMPIVIENNACL otherwise.

 12:    Options Database Keys:
 13: . -vec_type viennacl - sets the vector type to VECVIENNACL during a call to VecSetFromOptions()

 15:   Level: beginner

 17: .seealso: `VecCreate()`, `VecSetType()`, `VecSetFromOptions()`, `VecCreateMPIWithArray()`, `VECSEQVIENNACL`, `VECMPIVIENNACL`, `VECSTANDARD`, `VecType`, `VecCreateMPI()`, `VecCreateMPI()`
 18: M*/

 20: PetscErrorCode VecDestroy_MPIViennaCL(Vec v)
 21: {
 22:   try {
 23:     if (v->spptr) {
 24:       delete ((Vec_ViennaCL *)v->spptr)->GPUarray_allocated;
 25:       delete (Vec_ViennaCL *)v->spptr;
 26:     }
 27:   } catch (std::exception const &ex) {
 28:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
 29:   }
 30:   VecDestroy_MPI(v);
 31:   return 0;
 32: }

 34: PetscErrorCode VecNorm_MPIViennaCL(Vec xin, NormType type, PetscReal *z)
 35: {
 36:   PetscReal sum, work = 0.0;

 38:   if (type == NORM_2 || type == NORM_FROBENIUS) {
 39:     VecNorm_SeqViennaCL(xin, NORM_2, &work);
 40:     work *= work;
 41:     MPIU_Allreduce(&work, &sum, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)xin));
 42:     *z = PetscSqrtReal(sum);
 43:   } else if (type == NORM_1) {
 44:     /* Find the local part */
 45:     VecNorm_SeqViennaCL(xin, NORM_1, &work);
 46:     /* Find the global max */
 47:     MPIU_Allreduce(&work, z, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)xin));
 48:   } else if (type == NORM_INFINITY) {
 49:     /* Find the local max */
 50:     VecNorm_SeqViennaCL(xin, NORM_INFINITY, &work);
 51:     /* Find the global max */
 52:     MPIU_Allreduce(&work, z, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)xin));
 53:   } else if (type == NORM_1_AND_2) {
 54:     PetscReal temp[2];
 55:     VecNorm_SeqViennaCL(xin, NORM_1, temp);
 56:     VecNorm_SeqViennaCL(xin, NORM_2, temp + 1);
 57:     temp[1] = temp[1] * temp[1];
 58:     MPIU_Allreduce(temp, z, 2, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)xin));
 59:     z[1] = PetscSqrtReal(z[1]);
 60:   }
 61:   return 0;
 62: }

 64: PetscErrorCode VecDot_MPIViennaCL(Vec xin, Vec yin, PetscScalar *z)
 65: {
 66:   PetscScalar sum, work;

 68:   VecDot_SeqViennaCL(xin, yin, &work);
 69:   MPIU_Allreduce(&work, &sum, 1, MPIU_SCALAR, MPIU_SUM, PetscObjectComm((PetscObject)xin));
 70:   *z = sum;
 71:   return 0;
 72: }

 74: PetscErrorCode VecTDot_MPIViennaCL(Vec xin, Vec yin, PetscScalar *z)
 75: {
 76:   PetscScalar sum, work;

 78:   VecTDot_SeqViennaCL(xin, yin, &work);
 79:   MPIU_Allreduce(&work, &sum, 1, MPIU_SCALAR, MPIU_SUM, PetscObjectComm((PetscObject)xin));
 80:   *z = sum;
 81:   return 0;
 82: }

 84: PetscErrorCode VecMDot_MPIViennaCL(Vec xin, PetscInt nv, const Vec y[], PetscScalar *z)
 85: {
 86:   PetscScalar awork[128], *work = awork;

 88:   if (nv > 128) PetscMalloc1(nv, &work);
 89:   VecMDot_SeqViennaCL(xin, nv, y, work);
 90:   MPIU_Allreduce(work, z, nv, MPIU_SCALAR, MPIU_SUM, PetscObjectComm((PetscObject)xin));
 91:   if (nv > 128) PetscFree(work);
 92:   return 0;
 93: }

 95: /*MC
 96:    VECMPIVIENNACL - VECMPIVIENNACL = "mpiviennacl" - The basic parallel vector, modified to use ViennaCL

 98:    Options Database Keys:
 99: . -vec_type mpiviennacl - sets the vector type to VECMPIVIENNACL during a call to VecSetFromOptions()

101:   Level: beginner

103: .seealso: `VecCreate()`, `VecSetType()`, `VecSetFromOptions()`, `VecCreateMPIWithArray()`, `VECMPI`, `VecType`, `VecCreateMPI()`, `VecCreateMPI()`
104: M*/

106: PetscErrorCode VecDuplicate_MPIViennaCL(Vec win, Vec *v)
107: {
108:   Vec_MPI     *vw, *w = (Vec_MPI *)win->data;
109:   PetscScalar *array;

111:   VecCreate(PetscObjectComm((PetscObject)win), v);
112:   PetscLayoutReference(win->map, &(*v)->map);

114:   VecCreate_MPI_Private(*v, PETSC_FALSE, w->nghost, 0);
115:   vw = (Vec_MPI *)(*v)->data;
116:   PetscMemcpy((*v)->ops, win->ops, sizeof(struct _VecOps));

118:   /* save local representation of the parallel vector (and scatter) if it exists */
119:   if (w->localrep) {
120:     VecGetArray(*v, &array);
121:     VecCreateSeqWithArray(PETSC_COMM_SELF, 1, win->map->n + w->nghost, array, &vw->localrep);
122:     PetscMemcpy(vw->localrep->ops, w->localrep->ops, sizeof(struct _VecOps));
123:     VecRestoreArray(*v, &array);
124:     vw->localupdate = w->localupdate;
125:     if (vw->localupdate) PetscObjectReference((PetscObject)vw->localupdate);
126:   }

128:   /* New vector should inherit stashing property of parent */
129:   (*v)->stash.donotstash   = win->stash.donotstash;
130:   (*v)->stash.ignorenegidx = win->stash.ignorenegidx;

132:   /* change type_name appropriately */
133:   PetscObjectChangeTypeName((PetscObject)(*v), VECMPIVIENNACL);

135:   PetscObjectListDuplicate(((PetscObject)win)->olist, &((PetscObject)(*v))->olist);
136:   PetscFunctionListDuplicate(((PetscObject)win)->qlist, &((PetscObject)(*v))->qlist);
137:   (*v)->map->bs   = PetscAbs(win->map->bs);
138:   (*v)->bstash.bs = win->bstash.bs;
139:   return 0;
140: }

142: PetscErrorCode VecDotNorm2_MPIViennaCL(Vec s, Vec t, PetscScalar *dp, PetscScalar *nm)
143: {
144:   PetscScalar work[2], sum[2];

146:   VecDotNorm2_SeqViennaCL(s, t, work, work + 1);
147:   MPIU_Allreduce((void *)&work, (void *)&sum, 2, MPIU_SCALAR, MPIU_SUM, PetscObjectComm((PetscObject)s));
148:   *dp = sum[0];
149:   *nm = sum[1];
150:   return 0;
151: }

153: PetscErrorCode VecBindToCPU_MPIViennaCL(Vec vv, PetscBool bind)
154: {
155:   vv->boundtocpu = bind;

157:   if (bind) {
158:     VecViennaCLCopyFromGPU(vv);
159:     vv->offloadmask                 = PETSC_OFFLOAD_CPU; /* since the CPU code will likely change values in the vector */
160:     vv->ops->dotnorm2               = NULL;
161:     vv->ops->waxpy                  = VecWAXPY_Seq;
162:     vv->ops->dot                    = VecDot_MPI;
163:     vv->ops->mdot                   = VecMDot_MPI;
164:     vv->ops->tdot                   = VecTDot_MPI;
165:     vv->ops->norm                   = VecNorm_MPI;
166:     vv->ops->scale                  = VecScale_Seq;
167:     vv->ops->copy                   = VecCopy_Seq;
168:     vv->ops->set                    = VecSet_Seq;
169:     vv->ops->swap                   = VecSwap_Seq;
170:     vv->ops->axpy                   = VecAXPY_Seq;
171:     vv->ops->axpby                  = VecAXPBY_Seq;
172:     vv->ops->maxpy                  = VecMAXPY_Seq;
173:     vv->ops->aypx                   = VecAYPX_Seq;
174:     vv->ops->axpbypcz               = VecAXPBYPCZ_Seq;
175:     vv->ops->pointwisemult          = VecPointwiseMult_Seq;
176:     vv->ops->setrandom              = VecSetRandom_Seq;
177:     vv->ops->placearray             = VecPlaceArray_Seq;
178:     vv->ops->replacearray           = VecReplaceArray_Seq;
179:     vv->ops->resetarray             = VecResetArray_Seq;
180:     vv->ops->dot_local              = VecDot_Seq;
181:     vv->ops->tdot_local             = VecTDot_Seq;
182:     vv->ops->norm_local             = VecNorm_Seq;
183:     vv->ops->mdot_local             = VecMDot_Seq;
184:     vv->ops->pointwisedivide        = VecPointwiseDivide_Seq;
185:     vv->ops->getlocalvector         = NULL;
186:     vv->ops->restorelocalvector     = NULL;
187:     vv->ops->getlocalvectorread     = NULL;
188:     vv->ops->restorelocalvectorread = NULL;
189:     vv->ops->getarraywrite          = NULL;
190:   } else {
191:     vv->ops->dotnorm2        = VecDotNorm2_MPIViennaCL;
192:     vv->ops->waxpy           = VecWAXPY_SeqViennaCL;
193:     vv->ops->duplicate       = VecDuplicate_MPIViennaCL;
194:     vv->ops->dot             = VecDot_MPIViennaCL;
195:     vv->ops->mdot            = VecMDot_MPIViennaCL;
196:     vv->ops->tdot            = VecTDot_MPIViennaCL;
197:     vv->ops->norm            = VecNorm_MPIViennaCL;
198:     vv->ops->scale           = VecScale_SeqViennaCL;
199:     vv->ops->copy            = VecCopy_SeqViennaCL;
200:     vv->ops->set             = VecSet_SeqViennaCL;
201:     vv->ops->swap            = VecSwap_SeqViennaCL;
202:     vv->ops->axpy            = VecAXPY_SeqViennaCL;
203:     vv->ops->axpby           = VecAXPBY_SeqViennaCL;
204:     vv->ops->maxpy           = VecMAXPY_SeqViennaCL;
205:     vv->ops->aypx            = VecAYPX_SeqViennaCL;
206:     vv->ops->axpbypcz        = VecAXPBYPCZ_SeqViennaCL;
207:     vv->ops->pointwisemult   = VecPointwiseMult_SeqViennaCL;
208:     vv->ops->setrandom       = VecSetRandom_SeqViennaCL;
209:     vv->ops->dot_local       = VecDot_SeqViennaCL;
210:     vv->ops->tdot_local      = VecTDot_SeqViennaCL;
211:     vv->ops->norm_local      = VecNorm_SeqViennaCL;
212:     vv->ops->mdot_local      = VecMDot_SeqViennaCL;
213:     vv->ops->destroy         = VecDestroy_MPIViennaCL;
214:     vv->ops->pointwisedivide = VecPointwiseDivide_SeqViennaCL;
215:     vv->ops->placearray      = VecPlaceArray_SeqViennaCL;
216:     vv->ops->replacearray    = VecReplaceArray_SeqViennaCL;
217:     vv->ops->resetarray      = VecResetArray_SeqViennaCL;
218:     vv->ops->getarraywrite   = VecGetArrayWrite_SeqViennaCL;
219:     vv->ops->getarray        = VecGetArray_SeqViennaCL;
220:     vv->ops->restorearray    = VecRestoreArray_SeqViennaCL;
221:   }
222:   return 0;
223: }

225: PETSC_EXTERN PetscErrorCode VecCreate_MPIViennaCL(Vec vv)
226: {
227:   PetscLayoutSetUp(vv->map);
228:   VecViennaCLAllocateCheck(vv);
229:   VecCreate_MPIViennaCL_Private(vv, PETSC_FALSE, 0, ((Vec_ViennaCL *)(vv->spptr))->GPUarray);
230:   VecViennaCLAllocateCheckHost(vv);
231:   VecSet(vv, 0.0);
232:   VecSet_Seq(vv, 0.0);
233:   vv->offloadmask = PETSC_OFFLOAD_BOTH;
234:   return 0;
235: }

237: PETSC_EXTERN PetscErrorCode VecCreate_ViennaCL(Vec v)
238: {
239:   PetscMPIInt size;

241:   MPI_Comm_size(PetscObjectComm((PetscObject)v), &size);
242:   if (size == 1) {
243:     VecSetType(v, VECSEQVIENNACL);
244:   } else {
245:     VecSetType(v, VECMPIVIENNACL);
246:   }
247:   return 0;
248: }

250: /*@C
251:    VecCreateMPIViennaCLWithArray - Creates a parallel, array-style vector,
252:    where the user provides the viennacl vector to store the vector values.

254:    Collective

256:    Input Parameters:
257: +  comm  - the MPI communicator to use
258: .  bs    - block size, same meaning as VecSetBlockSize()
259: .  n     - local vector length, cannot be PETSC_DECIDE
260: .  N     - global vector length (or PETSC_DECIDE to have calculated)
261: -  array - the user provided GPU array to store the vector values

263:    Output Parameter:
264: .  vv - the vector

266:    Notes:
267:    Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
268:    same type as an existing vector.

270:    If the user-provided array is NULL, then VecViennaCLPlaceArray() can be used
271:    at a later stage to SET the array for storing the vector values.

273:    PETSc does NOT free the array when the vector is destroyed via VecDestroy().
274:    The user should not free the array until the vector is destroyed.

276:    Level: intermediate

278: .seealso: `VecCreateSeqViennaCLWithArray()`, `VecCreateMPIWithArray()`, `VecCreateSeqWithArray()`,
279:           `VecCreate()`, `VecCreateMPI()`, `VecCreateGhostWithArray()`, `VecViennaCLPlaceArray()`

281: @*/
282: PetscErrorCode VecCreateMPIViennaCLWithArray(MPI_Comm comm, PetscInt bs, PetscInt n, PetscInt N, const ViennaCLVector *array, Vec *vv)
283: {
285:   PetscSplitOwnership(comm, &n, &N);
286:   VecCreate(comm, vv);
287:   VecSetSizes(*vv, n, N);
288:   VecSetBlockSize(*vv, bs);
289:   VecCreate_MPIViennaCL_Private(*vv, PETSC_FALSE, 0, array);
290:   return 0;
291: }

293: /*@C
294:    VecCreateMPIViennaCLWithArrays - Creates a parallel, array-style vector,
295:    where the user provides the ViennaCL vector to store the vector values.

297:    Collective

299:    Input Parameters:
300: +  comm  - the MPI communicator to use
301: .  bs    - block size, same meaning as VecSetBlockSize()
302: .  n     - local vector length, cannot be PETSC_DECIDE
303: .  N     - global vector length (or PETSC_DECIDE to have calculated)
304: -  cpuarray - the user provided CPU array to store the vector values
305: -  viennaclvec - ViennaCL vector where the Vec entries are to be stored on the device.

307:    Output Parameter:
308: .  vv - the vector

310:    Notes:
311:    If both cpuarray and viennaclvec are provided, the caller must ensure that
312:    the provided arrays have identical values.

314:    Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
315:    same type as an existing vector.

317:    PETSc does NOT free the provided arrays when the vector is destroyed via
318:    VecDestroy(). The user should not free the array until the vector is
319:    destroyed.

321:    Level: intermediate

323: .seealso: `VecCreateSeqViennaCLWithArrays()`, `VecCreateMPIWithArray()`
324:           `VecCreate()`, `VecDuplicate()`, `VecDuplicateVecs()`, `VecCreateGhost()`,
325:           `VecCreateMPI()`, `VecCreateGhostWithArray()`, `VecViennaCLPlaceArray()`,
326:           `VecPlaceArray()`, `VecCreateMPICUDAWithArrays()`,
327:           `VecViennaCLAllocateCheckHost()`
328: @*/
329: PetscErrorCode VecCreateMPIViennaCLWithArrays(MPI_Comm comm, PetscInt bs, PetscInt n, PetscInt N, const PetscScalar cpuarray[], const ViennaCLVector *viennaclvec, Vec *vv)
330: {
331:   VecCreateMPIViennaCLWithArray(comm, bs, n, N, viennaclvec, vv);
332:   if (cpuarray && viennaclvec) {
333:     Vec_MPI *s         = (Vec_MPI *)((*vv)->data);
334:     s->array           = (PetscScalar *)cpuarray;
335:     (*vv)->offloadmask = PETSC_OFFLOAD_BOTH;
336:   } else if (cpuarray) {
337:     Vec_MPI *s         = (Vec_MPI *)((*vv)->data);
338:     s->array           = (PetscScalar *)cpuarray;
339:     (*vv)->offloadmask = PETSC_OFFLOAD_CPU;
340:   } else if (viennaclvec) {
341:     (*vv)->offloadmask = PETSC_OFFLOAD_GPU;
342:   } else {
343:     (*vv)->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
344:   }
345:   return 0;
346: }

348: PetscErrorCode VecCreate_MPIViennaCL_Private(Vec vv, PetscBool alloc, PetscInt nghost, const ViennaCLVector *array)
349: {
350:   Vec_ViennaCL *vecviennacl;

352:   VecCreate_MPI_Private(vv, PETSC_FALSE, 0, 0);
353:   PetscObjectChangeTypeName((PetscObject)vv, VECMPIVIENNACL);

355:   VecBindToCPU_MPIViennaCL(vv, PETSC_FALSE);
356:   vv->ops->bindtocpu = VecBindToCPU_MPIViennaCL;

358:   if (alloc && !array) {
359:     VecViennaCLAllocateCheck(vv);
360:     VecViennaCLAllocateCheckHost(vv);
361:     VecSet(vv, 0.0);
362:     VecSet_Seq(vv, 0.0);
363:     vv->offloadmask = PETSC_OFFLOAD_BOTH;
364:   }
365:   if (array) {
366:     if (!vv->spptr) vv->spptr = new Vec_ViennaCL;
367:     vecviennacl                     = (Vec_ViennaCL *)vv->spptr;
368:     vecviennacl->GPUarray_allocated = 0;
369:     vecviennacl->GPUarray           = (ViennaCLVector *)array;
370:     vv->offloadmask                 = PETSC_OFFLOAD_UNALLOCATED;
371:   }

373:   return 0;
374: }