Actual source code: bvec1.c


  2: /*
  3:    Defines the BLAS based vector operations. Code shared by parallel
  4:   and sequential vectors.
  5: */

  7: #include <../src/vec/vec/impls/dvecimpl.h>
  8: #include <petscblaslapack.h>

 10: PetscErrorCode VecDot_Seq(Vec xin, Vec yin, PetscScalar *z)
 11: {
 12:   const PetscScalar *ya, *xa;
 13:   PetscBLASInt       one = 1, bn = 0;

 15:   PetscBLASIntCast(xin->map->n, &bn);
 16:   VecGetArrayRead(xin, &xa);
 17:   VecGetArrayRead(yin, &ya);
 18:   /* arguments ya, xa are reversed because BLAS complex conjugates the first argument, PETSc the second */
 19:   PetscCallBLAS("BLASdot", *z = BLASdot_(&bn, ya, &one, xa, &one));
 20:   VecRestoreArrayRead(xin, &xa);
 21:   VecRestoreArrayRead(yin, &ya);
 22:   if (xin->map->n > 0) PetscLogFlops(2.0 * xin->map->n - 1);
 23:   return 0;
 24: }

 26: PetscErrorCode VecTDot_Seq(Vec xin, Vec yin, PetscScalar *z)
 27: {
 28:   const PetscScalar *ya, *xa;
 29:   PetscBLASInt       one = 1, bn = 0;

 31:   PetscBLASIntCast(xin->map->n, &bn);
 32:   VecGetArrayRead(xin, &xa);
 33:   VecGetArrayRead(yin, &ya);
 34:   PetscCallBLAS("BLASdot", *z = BLASdotu_(&bn, xa, &one, ya, &one));
 35:   VecRestoreArrayRead(xin, &xa);
 36:   VecRestoreArrayRead(yin, &ya);
 37:   if (xin->map->n > 0) PetscLogFlops(2.0 * xin->map->n - 1);
 38:   return 0;
 39: }

 41: PetscErrorCode VecScale_Seq(Vec xin, PetscScalar alpha)
 42: {
 43:   PetscBLASInt one = 1, bn;

 45:   PetscBLASIntCast(xin->map->n, &bn);
 46:   if (alpha == (PetscScalar)0.0) {
 47:     VecSet_Seq(xin, alpha);
 48:   } else if (alpha != (PetscScalar)1.0) {
 49:     PetscScalar a = alpha, *xarray;
 50:     VecGetArray(xin, &xarray);
 51:     PetscCallBLAS("BLASscal", BLASscal_(&bn, &a, xarray, &one));
 52:     VecRestoreArray(xin, &xarray);
 53:   }
 54:   PetscLogFlops(xin->map->n);
 55:   return 0;
 56: }

 58: PetscErrorCode VecAXPY_Seq(Vec yin, PetscScalar alpha, Vec xin)
 59: {
 60:   const PetscScalar *xarray;
 61:   PetscScalar       *yarray;
 62:   PetscBLASInt       one = 1, bn;

 64:   PetscBLASIntCast(yin->map->n, &bn);
 65:   /* assume that the BLAS handles alpha == 1.0 efficiently since we have no fast code for it */
 66:   if (alpha != (PetscScalar)0.0) {
 67:     VecGetArrayRead(xin, &xarray);
 68:     VecGetArray(yin, &yarray);
 69:     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bn, &alpha, xarray, &one, yarray, &one));
 70:     VecRestoreArrayRead(xin, &xarray);
 71:     VecRestoreArray(yin, &yarray);
 72:     PetscLogFlops(2.0 * yin->map->n);
 73:   }
 74:   return 0;
 75: }

 77: PetscErrorCode VecAXPBY_Seq(Vec yin, PetscScalar a, PetscScalar b, Vec xin)
 78: {
 79:   PetscInt           n = yin->map->n, i;
 80:   const PetscScalar *xx;
 81:   PetscScalar       *yy;

 83:   if (a == (PetscScalar)0.0) {
 84:     VecScale_Seq(yin, b);
 85:   } else if (b == (PetscScalar)1.0) {
 86:     VecAXPY_Seq(yin, a, xin);
 87:   } else if (a == (PetscScalar)1.0) {
 88:     VecAYPX_Seq(yin, b, xin);
 89:   } else if (b == (PetscScalar)0.0) {
 90:     VecGetArrayRead(xin, &xx);
 91:     VecGetArray(yin, (PetscScalar **)&yy);
 92:     for (i = 0; i < n; i++) yy[i] = a * xx[i];
 93:     VecRestoreArrayRead(xin, &xx);
 94:     VecRestoreArray(yin, (PetscScalar **)&yy);
 95:     PetscLogFlops(xin->map->n);
 96:   } else {
 97:     VecGetArrayRead(xin, &xx);
 98:     VecGetArray(yin, (PetscScalar **)&yy);
 99:     for (i = 0; i < n; i++) yy[i] = a * xx[i] + b * yy[i];
100:     VecRestoreArrayRead(xin, &xx);
101:     VecRestoreArray(yin, (PetscScalar **)&yy);
102:     PetscLogFlops(3.0 * xin->map->n);
103:   }
104:   return 0;
105: }

107: PetscErrorCode VecAXPBYPCZ_Seq(Vec zin, PetscScalar alpha, PetscScalar beta, PetscScalar gamma, Vec xin, Vec yin)
108: {
109:   PetscInt           n = zin->map->n, i;
110:   const PetscScalar *yy, *xx;
111:   PetscScalar       *zz;

113:   VecGetArrayRead(xin, &xx);
114:   VecGetArrayRead(yin, &yy);
115:   VecGetArray(zin, &zz);
116:   if (alpha == (PetscScalar)1.0) {
117:     for (i = 0; i < n; i++) zz[i] = xx[i] + beta * yy[i] + gamma * zz[i];
118:     PetscLogFlops(4.0 * n);
119:   } else if (gamma == (PetscScalar)1.0) {
120:     for (i = 0; i < n; i++) zz[i] = alpha * xx[i] + beta * yy[i] + zz[i];
121:     PetscLogFlops(4.0 * n);
122:   } else if (gamma == (PetscScalar)0.0) {
123:     for (i = 0; i < n; i++) zz[i] = alpha * xx[i] + beta * yy[i];
124:     PetscLogFlops(3.0 * n);
125:   } else {
126:     for (i = 0; i < n; i++) zz[i] = alpha * xx[i] + beta * yy[i] + gamma * zz[i];
127:     PetscLogFlops(5.0 * n);
128:   }
129:   VecRestoreArrayRead(xin, &xx);
130:   VecRestoreArrayRead(yin, &yy);
131:   VecRestoreArray(zin, &zz);
132:   return 0;
133: }