Actual source code: bvec1.c

  1: /*
  2:    Defines the BLAS based vector operations. Code shared by parallel
  3:   and sequential vectors.
  4: */

  6: #include <../src/vec/vec/impls/dvecimpl.h>
  7: #include <petscblaslapack.h>

  9: #if defined(PETSC_USE_REAL_SINGLE) && defined(PETSC_BLASLAPACK_SNRM2_RETURNS_DOUBLE) && !defined(PETSC_USE_COMPLEX)
 10: static PetscErrorCode VecXDot_Seq_Private(Vec xin, Vec yin, PetscScalar *z, double (*const BLASfn)(const PetscBLASInt *, const PetscScalar *, const PetscBLASInt *, const PetscScalar *, const PetscBLASInt *))
 11: #else
 12: static PetscErrorCode VecXDot_Seq_Private(Vec xin, Vec yin, PetscScalar *z, PetscScalar (*const BLASfn)(const PetscBLASInt *, const PetscScalar *, const PetscBLASInt *, const PetscScalar *, const PetscBLASInt *))
 13: #endif
 14: {
 15:   const PetscInt     n   = xin->map->n;
 16:   const PetscBLASInt one = 1;
 17:   const PetscScalar *ya, *xa;
 18:   PetscBLASInt       bn;

 20:   PetscFunctionBegin;
 21:   PetscCall(PetscBLASIntCast(n, &bn));
 22:   if (n > 0) PetscCall(PetscLogFlops(2.0 * n - 1));
 23:   PetscCall(VecGetArrayRead(xin, &xa));
 24:   PetscCall(VecGetArrayRead(yin, &ya));
 25:   /* arguments ya, xa are reversed because BLAS complex conjugates the first argument, PETSc
 26:      the second */
 27:   PetscCallBLAS("BLASdot", *z = BLASfn(&bn, ya, &one, xa, &one));
 28:   PetscCall(VecRestoreArrayRead(xin, &xa));
 29:   PetscCall(VecRestoreArrayRead(yin, &ya));
 30:   PetscFunctionReturn(PETSC_SUCCESS);
 31: }

 33: PetscErrorCode VecDot_Seq(Vec xin, Vec yin, PetscScalar *z)
 34: {
 35:   PetscFunctionBegin;
 36:   PetscCall(VecXDot_Seq_Private(xin, yin, z, BLASdot_));
 37:   PetscFunctionReturn(PETSC_SUCCESS);
 38: }

 40: PetscErrorCode VecTDot_Seq(Vec xin, Vec yin, PetscScalar *z)
 41: {
 42:   PetscFunctionBegin;
 43:   /*
 44:     pay close attention!!! xin and yin are SWAPPED here so that the eventual BLAS call is
 45:     dot(&bn, xa, &one, ya, &one)
 46:   */
 47:   PetscCall(VecXDot_Seq_Private(yin, xin, z, BLASdotu_));
 48:   PetscFunctionReturn(PETSC_SUCCESS);
 49: }

 51: PetscErrorCode VecScale_Seq(Vec xin, PetscScalar alpha)
 52: {
 53:   PetscFunctionBegin;
 54:   if (alpha == (PetscScalar)0.0) {
 55:     PetscCall(VecSet_Seq(xin, alpha));
 56:   } else if (alpha != (PetscScalar)1.0) {
 57:     const PetscBLASInt one = 1;
 58:     PetscBLASInt       bn;
 59:     PetscScalar       *xarray;

 61:     PetscCall(PetscBLASIntCast(xin->map->n, &bn));
 62:     PetscCall(PetscLogFlops(bn));
 63:     PetscCall(VecGetArray(xin, &xarray));
 64:     PetscCallBLAS("BLASscal", BLASscal_(&bn, &alpha, xarray, &one));
 65:     PetscCall(VecRestoreArray(xin, &xarray));
 66:   }
 67:   PetscFunctionReturn(PETSC_SUCCESS);
 68: }

 70: PetscErrorCode VecAXPY_Seq(Vec yin, PetscScalar alpha, Vec xin)
 71: {
 72:   PetscFunctionBegin;
 73:   /* assume that the BLAS handles alpha == 1.0 efficiently since we have no fast code for it */
 74:   if (alpha != (PetscScalar)0.0) {
 75:     const PetscScalar *xarray;
 76:     PetscScalar       *yarray;
 77:     const PetscBLASInt one = 1;
 78:     PetscBLASInt       bn;

 80:     PetscCall(PetscBLASIntCast(yin->map->n, &bn));
 81:     PetscCall(PetscLogFlops(2.0 * bn));
 82:     PetscCall(VecGetArrayRead(xin, &xarray));
 83:     PetscCall(VecGetArray(yin, &yarray));
 84:     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bn, &alpha, xarray, &one, yarray, &one));
 85:     PetscCall(VecRestoreArrayRead(xin, &xarray));
 86:     PetscCall(VecRestoreArray(yin, &yarray));
 87:   }
 88:   PetscFunctionReturn(PETSC_SUCCESS);
 89: }

 91: PetscErrorCode VecAXPBY_Seq(Vec yin, PetscScalar a, PetscScalar b, Vec xin)
 92: {
 93:   PetscFunctionBegin;
 94:   if (a == (PetscScalar)0.0) {
 95:     PetscCall(VecScale_Seq(yin, b));
 96:   } else if (b == (PetscScalar)1.0) {
 97:     PetscCall(VecAXPY_Seq(yin, a, xin));
 98:   } else if (a == (PetscScalar)1.0) {
 99:     PetscCall(VecAYPX_Seq(yin, b, xin));
100:   } else {
101:     const PetscInt     n = yin->map->n;
102:     const PetscScalar *xx;
103:     PetscScalar       *yy;

105:     PetscCall(VecGetArrayRead(xin, &xx));
106:     PetscCall(VecGetArray(yin, &yy));
107:     if (b == (PetscScalar)0.0) {
108:       for (PetscInt i = 0; i < n; ++i) yy[i] = a * xx[i];
109:       PetscCall(PetscLogFlops(n));
110:     } else {
111:       for (PetscInt i = 0; i < n; ++i) yy[i] = a * xx[i] + b * yy[i];
112:       PetscCall(PetscLogFlops(3.0 * n));
113:     }
114:     PetscCall(VecRestoreArrayRead(xin, &xx));
115:     PetscCall(VecRestoreArray(yin, &yy));
116:   }
117:   PetscFunctionReturn(PETSC_SUCCESS);
118: }

120: PetscErrorCode VecAXPBYPCZ_Seq(Vec zin, PetscScalar alpha, PetscScalar beta, PetscScalar gamma, Vec xin, Vec yin)
121: {
122:   const PetscInt     n = zin->map->n;
123:   const PetscScalar *yy, *xx;
124:   PetscInt           flops = 4 * n; // common case
125:   PetscScalar       *zz;

127:   PetscFunctionBegin;
128:   PetscCall(VecGetArrayRead(xin, &xx));
129:   PetscCall(VecGetArrayRead(yin, &yy));
130:   PetscCall(VecGetArray(zin, &zz));
131:   if (alpha == (PetscScalar)1.0) {
132:     for (PetscInt i = 0; i < n; ++i) zz[i] = xx[i] + beta * yy[i] + gamma * zz[i];
133:   } else if (gamma == (PetscScalar)1.0) {
134:     for (PetscInt i = 0; i < n; ++i) zz[i] = alpha * xx[i] + beta * yy[i] + zz[i];
135:   } else if (gamma == (PetscScalar)0.0) {
136:     for (PetscInt i = 0; i < n; ++i) zz[i] = alpha * xx[i] + beta * yy[i];
137:     flops -= n;
138:   } else {
139:     for (PetscInt i = 0; i < n; ++i) zz[i] = alpha * xx[i] + beta * yy[i] + gamma * zz[i];
140:     flops += n;
141:   }
142:   PetscCall(VecRestoreArrayRead(xin, &xx));
143:   PetscCall(VecRestoreArrayRead(yin, &yy));
144:   PetscCall(VecRestoreArray(zin, &zz));
145:   PetscCall(PetscLogFlops(flops));
146:   PetscFunctionReturn(PETSC_SUCCESS);
147: }