Actual source code: bvec1.c

  1: /*
  2:    Defines the BLAS based vector operations. Code shared by parallel
  3:   and sequential vectors.
  4: */

  6: #include <../src/vec/vec/impls/dvecimpl.h>
  7: #include <petscblaslapack.h>

  9: #if defined(PETSC_USE_REAL_SINGLE) && defined(PETSC_BLASLAPACK_SNRM2_RETURNS_DOUBLE) && !defined(PETSC_USE_COMPLEX)
 10: static PetscErrorCode VecXDot_Seq_Private(Vec xin, Vec yin, PetscScalar *z, double (*const BLASfn)(const PetscBLASInt *, const PetscScalar *, const PetscBLASInt *, const PetscScalar *, const PetscBLASInt *))
 11: #else
 12: static PetscErrorCode VecXDot_Seq_Private(Vec xin, Vec yin, PetscScalar *z, PetscScalar (*const BLASfn)(const PetscBLASInt *, const PetscScalar *, const PetscBLASInt *, const PetscScalar *, const PetscBLASInt *))
 13: #endif
 14: {
 15:   const PetscInt     n   = xin->map->n;
 16:   const PetscBLASInt one = 1;
 17:   const PetscScalar *ya, *xa;
 18:   PetscBLASInt       bn;

 20:   PetscFunctionBegin;
 21:   PetscCall(PetscBLASIntCast(n, &bn));
 22:   if (n > 0) PetscCall(PetscLogFlops(2.0 * n - 1));
 23:   PetscCall(VecGetArrayRead(xin, &xa));
 24:   PetscCall(VecGetArrayRead(yin, &ya));
 25:   /* arguments ya, xa are reversed because BLAS complex conjugates the first argument, PETSc
 26:      the second */
 27:   PetscCallBLAS("BLASdot", *z = BLASfn(&bn, ya, &one, xa, &one));
 28:   PetscCall(VecRestoreArrayRead(xin, &xa));
 29:   PetscCall(VecRestoreArrayRead(yin, &ya));
 30:   PetscFunctionReturn(PETSC_SUCCESS);
 31: }

 33: PetscErrorCode VecDot_Seq(Vec xin, Vec yin, PetscScalar *z)
 34: {
 35:   PetscFunctionBegin;
 36:   PetscCall(VecXDot_Seq_Private(xin, yin, z, BLASdot_));
 37:   PetscFunctionReturn(PETSC_SUCCESS);
 38: }

 40: PetscErrorCode VecTDot_Seq(Vec xin, Vec yin, PetscScalar *z)
 41: {
 42:   PetscFunctionBegin;
 43:   /*
 44:     pay close attention!!! xin and yin are SWAPPED here so that the eventual BLAS call is
 45:     dot(&bn, xa, &one, ya, &one)
 46:   */
 47:   PetscCall(VecXDot_Seq_Private(yin, xin, z, BLASdotu_));
 48:   PetscFunctionReturn(PETSC_SUCCESS);
 49: }

 51: PetscErrorCode VecScale_Seq(Vec xin, PetscScalar alpha)
 52: {
 53:   PetscFunctionBegin;
 54:   if (alpha == (PetscScalar)0.0) {
 55:     PetscCall(VecSet_Seq(xin, alpha));
 56:   } else if (alpha != (PetscScalar)1.0) {
 57:     const PetscBLASInt one = 1;
 58:     PetscBLASInt       bn;
 59:     PetscScalar       *xarray;

 61:     PetscCall(PetscBLASIntCast(xin->map->n, &bn));
 62:     PetscCall(PetscLogFlops(bn));
 63:     PetscCall(VecGetArray(xin, &xarray));
 64:     PetscCallBLAS("BLASscal", BLASscal_(&bn, &alpha, xarray, &one));
 65:     PetscCall(VecRestoreArray(xin, &xarray));
 66:   }
 67:   PetscFunctionReturn(PETSC_SUCCESS);
 68: }

 70: PetscErrorCode VecAXPY_Seq(Vec yin, PetscScalar alpha, Vec xin)
 71: {
 72:   PetscFunctionBegin;
 73:   /* assume that the BLAS handles alpha == 1.0 efficiently since we have no fast code for it */
 74:   if (alpha != (PetscScalar)0.0) {
 75:     const PetscScalar *xarray;
 76:     PetscScalar       *yarray;
 77:     const PetscBLASInt one = 1;
 78:     PetscBLASInt       bn;

 80:     PetscCall(PetscBLASIntCast(yin->map->n, &bn));
 81:     PetscCall(PetscLogFlops(2.0 * bn));
 82:     PetscCall(VecGetArrayRead(xin, &xarray));
 83:     PetscCall(VecGetArray(yin, &yarray));
 84:     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bn, &alpha, xarray, &one, yarray, &one));
 85:     PetscCall(VecRestoreArrayRead(xin, &xarray));
 86:     PetscCall(VecRestoreArray(yin, &yarray));
 87:   }
 88:   PetscFunctionReturn(PETSC_SUCCESS);
 89: }

 91: PetscErrorCode VecAXPBY_Seq(Vec yin, PetscScalar a, PetscScalar b, Vec xin)
 92: {
 93:   PetscFunctionBegin;
 94:   if (a == (PetscScalar)0.0) {
 95:     PetscCall(VecScale_Seq(yin, b));
 96:   } else if (b == (PetscScalar)1.0) {
 97:     PetscCall(VecAXPY_Seq(yin, a, xin));
 98:   } else if (a == (PetscScalar)1.0) {
 99:     PetscCall(VecAYPX_Seq(yin, b, xin));
100:   } else {
101:     const PetscInt     n = yin->map->n;
102:     const PetscScalar *xx;
103:     PetscInt           flops;
104:     PetscScalar       *yy;

106:     PetscCall(VecGetArrayRead(xin, &xx));
107:     PetscCall(VecGetArray(yin, &yy));
108:     if (b == (PetscScalar)0.0) {
109:       flops = n;
110:       for (PetscInt i = 0; i < n; ++i) yy[i] = a * xx[i];
111:     } else {
112:       flops = 3 * n;
113:       for (PetscInt i = 0; i < n; ++i) yy[i] = a * xx[i] + b * yy[i];
114:     }
115:     PetscCall(VecRestoreArrayRead(xin, &xx));
116:     PetscCall(VecRestoreArray(yin, &yy));
117:     PetscCall(PetscLogFlops(flops));
118:   }
119:   PetscFunctionReturn(PETSC_SUCCESS);
120: }

122: PetscErrorCode VecAXPBYPCZ_Seq(Vec zin, PetscScalar alpha, PetscScalar beta, PetscScalar gamma, Vec xin, Vec yin)
123: {
124:   const PetscInt     n = zin->map->n;
125:   const PetscScalar *yy, *xx;
126:   PetscInt           flops = 4 * n; // common case
127:   PetscScalar       *zz;

129:   PetscFunctionBegin;
130:   PetscCall(VecGetArrayRead(xin, &xx));
131:   PetscCall(VecGetArrayRead(yin, &yy));
132:   PetscCall(VecGetArray(zin, &zz));
133:   if (alpha == (PetscScalar)1.0) {
134:     for (PetscInt i = 0; i < n; ++i) zz[i] = xx[i] + beta * yy[i] + gamma * zz[i];
135:   } else if (gamma == (PetscScalar)1.0) {
136:     for (PetscInt i = 0; i < n; ++i) zz[i] = alpha * xx[i] + beta * yy[i] + zz[i];
137:   } else if (gamma == (PetscScalar)0.0) {
138:     for (PetscInt i = 0; i < n; ++i) zz[i] = alpha * xx[i] + beta * yy[i];
139:     flops -= n;
140:   } else {
141:     for (PetscInt i = 0; i < n; ++i) zz[i] = alpha * xx[i] + beta * yy[i] + gamma * zz[i];
142:     flops += n;
143:   }
144:   PetscCall(VecRestoreArrayRead(xin, &xx));
145:   PetscCall(VecRestoreArrayRead(yin, &yy));
146:   PetscCall(VecRestoreArray(zin, &zz));
147:   PetscCall(PetscLogFlops(flops));
148:   PetscFunctionReturn(PETSC_SUCCESS);
149: }