Actual source code: bvec1.c
1: /*
2: Defines the BLAS based vector operations. Code shared by parallel
3: and sequential vectors.
4: */
6: #include <../src/vec/vec/impls/dvecimpl.h>
7: #include <petscblaslapack.h>
9: #if defined(PETSC_USE_REAL_SINGLE) && defined(PETSC_BLASLAPACK_SNRM2_RETURNS_DOUBLE) && !defined(PETSC_USE_COMPLEX)
10: static PetscErrorCode VecXDot_Seq_Private(Vec xin, Vec yin, PetscScalar *z, double (*const BLASfn)(const PetscBLASInt *, const PetscScalar *, const PetscBLASInt *, const PetscScalar *, const PetscBLASInt *))
11: #else
12: static PetscErrorCode VecXDot_Seq_Private(Vec xin, Vec yin, PetscScalar *z, PetscScalar (*const BLASfn)(const PetscBLASInt *, const PetscScalar *, const PetscBLASInt *, const PetscScalar *, const PetscBLASInt *))
13: #endif
14: {
15: const PetscInt n = xin->map->n;
16: const PetscBLASInt one = 1;
17: const PetscScalar *ya, *xa;
18: PetscBLASInt bn;
20: PetscFunctionBegin;
21: PetscCall(PetscBLASIntCast(n, &bn));
22: if (n > 0) PetscCall(PetscLogFlops(2.0 * n - 1));
23: PetscCall(VecGetArrayRead(xin, &xa));
24: PetscCall(VecGetArrayRead(yin, &ya));
25: /* arguments ya, xa are reversed because BLAS complex conjugates the first argument, PETSc
26: the second */
27: PetscCallBLAS("BLASdot", *z = BLASfn(&bn, ya, &one, xa, &one));
28: PetscCall(VecRestoreArrayRead(xin, &xa));
29: PetscCall(VecRestoreArrayRead(yin, &ya));
30: PetscFunctionReturn(PETSC_SUCCESS);
31: }
33: PetscErrorCode VecDot_Seq(Vec xin, Vec yin, PetscScalar *z)
34: {
35: PetscFunctionBegin;
36: PetscCall(VecXDot_Seq_Private(xin, yin, z, BLASdot_));
37: PetscFunctionReturn(PETSC_SUCCESS);
38: }
40: PetscErrorCode VecTDot_Seq(Vec xin, Vec yin, PetscScalar *z)
41: {
42: PetscFunctionBegin;
43: /*
44: pay close attention!!! xin and yin are SWAPPED here so that the eventual BLAS call is
45: dot(&bn, xa, &one, ya, &one)
46: */
47: PetscCall(VecXDot_Seq_Private(yin, xin, z, BLASdotu_));
48: PetscFunctionReturn(PETSC_SUCCESS);
49: }
51: PetscErrorCode VecScale_Seq(Vec xin, PetscScalar alpha)
52: {
53: PetscFunctionBegin;
54: if (alpha == (PetscScalar)0.0) {
55: PetscCall(VecSet_Seq(xin, alpha));
56: } else if (alpha != (PetscScalar)1.0) {
57: const PetscBLASInt one = 1;
58: PetscBLASInt bn;
59: PetscScalar *xarray;
61: PetscCall(PetscBLASIntCast(xin->map->n, &bn));
62: PetscCall(PetscLogFlops(bn));
63: PetscCall(VecGetArray(xin, &xarray));
64: PetscCallBLAS("BLASscal", BLASscal_(&bn, &alpha, xarray, &one));
65: PetscCall(VecRestoreArray(xin, &xarray));
66: }
67: PetscFunctionReturn(PETSC_SUCCESS);
68: }
70: PetscErrorCode VecAXPY_Seq(Vec yin, PetscScalar alpha, Vec xin)
71: {
72: PetscFunctionBegin;
73: /* assume that the BLAS handles alpha == 1.0 efficiently since we have no fast code for it */
74: if (alpha != (PetscScalar)0.0) {
75: const PetscScalar *xarray;
76: PetscScalar *yarray;
77: const PetscBLASInt one = 1;
78: PetscBLASInt bn;
80: PetscCall(PetscBLASIntCast(yin->map->n, &bn));
81: PetscCall(PetscLogFlops(2.0 * bn));
82: PetscCall(VecGetArrayRead(xin, &xarray));
83: PetscCall(VecGetArray(yin, &yarray));
84: PetscCallBLAS("BLASaxpy", BLASaxpy_(&bn, &alpha, xarray, &one, yarray, &one));
85: PetscCall(VecRestoreArrayRead(xin, &xarray));
86: PetscCall(VecRestoreArray(yin, &yarray));
87: }
88: PetscFunctionReturn(PETSC_SUCCESS);
89: }
91: PetscErrorCode VecAXPBY_Seq(Vec yin, PetscScalar a, PetscScalar b, Vec xin)
92: {
93: PetscFunctionBegin;
94: if (a == (PetscScalar)0.0) {
95: PetscCall(VecScale_Seq(yin, b));
96: } else if (b == (PetscScalar)1.0) {
97: PetscCall(VecAXPY_Seq(yin, a, xin));
98: } else if (a == (PetscScalar)1.0) {
99: PetscCall(VecAYPX_Seq(yin, b, xin));
100: } else {
101: const PetscInt n = yin->map->n;
102: const PetscScalar *xx;
103: PetscScalar *yy;
105: PetscCall(VecGetArrayRead(xin, &xx));
106: PetscCall(VecGetArray(yin, &yy));
107: if (b == (PetscScalar)0.0) {
108: for (PetscInt i = 0; i < n; ++i) yy[i] = a * xx[i];
109: PetscCall(PetscLogFlops(n));
110: } else {
111: for (PetscInt i = 0; i < n; ++i) yy[i] = a * xx[i] + b * yy[i];
112: PetscCall(PetscLogFlops(3.0 * n));
113: }
114: PetscCall(VecRestoreArrayRead(xin, &xx));
115: PetscCall(VecRestoreArray(yin, &yy));
116: }
117: PetscFunctionReturn(PETSC_SUCCESS);
118: }
120: PetscErrorCode VecAXPBYPCZ_Seq(Vec zin, PetscScalar alpha, PetscScalar beta, PetscScalar gamma, Vec xin, Vec yin)
121: {
122: const PetscInt n = zin->map->n;
123: const PetscScalar *yy, *xx;
124: PetscInt flops = 4 * n; // common case
125: PetscScalar *zz;
127: PetscFunctionBegin;
128: PetscCall(VecGetArrayRead(xin, &xx));
129: PetscCall(VecGetArrayRead(yin, &yy));
130: PetscCall(VecGetArray(zin, &zz));
131: if (alpha == (PetscScalar)1.0) {
132: for (PetscInt i = 0; i < n; ++i) zz[i] = xx[i] + beta * yy[i] + gamma * zz[i];
133: } else if (gamma == (PetscScalar)1.0) {
134: for (PetscInt i = 0; i < n; ++i) zz[i] = alpha * xx[i] + beta * yy[i] + zz[i];
135: } else if (gamma == (PetscScalar)0.0) {
136: for (PetscInt i = 0; i < n; ++i) zz[i] = alpha * xx[i] + beta * yy[i];
137: flops -= n;
138: } else {
139: for (PetscInt i = 0; i < n; ++i) zz[i] = alpha * xx[i] + beta * yy[i] + gamma * zz[i];
140: flops += n;
141: }
142: PetscCall(VecRestoreArrayRead(xin, &xx));
143: PetscCall(VecRestoreArrayRead(yin, &yy));
144: PetscCall(VecRestoreArray(zin, &zz));
145: PetscCall(PetscLogFlops(flops));
146: PetscFunctionReturn(PETSC_SUCCESS);
147: }