Actual source code: bvec1.c
1: /*
2: Defines the BLAS based vector operations. Code shared by parallel
3: and sequential vectors.
4: */
6: #include <../src/vec/vec/impls/dvecimpl.h>
7: #include <petscblaslapack.h>
9: #if defined(PETSC_USE_REAL_SINGLE) && defined(PETSC_BLASLAPACK_SNRM2_RETURNS_DOUBLE) && !defined(PETSC_USE_COMPLEX)
10: static PetscErrorCode VecXDot_Seq_Private(Vec xin, Vec yin, PetscScalar *z, double (*const BLASfn)(const PetscBLASInt *, const PetscScalar *, const PetscBLASInt *, const PetscScalar *, const PetscBLASInt *))
11: #else
12: static PetscErrorCode VecXDot_Seq_Private(Vec xin, Vec yin, PetscScalar *z, PetscScalar (*const BLASfn)(const PetscBLASInt *, const PetscScalar *, const PetscBLASInt *, const PetscScalar *, const PetscBLASInt *))
13: #endif
14: {
15: const PetscInt n = xin->map->n;
16: const PetscBLASInt one = 1;
17: const PetscScalar *ya, *xa;
18: PetscBLASInt bn;
20: PetscFunctionBegin;
21: PetscCall(PetscBLASIntCast(n, &bn));
22: if (n > 0) PetscCall(PetscLogFlops(2.0 * n - 1));
23: PetscCall(VecGetArrayRead(xin, &xa));
24: PetscCall(VecGetArrayRead(yin, &ya));
25: /* arguments ya, xa are reversed because BLAS complex conjugates the first argument, PETSc
26: the second */
27: PetscCallBLAS("BLASdot", *z = BLASfn(&bn, ya, &one, xa, &one));
28: PetscCall(VecRestoreArrayRead(xin, &xa));
29: PetscCall(VecRestoreArrayRead(yin, &ya));
30: PetscFunctionReturn(PETSC_SUCCESS);
31: }
33: PetscErrorCode VecDot_Seq(Vec xin, Vec yin, PetscScalar *z)
34: {
35: PetscFunctionBegin;
36: PetscCall(VecXDot_Seq_Private(xin, yin, z, BLASdot_));
37: PetscFunctionReturn(PETSC_SUCCESS);
38: }
40: PetscErrorCode VecTDot_Seq(Vec xin, Vec yin, PetscScalar *z)
41: {
42: PetscFunctionBegin;
43: /*
44: pay close attention!!! xin and yin are SWAPPED here so that the eventual BLAS call is
45: dot(&bn, xa, &one, ya, &one)
46: */
47: PetscCall(VecXDot_Seq_Private(yin, xin, z, BLASdotu_));
48: PetscFunctionReturn(PETSC_SUCCESS);
49: }
51: PetscErrorCode VecScale_Seq(Vec xin, PetscScalar alpha)
52: {
53: PetscFunctionBegin;
54: if (alpha == (PetscScalar)0.0) {
55: PetscCall(VecSet_Seq(xin, alpha));
56: } else if (alpha != (PetscScalar)1.0) {
57: const PetscBLASInt one = 1;
58: PetscBLASInt bn;
59: PetscScalar *xarray;
61: PetscCall(PetscBLASIntCast(xin->map->n, &bn));
62: PetscCall(PetscLogFlops(bn));
63: PetscCall(VecGetArray(xin, &xarray));
64: PetscCallBLAS("BLASscal", BLASscal_(&bn, &alpha, xarray, &one));
65: PetscCall(VecRestoreArray(xin, &xarray));
66: }
67: PetscFunctionReturn(PETSC_SUCCESS);
68: }
70: PetscErrorCode VecAXPY_Seq(Vec yin, PetscScalar alpha, Vec xin)
71: {
72: PetscFunctionBegin;
73: /* assume that the BLAS handles alpha == 1.0 efficiently since we have no fast code for it */
74: if (alpha != (PetscScalar)0.0) {
75: const PetscScalar *xarray;
76: PetscScalar *yarray;
77: const PetscBLASInt one = 1;
78: PetscBLASInt bn;
80: PetscCall(PetscBLASIntCast(yin->map->n, &bn));
81: PetscCall(PetscLogFlops(2.0 * bn));
82: PetscCall(VecGetArrayRead(xin, &xarray));
83: PetscCall(VecGetArray(yin, &yarray));
84: PetscCallBLAS("BLASaxpy", BLASaxpy_(&bn, &alpha, xarray, &one, yarray, &one));
85: PetscCall(VecRestoreArrayRead(xin, &xarray));
86: PetscCall(VecRestoreArray(yin, &yarray));
87: }
88: PetscFunctionReturn(PETSC_SUCCESS);
89: }
91: PetscErrorCode VecAXPBY_Seq(Vec yin, PetscScalar a, PetscScalar b, Vec xin)
92: {
93: PetscFunctionBegin;
94: if (a == (PetscScalar)0.0) {
95: PetscCall(VecScale_Seq(yin, b));
96: } else if (b == (PetscScalar)1.0) {
97: PetscCall(VecAXPY_Seq(yin, a, xin));
98: } else if (a == (PetscScalar)1.0) {
99: PetscCall(VecAYPX_Seq(yin, b, xin));
100: } else {
101: const PetscInt n = yin->map->n;
102: const PetscScalar *xx;
103: PetscInt flops;
104: PetscScalar *yy;
106: PetscCall(VecGetArrayRead(xin, &xx));
107: PetscCall(VecGetArray(yin, &yy));
108: if (b == (PetscScalar)0.0) {
109: flops = n;
110: for (PetscInt i = 0; i < n; ++i) yy[i] = a * xx[i];
111: } else {
112: flops = 3 * n;
113: for (PetscInt i = 0; i < n; ++i) yy[i] = a * xx[i] + b * yy[i];
114: }
115: PetscCall(VecRestoreArrayRead(xin, &xx));
116: PetscCall(VecRestoreArray(yin, &yy));
117: PetscCall(PetscLogFlops(flops));
118: }
119: PetscFunctionReturn(PETSC_SUCCESS);
120: }
122: PetscErrorCode VecAXPBYPCZ_Seq(Vec zin, PetscScalar alpha, PetscScalar beta, PetscScalar gamma, Vec xin, Vec yin)
123: {
124: const PetscInt n = zin->map->n;
125: const PetscScalar *yy, *xx;
126: PetscInt flops = 4 * n; // common case
127: PetscScalar *zz;
129: PetscFunctionBegin;
130: PetscCall(VecGetArrayRead(xin, &xx));
131: PetscCall(VecGetArrayRead(yin, &yy));
132: PetscCall(VecGetArray(zin, &zz));
133: if (alpha == (PetscScalar)1.0) {
134: for (PetscInt i = 0; i < n; ++i) zz[i] = xx[i] + beta * yy[i] + gamma * zz[i];
135: } else if (gamma == (PetscScalar)1.0) {
136: for (PetscInt i = 0; i < n; ++i) zz[i] = alpha * xx[i] + beta * yy[i] + zz[i];
137: } else if (gamma == (PetscScalar)0.0) {
138: for (PetscInt i = 0; i < n; ++i) zz[i] = alpha * xx[i] + beta * yy[i];
139: flops -= n;
140: } else {
141: for (PetscInt i = 0; i < n; ++i) zz[i] = alpha * xx[i] + beta * yy[i] + gamma * zz[i];
142: flops += n;
143: }
144: PetscCall(VecRestoreArrayRead(xin, &xx));
145: PetscCall(VecRestoreArrayRead(yin, &yy));
146: PetscCall(VecRestoreArray(zin, &zz));
147: PetscCall(PetscLogFlops(flops));
148: PetscFunctionReturn(PETSC_SUCCESS);
149: }