Actual source code: dvec2.c
2: /*
3: Defines some vector operation functions that are shared by
4: sequential and parallel vectors.
5: */
6: #include <../src/vec/vec/impls/dvecimpl.h>
7: #include <petsc/private/kernels/petscaxpy.h>
9: #if defined(PETSC_USE_FORTRAN_KERNEL_MDOT)
10: #include <../src/vec/vec/impls/seq/ftn-kernels/fmdot.h>
11: PetscErrorCode VecMDot_Seq(Vec xin, PetscInt nv, const Vec yin[], PetscScalar *z)
12: {
13: const PetscInt n = xin->map->n;
14: PetscInt i = nv, nv_rem = nv & 0x3;
15: PetscScalar sum0 = 0., sum1 = 0., sum2 = 0., sum3;
16: const PetscScalar *yy0, *yy1, *yy2, *yy3, *x;
17: Vec *yy = (Vec *)yin;
19: PetscFunctionBegin;
20: PetscCall(VecGetArrayRead(xin, &x));
21: switch (nv_rem) {
22: case 3:
23: PetscCall(VecGetArrayRead(yy[0], &yy0));
24: PetscCall(VecGetArrayRead(yy[1], &yy1));
25: PetscCall(VecGetArrayRead(yy[2], &yy2));
26: fortranmdot3_(x, yy0, yy1, yy2, &n, &sum0, &sum1, &sum2);
27: PetscCall(VecRestoreArrayRead(yy[0], &yy0));
28: PetscCall(VecRestoreArrayRead(yy[1], &yy1));
29: PetscCall(VecRestoreArrayRead(yy[2], &yy2));
30: z[0] = sum0;
31: z[1] = sum1;
32: z[2] = sum2;
33: break;
34: case 2:
35: PetscCall(VecGetArrayRead(yy[0], &yy0));
36: PetscCall(VecGetArrayRead(yy[1], &yy1));
37: fortranmdot2_(x, yy0, yy1, &n, &sum0, &sum1);
38: PetscCall(VecRestoreArrayRead(yy[0], &yy0));
39: PetscCall(VecRestoreArrayRead(yy[1], &yy1));
40: z[0] = sum0;
41: z[1] = sum1;
42: break;
43: case 1:
44: PetscCall(VecGetArrayRead(yy[0], &yy0));
45: fortranmdot1_(x, yy0, &n, &sum0);
46: PetscCall(VecRestoreArrayRead(yy[0], &yy0));
47: z[0] = sum0;
48: break;
49: case 0:
50: break;
51: }
52: z += nv_rem;
53: i -= nv_rem;
54: yy += nv_rem;
56: while (i > 0) {
57: sum0 = 0.;
58: sum1 = 0.;
59: sum2 = 0.;
60: sum3 = 0.;
61: PetscCall(VecGetArrayRead(yy[0], &yy0));
62: PetscCall(VecGetArrayRead(yy[1], &yy1));
63: PetscCall(VecGetArrayRead(yy[2], &yy2));
64: PetscCall(VecGetArrayRead(yy[3], &yy3));
65: fortranmdot4_(x, yy0, yy1, yy2, yy3, &n, &sum0, &sum1, &sum2, &sum3);
66: PetscCall(VecRestoreArrayRead(yy[0], &yy0));
67: PetscCall(VecRestoreArrayRead(yy[1], &yy1));
68: PetscCall(VecRestoreArrayRead(yy[2], &yy2));
69: PetscCall(VecRestoreArrayRead(yy[3], &yy3));
70: yy += 4;
71: z[0] = sum0;
72: z[1] = sum1;
73: z[2] = sum2;
74: z[3] = sum3;
75: z += 4;
76: i -= 4;
77: }
78: PetscCall(VecRestoreArrayRead(xin, &x));
79: PetscCall(PetscLogFlops(PetscMax(nv * (2.0 * n - 1), 0.0)));
80: PetscFunctionReturn(PETSC_SUCCESS);
81: }
83: #else
84: PetscErrorCode VecMDot_Seq(Vec xin, PetscInt nv, const Vec yin[], PetscScalar *z)
85: {
86: const PetscInt n = xin->map->n;
87: PetscInt i = nv, j = n, nv_rem = nv & 0x3, j_rem;
88: PetscScalar sum0 = 0.0, sum1 = 0.0, sum2 = 0.0, sum3, x0, x1, x2, x3;
89: const PetscScalar *yy0, *yy1, *yy2, *yy3, *x, *xbase;
90: const Vec *yy = (Vec *)yin;
92: PetscFunctionBegin;
93: PetscCall(VecGetArrayRead(xin, &xbase));
94: x = xbase;
95: switch (nv_rem) {
96: case 3:
97: PetscCall(VecGetArrayRead(yy[0], &yy0));
98: PetscCall(VecGetArrayRead(yy[1], &yy1));
99: PetscCall(VecGetArrayRead(yy[2], &yy2));
100: switch (j_rem = j & 0x3) {
101: case 3:
102: x2 = x[2];
103: sum0 += x2 * PetscConj(yy0[2]);
104: sum1 += x2 * PetscConj(yy1[2]);
105: sum2 += x2 * PetscConj(yy2[2]); /* fall through */
106: case 2:
107: x1 = x[1];
108: sum0 += x1 * PetscConj(yy0[1]);
109: sum1 += x1 * PetscConj(yy1[1]);
110: sum2 += x1 * PetscConj(yy2[1]); /* fall through */
111: case 1:
112: x0 = x[0];
113: sum0 += x0 * PetscConj(yy0[0]);
114: sum1 += x0 * PetscConj(yy1[0]);
115: sum2 += x0 * PetscConj(yy2[0]); /* fall through */
116: case 0:
117: x += j_rem;
118: yy0 += j_rem;
119: yy1 += j_rem;
120: yy2 += j_rem;
121: j -= j_rem;
122: break;
123: }
124: while (j > 0) {
125: x0 = x[0];
126: x1 = x[1];
127: x2 = x[2];
128: x3 = x[3];
129: x += 4;
131: sum0 += x0 * PetscConj(yy0[0]) + x1 * PetscConj(yy0[1]) + x2 * PetscConj(yy0[2]) + x3 * PetscConj(yy0[3]);
132: yy0 += 4;
133: sum1 += x0 * PetscConj(yy1[0]) + x1 * PetscConj(yy1[1]) + x2 * PetscConj(yy1[2]) + x3 * PetscConj(yy1[3]);
134: yy1 += 4;
135: sum2 += x0 * PetscConj(yy2[0]) + x1 * PetscConj(yy2[1]) + x2 * PetscConj(yy2[2]) + x3 * PetscConj(yy2[3]);
136: yy2 += 4;
137: j -= 4;
138: }
139: z[0] = sum0;
140: z[1] = sum1;
141: z[2] = sum2;
142: PetscCall(VecRestoreArrayRead(yy[0], &yy0));
143: PetscCall(VecRestoreArrayRead(yy[1], &yy1));
144: PetscCall(VecRestoreArrayRead(yy[2], &yy2));
145: break;
146: case 2:
147: PetscCall(VecGetArrayRead(yy[0], &yy0));
148: PetscCall(VecGetArrayRead(yy[1], &yy1));
149: switch (j_rem = j & 0x3) {
150: case 3:
151: x2 = x[2];
152: sum0 += x2 * PetscConj(yy0[2]);
153: sum1 += x2 * PetscConj(yy1[2]); /* fall through */
154: case 2:
155: x1 = x[1];
156: sum0 += x1 * PetscConj(yy0[1]);
157: sum1 += x1 * PetscConj(yy1[1]); /* fall through */
158: case 1:
159: x0 = x[0];
160: sum0 += x0 * PetscConj(yy0[0]);
161: sum1 += x0 * PetscConj(yy1[0]); /* fall through */
162: case 0:
163: x += j_rem;
164: yy0 += j_rem;
165: yy1 += j_rem;
166: j -= j_rem;
167: break;
168: }
169: while (j > 0) {
170: x0 = x[0];
171: x1 = x[1];
172: x2 = x[2];
173: x3 = x[3];
174: x += 4;
176: sum0 += x0 * PetscConj(yy0[0]) + x1 * PetscConj(yy0[1]) + x2 * PetscConj(yy0[2]) + x3 * PetscConj(yy0[3]);
177: yy0 += 4;
178: sum1 += x0 * PetscConj(yy1[0]) + x1 * PetscConj(yy1[1]) + x2 * PetscConj(yy1[2]) + x3 * PetscConj(yy1[3]);
179: yy1 += 4;
180: j -= 4;
181: }
182: z[0] = sum0;
183: z[1] = sum1;
185: PetscCall(VecRestoreArrayRead(yy[0], &yy0));
186: PetscCall(VecRestoreArrayRead(yy[1], &yy1));
187: break;
188: case 1:
189: PetscCall(VecGetArrayRead(yy[0], &yy0));
190: switch (j_rem = j & 0x3) {
191: case 3:
192: x2 = x[2];
193: sum0 += x2 * PetscConj(yy0[2]); /* fall through */
194: case 2:
195: x1 = x[1];
196: sum0 += x1 * PetscConj(yy0[1]); /* fall through */
197: case 1:
198: x0 = x[0];
199: sum0 += x0 * PetscConj(yy0[0]); /* fall through */
200: case 0:
201: x += j_rem;
202: yy0 += j_rem;
203: j -= j_rem;
204: break;
205: }
206: while (j > 0) {
207: sum0 += x[0] * PetscConj(yy0[0]) + x[1] * PetscConj(yy0[1]) + x[2] * PetscConj(yy0[2]) + x[3] * PetscConj(yy0[3]);
208: yy0 += 4;
209: j -= 4;
210: x += 4;
211: }
212: z[0] = sum0;
214: PetscCall(VecRestoreArrayRead(yy[0], &yy0));
215: break;
216: case 0:
217: break;
218: }
219: z += nv_rem;
220: i -= nv_rem;
221: yy += nv_rem;
223: while (i > 0) {
224: sum0 = 0.;
225: sum1 = 0.;
226: sum2 = 0.;
227: sum3 = 0.;
228: PetscCall(VecGetArrayRead(yy[0], &yy0));
229: PetscCall(VecGetArrayRead(yy[1], &yy1));
230: PetscCall(VecGetArrayRead(yy[2], &yy2));
231: PetscCall(VecGetArrayRead(yy[3], &yy3));
233: j = n;
234: x = xbase;
235: switch (j_rem = j & 0x3) {
236: case 3:
237: x2 = x[2];
238: sum0 += x2 * PetscConj(yy0[2]);
239: sum1 += x2 * PetscConj(yy1[2]);
240: sum2 += x2 * PetscConj(yy2[2]);
241: sum3 += x2 * PetscConj(yy3[2]); /* fall through */
242: case 2:
243: x1 = x[1];
244: sum0 += x1 * PetscConj(yy0[1]);
245: sum1 += x1 * PetscConj(yy1[1]);
246: sum2 += x1 * PetscConj(yy2[1]);
247: sum3 += x1 * PetscConj(yy3[1]); /* fall through */
248: case 1:
249: x0 = x[0];
250: sum0 += x0 * PetscConj(yy0[0]);
251: sum1 += x0 * PetscConj(yy1[0]);
252: sum2 += x0 * PetscConj(yy2[0]);
253: sum3 += x0 * PetscConj(yy3[0]); /* fall through */
254: case 0:
255: x += j_rem;
256: yy0 += j_rem;
257: yy1 += j_rem;
258: yy2 += j_rem;
259: yy3 += j_rem;
260: j -= j_rem;
261: break;
262: }
263: while (j > 0) {
264: x0 = x[0];
265: x1 = x[1];
266: x2 = x[2];
267: x3 = x[3];
268: x += 4;
270: sum0 += x0 * PetscConj(yy0[0]) + x1 * PetscConj(yy0[1]) + x2 * PetscConj(yy0[2]) + x3 * PetscConj(yy0[3]);
271: yy0 += 4;
272: sum1 += x0 * PetscConj(yy1[0]) + x1 * PetscConj(yy1[1]) + x2 * PetscConj(yy1[2]) + x3 * PetscConj(yy1[3]);
273: yy1 += 4;
274: sum2 += x0 * PetscConj(yy2[0]) + x1 * PetscConj(yy2[1]) + x2 * PetscConj(yy2[2]) + x3 * PetscConj(yy2[3]);
275: yy2 += 4;
276: sum3 += x0 * PetscConj(yy3[0]) + x1 * PetscConj(yy3[1]) + x2 * PetscConj(yy3[2]) + x3 * PetscConj(yy3[3]);
277: yy3 += 4;
278: j -= 4;
279: }
280: z[0] = sum0;
281: z[1] = sum1;
282: z[2] = sum2;
283: z[3] = sum3;
284: z += 4;
285: i -= 4;
286: PetscCall(VecRestoreArrayRead(yy[0], &yy0));
287: PetscCall(VecRestoreArrayRead(yy[1], &yy1));
288: PetscCall(VecRestoreArrayRead(yy[2], &yy2));
289: PetscCall(VecRestoreArrayRead(yy[3], &yy3));
290: yy += 4;
291: }
292: PetscCall(VecRestoreArrayRead(xin, &xbase));
293: PetscCall(PetscLogFlops(PetscMax(nv * (2.0 * n - 1), 0.0)));
294: PetscFunctionReturn(PETSC_SUCCESS);
295: }
296: #endif
298: /* ----------------------------------------------------------------------------*/
299: PetscErrorCode VecMTDot_Seq(Vec xin, PetscInt nv, const Vec yin[], PetscScalar *z)
300: {
301: const PetscInt n = xin->map->n;
302: PetscInt i = nv, j = n, nv_rem = nv & 0x3, j_rem;
303: PetscScalar sum0 = 0., sum1 = 0., sum2 = 0., sum3, x0, x1, x2, x3;
304: const PetscScalar *yy0, *yy1, *yy2, *yy3, *x, *xbase;
305: const Vec *yy = (Vec *)yin;
307: PetscFunctionBegin;
308: PetscCall(VecGetArrayRead(xin, &xbase));
309: x = xbase;
311: switch (nv_rem) {
312: case 3:
313: PetscCall(VecGetArrayRead(yy[0], &yy0));
314: PetscCall(VecGetArrayRead(yy[1], &yy1));
315: PetscCall(VecGetArrayRead(yy[2], &yy2));
316: switch (j_rem = j & 0x3) {
317: case 3:
318: x2 = x[2];
319: sum0 += x2 * yy0[2];
320: sum1 += x2 * yy1[2];
321: sum2 += x2 * yy2[2]; /* fall through */
322: case 2:
323: x1 = x[1];
324: sum0 += x1 * yy0[1];
325: sum1 += x1 * yy1[1];
326: sum2 += x1 * yy2[1]; /* fall through */
327: case 1:
328: x0 = x[0];
329: sum0 += x0 * yy0[0];
330: sum1 += x0 * yy1[0];
331: sum2 += x0 * yy2[0]; /* fall through */
332: case 0:
333: x += j_rem;
334: yy0 += j_rem;
335: yy1 += j_rem;
336: yy2 += j_rem;
337: j -= j_rem;
338: break;
339: }
340: while (j > 0) {
341: x0 = x[0];
342: x1 = x[1];
343: x2 = x[2];
344: x3 = x[3];
345: x += 4;
347: sum0 += x0 * yy0[0] + x1 * yy0[1] + x2 * yy0[2] + x3 * yy0[3];
348: yy0 += 4;
349: sum1 += x0 * yy1[0] + x1 * yy1[1] + x2 * yy1[2] + x3 * yy1[3];
350: yy1 += 4;
351: sum2 += x0 * yy2[0] + x1 * yy2[1] + x2 * yy2[2] + x3 * yy2[3];
352: yy2 += 4;
353: j -= 4;
354: }
355: z[0] = sum0;
356: z[1] = sum1;
357: z[2] = sum2;
358: PetscCall(VecRestoreArrayRead(yy[0], &yy0));
359: PetscCall(VecRestoreArrayRead(yy[1], &yy1));
360: PetscCall(VecRestoreArrayRead(yy[2], &yy2));
361: break;
362: case 2:
363: PetscCall(VecGetArrayRead(yy[0], &yy0));
364: PetscCall(VecGetArrayRead(yy[1], &yy1));
365: switch (j_rem = j & 0x3) {
366: case 3:
367: x2 = x[2];
368: sum0 += x2 * yy0[2];
369: sum1 += x2 * yy1[2]; /* fall through */
370: case 2:
371: x1 = x[1];
372: sum0 += x1 * yy0[1];
373: sum1 += x1 * yy1[1]; /* fall through */
374: case 1:
375: x0 = x[0];
376: sum0 += x0 * yy0[0];
377: sum1 += x0 * yy1[0]; /* fall through */
378: case 0:
379: x += j_rem;
380: yy0 += j_rem;
381: yy1 += j_rem;
382: j -= j_rem;
383: break;
384: }
385: while (j > 0) {
386: x0 = x[0];
387: x1 = x[1];
388: x2 = x[2];
389: x3 = x[3];
390: x += 4;
392: sum0 += x0 * yy0[0] + x1 * yy0[1] + x2 * yy0[2] + x3 * yy0[3];
393: yy0 += 4;
394: sum1 += x0 * yy1[0] + x1 * yy1[1] + x2 * yy1[2] + x3 * yy1[3];
395: yy1 += 4;
396: j -= 4;
397: }
398: z[0] = sum0;
399: z[1] = sum1;
401: PetscCall(VecRestoreArrayRead(yy[0], &yy0));
402: PetscCall(VecRestoreArrayRead(yy[1], &yy1));
403: break;
404: case 1:
405: PetscCall(VecGetArrayRead(yy[0], &yy0));
406: switch (j_rem = j & 0x3) {
407: case 3:
408: x2 = x[2];
409: sum0 += x2 * yy0[2]; /* fall through */
410: case 2:
411: x1 = x[1];
412: sum0 += x1 * yy0[1]; /* fall through */
413: case 1:
414: x0 = x[0];
415: sum0 += x0 * yy0[0]; /* fall through */
416: case 0:
417: x += j_rem;
418: yy0 += j_rem;
419: j -= j_rem;
420: break;
421: }
422: while (j > 0) {
423: sum0 += x[0] * yy0[0] + x[1] * yy0[1] + x[2] * yy0[2] + x[3] * yy0[3];
424: yy0 += 4;
425: j -= 4;
426: x += 4;
427: }
428: z[0] = sum0;
430: PetscCall(VecRestoreArrayRead(yy[0], &yy0));
431: break;
432: case 0:
433: break;
434: }
435: z += nv_rem;
436: i -= nv_rem;
437: yy += nv_rem;
439: while (i > 0) {
440: sum0 = 0.;
441: sum1 = 0.;
442: sum2 = 0.;
443: sum3 = 0.;
444: PetscCall(VecGetArrayRead(yy[0], &yy0));
445: PetscCall(VecGetArrayRead(yy[1], &yy1));
446: PetscCall(VecGetArrayRead(yy[2], &yy2));
447: PetscCall(VecGetArrayRead(yy[3], &yy3));
448: x = xbase;
450: j = n;
451: switch (j_rem = j & 0x3) {
452: case 3:
453: x2 = x[2];
454: sum0 += x2 * yy0[2];
455: sum1 += x2 * yy1[2];
456: sum2 += x2 * yy2[2];
457: sum3 += x2 * yy3[2]; /* fall through */
458: case 2:
459: x1 = x[1];
460: sum0 += x1 * yy0[1];
461: sum1 += x1 * yy1[1];
462: sum2 += x1 * yy2[1];
463: sum3 += x1 * yy3[1]; /* fall through */
464: case 1:
465: x0 = x[0];
466: sum0 += x0 * yy0[0];
467: sum1 += x0 * yy1[0];
468: sum2 += x0 * yy2[0];
469: sum3 += x0 * yy3[0]; /* fall through */
470: case 0:
471: x += j_rem;
472: yy0 += j_rem;
473: yy1 += j_rem;
474: yy2 += j_rem;
475: yy3 += j_rem;
476: j -= j_rem;
477: break;
478: }
479: while (j > 0) {
480: x0 = x[0];
481: x1 = x[1];
482: x2 = x[2];
483: x3 = x[3];
484: x += 4;
486: sum0 += x0 * yy0[0] + x1 * yy0[1] + x2 * yy0[2] + x3 * yy0[3];
487: yy0 += 4;
488: sum1 += x0 * yy1[0] + x1 * yy1[1] + x2 * yy1[2] + x3 * yy1[3];
489: yy1 += 4;
490: sum2 += x0 * yy2[0] + x1 * yy2[1] + x2 * yy2[2] + x3 * yy2[3];
491: yy2 += 4;
492: sum3 += x0 * yy3[0] + x1 * yy3[1] + x2 * yy3[2] + x3 * yy3[3];
493: yy3 += 4;
494: j -= 4;
495: }
496: z[0] = sum0;
497: z[1] = sum1;
498: z[2] = sum2;
499: z[3] = sum3;
500: z += 4;
501: i -= 4;
502: PetscCall(VecRestoreArrayRead(yy[0], &yy0));
503: PetscCall(VecRestoreArrayRead(yy[1], &yy1));
504: PetscCall(VecRestoreArrayRead(yy[2], &yy2));
505: PetscCall(VecRestoreArrayRead(yy[3], &yy3));
506: yy += 4;
507: }
508: PetscCall(VecRestoreArrayRead(xin, &xbase));
509: PetscCall(PetscLogFlops(PetscMax(nv * (2.0 * n - 1), 0.0)));
510: PetscFunctionReturn(PETSC_SUCCESS);
511: }
513: static PetscErrorCode VecMinMax_Seq(Vec xin, PetscInt *idx, PetscReal *z, PetscReal minmax, int (*const cmp)(PetscReal, PetscReal))
514: {
515: const PetscInt n = xin->map->n;
516: PetscInt j = -1;
518: PetscFunctionBegin;
519: if (n) {
520: const PetscScalar *xx;
522: PetscCall(VecGetArrayRead(xin, &xx));
523: minmax = PetscRealPart(xx[(j = 0)]);
524: for (PetscInt i = 1; i < n; ++i) {
525: const PetscReal tmp = PetscRealPart(xx[i]);
527: if (cmp(tmp, minmax)) {
528: j = i;
529: minmax = tmp;
530: }
531: }
532: PetscCall(VecRestoreArrayRead(xin, &xx));
533: }
534: *z = minmax;
535: if (idx) *idx = j;
536: PetscFunctionReturn(PETSC_SUCCESS);
537: }
539: static int VecMax_Seq_GT(PetscReal l, PetscReal r)
540: {
541: return l > r;
542: }
544: PetscErrorCode VecMax_Seq(Vec xin, PetscInt *idx, PetscReal *z)
545: {
546: PetscFunctionBegin;
547: PetscCall(VecMinMax_Seq(xin, idx, z, PETSC_MIN_REAL, VecMax_Seq_GT));
548: PetscFunctionReturn(PETSC_SUCCESS);
549: }
551: static int VecMin_Seq_LT(PetscReal l, PetscReal r)
552: {
553: return l < r;
554: }
556: PetscErrorCode VecMin_Seq(Vec xin, PetscInt *idx, PetscReal *z)
557: {
558: PetscFunctionBegin;
559: PetscCall(VecMinMax_Seq(xin, idx, z, PETSC_MAX_REAL, VecMin_Seq_LT));
560: PetscFunctionReturn(PETSC_SUCCESS);
561: }
563: PetscErrorCode VecSet_Seq(Vec xin, PetscScalar alpha)
564: {
565: const PetscInt n = xin->map->n;
566: PetscScalar *xx;
568: PetscFunctionBegin;
569: PetscCall(VecGetArrayWrite(xin, &xx));
570: if (alpha == (PetscScalar)0.0) {
571: PetscCall(PetscArrayzero(xx, n));
572: } else {
573: for (PetscInt i = 0; i < n; i++) xx[i] = alpha;
574: }
575: PetscCall(VecRestoreArrayWrite(xin, &xx));
576: PetscFunctionReturn(PETSC_SUCCESS);
577: }
579: PetscErrorCode VecMAXPY_Seq(Vec xin, PetscInt nv, const PetscScalar *alpha, Vec *y)
580: {
581: const PetscInt j_rem = nv & 0x3, n = xin->map->n;
582: const PetscScalar *yptr[4];
583: PetscScalar *xx;
584: #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
585: #pragma disjoint(*xx, **yptr, *aptr)
586: #endif
588: PetscFunctionBegin;
589: PetscCall(PetscLogFlops(nv * 2.0 * n));
590: PetscCall(VecGetArray(xin, &xx));
591: for (PetscInt i = 0; i < j_rem; ++i) PetscCall(VecGetArrayRead(y[i], yptr + i));
592: switch (j_rem) {
593: case 3:
594: PetscKernelAXPY3(xx, alpha[0], alpha[1], alpha[2], yptr[0], yptr[1], yptr[2], n);
595: break;
596: case 2:
597: PetscKernelAXPY2(xx, alpha[0], alpha[1], yptr[0], yptr[1], n);
598: break;
599: case 1:
600: PetscKernelAXPY(xx, alpha[0], yptr[0], n);
601: default:
602: break;
603: }
604: for (PetscInt i = 0; i < j_rem; ++i) PetscCall(VecRestoreArrayRead(y[i], yptr + i));
605: alpha += j_rem;
606: y += j_rem;
607: for (PetscInt j = j_rem, inc = 4; j < nv; j += inc, alpha += inc, y += inc) {
608: for (PetscInt i = 0; i < inc; ++i) PetscCall(VecGetArrayRead(y[i], yptr + i));
609: PetscKernelAXPY4(xx, alpha[0], alpha[1], alpha[2], alpha[3], yptr[0], yptr[1], yptr[2], yptr[3], n);
610: for (PetscInt i = 0; i < inc; ++i) PetscCall(VecRestoreArrayRead(y[i], yptr + i));
611: }
612: PetscCall(VecRestoreArray(xin, &xx));
613: PetscFunctionReturn(PETSC_SUCCESS);
614: }
616: #include <../src/vec/vec/impls/seq/ftn-kernels/faypx.h>
618: PetscErrorCode VecAYPX_Seq(Vec yin, PetscScalar alpha, Vec xin)
619: {
620: PetscFunctionBegin;
621: if (alpha == (PetscScalar)0.0) {
622: PetscCall(VecCopy(xin, yin));
623: } else if (alpha == (PetscScalar)1.0) {
624: PetscCall(VecAXPY_Seq(yin, alpha, xin));
625: } else {
626: const PetscInt n = yin->map->n;
627: const PetscScalar *xx;
628: PetscScalar *yy;
630: PetscCall(VecGetArrayRead(xin, &xx));
631: PetscCall(VecGetArray(yin, &yy));
632: if (alpha == (PetscScalar)-1.0) {
633: for (PetscInt i = 0; i < n; ++i) yy[i] = xx[i] - yy[i];
634: PetscCall(PetscLogFlops(n));
635: } else {
636: #if defined(PETSC_USE_FORTRAN_KERNEL_AYPX)
637: fortranaypx_(&n, &alpha, xx, yy);
638: #else
639: for (PetscInt i = 0; i < n; ++i) yy[i] = xx[i] + alpha * yy[i];
640: #endif
641: PetscCall(PetscLogFlops(2 * n));
642: }
643: PetscCall(VecRestoreArrayRead(xin, &xx));
644: PetscCall(VecRestoreArray(yin, &yy));
645: }
646: PetscFunctionReturn(PETSC_SUCCESS);
647: }
649: #include <../src/vec/vec/impls/seq/ftn-kernels/fwaxpy.h>
650: /*
651: IBM ESSL contains a routine dzaxpy() that is our WAXPY() but it appears
652: to be slower than a regular C loop. Hence,we do not include it.
653: void ?zaxpy(int*,PetscScalar*,PetscScalar*,int*,PetscScalar*,int*,PetscScalar*,int*);
654: */
656: PetscErrorCode VecWAXPY_Seq(Vec win, PetscScalar alpha, Vec xin, Vec yin)
657: {
658: const PetscInt n = win->map->n;
659: const PetscScalar *yy, *xx;
660: PetscScalar *ww;
662: PetscFunctionBegin;
663: PetscCall(VecGetArrayRead(xin, &xx));
664: PetscCall(VecGetArrayRead(yin, &yy));
665: PetscCall(VecGetArray(win, &ww));
666: if (alpha == (PetscScalar)1.0) {
667: PetscCall(PetscLogFlops(n));
668: /* could call BLAS axpy after call to memcopy, but may be slower */
669: for (PetscInt i = 0; i < n; i++) ww[i] = yy[i] + xx[i];
670: } else if (alpha == (PetscScalar)-1.0) {
671: PetscCall(PetscLogFlops(n));
672: for (PetscInt i = 0; i < n; i++) ww[i] = yy[i] - xx[i];
673: } else if (alpha == (PetscScalar)0.0) {
674: PetscCall(PetscArraycpy(ww, yy, n));
675: } else {
676: PetscCall(PetscLogFlops(2.0 * n));
677: #if defined(PETSC_USE_FORTRAN_KERNEL_WAXPY)
678: fortranwaxpy_(&n, &alpha, xx, yy, ww);
679: #else
680: for (PetscInt i = 0; i < n; i++) ww[i] = yy[i] + alpha * xx[i];
681: #endif
682: }
683: PetscCall(VecRestoreArrayRead(xin, &xx));
684: PetscCall(VecRestoreArrayRead(yin, &yy));
685: PetscCall(VecRestoreArray(win, &ww));
686: PetscFunctionReturn(PETSC_SUCCESS);
687: }
689: PetscErrorCode VecMaxPointwiseDivide_Seq(Vec xin, Vec yin, PetscReal *max)
690: {
691: const PetscInt n = xin->map->n;
692: const PetscScalar *xx, *yy;
693: PetscReal m = 0.0;
695: PetscFunctionBegin;
696: PetscCall(VecGetArrayRead(xin, &xx));
697: PetscCall(VecGetArrayRead(yin, &yy));
698: for (PetscInt i = 0; i < n; ++i) {
699: const PetscReal v = PetscAbsScalar(yy[i] == (PetscScalar)0.0 ? xx[i] : xx[i] / yy[i]);
701: // use a separate value to not re-evaluate side-effects
702: m = PetscMax(v, m);
703: }
704: PetscCall(VecRestoreArrayRead(xin, &xx));
705: PetscCall(VecRestoreArrayRead(yin, &yy));
706: PetscCall(PetscLogFlops(n));
707: *max = m;
708: PetscFunctionReturn(PETSC_SUCCESS);
709: }
711: PetscErrorCode VecPlaceArray_Seq(Vec vin, const PetscScalar *a)
712: {
713: Vec_Seq *v = (Vec_Seq *)vin->data;
715: PetscFunctionBegin;
716: PetscCheck(!v->unplacedarray, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "VecPlaceArray() was already called on this vector, without a call to VecResetArray()");
717: v->unplacedarray = v->array; /* save previous array so reset can bring it back */
718: v->array = (PetscScalar *)a;
719: PetscFunctionReturn(PETSC_SUCCESS);
720: }
722: PetscErrorCode VecReplaceArray_Seq(Vec vin, const PetscScalar *a)
723: {
724: Vec_Seq *v = (Vec_Seq *)vin->data;
726: PetscFunctionBegin;
727: PetscCall(PetscFree(v->array_allocated));
728: v->array_allocated = v->array = (PetscScalar *)a;
729: PetscFunctionReturn(PETSC_SUCCESS);
730: }