Actual source code: dvec2.c


  2: /*
  3:    Defines some vector operation functions that are shared by
  4:    sequential and parallel vectors.
  5: */
  6: #include <../src/vec/vec/impls/dvecimpl.h>
  7: #include <petsc/private/kernels/petscaxpy.h>

  9: #if defined(PETSC_USE_FORTRAN_KERNEL_MDOT)
 10: #include <../src/vec/vec/impls/seq/ftn-kernels/fmdot.h>
 11: PetscErrorCode VecMDot_Seq(Vec xin, PetscInt nv, const Vec yin[], PetscScalar *z)
 12: {
 13:   const PetscInt     n = xin->map->n;
 14:   PetscInt           i = nv, nv_rem = nv & 0x3;
 15:   PetscScalar        sum0 = 0., sum1 = 0., sum2 = 0., sum3;
 16:   const PetscScalar *yy0, *yy1, *yy2, *yy3, *x;
 17:   Vec               *yy = (Vec *)yin;

 19:   PetscFunctionBegin;
 20:   PetscCall(VecGetArrayRead(xin, &x));
 21:   switch (nv_rem) {
 22:   case 3:
 23:     PetscCall(VecGetArrayRead(yy[0], &yy0));
 24:     PetscCall(VecGetArrayRead(yy[1], &yy1));
 25:     PetscCall(VecGetArrayRead(yy[2], &yy2));
 26:     fortranmdot3_(x, yy0, yy1, yy2, &n, &sum0, &sum1, &sum2);
 27:     PetscCall(VecRestoreArrayRead(yy[0], &yy0));
 28:     PetscCall(VecRestoreArrayRead(yy[1], &yy1));
 29:     PetscCall(VecRestoreArrayRead(yy[2], &yy2));
 30:     z[0] = sum0;
 31:     z[1] = sum1;
 32:     z[2] = sum2;
 33:     break;
 34:   case 2:
 35:     PetscCall(VecGetArrayRead(yy[0], &yy0));
 36:     PetscCall(VecGetArrayRead(yy[1], &yy1));
 37:     fortranmdot2_(x, yy0, yy1, &n, &sum0, &sum1);
 38:     PetscCall(VecRestoreArrayRead(yy[0], &yy0));
 39:     PetscCall(VecRestoreArrayRead(yy[1], &yy1));
 40:     z[0] = sum0;
 41:     z[1] = sum1;
 42:     break;
 43:   case 1:
 44:     PetscCall(VecGetArrayRead(yy[0], &yy0));
 45:     fortranmdot1_(x, yy0, &n, &sum0);
 46:     PetscCall(VecRestoreArrayRead(yy[0], &yy0));
 47:     z[0] = sum0;
 48:     break;
 49:   case 0:
 50:     break;
 51:   }
 52:   z += nv_rem;
 53:   i -= nv_rem;
 54:   yy += nv_rem;

 56:   while (i > 0) {
 57:     sum0 = 0.;
 58:     sum1 = 0.;
 59:     sum2 = 0.;
 60:     sum3 = 0.;
 61:     PetscCall(VecGetArrayRead(yy[0], &yy0));
 62:     PetscCall(VecGetArrayRead(yy[1], &yy1));
 63:     PetscCall(VecGetArrayRead(yy[2], &yy2));
 64:     PetscCall(VecGetArrayRead(yy[3], &yy3));
 65:     fortranmdot4_(x, yy0, yy1, yy2, yy3, &n, &sum0, &sum1, &sum2, &sum3);
 66:     PetscCall(VecRestoreArrayRead(yy[0], &yy0));
 67:     PetscCall(VecRestoreArrayRead(yy[1], &yy1));
 68:     PetscCall(VecRestoreArrayRead(yy[2], &yy2));
 69:     PetscCall(VecRestoreArrayRead(yy[3], &yy3));
 70:     yy += 4;
 71:     z[0] = sum0;
 72:     z[1] = sum1;
 73:     z[2] = sum2;
 74:     z[3] = sum3;
 75:     z += 4;
 76:     i -= 4;
 77:   }
 78:   PetscCall(VecRestoreArrayRead(xin, &x));
 79:   PetscCall(PetscLogFlops(PetscMax(nv * (2.0 * n - 1), 0.0)));
 80:   PetscFunctionReturn(PETSC_SUCCESS);
 81: }

 83: #else
 84: PetscErrorCode VecMDot_Seq(Vec xin, PetscInt nv, const Vec yin[], PetscScalar *z)
 85: {
 86:   const PetscInt     n = xin->map->n;
 87:   PetscInt           i = nv, j = n, nv_rem = nv & 0x3, j_rem;
 88:   PetscScalar        sum0 = 0.0, sum1 = 0.0, sum2 = 0.0, sum3, x0, x1, x2, x3;
 89:   const PetscScalar *yy0, *yy1, *yy2, *yy3, *x, *xbase;
 90:   const Vec         *yy = (Vec *)yin;

 92:   PetscFunctionBegin;
 93:   PetscCall(VecGetArrayRead(xin, &xbase));
 94:   x = xbase;
 95:   switch (nv_rem) {
 96:   case 3:
 97:     PetscCall(VecGetArrayRead(yy[0], &yy0));
 98:     PetscCall(VecGetArrayRead(yy[1], &yy1));
 99:     PetscCall(VecGetArrayRead(yy[2], &yy2));
100:     switch (j_rem = j & 0x3) {
101:     case 3:
102:       x2 = x[2];
103:       sum0 += x2 * PetscConj(yy0[2]);
104:       sum1 += x2 * PetscConj(yy1[2]);
105:       sum2 += x2 * PetscConj(yy2[2]); /* fall through */
106:     case 2:
107:       x1 = x[1];
108:       sum0 += x1 * PetscConj(yy0[1]);
109:       sum1 += x1 * PetscConj(yy1[1]);
110:       sum2 += x1 * PetscConj(yy2[1]); /* fall through */
111:     case 1:
112:       x0 = x[0];
113:       sum0 += x0 * PetscConj(yy0[0]);
114:       sum1 += x0 * PetscConj(yy1[0]);
115:       sum2 += x0 * PetscConj(yy2[0]); /* fall through */
116:     case 0:
117:       x += j_rem;
118:       yy0 += j_rem;
119:       yy1 += j_rem;
120:       yy2 += j_rem;
121:       j -= j_rem;
122:       break;
123:     }
124:     while (j > 0) {
125:       x0 = x[0];
126:       x1 = x[1];
127:       x2 = x[2];
128:       x3 = x[3];
129:       x += 4;

131:       sum0 += x0 * PetscConj(yy0[0]) + x1 * PetscConj(yy0[1]) + x2 * PetscConj(yy0[2]) + x3 * PetscConj(yy0[3]);
132:       yy0 += 4;
133:       sum1 += x0 * PetscConj(yy1[0]) + x1 * PetscConj(yy1[1]) + x2 * PetscConj(yy1[2]) + x3 * PetscConj(yy1[3]);
134:       yy1 += 4;
135:       sum2 += x0 * PetscConj(yy2[0]) + x1 * PetscConj(yy2[1]) + x2 * PetscConj(yy2[2]) + x3 * PetscConj(yy2[3]);
136:       yy2 += 4;
137:       j -= 4;
138:     }
139:     z[0] = sum0;
140:     z[1] = sum1;
141:     z[2] = sum2;
142:     PetscCall(VecRestoreArrayRead(yy[0], &yy0));
143:     PetscCall(VecRestoreArrayRead(yy[1], &yy1));
144:     PetscCall(VecRestoreArrayRead(yy[2], &yy2));
145:     break;
146:   case 2:
147:     PetscCall(VecGetArrayRead(yy[0], &yy0));
148:     PetscCall(VecGetArrayRead(yy[1], &yy1));
149:     switch (j_rem = j & 0x3) {
150:     case 3:
151:       x2 = x[2];
152:       sum0 += x2 * PetscConj(yy0[2]);
153:       sum1 += x2 * PetscConj(yy1[2]); /* fall through */
154:     case 2:
155:       x1 = x[1];
156:       sum0 += x1 * PetscConj(yy0[1]);
157:       sum1 += x1 * PetscConj(yy1[1]); /* fall through */
158:     case 1:
159:       x0 = x[0];
160:       sum0 += x0 * PetscConj(yy0[0]);
161:       sum1 += x0 * PetscConj(yy1[0]); /* fall through */
162:     case 0:
163:       x += j_rem;
164:       yy0 += j_rem;
165:       yy1 += j_rem;
166:       j -= j_rem;
167:       break;
168:     }
169:     while (j > 0) {
170:       x0 = x[0];
171:       x1 = x[1];
172:       x2 = x[2];
173:       x3 = x[3];
174:       x += 4;

176:       sum0 += x0 * PetscConj(yy0[0]) + x1 * PetscConj(yy0[1]) + x2 * PetscConj(yy0[2]) + x3 * PetscConj(yy0[3]);
177:       yy0 += 4;
178:       sum1 += x0 * PetscConj(yy1[0]) + x1 * PetscConj(yy1[1]) + x2 * PetscConj(yy1[2]) + x3 * PetscConj(yy1[3]);
179:       yy1 += 4;
180:       j -= 4;
181:     }
182:     z[0] = sum0;
183:     z[1] = sum1;

185:     PetscCall(VecRestoreArrayRead(yy[0], &yy0));
186:     PetscCall(VecRestoreArrayRead(yy[1], &yy1));
187:     break;
188:   case 1:
189:     PetscCall(VecGetArrayRead(yy[0], &yy0));
190:     switch (j_rem = j & 0x3) {
191:     case 3:
192:       x2 = x[2];
193:       sum0 += x2 * PetscConj(yy0[2]); /* fall through */
194:     case 2:
195:       x1 = x[1];
196:       sum0 += x1 * PetscConj(yy0[1]); /* fall through */
197:     case 1:
198:       x0 = x[0];
199:       sum0 += x0 * PetscConj(yy0[0]); /* fall through */
200:     case 0:
201:       x += j_rem;
202:       yy0 += j_rem;
203:       j -= j_rem;
204:       break;
205:     }
206:     while (j > 0) {
207:       sum0 += x[0] * PetscConj(yy0[0]) + x[1] * PetscConj(yy0[1]) + x[2] * PetscConj(yy0[2]) + x[3] * PetscConj(yy0[3]);
208:       yy0 += 4;
209:       j -= 4;
210:       x += 4;
211:     }
212:     z[0] = sum0;

214:     PetscCall(VecRestoreArrayRead(yy[0], &yy0));
215:     break;
216:   case 0:
217:     break;
218:   }
219:   z += nv_rem;
220:   i -= nv_rem;
221:   yy += nv_rem;

223:   while (i > 0) {
224:     sum0 = 0.;
225:     sum1 = 0.;
226:     sum2 = 0.;
227:     sum3 = 0.;
228:     PetscCall(VecGetArrayRead(yy[0], &yy0));
229:     PetscCall(VecGetArrayRead(yy[1], &yy1));
230:     PetscCall(VecGetArrayRead(yy[2], &yy2));
231:     PetscCall(VecGetArrayRead(yy[3], &yy3));

233:     j = n;
234:     x = xbase;
235:     switch (j_rem = j & 0x3) {
236:     case 3:
237:       x2 = x[2];
238:       sum0 += x2 * PetscConj(yy0[2]);
239:       sum1 += x2 * PetscConj(yy1[2]);
240:       sum2 += x2 * PetscConj(yy2[2]);
241:       sum3 += x2 * PetscConj(yy3[2]); /* fall through */
242:     case 2:
243:       x1 = x[1];
244:       sum0 += x1 * PetscConj(yy0[1]);
245:       sum1 += x1 * PetscConj(yy1[1]);
246:       sum2 += x1 * PetscConj(yy2[1]);
247:       sum3 += x1 * PetscConj(yy3[1]); /* fall through */
248:     case 1:
249:       x0 = x[0];
250:       sum0 += x0 * PetscConj(yy0[0]);
251:       sum1 += x0 * PetscConj(yy1[0]);
252:       sum2 += x0 * PetscConj(yy2[0]);
253:       sum3 += x0 * PetscConj(yy3[0]); /* fall through */
254:     case 0:
255:       x += j_rem;
256:       yy0 += j_rem;
257:       yy1 += j_rem;
258:       yy2 += j_rem;
259:       yy3 += j_rem;
260:       j -= j_rem;
261:       break;
262:     }
263:     while (j > 0) {
264:       x0 = x[0];
265:       x1 = x[1];
266:       x2 = x[2];
267:       x3 = x[3];
268:       x += 4;

270:       sum0 += x0 * PetscConj(yy0[0]) + x1 * PetscConj(yy0[1]) + x2 * PetscConj(yy0[2]) + x3 * PetscConj(yy0[3]);
271:       yy0 += 4;
272:       sum1 += x0 * PetscConj(yy1[0]) + x1 * PetscConj(yy1[1]) + x2 * PetscConj(yy1[2]) + x3 * PetscConj(yy1[3]);
273:       yy1 += 4;
274:       sum2 += x0 * PetscConj(yy2[0]) + x1 * PetscConj(yy2[1]) + x2 * PetscConj(yy2[2]) + x3 * PetscConj(yy2[3]);
275:       yy2 += 4;
276:       sum3 += x0 * PetscConj(yy3[0]) + x1 * PetscConj(yy3[1]) + x2 * PetscConj(yy3[2]) + x3 * PetscConj(yy3[3]);
277:       yy3 += 4;
278:       j -= 4;
279:     }
280:     z[0] = sum0;
281:     z[1] = sum1;
282:     z[2] = sum2;
283:     z[3] = sum3;
284:     z += 4;
285:     i -= 4;
286:     PetscCall(VecRestoreArrayRead(yy[0], &yy0));
287:     PetscCall(VecRestoreArrayRead(yy[1], &yy1));
288:     PetscCall(VecRestoreArrayRead(yy[2], &yy2));
289:     PetscCall(VecRestoreArrayRead(yy[3], &yy3));
290:     yy += 4;
291:   }
292:   PetscCall(VecRestoreArrayRead(xin, &xbase));
293:   PetscCall(PetscLogFlops(PetscMax(nv * (2.0 * n - 1), 0.0)));
294:   PetscFunctionReturn(PETSC_SUCCESS);
295: }
296: #endif

298: /* ----------------------------------------------------------------------------*/
299: PetscErrorCode VecMTDot_Seq(Vec xin, PetscInt nv, const Vec yin[], PetscScalar *z)
300: {
301:   const PetscInt     n = xin->map->n;
302:   PetscInt           i = nv, j = n, nv_rem = nv & 0x3, j_rem;
303:   PetscScalar        sum0 = 0., sum1 = 0., sum2 = 0., sum3, x0, x1, x2, x3;
304:   const PetscScalar *yy0, *yy1, *yy2, *yy3, *x, *xbase;
305:   const Vec         *yy = (Vec *)yin;

307:   PetscFunctionBegin;
308:   PetscCall(VecGetArrayRead(xin, &xbase));
309:   x = xbase;

311:   switch (nv_rem) {
312:   case 3:
313:     PetscCall(VecGetArrayRead(yy[0], &yy0));
314:     PetscCall(VecGetArrayRead(yy[1], &yy1));
315:     PetscCall(VecGetArrayRead(yy[2], &yy2));
316:     switch (j_rem = j & 0x3) {
317:     case 3:
318:       x2 = x[2];
319:       sum0 += x2 * yy0[2];
320:       sum1 += x2 * yy1[2];
321:       sum2 += x2 * yy2[2]; /* fall through */
322:     case 2:
323:       x1 = x[1];
324:       sum0 += x1 * yy0[1];
325:       sum1 += x1 * yy1[1];
326:       sum2 += x1 * yy2[1]; /* fall through */
327:     case 1:
328:       x0 = x[0];
329:       sum0 += x0 * yy0[0];
330:       sum1 += x0 * yy1[0];
331:       sum2 += x0 * yy2[0]; /* fall through */
332:     case 0:
333:       x += j_rem;
334:       yy0 += j_rem;
335:       yy1 += j_rem;
336:       yy2 += j_rem;
337:       j -= j_rem;
338:       break;
339:     }
340:     while (j > 0) {
341:       x0 = x[0];
342:       x1 = x[1];
343:       x2 = x[2];
344:       x3 = x[3];
345:       x += 4;

347:       sum0 += x0 * yy0[0] + x1 * yy0[1] + x2 * yy0[2] + x3 * yy0[3];
348:       yy0 += 4;
349:       sum1 += x0 * yy1[0] + x1 * yy1[1] + x2 * yy1[2] + x3 * yy1[3];
350:       yy1 += 4;
351:       sum2 += x0 * yy2[0] + x1 * yy2[1] + x2 * yy2[2] + x3 * yy2[3];
352:       yy2 += 4;
353:       j -= 4;
354:     }
355:     z[0] = sum0;
356:     z[1] = sum1;
357:     z[2] = sum2;
358:     PetscCall(VecRestoreArrayRead(yy[0], &yy0));
359:     PetscCall(VecRestoreArrayRead(yy[1], &yy1));
360:     PetscCall(VecRestoreArrayRead(yy[2], &yy2));
361:     break;
362:   case 2:
363:     PetscCall(VecGetArrayRead(yy[0], &yy0));
364:     PetscCall(VecGetArrayRead(yy[1], &yy1));
365:     switch (j_rem = j & 0x3) {
366:     case 3:
367:       x2 = x[2];
368:       sum0 += x2 * yy0[2];
369:       sum1 += x2 * yy1[2]; /* fall through */
370:     case 2:
371:       x1 = x[1];
372:       sum0 += x1 * yy0[1];
373:       sum1 += x1 * yy1[1]; /* fall through */
374:     case 1:
375:       x0 = x[0];
376:       sum0 += x0 * yy0[0];
377:       sum1 += x0 * yy1[0]; /* fall through */
378:     case 0:
379:       x += j_rem;
380:       yy0 += j_rem;
381:       yy1 += j_rem;
382:       j -= j_rem;
383:       break;
384:     }
385:     while (j > 0) {
386:       x0 = x[0];
387:       x1 = x[1];
388:       x2 = x[2];
389:       x3 = x[3];
390:       x += 4;

392:       sum0 += x0 * yy0[0] + x1 * yy0[1] + x2 * yy0[2] + x3 * yy0[3];
393:       yy0 += 4;
394:       sum1 += x0 * yy1[0] + x1 * yy1[1] + x2 * yy1[2] + x3 * yy1[3];
395:       yy1 += 4;
396:       j -= 4;
397:     }
398:     z[0] = sum0;
399:     z[1] = sum1;

401:     PetscCall(VecRestoreArrayRead(yy[0], &yy0));
402:     PetscCall(VecRestoreArrayRead(yy[1], &yy1));
403:     break;
404:   case 1:
405:     PetscCall(VecGetArrayRead(yy[0], &yy0));
406:     switch (j_rem = j & 0x3) {
407:     case 3:
408:       x2 = x[2];
409:       sum0 += x2 * yy0[2]; /* fall through */
410:     case 2:
411:       x1 = x[1];
412:       sum0 += x1 * yy0[1]; /* fall through */
413:     case 1:
414:       x0 = x[0];
415:       sum0 += x0 * yy0[0]; /* fall through */
416:     case 0:
417:       x += j_rem;
418:       yy0 += j_rem;
419:       j -= j_rem;
420:       break;
421:     }
422:     while (j > 0) {
423:       sum0 += x[0] * yy0[0] + x[1] * yy0[1] + x[2] * yy0[2] + x[3] * yy0[3];
424:       yy0 += 4;
425:       j -= 4;
426:       x += 4;
427:     }
428:     z[0] = sum0;

430:     PetscCall(VecRestoreArrayRead(yy[0], &yy0));
431:     break;
432:   case 0:
433:     break;
434:   }
435:   z += nv_rem;
436:   i -= nv_rem;
437:   yy += nv_rem;

439:   while (i > 0) {
440:     sum0 = 0.;
441:     sum1 = 0.;
442:     sum2 = 0.;
443:     sum3 = 0.;
444:     PetscCall(VecGetArrayRead(yy[0], &yy0));
445:     PetscCall(VecGetArrayRead(yy[1], &yy1));
446:     PetscCall(VecGetArrayRead(yy[2], &yy2));
447:     PetscCall(VecGetArrayRead(yy[3], &yy3));
448:     x = xbase;

450:     j = n;
451:     switch (j_rem = j & 0x3) {
452:     case 3:
453:       x2 = x[2];
454:       sum0 += x2 * yy0[2];
455:       sum1 += x2 * yy1[2];
456:       sum2 += x2 * yy2[2];
457:       sum3 += x2 * yy3[2]; /* fall through */
458:     case 2:
459:       x1 = x[1];
460:       sum0 += x1 * yy0[1];
461:       sum1 += x1 * yy1[1];
462:       sum2 += x1 * yy2[1];
463:       sum3 += x1 * yy3[1]; /* fall through */
464:     case 1:
465:       x0 = x[0];
466:       sum0 += x0 * yy0[0];
467:       sum1 += x0 * yy1[0];
468:       sum2 += x0 * yy2[0];
469:       sum3 += x0 * yy3[0]; /* fall through */
470:     case 0:
471:       x += j_rem;
472:       yy0 += j_rem;
473:       yy1 += j_rem;
474:       yy2 += j_rem;
475:       yy3 += j_rem;
476:       j -= j_rem;
477:       break;
478:     }
479:     while (j > 0) {
480:       x0 = x[0];
481:       x1 = x[1];
482:       x2 = x[2];
483:       x3 = x[3];
484:       x += 4;

486:       sum0 += x0 * yy0[0] + x1 * yy0[1] + x2 * yy0[2] + x3 * yy0[3];
487:       yy0 += 4;
488:       sum1 += x0 * yy1[0] + x1 * yy1[1] + x2 * yy1[2] + x3 * yy1[3];
489:       yy1 += 4;
490:       sum2 += x0 * yy2[0] + x1 * yy2[1] + x2 * yy2[2] + x3 * yy2[3];
491:       yy2 += 4;
492:       sum3 += x0 * yy3[0] + x1 * yy3[1] + x2 * yy3[2] + x3 * yy3[3];
493:       yy3 += 4;
494:       j -= 4;
495:     }
496:     z[0] = sum0;
497:     z[1] = sum1;
498:     z[2] = sum2;
499:     z[3] = sum3;
500:     z += 4;
501:     i -= 4;
502:     PetscCall(VecRestoreArrayRead(yy[0], &yy0));
503:     PetscCall(VecRestoreArrayRead(yy[1], &yy1));
504:     PetscCall(VecRestoreArrayRead(yy[2], &yy2));
505:     PetscCall(VecRestoreArrayRead(yy[3], &yy3));
506:     yy += 4;
507:   }
508:   PetscCall(VecRestoreArrayRead(xin, &xbase));
509:   PetscCall(PetscLogFlops(PetscMax(nv * (2.0 * n - 1), 0.0)));
510:   PetscFunctionReturn(PETSC_SUCCESS);
511: }

513: static PetscErrorCode VecMinMax_Seq(Vec xin, PetscInt *idx, PetscReal *z, PetscReal minmax, int (*const cmp)(PetscReal, PetscReal))
514: {
515:   const PetscInt n = xin->map->n;
516:   PetscInt       j = -1;

518:   PetscFunctionBegin;
519:   if (n) {
520:     const PetscScalar *xx;

522:     PetscCall(VecGetArrayRead(xin, &xx));
523:     minmax = PetscRealPart(xx[(j = 0)]);
524:     for (PetscInt i = 1; i < n; ++i) {
525:       const PetscReal tmp = PetscRealPart(xx[i]);

527:       if (cmp(tmp, minmax)) {
528:         j      = i;
529:         minmax = tmp;
530:       }
531:     }
532:     PetscCall(VecRestoreArrayRead(xin, &xx));
533:   }
534:   *z = minmax;
535:   if (idx) *idx = j;
536:   PetscFunctionReturn(PETSC_SUCCESS);
537: }

539: static int VecMax_Seq_GT(PetscReal l, PetscReal r)
540: {
541:   return l > r;
542: }

544: PetscErrorCode VecMax_Seq(Vec xin, PetscInt *idx, PetscReal *z)
545: {
546:   PetscFunctionBegin;
547:   PetscCall(VecMinMax_Seq(xin, idx, z, PETSC_MIN_REAL, VecMax_Seq_GT));
548:   PetscFunctionReturn(PETSC_SUCCESS);
549: }

551: static int VecMin_Seq_LT(PetscReal l, PetscReal r)
552: {
553:   return l < r;
554: }

556: PetscErrorCode VecMin_Seq(Vec xin, PetscInt *idx, PetscReal *z)
557: {
558:   PetscFunctionBegin;
559:   PetscCall(VecMinMax_Seq(xin, idx, z, PETSC_MAX_REAL, VecMin_Seq_LT));
560:   PetscFunctionReturn(PETSC_SUCCESS);
561: }

563: PetscErrorCode VecSet_Seq(Vec xin, PetscScalar alpha)
564: {
565:   const PetscInt n = xin->map->n;
566:   PetscScalar   *xx;

568:   PetscFunctionBegin;
569:   PetscCall(VecGetArrayWrite(xin, &xx));
570:   if (alpha == (PetscScalar)0.0) {
571:     PetscCall(PetscArrayzero(xx, n));
572:   } else {
573:     for (PetscInt i = 0; i < n; i++) xx[i] = alpha;
574:   }
575:   PetscCall(VecRestoreArrayWrite(xin, &xx));
576:   PetscFunctionReturn(PETSC_SUCCESS);
577: }

579: PetscErrorCode VecMAXPY_Seq(Vec xin, PetscInt nv, const PetscScalar *alpha, Vec *y)
580: {
581:   const PetscInt     j_rem = nv & 0x3, n = xin->map->n;
582:   const PetscScalar *yptr[4];
583:   PetscScalar       *xx;
584: #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
585:   #pragma disjoint(*xx, **yptr, *aptr)
586: #endif

588:   PetscFunctionBegin;
589:   PetscCall(PetscLogFlops(nv * 2.0 * n));
590:   PetscCall(VecGetArray(xin, &xx));
591:   for (PetscInt i = 0; i < j_rem; ++i) PetscCall(VecGetArrayRead(y[i], yptr + i));
592:   switch (j_rem) {
593:   case 3:
594:     PetscKernelAXPY3(xx, alpha[0], alpha[1], alpha[2], yptr[0], yptr[1], yptr[2], n);
595:     break;
596:   case 2:
597:     PetscKernelAXPY2(xx, alpha[0], alpha[1], yptr[0], yptr[1], n);
598:     break;
599:   case 1:
600:     PetscKernelAXPY(xx, alpha[0], yptr[0], n);
601:   default:
602:     break;
603:   }
604:   for (PetscInt i = 0; i < j_rem; ++i) PetscCall(VecRestoreArrayRead(y[i], yptr + i));
605:   alpha += j_rem;
606:   y += j_rem;
607:   for (PetscInt j = j_rem, inc = 4; j < nv; j += inc, alpha += inc, y += inc) {
608:     for (PetscInt i = 0; i < inc; ++i) PetscCall(VecGetArrayRead(y[i], yptr + i));
609:     PetscKernelAXPY4(xx, alpha[0], alpha[1], alpha[2], alpha[3], yptr[0], yptr[1], yptr[2], yptr[3], n);
610:     for (PetscInt i = 0; i < inc; ++i) PetscCall(VecRestoreArrayRead(y[i], yptr + i));
611:   }
612:   PetscCall(VecRestoreArray(xin, &xx));
613:   PetscFunctionReturn(PETSC_SUCCESS);
614: }

616: #include <../src/vec/vec/impls/seq/ftn-kernels/faypx.h>

618: PetscErrorCode VecAYPX_Seq(Vec yin, PetscScalar alpha, Vec xin)
619: {
620:   PetscFunctionBegin;
621:   if (alpha == (PetscScalar)0.0) {
622:     PetscCall(VecCopy(xin, yin));
623:   } else if (alpha == (PetscScalar)1.0) {
624:     PetscCall(VecAXPY_Seq(yin, alpha, xin));
625:   } else {
626:     const PetscInt     n = yin->map->n;
627:     const PetscScalar *xx;
628:     PetscScalar       *yy;

630:     PetscCall(VecGetArrayRead(xin, &xx));
631:     PetscCall(VecGetArray(yin, &yy));
632:     if (alpha == (PetscScalar)-1.0) {
633:       for (PetscInt i = 0; i < n; ++i) yy[i] = xx[i] - yy[i];
634:       PetscCall(PetscLogFlops(n));
635:     } else {
636: #if defined(PETSC_USE_FORTRAN_KERNEL_AYPX)
637:       fortranaypx_(&n, &alpha, xx, yy);
638: #else
639:       for (PetscInt i = 0; i < n; ++i) yy[i] = xx[i] + alpha * yy[i];
640: #endif
641:       PetscCall(PetscLogFlops(2 * n));
642:     }
643:     PetscCall(VecRestoreArrayRead(xin, &xx));
644:     PetscCall(VecRestoreArray(yin, &yy));
645:   }
646:   PetscFunctionReturn(PETSC_SUCCESS);
647: }

649: #include <../src/vec/vec/impls/seq/ftn-kernels/fwaxpy.h>
650: /*
651:    IBM ESSL contains a routine dzaxpy() that is our WAXPY() but it appears
652:    to be slower than a regular C loop.  Hence,we do not include it.
653:    void ?zaxpy(int*,PetscScalar*,PetscScalar*,int*,PetscScalar*,int*,PetscScalar*,int*);
654: */

656: PetscErrorCode VecWAXPY_Seq(Vec win, PetscScalar alpha, Vec xin, Vec yin)
657: {
658:   const PetscInt     n = win->map->n;
659:   const PetscScalar *yy, *xx;
660:   PetscScalar       *ww;

662:   PetscFunctionBegin;
663:   PetscCall(VecGetArrayRead(xin, &xx));
664:   PetscCall(VecGetArrayRead(yin, &yy));
665:   PetscCall(VecGetArray(win, &ww));
666:   if (alpha == (PetscScalar)1.0) {
667:     PetscCall(PetscLogFlops(n));
668:     /* could call BLAS axpy after call to memcopy, but may be slower */
669:     for (PetscInt i = 0; i < n; i++) ww[i] = yy[i] + xx[i];
670:   } else if (alpha == (PetscScalar)-1.0) {
671:     PetscCall(PetscLogFlops(n));
672:     for (PetscInt i = 0; i < n; i++) ww[i] = yy[i] - xx[i];
673:   } else if (alpha == (PetscScalar)0.0) {
674:     PetscCall(PetscArraycpy(ww, yy, n));
675:   } else {
676:     PetscCall(PetscLogFlops(2.0 * n));
677: #if defined(PETSC_USE_FORTRAN_KERNEL_WAXPY)
678:     fortranwaxpy_(&n, &alpha, xx, yy, ww);
679: #else
680:     for (PetscInt i = 0; i < n; i++) ww[i] = yy[i] + alpha * xx[i];
681: #endif
682:   }
683:   PetscCall(VecRestoreArrayRead(xin, &xx));
684:   PetscCall(VecRestoreArrayRead(yin, &yy));
685:   PetscCall(VecRestoreArray(win, &ww));
686:   PetscFunctionReturn(PETSC_SUCCESS);
687: }

689: PetscErrorCode VecMaxPointwiseDivide_Seq(Vec xin, Vec yin, PetscReal *max)
690: {
691:   const PetscInt     n = xin->map->n;
692:   const PetscScalar *xx, *yy;
693:   PetscReal          m = 0.0;

695:   PetscFunctionBegin;
696:   PetscCall(VecGetArrayRead(xin, &xx));
697:   PetscCall(VecGetArrayRead(yin, &yy));
698:   for (PetscInt i = 0; i < n; ++i) {
699:     const PetscReal v = PetscAbsScalar(yy[i] == (PetscScalar)0.0 ? xx[i] : xx[i] / yy[i]);

701:     // use a separate value to not re-evaluate side-effects
702:     m = PetscMax(v, m);
703:   }
704:   PetscCall(VecRestoreArrayRead(xin, &xx));
705:   PetscCall(VecRestoreArrayRead(yin, &yy));
706:   PetscCall(PetscLogFlops(n));
707:   *max = m;
708:   PetscFunctionReturn(PETSC_SUCCESS);
709: }

711: PetscErrorCode VecPlaceArray_Seq(Vec vin, const PetscScalar *a)
712: {
713:   Vec_Seq *v = (Vec_Seq *)vin->data;

715:   PetscFunctionBegin;
716:   PetscCheck(!v->unplacedarray, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "VecPlaceArray() was already called on this vector, without a call to VecResetArray()");
717:   v->unplacedarray = v->array; /* save previous array so reset can bring it back */
718:   v->array         = (PetscScalar *)a;
719:   PetscFunctionReturn(PETSC_SUCCESS);
720: }

722: PetscErrorCode VecReplaceArray_Seq(Vec vin, const PetscScalar *a)
723: {
724:   Vec_Seq *v = (Vec_Seq *)vin->data;

726:   PetscFunctionBegin;
727:   PetscCall(PetscFree(v->array_allocated));
728:   v->array_allocated = v->array = (PetscScalar *)a;
729:   PetscFunctionReturn(PETSC_SUCCESS);
730: }