Actual source code: dvec2.c

  1: /*
  2:    Defines some vector operation functions that are shared by
  3:    sequential and parallel vectors.
  4: */
  5: #include <../src/vec/vec/impls/dvecimpl.h>
  6: #include <petsc/private/kernels/petscaxpy.h>

  8: #if defined(PETSC_USE_FORTRAN_KERNEL_MDOT)
  9: #include <../src/vec/vec/impls/seq/ftn-kernels/fmdot.h>
 10: PetscErrorCode VecMDot_Seq(Vec xin, PetscInt nv, const Vec yin[], PetscScalar *z)
 11: {
 12:   const PetscInt     n = xin->map->n;
 13:   PetscInt           i = nv, nv_rem = nv & 0x3;
 14:   PetscScalar        sum0 = 0., sum1 = 0., sum2 = 0., sum3;
 15:   const PetscScalar *yy0, *yy1, *yy2, *yy3, *x;
 16:   Vec               *yy = (Vec *)yin;

 18:   PetscFunctionBegin;
 19:   PetscCall(VecGetArrayRead(xin, &x));
 20:   switch (nv_rem) {
 21:   case 3:
 22:     PetscCall(VecGetArrayRead(yy[0], &yy0));
 23:     PetscCall(VecGetArrayRead(yy[1], &yy1));
 24:     PetscCall(VecGetArrayRead(yy[2], &yy2));
 25:     fortranmdot3_(x, yy0, yy1, yy2, &n, &sum0, &sum1, &sum2);
 26:     PetscCall(VecRestoreArrayRead(yy[0], &yy0));
 27:     PetscCall(VecRestoreArrayRead(yy[1], &yy1));
 28:     PetscCall(VecRestoreArrayRead(yy[2], &yy2));
 29:     z[0] = sum0;
 30:     z[1] = sum1;
 31:     z[2] = sum2;
 32:     break;
 33:   case 2:
 34:     PetscCall(VecGetArrayRead(yy[0], &yy0));
 35:     PetscCall(VecGetArrayRead(yy[1], &yy1));
 36:     fortranmdot2_(x, yy0, yy1, &n, &sum0, &sum1);
 37:     PetscCall(VecRestoreArrayRead(yy[0], &yy0));
 38:     PetscCall(VecRestoreArrayRead(yy[1], &yy1));
 39:     z[0] = sum0;
 40:     z[1] = sum1;
 41:     break;
 42:   case 1:
 43:     PetscCall(VecGetArrayRead(yy[0], &yy0));
 44:     fortranmdot1_(x, yy0, &n, &sum0);
 45:     PetscCall(VecRestoreArrayRead(yy[0], &yy0));
 46:     z[0] = sum0;
 47:     break;
 48:   case 0:
 49:     break;
 50:   }
 51:   z += nv_rem;
 52:   i -= nv_rem;
 53:   yy += nv_rem;

 55:   while (i > 0) {
 56:     sum0 = 0.;
 57:     sum1 = 0.;
 58:     sum2 = 0.;
 59:     sum3 = 0.;
 60:     PetscCall(VecGetArrayRead(yy[0], &yy0));
 61:     PetscCall(VecGetArrayRead(yy[1], &yy1));
 62:     PetscCall(VecGetArrayRead(yy[2], &yy2));
 63:     PetscCall(VecGetArrayRead(yy[3], &yy3));
 64:     fortranmdot4_(x, yy0, yy1, yy2, yy3, &n, &sum0, &sum1, &sum2, &sum3);
 65:     PetscCall(VecRestoreArrayRead(yy[0], &yy0));
 66:     PetscCall(VecRestoreArrayRead(yy[1], &yy1));
 67:     PetscCall(VecRestoreArrayRead(yy[2], &yy2));
 68:     PetscCall(VecRestoreArrayRead(yy[3], &yy3));
 69:     yy += 4;
 70:     z[0] = sum0;
 71:     z[1] = sum1;
 72:     z[2] = sum2;
 73:     z[3] = sum3;
 74:     z += 4;
 75:     i -= 4;
 76:   }
 77:   PetscCall(VecRestoreArrayRead(xin, &x));
 78:   PetscCall(PetscLogFlops(PetscMax(nv * (2.0 * n - 1), 0.0)));
 79:   PetscFunctionReturn(PETSC_SUCCESS);
 80: }

 82: #else
 83: PetscErrorCode VecMDot_Seq(Vec xin, PetscInt nv, const Vec yin[], PetscScalar *z)
 84: {
 85:   const PetscInt     n = xin->map->n;
 86:   PetscInt           i = nv, j = n, nv_rem = nv & 0x3, j_rem;
 87:   PetscScalar        sum0 = 0.0, sum1 = 0.0, sum2 = 0.0, sum3, x0, x1, x2, x3;
 88:   const PetscScalar *yy0, *yy1, *yy2, *yy3, *x, *xbase;
 89:   const Vec         *yy = (Vec *)yin;

 91:   PetscFunctionBegin;
 92:   if (n == 0) {
 93:     PetscCall(PetscArrayzero(z, nv));
 94:     PetscFunctionReturn(PETSC_SUCCESS);
 95:   }
 96:   PetscCall(VecGetArrayRead(xin, &xbase));
 97:   x = xbase;
 98:   switch (nv_rem) {
 99:   case 3:
100:     PetscCall(VecGetArrayRead(yy[0], &yy0));
101:     PetscCall(VecGetArrayRead(yy[1], &yy1));
102:     PetscCall(VecGetArrayRead(yy[2], &yy2));
103:     switch (j_rem = j & 0x3) {
104:     case 3:
105:       x2 = x[2];
106:       sum0 += x2 * PetscConj(yy0[2]);
107:       sum1 += x2 * PetscConj(yy1[2]);
108:       sum2 += x2 * PetscConj(yy2[2]); /* fall through */
109:     case 2:
110:       x1 = x[1];
111:       sum0 += x1 * PetscConj(yy0[1]);
112:       sum1 += x1 * PetscConj(yy1[1]);
113:       sum2 += x1 * PetscConj(yy2[1]); /* fall through */
114:     case 1:
115:       x0 = x[0];
116:       sum0 += x0 * PetscConj(yy0[0]);
117:       sum1 += x0 * PetscConj(yy1[0]);
118:       sum2 += x0 * PetscConj(yy2[0]); /* fall through */
119:     case 0:
120:       x += j_rem;
121:       yy0 += j_rem;
122:       yy1 += j_rem;
123:       yy2 += j_rem;
124:       j -= j_rem;
125:       break;
126:     }
127:     while (j > 0) {
128:       x0 = x[0];
129:       x1 = x[1];
130:       x2 = x[2];
131:       x3 = x[3];
132:       x += 4;

134:       sum0 += x0 * PetscConj(yy0[0]) + x1 * PetscConj(yy0[1]) + x2 * PetscConj(yy0[2]) + x3 * PetscConj(yy0[3]);
135:       yy0 += 4;
136:       sum1 += x0 * PetscConj(yy1[0]) + x1 * PetscConj(yy1[1]) + x2 * PetscConj(yy1[2]) + x3 * PetscConj(yy1[3]);
137:       yy1 += 4;
138:       sum2 += x0 * PetscConj(yy2[0]) + x1 * PetscConj(yy2[1]) + x2 * PetscConj(yy2[2]) + x3 * PetscConj(yy2[3]);
139:       yy2 += 4;
140:       j -= 4;
141:     }
142:     z[0] = sum0;
143:     z[1] = sum1;
144:     z[2] = sum2;
145:     PetscCall(VecRestoreArrayRead(yy[0], &yy0));
146:     PetscCall(VecRestoreArrayRead(yy[1], &yy1));
147:     PetscCall(VecRestoreArrayRead(yy[2], &yy2));
148:     break;
149:   case 2:
150:     PetscCall(VecGetArrayRead(yy[0], &yy0));
151:     PetscCall(VecGetArrayRead(yy[1], &yy1));
152:     switch (j_rem = j & 0x3) {
153:     case 3:
154:       x2 = x[2];
155:       sum0 += x2 * PetscConj(yy0[2]);
156:       sum1 += x2 * PetscConj(yy1[2]); /* fall through */
157:     case 2:
158:       x1 = x[1];
159:       sum0 += x1 * PetscConj(yy0[1]);
160:       sum1 += x1 * PetscConj(yy1[1]); /* fall through */
161:     case 1:
162:       x0 = x[0];
163:       sum0 += x0 * PetscConj(yy0[0]);
164:       sum1 += x0 * PetscConj(yy1[0]); /* fall through */
165:     case 0:
166:       x += j_rem;
167:       yy0 += j_rem;
168:       yy1 += j_rem;
169:       j -= j_rem;
170:       break;
171:     }
172:     while (j > 0) {
173:       x0 = x[0];
174:       x1 = x[1];
175:       x2 = x[2];
176:       x3 = x[3];
177:       x += 4;

179:       sum0 += x0 * PetscConj(yy0[0]) + x1 * PetscConj(yy0[1]) + x2 * PetscConj(yy0[2]) + x3 * PetscConj(yy0[3]);
180:       yy0 += 4;
181:       sum1 += x0 * PetscConj(yy1[0]) + x1 * PetscConj(yy1[1]) + x2 * PetscConj(yy1[2]) + x3 * PetscConj(yy1[3]);
182:       yy1 += 4;
183:       j -= 4;
184:     }
185:     z[0] = sum0;
186:     z[1] = sum1;

188:     PetscCall(VecRestoreArrayRead(yy[0], &yy0));
189:     PetscCall(VecRestoreArrayRead(yy[1], &yy1));
190:     break;
191:   case 1:
192:     PetscCall(VecGetArrayRead(yy[0], &yy0));
193:     switch (j_rem = j & 0x3) {
194:     case 3:
195:       x2 = x[2];
196:       sum0 += x2 * PetscConj(yy0[2]); /* fall through */
197:     case 2:
198:       x1 = x[1];
199:       sum0 += x1 * PetscConj(yy0[1]); /* fall through */
200:     case 1:
201:       x0 = x[0];
202:       sum0 += x0 * PetscConj(yy0[0]); /* fall through */
203:     case 0:
204:       x += j_rem;
205:       yy0 += j_rem;
206:       j -= j_rem;
207:       break;
208:     }
209:     while (j > 0) {
210:       sum0 += x[0] * PetscConj(yy0[0]) + x[1] * PetscConj(yy0[1]) + x[2] * PetscConj(yy0[2]) + x[3] * PetscConj(yy0[3]);
211:       yy0 += 4;
212:       j -= 4;
213:       x += 4;
214:     }
215:     z[0] = sum0;

217:     PetscCall(VecRestoreArrayRead(yy[0], &yy0));
218:     break;
219:   case 0:
220:     break;
221:   }
222:   z += nv_rem;
223:   i -= nv_rem;
224:   yy += nv_rem;

226:   while (i > 0) {
227:     sum0 = 0.;
228:     sum1 = 0.;
229:     sum2 = 0.;
230:     sum3 = 0.;
231:     PetscCall(VecGetArrayRead(yy[0], &yy0));
232:     PetscCall(VecGetArrayRead(yy[1], &yy1));
233:     PetscCall(VecGetArrayRead(yy[2], &yy2));
234:     PetscCall(VecGetArrayRead(yy[3], &yy3));

236:     j = n;
237:     x = xbase;
238:     switch (j_rem = j & 0x3) {
239:     case 3:
240:       x2 = x[2];
241:       sum0 += x2 * PetscConj(yy0[2]);
242:       sum1 += x2 * PetscConj(yy1[2]);
243:       sum2 += x2 * PetscConj(yy2[2]);
244:       sum3 += x2 * PetscConj(yy3[2]); /* fall through */
245:     case 2:
246:       x1 = x[1];
247:       sum0 += x1 * PetscConj(yy0[1]);
248:       sum1 += x1 * PetscConj(yy1[1]);
249:       sum2 += x1 * PetscConj(yy2[1]);
250:       sum3 += x1 * PetscConj(yy3[1]); /* fall through */
251:     case 1:
252:       x0 = x[0];
253:       sum0 += x0 * PetscConj(yy0[0]);
254:       sum1 += x0 * PetscConj(yy1[0]);
255:       sum2 += x0 * PetscConj(yy2[0]);
256:       sum3 += x0 * PetscConj(yy3[0]); /* fall through */
257:     case 0:
258:       x += j_rem;
259:       yy0 += j_rem;
260:       yy1 += j_rem;
261:       yy2 += j_rem;
262:       yy3 += j_rem;
263:       j -= j_rem;
264:       break;
265:     }
266:     while (j > 0) {
267:       x0 = x[0];
268:       x1 = x[1];
269:       x2 = x[2];
270:       x3 = x[3];
271:       x += 4;

273:       sum0 += x0 * PetscConj(yy0[0]) + x1 * PetscConj(yy0[1]) + x2 * PetscConj(yy0[2]) + x3 * PetscConj(yy0[3]);
274:       yy0 += 4;
275:       sum1 += x0 * PetscConj(yy1[0]) + x1 * PetscConj(yy1[1]) + x2 * PetscConj(yy1[2]) + x3 * PetscConj(yy1[3]);
276:       yy1 += 4;
277:       sum2 += x0 * PetscConj(yy2[0]) + x1 * PetscConj(yy2[1]) + x2 * PetscConj(yy2[2]) + x3 * PetscConj(yy2[3]);
278:       yy2 += 4;
279:       sum3 += x0 * PetscConj(yy3[0]) + x1 * PetscConj(yy3[1]) + x2 * PetscConj(yy3[2]) + x3 * PetscConj(yy3[3]);
280:       yy3 += 4;
281:       j -= 4;
282:     }
283:     z[0] = sum0;
284:     z[1] = sum1;
285:     z[2] = sum2;
286:     z[3] = sum3;
287:     z += 4;
288:     i -= 4;
289:     PetscCall(VecRestoreArrayRead(yy[0], &yy0));
290:     PetscCall(VecRestoreArrayRead(yy[1], &yy1));
291:     PetscCall(VecRestoreArrayRead(yy[2], &yy2));
292:     PetscCall(VecRestoreArrayRead(yy[3], &yy3));
293:     yy += 4;
294:   }
295:   PetscCall(VecRestoreArrayRead(xin, &xbase));
296:   PetscCall(PetscLogFlops(PetscMax(nv * (2.0 * n - 1), 0.0)));
297:   PetscFunctionReturn(PETSC_SUCCESS);
298: }
299: #endif

301: PetscErrorCode VecMTDot_Seq(Vec xin, PetscInt nv, const Vec yin[], PetscScalar *z)
302: {
303:   const PetscInt     n = xin->map->n;
304:   PetscInt           i = nv, j = n, nv_rem = nv & 0x3, j_rem;
305:   PetscScalar        sum0 = 0., sum1 = 0., sum2 = 0., sum3, x0, x1, x2, x3;
306:   const PetscScalar *yy0, *yy1, *yy2, *yy3, *x, *xbase;
307:   const Vec         *yy = (Vec *)yin;

309:   PetscFunctionBegin;
310:   PetscCall(VecGetArrayRead(xin, &xbase));
311:   x = xbase;

313:   switch (nv_rem) {
314:   case 3:
315:     PetscCall(VecGetArrayRead(yy[0], &yy0));
316:     PetscCall(VecGetArrayRead(yy[1], &yy1));
317:     PetscCall(VecGetArrayRead(yy[2], &yy2));
318:     switch (j_rem = j & 0x3) {
319:     case 3:
320:       x2 = x[2];
321:       sum0 += x2 * yy0[2];
322:       sum1 += x2 * yy1[2];
323:       sum2 += x2 * yy2[2]; /* fall through */
324:     case 2:
325:       x1 = x[1];
326:       sum0 += x1 * yy0[1];
327:       sum1 += x1 * yy1[1];
328:       sum2 += x1 * yy2[1]; /* fall through */
329:     case 1:
330:       x0 = x[0];
331:       sum0 += x0 * yy0[0];
332:       sum1 += x0 * yy1[0];
333:       sum2 += x0 * yy2[0]; /* fall through */
334:     case 0:
335:       x += j_rem;
336:       yy0 += j_rem;
337:       yy1 += j_rem;
338:       yy2 += j_rem;
339:       j -= j_rem;
340:       break;
341:     }
342:     while (j > 0) {
343:       x0 = x[0];
344:       x1 = x[1];
345:       x2 = x[2];
346:       x3 = x[3];
347:       x += 4;

349:       sum0 += x0 * yy0[0] + x1 * yy0[1] + x2 * yy0[2] + x3 * yy0[3];
350:       yy0 += 4;
351:       sum1 += x0 * yy1[0] + x1 * yy1[1] + x2 * yy1[2] + x3 * yy1[3];
352:       yy1 += 4;
353:       sum2 += x0 * yy2[0] + x1 * yy2[1] + x2 * yy2[2] + x3 * yy2[3];
354:       yy2 += 4;
355:       j -= 4;
356:     }
357:     z[0] = sum0;
358:     z[1] = sum1;
359:     z[2] = sum2;
360:     PetscCall(VecRestoreArrayRead(yy[0], &yy0));
361:     PetscCall(VecRestoreArrayRead(yy[1], &yy1));
362:     PetscCall(VecRestoreArrayRead(yy[2], &yy2));
363:     break;
364:   case 2:
365:     PetscCall(VecGetArrayRead(yy[0], &yy0));
366:     PetscCall(VecGetArrayRead(yy[1], &yy1));
367:     switch (j_rem = j & 0x3) {
368:     case 3:
369:       x2 = x[2];
370:       sum0 += x2 * yy0[2];
371:       sum1 += x2 * yy1[2]; /* fall through */
372:     case 2:
373:       x1 = x[1];
374:       sum0 += x1 * yy0[1];
375:       sum1 += x1 * yy1[1]; /* fall through */
376:     case 1:
377:       x0 = x[0];
378:       sum0 += x0 * yy0[0];
379:       sum1 += x0 * yy1[0]; /* fall through */
380:     case 0:
381:       x += j_rem;
382:       yy0 += j_rem;
383:       yy1 += j_rem;
384:       j -= j_rem;
385:       break;
386:     }
387:     while (j > 0) {
388:       x0 = x[0];
389:       x1 = x[1];
390:       x2 = x[2];
391:       x3 = x[3];
392:       x += 4;

394:       sum0 += x0 * yy0[0] + x1 * yy0[1] + x2 * yy0[2] + x3 * yy0[3];
395:       yy0 += 4;
396:       sum1 += x0 * yy1[0] + x1 * yy1[1] + x2 * yy1[2] + x3 * yy1[3];
397:       yy1 += 4;
398:       j -= 4;
399:     }
400:     z[0] = sum0;
401:     z[1] = sum1;

403:     PetscCall(VecRestoreArrayRead(yy[0], &yy0));
404:     PetscCall(VecRestoreArrayRead(yy[1], &yy1));
405:     break;
406:   case 1:
407:     PetscCall(VecGetArrayRead(yy[0], &yy0));
408:     switch (j_rem = j & 0x3) {
409:     case 3:
410:       x2 = x[2];
411:       sum0 += x2 * yy0[2]; /* fall through */
412:     case 2:
413:       x1 = x[1];
414:       sum0 += x1 * yy0[1]; /* fall through */
415:     case 1:
416:       x0 = x[0];
417:       sum0 += x0 * yy0[0]; /* fall through */
418:     case 0:
419:       x += j_rem;
420:       yy0 += j_rem;
421:       j -= j_rem;
422:       break;
423:     }
424:     while (j > 0) {
425:       sum0 += x[0] * yy0[0] + x[1] * yy0[1] + x[2] * yy0[2] + x[3] * yy0[3];
426:       yy0 += 4;
427:       j -= 4;
428:       x += 4;
429:     }
430:     z[0] = sum0;

432:     PetscCall(VecRestoreArrayRead(yy[0], &yy0));
433:     break;
434:   case 0:
435:     break;
436:   }
437:   z += nv_rem;
438:   i -= nv_rem;
439:   yy += nv_rem;

441:   while (i > 0) {
442:     sum0 = 0.;
443:     sum1 = 0.;
444:     sum2 = 0.;
445:     sum3 = 0.;
446:     PetscCall(VecGetArrayRead(yy[0], &yy0));
447:     PetscCall(VecGetArrayRead(yy[1], &yy1));
448:     PetscCall(VecGetArrayRead(yy[2], &yy2));
449:     PetscCall(VecGetArrayRead(yy[3], &yy3));
450:     x = xbase;

452:     j = n;
453:     switch (j_rem = j & 0x3) {
454:     case 3:
455:       x2 = x[2];
456:       sum0 += x2 * yy0[2];
457:       sum1 += x2 * yy1[2];
458:       sum2 += x2 * yy2[2];
459:       sum3 += x2 * yy3[2]; /* fall through */
460:     case 2:
461:       x1 = x[1];
462:       sum0 += x1 * yy0[1];
463:       sum1 += x1 * yy1[1];
464:       sum2 += x1 * yy2[1];
465:       sum3 += x1 * yy3[1]; /* fall through */
466:     case 1:
467:       x0 = x[0];
468:       sum0 += x0 * yy0[0];
469:       sum1 += x0 * yy1[0];
470:       sum2 += x0 * yy2[0];
471:       sum3 += x0 * yy3[0]; /* fall through */
472:     case 0:
473:       x += j_rem;
474:       yy0 += j_rem;
475:       yy1 += j_rem;
476:       yy2 += j_rem;
477:       yy3 += j_rem;
478:       j -= j_rem;
479:       break;
480:     }
481:     while (j > 0) {
482:       x0 = x[0];
483:       x1 = x[1];
484:       x2 = x[2];
485:       x3 = x[3];
486:       x += 4;

488:       sum0 += x0 * yy0[0] + x1 * yy0[1] + x2 * yy0[2] + x3 * yy0[3];
489:       yy0 += 4;
490:       sum1 += x0 * yy1[0] + x1 * yy1[1] + x2 * yy1[2] + x3 * yy1[3];
491:       yy1 += 4;
492:       sum2 += x0 * yy2[0] + x1 * yy2[1] + x2 * yy2[2] + x3 * yy2[3];
493:       yy2 += 4;
494:       sum3 += x0 * yy3[0] + x1 * yy3[1] + x2 * yy3[2] + x3 * yy3[3];
495:       yy3 += 4;
496:       j -= 4;
497:     }
498:     z[0] = sum0;
499:     z[1] = sum1;
500:     z[2] = sum2;
501:     z[3] = sum3;
502:     z += 4;
503:     i -= 4;
504:     PetscCall(VecRestoreArrayRead(yy[0], &yy0));
505:     PetscCall(VecRestoreArrayRead(yy[1], &yy1));
506:     PetscCall(VecRestoreArrayRead(yy[2], &yy2));
507:     PetscCall(VecRestoreArrayRead(yy[3], &yy3));
508:     yy += 4;
509:   }
510:   PetscCall(VecRestoreArrayRead(xin, &xbase));
511:   PetscCall(PetscLogFlops(PetscMax(nv * (2.0 * n - 1), 0.0)));
512:   PetscFunctionReturn(PETSC_SUCCESS);
513: }

515: static PetscErrorCode VecMultiDot_Seq_GEMV(PetscBool conjugate, Vec xin, PetscInt nv, const Vec yin[], PetscScalar *z)
516: {
517:   PetscInt           i, j, nfail;
518:   const PetscScalar *xarray, *yarray, *yfirst, *ynext;
519:   PetscBool          stop  = PETSC_FALSE;
520:   const char        *trans = conjugate ? "C" : "T";
521:   PetscInt64         lda   = 0;
522:   PetscBLASInt       n, m;

524:   PetscFunctionBegin;
525:   if (xin->map->n == 0) { // otherwise BLAS complains illegal values (0) for lda
526:     PetscCall(PetscArrayzero(z, nv));
527:     PetscFunctionReturn(PETSC_SUCCESS);
528:   }

530:   // caller guarantees xin->map->n <= PETSC_BLAS_INT_MAX
531:   PetscCall(PetscBLASIntCast(xin->map->n, &n));
532:   PetscCall(VecGetArrayRead(xin, &xarray));
533:   i = nfail = 0;
534:   while (i < nv) {
535:     stop = PETSC_FALSE;
536:     PetscCall(VecGetArrayRead(yin[i], &yfirst));
537:     yarray = yfirst;
538:     for (j = i + 1; j < nv; j++) {
539:       PetscCall(VecGetArrayRead(yin[j], &ynext));
540:       if (j == i + 1) {
541:         lda = ynext - yarray;
542:         if (lda < 0 || lda > PETSC_BLAS_INT_MAX || lda - n > 64) stop = PETSC_TRUE;
543:       } else if (lda * (j - i) != ynext - yarray) { // not in the same stride? if so, stop here
544:         stop = PETSC_TRUE;
545:       }
546:       PetscCall(VecRestoreArrayRead(yin[j], &ynext));
547:       if (stop) break;
548:     }
549:     PetscCall(VecRestoreArrayRead(yin[i], &yfirst));

551:     // we found m vectors yin[i..j)
552:     PetscCall(PetscBLASIntCast(j - i, &m));
553:     if (m > 1) {
554:       PetscBLASInt ione = 1, lda2 = (PetscBLASInt)lda; // the cast is safe since we've screened out those lda > PETSC_BLAS_INT_MAX above
555:       PetscScalar  one = 1, zero = 0;

557:       PetscCallBLAS("BLASgemv", BLASgemv_(trans, &n, &m, &one, yarray, &lda2, xarray, &ione, &zero, z + i, &ione));
558:       PetscCall(PetscLogFlops(PetscMax(m * (2.0 * n - 1), 0.0)));
559:     } else {
560:       if (nfail == 0) {
561:         if (conjugate) PetscCall(VecDot_Seq(xin, yin[i], z + i));
562:         else PetscCall(VecTDot_Seq(xin, yin[i], z + i));
563:         nfail++;
564:       } else break;
565:     }
566:     i = j;
567:   }
568:   PetscCall(VecRestoreArrayRead(xin, &xarray));
569:   if (i < nv) { // finish the remaining if any
570:     if (conjugate) PetscCall(VecMDot_Seq(xin, nv - i, yin + i, z + i));
571:     else PetscCall(VecMTDot_Seq(xin, nv - i, yin + i, z + i));
572:   }
573:   PetscFunctionReturn(PETSC_SUCCESS);
574: }

576: PetscErrorCode VecMDot_Seq_GEMV(Vec xin, PetscInt nv, const Vec yin[], PetscScalar *z)
577: {
578:   PetscFunctionBegin;
579:   if (xin->map->n > PETSC_BLAS_INT_MAX) PetscCall(VecMDot_Seq(xin, nv, yin, z));
580:   else PetscCall(VecMultiDot_Seq_GEMV(PETSC_TRUE, xin, nv, yin, z));
581:   PetscFunctionReturn(PETSC_SUCCESS);
582: }

584: PetscErrorCode VecMTDot_Seq_GEMV(Vec xin, PetscInt nv, const Vec yin[], PetscScalar *z)
585: {
586:   PetscFunctionBegin;
587:   if (xin->map->n > PETSC_BLAS_INT_MAX) PetscCall(VecMTDot_Seq(xin, nv, yin, z));
588:   else PetscCall(VecMultiDot_Seq_GEMV(PETSC_FALSE, xin, nv, yin, z));
589:   PetscFunctionReturn(PETSC_SUCCESS);
590: }

592: static PetscErrorCode VecMinMax_Seq(Vec xin, PetscInt *idx, PetscReal *z, PetscReal minmax, int (*const cmp)(PetscReal, PetscReal))
593: {
594:   const PetscInt n = xin->map->n;
595:   PetscInt       j = -1;

597:   PetscFunctionBegin;
598:   if (n) {
599:     const PetscScalar *xx;

601:     PetscCall(VecGetArrayRead(xin, &xx));
602:     minmax = PetscRealPart(xx[(j = 0)]);
603:     for (PetscInt i = 1; i < n; ++i) {
604:       const PetscReal tmp = PetscRealPart(xx[i]);

606:       if (cmp(tmp, minmax)) {
607:         j      = i;
608:         minmax = tmp;
609:       }
610:     }
611:     PetscCall(VecRestoreArrayRead(xin, &xx));
612:   }
613:   *z = minmax;
614:   if (idx) *idx = j;
615:   PetscFunctionReturn(PETSC_SUCCESS);
616: }

618: static int VecMax_Seq_GT(PetscReal l, PetscReal r)
619: {
620:   return l > r;
621: }

623: PetscErrorCode VecMax_Seq(Vec xin, PetscInt *idx, PetscReal *z)
624: {
625:   PetscFunctionBegin;
626:   PetscCall(VecMinMax_Seq(xin, idx, z, PETSC_MIN_REAL, VecMax_Seq_GT));
627:   PetscFunctionReturn(PETSC_SUCCESS);
628: }

630: static int VecMin_Seq_LT(PetscReal l, PetscReal r)
631: {
632:   return l < r;
633: }

635: PetscErrorCode VecMin_Seq(Vec xin, PetscInt *idx, PetscReal *z)
636: {
637:   PetscFunctionBegin;
638:   PetscCall(VecMinMax_Seq(xin, idx, z, PETSC_MAX_REAL, VecMin_Seq_LT));
639:   PetscFunctionReturn(PETSC_SUCCESS);
640: }

642: PetscErrorCode VecSet_Seq(Vec xin, PetscScalar alpha)
643: {
644:   const PetscInt n = xin->map->n;
645:   PetscScalar   *xx;

647:   PetscFunctionBegin;
648:   PetscCall(VecGetArrayWrite(xin, &xx));
649:   if (alpha == (PetscScalar)0.0) {
650:     PetscCall(PetscArrayzero(xx, n));
651:   } else {
652:     for (PetscInt i = 0; i < n; i++) xx[i] = alpha;
653:   }
654:   PetscCall(VecRestoreArrayWrite(xin, &xx));
655:   PetscFunctionReturn(PETSC_SUCCESS);
656: }

658: PetscErrorCode VecMAXPY_Seq(Vec xin, PetscInt nv, const PetscScalar *alpha, Vec *y)
659: {
660:   const PetscInt     j_rem = nv & 0x3, n = xin->map->n;
661:   const PetscScalar *yptr[4];
662:   PetscScalar       *xx;
663: #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
664:   #pragma disjoint(*xx, **yptr, *aptr)
665: #endif

667:   PetscFunctionBegin;
668:   PetscCall(PetscLogFlops(nv * 2.0 * n));
669:   PetscCall(VecGetArray(xin, &xx));
670:   for (PetscInt i = 0; i < j_rem; ++i) PetscCall(VecGetArrayRead(y[i], yptr + i));
671:   switch (j_rem) {
672:   case 3:
673:     PetscKernelAXPY3(xx, alpha[0], alpha[1], alpha[2], yptr[0], yptr[1], yptr[2], n);
674:     break;
675:   case 2:
676:     PetscKernelAXPY2(xx, alpha[0], alpha[1], yptr[0], yptr[1], n);
677:     break;
678:   case 1:
679:     PetscKernelAXPY(xx, alpha[0], yptr[0], n);
680:   default:
681:     break;
682:   }
683:   for (PetscInt i = 0; i < j_rem; ++i) PetscCall(VecRestoreArrayRead(y[i], yptr + i));
684:   alpha += j_rem;
685:   y += j_rem;
686:   for (PetscInt j = j_rem, inc = 4; j < nv; j += inc, alpha += inc, y += inc) {
687:     for (PetscInt i = 0; i < inc; ++i) PetscCall(VecGetArrayRead(y[i], yptr + i));
688:     PetscKernelAXPY4(xx, alpha[0], alpha[1], alpha[2], alpha[3], yptr[0], yptr[1], yptr[2], yptr[3], n);
689:     for (PetscInt i = 0; i < inc; ++i) PetscCall(VecRestoreArrayRead(y[i], yptr + i));
690:   }
691:   PetscCall(VecRestoreArray(xin, &xx));
692:   PetscFunctionReturn(PETSC_SUCCESS);
693: }

695: /*  y = y + sum alpha[i] x[i] */
696: PetscErrorCode VecMAXPY_Seq_GEMV(Vec yin, PetscInt nv, const PetscScalar alpha[], Vec xin[])
697: {
698:   PetscInt           i, j, nfail;
699:   PetscScalar       *yarray;
700:   const PetscScalar *xfirst, *xnext, *xarray;
701:   PetscBool          stop = PETSC_FALSE;
702:   PetscInt64         lda  = 0;
703:   PetscBLASInt       n, m;

705:   PetscFunctionBegin;
706:   if (yin->map->n == 0 || yin->map->n > PETSC_BLAS_INT_MAX) {
707:     PetscCall(VecMAXPY_Seq(yin, nv, alpha, xin));
708:     PetscFunctionReturn(PETSC_SUCCESS);
709:   }

711:   PetscCall(PetscBLASIntCast(yin->map->n, &n));
712:   PetscCall(VecGetArray(yin, &yarray));
713:   i = nfail = 0;
714:   while (i < nv) {
715:     stop = PETSC_FALSE;
716:     PetscCall(VecGetArrayRead(xin[i], &xfirst));
717:     xarray = xfirst;
718:     for (j = i + 1; j < nv; j++) {
719:       PetscCall(VecGetArrayRead(xin[j], &xnext));
720:       if (j == i + 1) {
721:         lda = xnext - xarray;
722:         if (lda < 0 || lda > PETSC_BLAS_INT_MAX || lda - n > 64) stop = PETSC_TRUE;
723:       } else if (lda * (j - i) != xnext - xarray) { // not in the same stride? if so, stop here
724:         stop = PETSC_TRUE;
725:       }
726:       PetscCall(VecRestoreArrayRead(xin[j], &xnext));
727:       if (stop) break;
728:     }
729:     PetscCall(VecRestoreArrayRead(xin[i], &xfirst));

731:     PetscCall(PetscBLASIntCast(j - i, &m));
732:     if (m > 1) {
733:       PetscBLASInt incx = 1, incy = 1, lda2 = (PetscBLASInt)lda; // the cast is safe since we've screened out those lda > PETSC_BLAS_INT_MAX above
734:       PetscScalar  one = 1;
735:       PetscCallBLAS("BLASgemv", BLASgemv_("N", &n, &m, &one, xarray, &lda2, alpha + i, &incx, &one, yarray, &incy));
736:       PetscCall(PetscLogFlops(m * 2.0 * n));
737:     } else {
738:       // we only allow falling back on VecAXPY once
739:       if (nfail++ == 0) PetscCall(VecAXPY_Seq(yin, alpha[i], xin[i]));
740:       else break; // break the while loop
741:     }
742:     i = j;
743:   }
744:   PetscCall(VecRestoreArray(yin, &yarray));

746:   // finish the remaining if any
747:   if (i < nv) PetscCall(VecMAXPY_Seq(yin, nv - i, alpha + i, xin + i));
748:   PetscFunctionReturn(PETSC_SUCCESS);
749: }

751: #include <../src/vec/vec/impls/seq/ftn-kernels/faypx.h>

753: PetscErrorCode VecAYPX_Seq(Vec yin, PetscScalar alpha, Vec xin)
754: {
755:   PetscFunctionBegin;
756:   if (alpha == (PetscScalar)0.0) {
757:     PetscCall(VecCopy(xin, yin));
758:   } else if (alpha == (PetscScalar)1.0) {
759:     PetscCall(VecAXPY_Seq(yin, alpha, xin));
760:   } else {
761:     const PetscInt     n = yin->map->n;
762:     const PetscScalar *xx;
763:     PetscScalar       *yy;

765:     PetscCall(VecGetArrayRead(xin, &xx));
766:     PetscCall(VecGetArray(yin, &yy));
767:     if (alpha == (PetscScalar)-1.0) {
768:       for (PetscInt i = 0; i < n; ++i) yy[i] = xx[i] - yy[i];
769:       PetscCall(PetscLogFlops(n));
770:     } else {
771: #if defined(PETSC_USE_FORTRAN_KERNEL_AYPX)
772:       fortranaypx_(&n, &alpha, xx, yy);
773: #else
774:       for (PetscInt i = 0; i < n; ++i) yy[i] = xx[i] + alpha * yy[i];
775: #endif
776:       PetscCall(PetscLogFlops(2 * n));
777:     }
778:     PetscCall(VecRestoreArrayRead(xin, &xx));
779:     PetscCall(VecRestoreArray(yin, &yy));
780:   }
781:   PetscFunctionReturn(PETSC_SUCCESS);
782: }

784: #include <../src/vec/vec/impls/seq/ftn-kernels/fwaxpy.h>
785: /*
786:    IBM ESSL contains a routine dzaxpy() that is our WAXPY() but it appears
787:    to be slower than a regular C loop.  Hence,we do not include it.
788:    void ?zaxpy(int*,PetscScalar*,PetscScalar*,int*,PetscScalar*,int*,PetscScalar*,int*);
789: */

791: PetscErrorCode VecWAXPY_Seq(Vec win, PetscScalar alpha, Vec xin, Vec yin)
792: {
793:   const PetscInt     n = win->map->n;
794:   const PetscScalar *yy, *xx;
795:   PetscScalar       *ww;

797:   PetscFunctionBegin;
798:   PetscCall(VecGetArrayRead(xin, &xx));
799:   PetscCall(VecGetArrayRead(yin, &yy));
800:   PetscCall(VecGetArray(win, &ww));
801:   if (alpha == (PetscScalar)1.0) {
802:     PetscCall(PetscLogFlops(n));
803:     /* could call BLAS axpy after call to memcopy, but may be slower */
804:     for (PetscInt i = 0; i < n; i++) ww[i] = yy[i] + xx[i];
805:   } else if (alpha == (PetscScalar)-1.0) {
806:     PetscCall(PetscLogFlops(n));
807:     for (PetscInt i = 0; i < n; i++) ww[i] = yy[i] - xx[i];
808:   } else if (alpha == (PetscScalar)0.0) {
809:     PetscCall(PetscArraycpy(ww, yy, n));
810:   } else {
811:     PetscCall(PetscLogFlops(2.0 * n));
812: #if defined(PETSC_USE_FORTRAN_KERNEL_WAXPY)
813:     fortranwaxpy_(&n, &alpha, xx, yy, ww);
814: #else
815:     for (PetscInt i = 0; i < n; i++) ww[i] = yy[i] + alpha * xx[i];
816: #endif
817:   }
818:   PetscCall(VecRestoreArrayRead(xin, &xx));
819:   PetscCall(VecRestoreArrayRead(yin, &yy));
820:   PetscCall(VecRestoreArray(win, &ww));
821:   PetscFunctionReturn(PETSC_SUCCESS);
822: }

824: PetscErrorCode VecMaxPointwiseDivide_Seq(Vec xin, Vec yin, PetscReal *max)
825: {
826:   const PetscInt     n = xin->map->n;
827:   const PetscScalar *xx, *yy;
828:   PetscReal          m = 0.0;

830:   PetscFunctionBegin;
831:   PetscCall(VecGetArrayRead(xin, &xx));
832:   PetscCall(VecGetArrayRead(yin, &yy));
833:   for (PetscInt i = 0; i < n; ++i) {
834:     const PetscReal v = PetscAbsScalar(yy[i] == (PetscScalar)0.0 ? xx[i] : xx[i] / yy[i]);

836:     // use a separate value to not re-evaluate side-effects
837:     m = PetscMax(v, m);
838:   }
839:   PetscCall(VecRestoreArrayRead(xin, &xx));
840:   PetscCall(VecRestoreArrayRead(yin, &yy));
841:   PetscCall(PetscLogFlops(n));
842:   *max = m;
843:   PetscFunctionReturn(PETSC_SUCCESS);
844: }

846: PetscErrorCode VecPlaceArray_Seq(Vec vin, const PetscScalar *a)
847: {
848:   Vec_Seq *v = (Vec_Seq *)vin->data;

850:   PetscFunctionBegin;
851:   PetscCheck(!v->unplacedarray, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "VecPlaceArray() was already called on this vector, without a call to VecResetArray()");
852:   v->unplacedarray = v->array; /* save previous array so reset can bring it back */
853:   v->array         = (PetscScalar *)a;
854:   PetscFunctionReturn(PETSC_SUCCESS);
855: }

857: PetscErrorCode VecReplaceArray_Seq(Vec vin, const PetscScalar *a)
858: {
859:   Vec_Seq *v = (Vec_Seq *)vin->data;

861:   PetscFunctionBegin;
862:   PetscCall(PetscFree(v->array_allocated));
863:   v->array_allocated = v->array = (PetscScalar *)a;
864:   PetscFunctionReturn(PETSC_SUCCESS);
865: }