Actual source code: dvec2.c

  1: /*
  2:    Defines some vector operation functions that are shared by
  3:    sequential and parallel vectors.
  4: */
  5: #include <../src/vec/vec/impls/dvecimpl.h>
  6: #include <petsc/private/kernels/petscaxpy.h>

  8: #if defined(PETSC_USE_FORTRAN_KERNEL_MDOT)
  9: #include <../src/vec/vec/impls/seq/ftn-kernels/fmdot.h>
 10: PetscErrorCode VecMDot_Seq(Vec xin, PetscInt nv, const Vec yin[], PetscScalar *z)
 11: {
 12:   const PetscInt     n = xin->map->n;
 13:   PetscInt           i = nv, nv_rem = nv & 0x3;
 14:   PetscScalar        sum0 = 0., sum1 = 0., sum2 = 0., sum3;
 15:   const PetscScalar *yy0, *yy1, *yy2, *yy3, *x;
 16:   Vec               *yy = (Vec *)yin;

 18:   PetscFunctionBegin;
 19:   PetscCall(VecGetArrayRead(xin, &x));
 20:   switch (nv_rem) {
 21:   case 3:
 22:     PetscCall(VecGetArrayRead(yy[0], &yy0));
 23:     PetscCall(VecGetArrayRead(yy[1], &yy1));
 24:     PetscCall(VecGetArrayRead(yy[2], &yy2));
 25:     fortranmdot3_(x, yy0, yy1, yy2, &n, &sum0, &sum1, &sum2);
 26:     PetscCall(VecRestoreArrayRead(yy[0], &yy0));
 27:     PetscCall(VecRestoreArrayRead(yy[1], &yy1));
 28:     PetscCall(VecRestoreArrayRead(yy[2], &yy2));
 29:     z[0] = sum0;
 30:     z[1] = sum1;
 31:     z[2] = sum2;
 32:     break;
 33:   case 2:
 34:     PetscCall(VecGetArrayRead(yy[0], &yy0));
 35:     PetscCall(VecGetArrayRead(yy[1], &yy1));
 36:     fortranmdot2_(x, yy0, yy1, &n, &sum0, &sum1);
 37:     PetscCall(VecRestoreArrayRead(yy[0], &yy0));
 38:     PetscCall(VecRestoreArrayRead(yy[1], &yy1));
 39:     z[0] = sum0;
 40:     z[1] = sum1;
 41:     break;
 42:   case 1:
 43:     PetscCall(VecGetArrayRead(yy[0], &yy0));
 44:     fortranmdot1_(x, yy0, &n, &sum0);
 45:     PetscCall(VecRestoreArrayRead(yy[0], &yy0));
 46:     z[0] = sum0;
 47:     break;
 48:   case 0:
 49:     break;
 50:   }
 51:   z += nv_rem;
 52:   i -= nv_rem;
 53:   yy += nv_rem;

 55:   while (i > 0) {
 56:     sum0 = 0.;
 57:     sum1 = 0.;
 58:     sum2 = 0.;
 59:     sum3 = 0.;
 60:     PetscCall(VecGetArrayRead(yy[0], &yy0));
 61:     PetscCall(VecGetArrayRead(yy[1], &yy1));
 62:     PetscCall(VecGetArrayRead(yy[2], &yy2));
 63:     PetscCall(VecGetArrayRead(yy[3], &yy3));
 64:     fortranmdot4_(x, yy0, yy1, yy2, yy3, &n, &sum0, &sum1, &sum2, &sum3);
 65:     PetscCall(VecRestoreArrayRead(yy[0], &yy0));
 66:     PetscCall(VecRestoreArrayRead(yy[1], &yy1));
 67:     PetscCall(VecRestoreArrayRead(yy[2], &yy2));
 68:     PetscCall(VecRestoreArrayRead(yy[3], &yy3));
 69:     yy += 4;
 70:     z[0] = sum0;
 71:     z[1] = sum1;
 72:     z[2] = sum2;
 73:     z[3] = sum3;
 74:     z += 4;
 75:     i -= 4;
 76:   }
 77:   PetscCall(VecRestoreArrayRead(xin, &x));
 78:   PetscCall(PetscLogFlops(PetscMax(nv * (2.0 * n - 1), 0.0)));
 79:   PetscFunctionReturn(PETSC_SUCCESS);
 80: }

 82: #else
 83: PetscErrorCode VecMDot_Seq(Vec xin, PetscInt nv, const Vec yin[], PetscScalar *z)
 84: {
 85:   const PetscInt     n = xin->map->n;
 86:   PetscInt           i = nv, j = n, nv_rem = nv & 0x3, j_rem;
 87:   PetscScalar        sum0 = 0.0, sum1 = 0.0, sum2 = 0.0, sum3, x0, x1, x2, x3;
 88:   const PetscScalar *yy0, *yy1, *yy2, *yy3, *x, *xbase;
 89:   const Vec         *yy = (Vec *)yin;

 91:   PetscFunctionBegin;
 92:   if (n == 0) {
 93:     PetscCall(PetscArrayzero(z, nv));
 94:     PetscFunctionReturn(PETSC_SUCCESS);
 95:   }
 96:   PetscCall(VecGetArrayRead(xin, &xbase));
 97:   x = xbase;
 98:   switch (nv_rem) {
 99:   case 3:
100:     PetscCall(VecGetArrayRead(yy[0], &yy0));
101:     PetscCall(VecGetArrayRead(yy[1], &yy1));
102:     PetscCall(VecGetArrayRead(yy[2], &yy2));
103:     switch (j_rem = j & 0x3) {
104:     case 3:
105:       x2 = x[2];
106:       sum0 += x2 * PetscConj(yy0[2]);
107:       sum1 += x2 * PetscConj(yy1[2]);
108:       sum2 += x2 * PetscConj(yy2[2]); /* fall through */
109:     case 2:
110:       x1 = x[1];
111:       sum0 += x1 * PetscConj(yy0[1]);
112:       sum1 += x1 * PetscConj(yy1[1]);
113:       sum2 += x1 * PetscConj(yy2[1]); /* fall through */
114:     case 1:
115:       x0 = x[0];
116:       sum0 += x0 * PetscConj(yy0[0]);
117:       sum1 += x0 * PetscConj(yy1[0]);
118:       sum2 += x0 * PetscConj(yy2[0]); /* fall through */
119:     case 0:
120:       x += j_rem;
121:       yy0 += j_rem;
122:       yy1 += j_rem;
123:       yy2 += j_rem;
124:       j -= j_rem;
125:       break;
126:     }
127:     while (j > 0) {
128:       x0 = x[0];
129:       x1 = x[1];
130:       x2 = x[2];
131:       x3 = x[3];
132:       x += 4;

134:       sum0 += x0 * PetscConj(yy0[0]) + x1 * PetscConj(yy0[1]) + x2 * PetscConj(yy0[2]) + x3 * PetscConj(yy0[3]);
135:       yy0 += 4;
136:       sum1 += x0 * PetscConj(yy1[0]) + x1 * PetscConj(yy1[1]) + x2 * PetscConj(yy1[2]) + x3 * PetscConj(yy1[3]);
137:       yy1 += 4;
138:       sum2 += x0 * PetscConj(yy2[0]) + x1 * PetscConj(yy2[1]) + x2 * PetscConj(yy2[2]) + x3 * PetscConj(yy2[3]);
139:       yy2 += 4;
140:       j -= 4;
141:     }
142:     z[0] = sum0;
143:     z[1] = sum1;
144:     z[2] = sum2;
145:     PetscCall(VecRestoreArrayRead(yy[0], &yy0));
146:     PetscCall(VecRestoreArrayRead(yy[1], &yy1));
147:     PetscCall(VecRestoreArrayRead(yy[2], &yy2));
148:     break;
149:   case 2:
150:     PetscCall(VecGetArrayRead(yy[0], &yy0));
151:     PetscCall(VecGetArrayRead(yy[1], &yy1));
152:     switch (j_rem = j & 0x3) {
153:     case 3:
154:       x2 = x[2];
155:       sum0 += x2 * PetscConj(yy0[2]);
156:       sum1 += x2 * PetscConj(yy1[2]); /* fall through */
157:     case 2:
158:       x1 = x[1];
159:       sum0 += x1 * PetscConj(yy0[1]);
160:       sum1 += x1 * PetscConj(yy1[1]); /* fall through */
161:     case 1:
162:       x0 = x[0];
163:       sum0 += x0 * PetscConj(yy0[0]);
164:       sum1 += x0 * PetscConj(yy1[0]); /* fall through */
165:     case 0:
166:       x += j_rem;
167:       yy0 += j_rem;
168:       yy1 += j_rem;
169:       j -= j_rem;
170:       break;
171:     }
172:     while (j > 0) {
173:       x0 = x[0];
174:       x1 = x[1];
175:       x2 = x[2];
176:       x3 = x[3];
177:       x += 4;

179:       sum0 += x0 * PetscConj(yy0[0]) + x1 * PetscConj(yy0[1]) + x2 * PetscConj(yy0[2]) + x3 * PetscConj(yy0[3]);
180:       yy0 += 4;
181:       sum1 += x0 * PetscConj(yy1[0]) + x1 * PetscConj(yy1[1]) + x2 * PetscConj(yy1[2]) + x3 * PetscConj(yy1[3]);
182:       yy1 += 4;
183:       j -= 4;
184:     }
185:     z[0] = sum0;
186:     z[1] = sum1;

188:     PetscCall(VecRestoreArrayRead(yy[0], &yy0));
189:     PetscCall(VecRestoreArrayRead(yy[1], &yy1));
190:     break;
191:   case 1:
192:     PetscCall(VecGetArrayRead(yy[0], &yy0));
193:     switch (j_rem = j & 0x3) {
194:     case 3:
195:       x2 = x[2];
196:       sum0 += x2 * PetscConj(yy0[2]); /* fall through */
197:     case 2:
198:       x1 = x[1];
199:       sum0 += x1 * PetscConj(yy0[1]); /* fall through */
200:     case 1:
201:       x0 = x[0];
202:       sum0 += x0 * PetscConj(yy0[0]); /* fall through */
203:     case 0:
204:       x += j_rem;
205:       yy0 += j_rem;
206:       j -= j_rem;
207:       break;
208:     }
209:     while (j > 0) {
210:       sum0 += x[0] * PetscConj(yy0[0]) + x[1] * PetscConj(yy0[1]) + x[2] * PetscConj(yy0[2]) + x[3] * PetscConj(yy0[3]);
211:       yy0 += 4;
212:       j -= 4;
213:       x += 4;
214:     }
215:     z[0] = sum0;

217:     PetscCall(VecRestoreArrayRead(yy[0], &yy0));
218:     break;
219:   case 0:
220:     break;
221:   }
222:   z += nv_rem;
223:   i -= nv_rem;
224:   yy += nv_rem;

226:   while (i > 0) {
227:     sum0 = 0.;
228:     sum1 = 0.;
229:     sum2 = 0.;
230:     sum3 = 0.;
231:     PetscCall(VecGetArrayRead(yy[0], &yy0));
232:     PetscCall(VecGetArrayRead(yy[1], &yy1));
233:     PetscCall(VecGetArrayRead(yy[2], &yy2));
234:     PetscCall(VecGetArrayRead(yy[3], &yy3));

236:     j = n;
237:     x = xbase;
238:     switch (j_rem = j & 0x3) {
239:     case 3:
240:       x2 = x[2];
241:       sum0 += x2 * PetscConj(yy0[2]);
242:       sum1 += x2 * PetscConj(yy1[2]);
243:       sum2 += x2 * PetscConj(yy2[2]);
244:       sum3 += x2 * PetscConj(yy3[2]); /* fall through */
245:     case 2:
246:       x1 = x[1];
247:       sum0 += x1 * PetscConj(yy0[1]);
248:       sum1 += x1 * PetscConj(yy1[1]);
249:       sum2 += x1 * PetscConj(yy2[1]);
250:       sum3 += x1 * PetscConj(yy3[1]); /* fall through */
251:     case 1:
252:       x0 = x[0];
253:       sum0 += x0 * PetscConj(yy0[0]);
254:       sum1 += x0 * PetscConj(yy1[0]);
255:       sum2 += x0 * PetscConj(yy2[0]);
256:       sum3 += x0 * PetscConj(yy3[0]); /* fall through */
257:     case 0:
258:       x += j_rem;
259:       yy0 += j_rem;
260:       yy1 += j_rem;
261:       yy2 += j_rem;
262:       yy3 += j_rem;
263:       j -= j_rem;
264:       break;
265:     }
266:     while (j > 0) {
267:       x0 = x[0];
268:       x1 = x[1];
269:       x2 = x[2];
270:       x3 = x[3];
271:       x += 4;

273:       sum0 += x0 * PetscConj(yy0[0]) + x1 * PetscConj(yy0[1]) + x2 * PetscConj(yy0[2]) + x3 * PetscConj(yy0[3]);
274:       yy0 += 4;
275:       sum1 += x0 * PetscConj(yy1[0]) + x1 * PetscConj(yy1[1]) + x2 * PetscConj(yy1[2]) + x3 * PetscConj(yy1[3]);
276:       yy1 += 4;
277:       sum2 += x0 * PetscConj(yy2[0]) + x1 * PetscConj(yy2[1]) + x2 * PetscConj(yy2[2]) + x3 * PetscConj(yy2[3]);
278:       yy2 += 4;
279:       sum3 += x0 * PetscConj(yy3[0]) + x1 * PetscConj(yy3[1]) + x2 * PetscConj(yy3[2]) + x3 * PetscConj(yy3[3]);
280:       yy3 += 4;
281:       j -= 4;
282:     }
283:     z[0] = sum0;
284:     z[1] = sum1;
285:     z[2] = sum2;
286:     z[3] = sum3;
287:     z += 4;
288:     i -= 4;
289:     PetscCall(VecRestoreArrayRead(yy[0], &yy0));
290:     PetscCall(VecRestoreArrayRead(yy[1], &yy1));
291:     PetscCall(VecRestoreArrayRead(yy[2], &yy2));
292:     PetscCall(VecRestoreArrayRead(yy[3], &yy3));
293:     yy += 4;
294:   }
295:   PetscCall(VecRestoreArrayRead(xin, &xbase));
296:   PetscCall(PetscLogFlops(PetscMax(nv * (2.0 * n - 1), 0.0)));
297:   PetscFunctionReturn(PETSC_SUCCESS);
298: }
299: #endif

301: /* ----------------------------------------------------------------------------*/
302: PetscErrorCode VecMTDot_Seq(Vec xin, PetscInt nv, const Vec yin[], PetscScalar *z)
303: {
304:   const PetscInt     n = xin->map->n;
305:   PetscInt           i = nv, j = n, nv_rem = nv & 0x3, j_rem;
306:   PetscScalar        sum0 = 0., sum1 = 0., sum2 = 0., sum3, x0, x1, x2, x3;
307:   const PetscScalar *yy0, *yy1, *yy2, *yy3, *x, *xbase;
308:   const Vec         *yy = (Vec *)yin;

310:   PetscFunctionBegin;
311:   PetscCall(VecGetArrayRead(xin, &xbase));
312:   x = xbase;

314:   switch (nv_rem) {
315:   case 3:
316:     PetscCall(VecGetArrayRead(yy[0], &yy0));
317:     PetscCall(VecGetArrayRead(yy[1], &yy1));
318:     PetscCall(VecGetArrayRead(yy[2], &yy2));
319:     switch (j_rem = j & 0x3) {
320:     case 3:
321:       x2 = x[2];
322:       sum0 += x2 * yy0[2];
323:       sum1 += x2 * yy1[2];
324:       sum2 += x2 * yy2[2]; /* fall through */
325:     case 2:
326:       x1 = x[1];
327:       sum0 += x1 * yy0[1];
328:       sum1 += x1 * yy1[1];
329:       sum2 += x1 * yy2[1]; /* fall through */
330:     case 1:
331:       x0 = x[0];
332:       sum0 += x0 * yy0[0];
333:       sum1 += x0 * yy1[0];
334:       sum2 += x0 * yy2[0]; /* fall through */
335:     case 0:
336:       x += j_rem;
337:       yy0 += j_rem;
338:       yy1 += j_rem;
339:       yy2 += j_rem;
340:       j -= j_rem;
341:       break;
342:     }
343:     while (j > 0) {
344:       x0 = x[0];
345:       x1 = x[1];
346:       x2 = x[2];
347:       x3 = x[3];
348:       x += 4;

350:       sum0 += x0 * yy0[0] + x1 * yy0[1] + x2 * yy0[2] + x3 * yy0[3];
351:       yy0 += 4;
352:       sum1 += x0 * yy1[0] + x1 * yy1[1] + x2 * yy1[2] + x3 * yy1[3];
353:       yy1 += 4;
354:       sum2 += x0 * yy2[0] + x1 * yy2[1] + x2 * yy2[2] + x3 * yy2[3];
355:       yy2 += 4;
356:       j -= 4;
357:     }
358:     z[0] = sum0;
359:     z[1] = sum1;
360:     z[2] = sum2;
361:     PetscCall(VecRestoreArrayRead(yy[0], &yy0));
362:     PetscCall(VecRestoreArrayRead(yy[1], &yy1));
363:     PetscCall(VecRestoreArrayRead(yy[2], &yy2));
364:     break;
365:   case 2:
366:     PetscCall(VecGetArrayRead(yy[0], &yy0));
367:     PetscCall(VecGetArrayRead(yy[1], &yy1));
368:     switch (j_rem = j & 0x3) {
369:     case 3:
370:       x2 = x[2];
371:       sum0 += x2 * yy0[2];
372:       sum1 += x2 * yy1[2]; /* fall through */
373:     case 2:
374:       x1 = x[1];
375:       sum0 += x1 * yy0[1];
376:       sum1 += x1 * yy1[1]; /* fall through */
377:     case 1:
378:       x0 = x[0];
379:       sum0 += x0 * yy0[0];
380:       sum1 += x0 * yy1[0]; /* fall through */
381:     case 0:
382:       x += j_rem;
383:       yy0 += j_rem;
384:       yy1 += j_rem;
385:       j -= j_rem;
386:       break;
387:     }
388:     while (j > 0) {
389:       x0 = x[0];
390:       x1 = x[1];
391:       x2 = x[2];
392:       x3 = x[3];
393:       x += 4;

395:       sum0 += x0 * yy0[0] + x1 * yy0[1] + x2 * yy0[2] + x3 * yy0[3];
396:       yy0 += 4;
397:       sum1 += x0 * yy1[0] + x1 * yy1[1] + x2 * yy1[2] + x3 * yy1[3];
398:       yy1 += 4;
399:       j -= 4;
400:     }
401:     z[0] = sum0;
402:     z[1] = sum1;

404:     PetscCall(VecRestoreArrayRead(yy[0], &yy0));
405:     PetscCall(VecRestoreArrayRead(yy[1], &yy1));
406:     break;
407:   case 1:
408:     PetscCall(VecGetArrayRead(yy[0], &yy0));
409:     switch (j_rem = j & 0x3) {
410:     case 3:
411:       x2 = x[2];
412:       sum0 += x2 * yy0[2]; /* fall through */
413:     case 2:
414:       x1 = x[1];
415:       sum0 += x1 * yy0[1]; /* fall through */
416:     case 1:
417:       x0 = x[0];
418:       sum0 += x0 * yy0[0]; /* fall through */
419:     case 0:
420:       x += j_rem;
421:       yy0 += j_rem;
422:       j -= j_rem;
423:       break;
424:     }
425:     while (j > 0) {
426:       sum0 += x[0] * yy0[0] + x[1] * yy0[1] + x[2] * yy0[2] + x[3] * yy0[3];
427:       yy0 += 4;
428:       j -= 4;
429:       x += 4;
430:     }
431:     z[0] = sum0;

433:     PetscCall(VecRestoreArrayRead(yy[0], &yy0));
434:     break;
435:   case 0:
436:     break;
437:   }
438:   z += nv_rem;
439:   i -= nv_rem;
440:   yy += nv_rem;

442:   while (i > 0) {
443:     sum0 = 0.;
444:     sum1 = 0.;
445:     sum2 = 0.;
446:     sum3 = 0.;
447:     PetscCall(VecGetArrayRead(yy[0], &yy0));
448:     PetscCall(VecGetArrayRead(yy[1], &yy1));
449:     PetscCall(VecGetArrayRead(yy[2], &yy2));
450:     PetscCall(VecGetArrayRead(yy[3], &yy3));
451:     x = xbase;

453:     j = n;
454:     switch (j_rem = j & 0x3) {
455:     case 3:
456:       x2 = x[2];
457:       sum0 += x2 * yy0[2];
458:       sum1 += x2 * yy1[2];
459:       sum2 += x2 * yy2[2];
460:       sum3 += x2 * yy3[2]; /* fall through */
461:     case 2:
462:       x1 = x[1];
463:       sum0 += x1 * yy0[1];
464:       sum1 += x1 * yy1[1];
465:       sum2 += x1 * yy2[1];
466:       sum3 += x1 * yy3[1]; /* fall through */
467:     case 1:
468:       x0 = x[0];
469:       sum0 += x0 * yy0[0];
470:       sum1 += x0 * yy1[0];
471:       sum2 += x0 * yy2[0];
472:       sum3 += x0 * yy3[0]; /* fall through */
473:     case 0:
474:       x += j_rem;
475:       yy0 += j_rem;
476:       yy1 += j_rem;
477:       yy2 += j_rem;
478:       yy3 += j_rem;
479:       j -= j_rem;
480:       break;
481:     }
482:     while (j > 0) {
483:       x0 = x[0];
484:       x1 = x[1];
485:       x2 = x[2];
486:       x3 = x[3];
487:       x += 4;

489:       sum0 += x0 * yy0[0] + x1 * yy0[1] + x2 * yy0[2] + x3 * yy0[3];
490:       yy0 += 4;
491:       sum1 += x0 * yy1[0] + x1 * yy1[1] + x2 * yy1[2] + x3 * yy1[3];
492:       yy1 += 4;
493:       sum2 += x0 * yy2[0] + x1 * yy2[1] + x2 * yy2[2] + x3 * yy2[3];
494:       yy2 += 4;
495:       sum3 += x0 * yy3[0] + x1 * yy3[1] + x2 * yy3[2] + x3 * yy3[3];
496:       yy3 += 4;
497:       j -= 4;
498:     }
499:     z[0] = sum0;
500:     z[1] = sum1;
501:     z[2] = sum2;
502:     z[3] = sum3;
503:     z += 4;
504:     i -= 4;
505:     PetscCall(VecRestoreArrayRead(yy[0], &yy0));
506:     PetscCall(VecRestoreArrayRead(yy[1], &yy1));
507:     PetscCall(VecRestoreArrayRead(yy[2], &yy2));
508:     PetscCall(VecRestoreArrayRead(yy[3], &yy3));
509:     yy += 4;
510:   }
511:   PetscCall(VecRestoreArrayRead(xin, &xbase));
512:   PetscCall(PetscLogFlops(PetscMax(nv * (2.0 * n - 1), 0.0)));
513:   PetscFunctionReturn(PETSC_SUCCESS);
514: }

516: static PetscErrorCode VecMultiDot_Seq_GEMV(PetscBool conjugate, Vec xin, PetscInt nv, const Vec yin[], PetscScalar *z)
517: {
518:   PetscInt           i, j, nfail;
519:   const PetscScalar *xarray, *yarray, *yfirst, *ynext;
520:   PetscBool          stop  = PETSC_FALSE;
521:   const char        *trans = conjugate ? "C" : "T";
522:   PetscInt64         lda   = 0;
523:   PetscBLASInt       n, m;

525:   PetscFunctionBegin;
526:   if (xin->map->n == 0) { // otherwise BLAS complains illegal values (0) for lda
527:     PetscCall(PetscArrayzero(z, nv));
528:     PetscFunctionReturn(PETSC_SUCCESS);
529:   }

531:   // caller guarantees xin->map->n <= PETSC_BLAS_INT_MAX
532:   PetscCall(PetscBLASIntCast(xin->map->n, &n));
533:   PetscCall(VecGetArrayRead(xin, &xarray));
534:   i = nfail = 0;
535:   while (i < nv) {
536:     stop = PETSC_FALSE;
537:     PetscCall(VecGetArrayRead(yin[i], &yfirst));
538:     yarray = yfirst;
539:     for (j = i + 1; j < nv; j++) {
540:       PetscCall(VecGetArrayRead(yin[j], &ynext));
541:       if (j == i + 1) {
542:         lda = ynext - yarray;
543:         if (lda < 0 || lda > PETSC_BLAS_INT_MAX || lda - n > 64) stop = PETSC_TRUE;
544:       } else if (lda * (j - i) != ynext - yarray) { // not in the same stride? if so, stop here
545:         stop = PETSC_TRUE;
546:       }
547:       PetscCall(VecRestoreArrayRead(yin[j], &ynext));
548:       if (stop) break;
549:     }
550:     PetscCall(VecRestoreArrayRead(yin[i], &yfirst));

552:     // we found m vectors yin[i..j)
553:     m = j - i;
554:     if (m > 1) {
555:       PetscBLASInt ione = 1, lda2 = (PetscBLASInt)lda; // the cast is safe since we've screened out those lda > PETSC_BLAS_INT_MAX above
556:       PetscScalar  one = 1, zero = 0;

558:       PetscCallBLAS("BLASgemv", BLASgemv_(trans, &n, &m, &one, yarray, &lda2, xarray, &ione, &zero, z + i, &ione));
559:       PetscCall(PetscLogFlops(PetscMax(m * (2.0 * n - 1), 0.0)));
560:     } else {
561:       if (nfail == 0) {
562:         if (conjugate) PetscCall(VecDot_Seq(xin, yin[i], z + i));
563:         else PetscCall(VecTDot_Seq(xin, yin[i], z + i));
564:         nfail++;
565:       } else break;
566:     }
567:     i = j;
568:   }
569:   PetscCall(VecRestoreArrayRead(xin, &xarray));
570:   if (i < nv) { // finish the remaining if any
571:     if (conjugate) PetscCall(VecMDot_Seq(xin, nv - i, yin + i, z + i));
572:     else PetscCall(VecMTDot_Seq(xin, nv - i, yin + i, z + i));
573:   }
574:   PetscFunctionReturn(PETSC_SUCCESS);
575: }

577: PetscErrorCode VecMDot_Seq_GEMV(Vec xin, PetscInt nv, const Vec yin[], PetscScalar *z)
578: {
579:   PetscFunctionBegin;
580:   if (xin->map->n > PETSC_BLAS_INT_MAX) {
581:     PetscCall(VecMDot_Seq(xin, nv, yin, z));
582:   } else {
583:     PetscCall(VecMultiDot_Seq_GEMV(PETSC_TRUE, xin, nv, yin, z));
584:   }
585:   PetscFunctionReturn(PETSC_SUCCESS);
586: }

588: PetscErrorCode VecMTDot_Seq_GEMV(Vec xin, PetscInt nv, const Vec yin[], PetscScalar *z)
589: {
590:   PetscFunctionBegin;
591:   if (xin->map->n > PETSC_BLAS_INT_MAX) {
592:     PetscCall(VecMTDot_Seq(xin, nv, yin, z));
593:   } else {
594:     PetscCall(VecMultiDot_Seq_GEMV(PETSC_FALSE, xin, nv, yin, z));
595:   }
596:   PetscFunctionReturn(PETSC_SUCCESS);
597: }

599: static PetscErrorCode VecMinMax_Seq(Vec xin, PetscInt *idx, PetscReal *z, PetscReal minmax, int (*const cmp)(PetscReal, PetscReal))
600: {
601:   const PetscInt n = xin->map->n;
602:   PetscInt       j = -1;

604:   PetscFunctionBegin;
605:   if (n) {
606:     const PetscScalar *xx;

608:     PetscCall(VecGetArrayRead(xin, &xx));
609:     minmax = PetscRealPart(xx[(j = 0)]);
610:     for (PetscInt i = 1; i < n; ++i) {
611:       const PetscReal tmp = PetscRealPart(xx[i]);

613:       if (cmp(tmp, minmax)) {
614:         j      = i;
615:         minmax = tmp;
616:       }
617:     }
618:     PetscCall(VecRestoreArrayRead(xin, &xx));
619:   }
620:   *z = minmax;
621:   if (idx) *idx = j;
622:   PetscFunctionReturn(PETSC_SUCCESS);
623: }

625: static int VecMax_Seq_GT(PetscReal l, PetscReal r)
626: {
627:   return l > r;
628: }

630: PetscErrorCode VecMax_Seq(Vec xin, PetscInt *idx, PetscReal *z)
631: {
632:   PetscFunctionBegin;
633:   PetscCall(VecMinMax_Seq(xin, idx, z, PETSC_MIN_REAL, VecMax_Seq_GT));
634:   PetscFunctionReturn(PETSC_SUCCESS);
635: }

637: static int VecMin_Seq_LT(PetscReal l, PetscReal r)
638: {
639:   return l < r;
640: }

642: PetscErrorCode VecMin_Seq(Vec xin, PetscInt *idx, PetscReal *z)
643: {
644:   PetscFunctionBegin;
645:   PetscCall(VecMinMax_Seq(xin, idx, z, PETSC_MAX_REAL, VecMin_Seq_LT));
646:   PetscFunctionReturn(PETSC_SUCCESS);
647: }

649: PetscErrorCode VecSet_Seq(Vec xin, PetscScalar alpha)
650: {
651:   const PetscInt n = xin->map->n;
652:   PetscScalar   *xx;

654:   PetscFunctionBegin;
655:   PetscCall(VecGetArrayWrite(xin, &xx));
656:   if (alpha == (PetscScalar)0.0) {
657:     PetscCall(PetscArrayzero(xx, n));
658:   } else {
659:     for (PetscInt i = 0; i < n; i++) xx[i] = alpha;
660:   }
661:   PetscCall(VecRestoreArrayWrite(xin, &xx));
662:   PetscFunctionReturn(PETSC_SUCCESS);
663: }

665: PetscErrorCode VecMAXPY_Seq(Vec xin, PetscInt nv, const PetscScalar *alpha, Vec *y)
666: {
667:   const PetscInt     j_rem = nv & 0x3, n = xin->map->n;
668:   const PetscScalar *yptr[4];
669:   PetscScalar       *xx;
670: #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
671:   #pragma disjoint(*xx, **yptr, *aptr)
672: #endif

674:   PetscFunctionBegin;
675:   PetscCall(PetscLogFlops(nv * 2.0 * n));
676:   PetscCall(VecGetArray(xin, &xx));
677:   for (PetscInt i = 0; i < j_rem; ++i) PetscCall(VecGetArrayRead(y[i], yptr + i));
678:   switch (j_rem) {
679:   case 3:
680:     PetscKernelAXPY3(xx, alpha[0], alpha[1], alpha[2], yptr[0], yptr[1], yptr[2], n);
681:     break;
682:   case 2:
683:     PetscKernelAXPY2(xx, alpha[0], alpha[1], yptr[0], yptr[1], n);
684:     break;
685:   case 1:
686:     PetscKernelAXPY(xx, alpha[0], yptr[0], n);
687:   default:
688:     break;
689:   }
690:   for (PetscInt i = 0; i < j_rem; ++i) PetscCall(VecRestoreArrayRead(y[i], yptr + i));
691:   alpha += j_rem;
692:   y += j_rem;
693:   for (PetscInt j = j_rem, inc = 4; j < nv; j += inc, alpha += inc, y += inc) {
694:     for (PetscInt i = 0; i < inc; ++i) PetscCall(VecGetArrayRead(y[i], yptr + i));
695:     PetscKernelAXPY4(xx, alpha[0], alpha[1], alpha[2], alpha[3], yptr[0], yptr[1], yptr[2], yptr[3], n);
696:     for (PetscInt i = 0; i < inc; ++i) PetscCall(VecRestoreArrayRead(y[i], yptr + i));
697:   }
698:   PetscCall(VecRestoreArray(xin, &xx));
699:   PetscFunctionReturn(PETSC_SUCCESS);
700: }

702: /*  y = y + sum alpha[i] x[i] */
703: PetscErrorCode VecMAXPY_Seq_GEMV(Vec yin, PetscInt nv, const PetscScalar alpha[], Vec xin[])
704: {
705:   PetscInt           i, j, nfail;
706:   PetscScalar       *yarray;
707:   const PetscScalar *xfirst, *xnext, *xarray;
708:   PetscBool          stop = PETSC_FALSE;
709:   PetscInt64         lda  = 0;
710:   PetscBLASInt       n, m;

712:   PetscFunctionBegin;
713:   if (yin->map->n == 0 || yin->map->n > PETSC_BLAS_INT_MAX) {
714:     PetscCall(VecMAXPY_Seq(yin, nv, alpha, xin));
715:     PetscFunctionReturn(PETSC_SUCCESS);
716:   }

718:   PetscCall(PetscBLASIntCast(yin->map->n, &n));
719:   PetscCall(VecGetArray(yin, &yarray));
720:   i = nfail = 0;
721:   while (i < nv) {
722:     stop = PETSC_FALSE;
723:     PetscCall(VecGetArrayRead(xin[i], &xfirst));
724:     xarray = xfirst;
725:     for (j = i + 1; j < nv; j++) {
726:       PetscCall(VecGetArrayRead(xin[j], &xnext));
727:       if (j == i + 1) {
728:         lda = xnext - xarray;
729:         if (lda < 0 || lda > PETSC_BLAS_INT_MAX || lda - n > 64) stop = PETSC_TRUE;
730:       } else if (lda * (j - i) != xnext - xarray) { // not in the same stride? if so, stop here
731:         stop = PETSC_TRUE;
732:       }
733:       PetscCall(VecRestoreArrayRead(xin[j], &xnext));
734:       if (stop) break;
735:     }
736:     PetscCall(VecRestoreArrayRead(xin[i], &xfirst));

738:     m = j - i;
739:     if (m > 1) {
740:       PetscBLASInt incx = 1, incy = 1, lda2 = (PetscBLASInt)lda; // the cast is safe since we've screened out those lda > PETSC_BLAS_INT_MAX above
741:       PetscScalar  one = 1;
742:       PetscCallBLAS("BLASgemv", BLASgemv_("N", &n, &m, &one, xarray, &lda2, alpha + i, &incx, &one, yarray, &incy));
743:       PetscCall(PetscLogFlops(m * 2.0 * n));
744:     } else {
745:       // we only allow falling back on VecAXPY once
746:       if (nfail++ == 0) PetscCall(VecAXPY_Seq(yin, alpha[i], xin[i]));
747:       else break; // break the while loop
748:     }
749:     i = j;
750:   }
751:   PetscCall(VecRestoreArray(yin, &yarray));

753:   // finish the remaining if any
754:   if (i < nv) PetscCall(VecMAXPY_Seq(yin, nv - i, alpha + i, xin + i));
755:   PetscFunctionReturn(PETSC_SUCCESS);
756: }

758: #include <../src/vec/vec/impls/seq/ftn-kernels/faypx.h>

760: PetscErrorCode VecAYPX_Seq(Vec yin, PetscScalar alpha, Vec xin)
761: {
762:   PetscFunctionBegin;
763:   if (alpha == (PetscScalar)0.0) {
764:     PetscCall(VecCopy(xin, yin));
765:   } else if (alpha == (PetscScalar)1.0) {
766:     PetscCall(VecAXPY_Seq(yin, alpha, xin));
767:   } else {
768:     const PetscInt     n = yin->map->n;
769:     const PetscScalar *xx;
770:     PetscScalar       *yy;

772:     PetscCall(VecGetArrayRead(xin, &xx));
773:     PetscCall(VecGetArray(yin, &yy));
774:     if (alpha == (PetscScalar)-1.0) {
775:       for (PetscInt i = 0; i < n; ++i) yy[i] = xx[i] - yy[i];
776:       PetscCall(PetscLogFlops(n));
777:     } else {
778: #if defined(PETSC_USE_FORTRAN_KERNEL_AYPX)
779:       fortranaypx_(&n, &alpha, xx, yy);
780: #else
781:       for (PetscInt i = 0; i < n; ++i) yy[i] = xx[i] + alpha * yy[i];
782: #endif
783:       PetscCall(PetscLogFlops(2 * n));
784:     }
785:     PetscCall(VecRestoreArrayRead(xin, &xx));
786:     PetscCall(VecRestoreArray(yin, &yy));
787:   }
788:   PetscFunctionReturn(PETSC_SUCCESS);
789: }

791: #include <../src/vec/vec/impls/seq/ftn-kernels/fwaxpy.h>
792: /*
793:    IBM ESSL contains a routine dzaxpy() that is our WAXPY() but it appears
794:    to be slower than a regular C loop.  Hence,we do not include it.
795:    void ?zaxpy(int*,PetscScalar*,PetscScalar*,int*,PetscScalar*,int*,PetscScalar*,int*);
796: */

798: PetscErrorCode VecWAXPY_Seq(Vec win, PetscScalar alpha, Vec xin, Vec yin)
799: {
800:   const PetscInt     n = win->map->n;
801:   const PetscScalar *yy, *xx;
802:   PetscScalar       *ww;

804:   PetscFunctionBegin;
805:   PetscCall(VecGetArrayRead(xin, &xx));
806:   PetscCall(VecGetArrayRead(yin, &yy));
807:   PetscCall(VecGetArray(win, &ww));
808:   if (alpha == (PetscScalar)1.0) {
809:     PetscCall(PetscLogFlops(n));
810:     /* could call BLAS axpy after call to memcopy, but may be slower */
811:     for (PetscInt i = 0; i < n; i++) ww[i] = yy[i] + xx[i];
812:   } else if (alpha == (PetscScalar)-1.0) {
813:     PetscCall(PetscLogFlops(n));
814:     for (PetscInt i = 0; i < n; i++) ww[i] = yy[i] - xx[i];
815:   } else if (alpha == (PetscScalar)0.0) {
816:     PetscCall(PetscArraycpy(ww, yy, n));
817:   } else {
818:     PetscCall(PetscLogFlops(2.0 * n));
819: #if defined(PETSC_USE_FORTRAN_KERNEL_WAXPY)
820:     fortranwaxpy_(&n, &alpha, xx, yy, ww);
821: #else
822:     for (PetscInt i = 0; i < n; i++) ww[i] = yy[i] + alpha * xx[i];
823: #endif
824:   }
825:   PetscCall(VecRestoreArrayRead(xin, &xx));
826:   PetscCall(VecRestoreArrayRead(yin, &yy));
827:   PetscCall(VecRestoreArray(win, &ww));
828:   PetscFunctionReturn(PETSC_SUCCESS);
829: }

831: PetscErrorCode VecMaxPointwiseDivide_Seq(Vec xin, Vec yin, PetscReal *max)
832: {
833:   const PetscInt     n = xin->map->n;
834:   const PetscScalar *xx, *yy;
835:   PetscReal          m = 0.0;

837:   PetscFunctionBegin;
838:   PetscCall(VecGetArrayRead(xin, &xx));
839:   PetscCall(VecGetArrayRead(yin, &yy));
840:   for (PetscInt i = 0; i < n; ++i) {
841:     const PetscReal v = PetscAbsScalar(yy[i] == (PetscScalar)0.0 ? xx[i] : xx[i] / yy[i]);

843:     // use a separate value to not re-evaluate side-effects
844:     m = PetscMax(v, m);
845:   }
846:   PetscCall(VecRestoreArrayRead(xin, &xx));
847:   PetscCall(VecRestoreArrayRead(yin, &yy));
848:   PetscCall(PetscLogFlops(n));
849:   *max = m;
850:   PetscFunctionReturn(PETSC_SUCCESS);
851: }

853: PetscErrorCode VecPlaceArray_Seq(Vec vin, const PetscScalar *a)
854: {
855:   Vec_Seq *v = (Vec_Seq *)vin->data;

857:   PetscFunctionBegin;
858:   PetscCheck(!v->unplacedarray, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "VecPlaceArray() was already called on this vector, without a call to VecResetArray()");
859:   v->unplacedarray = v->array; /* save previous array so reset can bring it back */
860:   v->array         = (PetscScalar *)a;
861:   PetscFunctionReturn(PETSC_SUCCESS);
862: }

864: PetscErrorCode VecReplaceArray_Seq(Vec vin, const PetscScalar *a)
865: {
866:   Vec_Seq *v = (Vec_Seq *)vin->data;

868:   PetscFunctionBegin;
869:   PetscCall(PetscFree(v->array_allocated));
870:   v->array_allocated = v->array = (PetscScalar *)a;
871:   PetscFunctionReturn(PETSC_SUCCESS);
872: }