Actual source code: dvec2.c


  2: /*
  3:    Defines some vector operation functions that are shared by
  4:    sequential and parallel vectors.
  5: */
  6: #include <../src/vec/vec/impls/dvecimpl.h>
  7: #include <petsc/private/kernels/petscaxpy.h>

  9: #if defined(PETSC_USE_FORTRAN_KERNEL_MDOT)
 10: #include <../src/vec/vec/impls/seq/ftn-kernels/fmdot.h>
 11: PetscErrorCode VecMDot_Seq(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
 12: {
 13:   PetscInt          i,nv_rem,n = xin->map->n;
 14:   PetscScalar       sum0,sum1,sum2,sum3;
 15:   const PetscScalar *yy0,*yy1,*yy2,*yy3,*x;
 16:   Vec               *yy;

 18:   sum0 = 0.0;
 19:   sum1 = 0.0;
 20:   sum2 = 0.0;

 22:   i      = nv;
 23:   nv_rem = nv&0x3;
 24:   yy     = (Vec*)yin;
 25:   VecGetArrayRead(xin,&x);

 27:   switch (nv_rem) {
 28:   case 3:
 29:     VecGetArrayRead(yy[0],&yy0);
 30:     VecGetArrayRead(yy[1],&yy1);
 31:     VecGetArrayRead(yy[2],&yy2);
 32:     fortranmdot3_(x,yy0,yy1,yy2,&n,&sum0,&sum1,&sum2);
 33:     VecRestoreArrayRead(yy[0],&yy0);
 34:     VecRestoreArrayRead(yy[1],&yy1);
 35:     VecRestoreArrayRead(yy[2],&yy2);
 36:     z[0] = sum0;
 37:     z[1] = sum1;
 38:     z[2] = sum2;
 39:     break;
 40:   case 2:
 41:     VecGetArrayRead(yy[0],&yy0);
 42:     VecGetArrayRead(yy[1],&yy1);
 43:     fortranmdot2_(x,yy0,yy1,&n,&sum0,&sum1);
 44:     VecRestoreArrayRead(yy[0],&yy0);
 45:     VecRestoreArrayRead(yy[1],&yy1);
 46:     z[0] = sum0;
 47:     z[1] = sum1;
 48:     break;
 49:   case 1:
 50:     VecGetArrayRead(yy[0],&yy0);
 51:     fortranmdot1_(x,yy0,&n,&sum0);
 52:     VecRestoreArrayRead(yy[0],&yy0);
 53:     z[0] = sum0;
 54:     break;
 55:   case 0:
 56:     break;
 57:   }
 58:   z  += nv_rem;
 59:   i  -= nv_rem;
 60:   yy += nv_rem;

 62:   while (i >0) {
 63:     sum0 = 0.;
 64:     sum1 = 0.;
 65:     sum2 = 0.;
 66:     sum3 = 0.;
 67:     VecGetArrayRead(yy[0],&yy0);
 68:     VecGetArrayRead(yy[1],&yy1);
 69:     VecGetArrayRead(yy[2],&yy2);
 70:     VecGetArrayRead(yy[3],&yy3);
 71:     fortranmdot4_(x,yy0,yy1,yy2,yy3,&n,&sum0,&sum1,&sum2,&sum3);
 72:     VecRestoreArrayRead(yy[0],&yy0);
 73:     VecRestoreArrayRead(yy[1],&yy1);
 74:     VecRestoreArrayRead(yy[2],&yy2);
 75:     VecRestoreArrayRead(yy[3],&yy3);
 76:     yy  += 4;
 77:     z[0] = sum0;
 78:     z[1] = sum1;
 79:     z[2] = sum2;
 80:     z[3] = sum3;
 81:     z   += 4;
 82:     i   -= 4;
 83:   }
 84:   VecRestoreArrayRead(xin,&x);
 85:   PetscLogFlops(PetscMax(nv*(2.0*xin->map->n-1),0.0));
 86:   return 0;
 87: }

 89: #else
 90: PetscErrorCode VecMDot_Seq(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
 91: {
 92:   PetscInt          n = xin->map->n,i,j,nv_rem,j_rem;
 93:   PetscScalar       sum0,sum1,sum2,sum3,x0,x1,x2,x3;
 94:   const PetscScalar *yy0,*yy1,*yy2,*yy3,*x,*xbase;
 95:   Vec               *yy;

 97:   sum0 = 0.;
 98:   sum1 = 0.;
 99:   sum2 = 0.;

101:   i      = nv;
102:   nv_rem = nv&0x3;
103:   yy     = (Vec*)yin;
104:   j      = n;
105:   VecGetArrayRead(xin,&xbase);
106:   x      = xbase;

108:   switch (nv_rem) {
109:   case 3:
110:     VecGetArrayRead(yy[0],&yy0);
111:     VecGetArrayRead(yy[1],&yy1);
112:     VecGetArrayRead(yy[2],&yy2);
113:     switch (j_rem=j&0x3) {
114:     case 3:
115:       x2    = x[2];
116:       sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]);
117:       sum2 += x2*PetscConj(yy2[2]);
118:     case 2:
119:       x1    = x[1];
120:       sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]);
121:       sum2 += x1*PetscConj(yy2[1]);
122:     case 1:
123:       x0    = x[0];
124:       sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]);
125:       sum2 += x0*PetscConj(yy2[0]);
126:     case 0:
127:       x   += j_rem;
128:       yy0 += j_rem;
129:       yy1 += j_rem;
130:       yy2 += j_rem;
131:       j   -= j_rem;
132:       break;
133:     }
134:     while (j>0) {
135:       x0 = x[0];
136:       x1 = x[1];
137:       x2 = x[2];
138:       x3 = x[3];
139:       x += 4;

141:       sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
142:       sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
143:       sum2 += x0*PetscConj(yy2[0]) + x1*PetscConj(yy2[1]) + x2*PetscConj(yy2[2]) + x3*PetscConj(yy2[3]); yy2+=4;
144:       j    -= 4;
145:     }
146:     z[0] = sum0;
147:     z[1] = sum1;
148:     z[2] = sum2;
149:     VecRestoreArrayRead(yy[0],&yy0);
150:     VecRestoreArrayRead(yy[1],&yy1);
151:     VecRestoreArrayRead(yy[2],&yy2);
152:     break;
153:   case 2:
154:     VecGetArrayRead(yy[0],&yy0);
155:     VecGetArrayRead(yy[1],&yy1);
156:     switch (j_rem=j&0x3) {
157:     case 3:
158:       x2    = x[2];
159:       sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]);
160:     case 2:
161:       x1    = x[1];
162:       sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]);
163:     case 1:
164:       x0    = x[0];
165:       sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]);
166:     case 0:
167:       x   += j_rem;
168:       yy0 += j_rem;
169:       yy1 += j_rem;
170:       j   -= j_rem;
171:       break;
172:     }
173:     while (j>0) {
174:       x0 = x[0];
175:       x1 = x[1];
176:       x2 = x[2];
177:       x3 = x[3];
178:       x += 4;

180:       sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
181:       sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
182:       j    -= 4;
183:     }
184:     z[0] = sum0;
185:     z[1] = sum1;

187:     VecRestoreArrayRead(yy[0],&yy0);
188:     VecRestoreArrayRead(yy[1],&yy1);
189:     break;
190:   case 1:
191:     VecGetArrayRead(yy[0],&yy0);
192:     switch (j_rem=j&0x3) {
193:     case 3:
194:       x2 = x[2]; sum0 += x2*PetscConj(yy0[2]);
195:     case 2:
196:       x1 = x[1]; sum0 += x1*PetscConj(yy0[1]);
197:     case 1:
198:       x0 = x[0]; sum0 += x0*PetscConj(yy0[0]);
199:     case 0:
200:       x   += j_rem;
201:       yy0 += j_rem;
202:       j   -= j_rem;
203:       break;
204:     }
205:     while (j>0) {
206:       sum0 += x[0]*PetscConj(yy0[0]) + x[1]*PetscConj(yy0[1])
207:             + x[2]*PetscConj(yy0[2]) + x[3]*PetscConj(yy0[3]);
208:       yy0  +=4;
209:       j    -= 4; x+=4;
210:     }
211:     z[0] = sum0;

213:     VecRestoreArrayRead(yy[0],&yy0);
214:     break;
215:   case 0:
216:     break;
217:   }
218:   z  += nv_rem;
219:   i  -= nv_rem;
220:   yy += nv_rem;

222:   while (i >0) {
223:     sum0 = 0.;
224:     sum1 = 0.;
225:     sum2 = 0.;
226:     sum3 = 0.;
227:     VecGetArrayRead(yy[0],&yy0);
228:     VecGetArrayRead(yy[1],&yy1);
229:     VecGetArrayRead(yy[2],&yy2);
230:     VecGetArrayRead(yy[3],&yy3);

232:     j = n;
233:     x = xbase;
234:     switch (j_rem=j&0x3) {
235:     case 3:
236:       x2    = x[2];
237:       sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]);
238:       sum2 += x2*PetscConj(yy2[2]); sum3 += x2*PetscConj(yy3[2]);
239:     case 2:
240:       x1    = x[1];
241:       sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]);
242:       sum2 += x1*PetscConj(yy2[1]); sum3 += x1*PetscConj(yy3[1]);
243:     case 1:
244:       x0    = x[0];
245:       sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]);
246:       sum2 += x0*PetscConj(yy2[0]); sum3 += x0*PetscConj(yy3[0]);
247:     case 0:
248:       x   += j_rem;
249:       yy0 += j_rem;
250:       yy1 += j_rem;
251:       yy2 += j_rem;
252:       yy3 += j_rem;
253:       j   -= j_rem;
254:       break;
255:     }
256:     while (j>0) {
257:       x0 = x[0];
258:       x1 = x[1];
259:       x2 = x[2];
260:       x3 = x[3];
261:       x += 4;

263:       sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
264:       sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
265:       sum2 += x0*PetscConj(yy2[0]) + x1*PetscConj(yy2[1]) + x2*PetscConj(yy2[2]) + x3*PetscConj(yy2[3]); yy2+=4;
266:       sum3 += x0*PetscConj(yy3[0]) + x1*PetscConj(yy3[1]) + x2*PetscConj(yy3[2]) + x3*PetscConj(yy3[3]); yy3+=4;
267:       j    -= 4;
268:     }
269:     z[0] = sum0;
270:     z[1] = sum1;
271:     z[2] = sum2;
272:     z[3] = sum3;
273:     z   += 4;
274:     i   -= 4;
275:     VecRestoreArrayRead(yy[0],&yy0);
276:     VecRestoreArrayRead(yy[1],&yy1);
277:     VecRestoreArrayRead(yy[2],&yy2);
278:     VecRestoreArrayRead(yy[3],&yy3);
279:     yy  += 4;
280:   }
281:   VecRestoreArrayRead(xin,&xbase);
282:   PetscLogFlops(PetscMax(nv*(2.0*xin->map->n-1),0.0));
283:   return 0;
284: }
285: #endif

287: /* ----------------------------------------------------------------------------*/
288: PetscErrorCode VecMTDot_Seq(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
289: {
290:   PetscInt          n = xin->map->n,i,j,nv_rem,j_rem;
291:   PetscScalar       sum0,sum1,sum2,sum3,x0,x1,x2,x3;
292:   const PetscScalar *yy0,*yy1,*yy2,*yy3,*x,*xbase;
293:   Vec               *yy;

295:   sum0 = 0.;
296:   sum1 = 0.;
297:   sum2 = 0.;

299:   i      = nv;
300:   nv_rem = nv&0x3;
301:   yy     = (Vec*)yin;
302:   j      = n;
303:   VecGetArrayRead(xin,&xbase);
304:   x      = xbase;

306:   switch (nv_rem) {
307:   case 3:
308:     VecGetArrayRead(yy[0],&yy0);
309:     VecGetArrayRead(yy[1],&yy1);
310:     VecGetArrayRead(yy[2],&yy2);
311:     switch (j_rem=j&0x3) {
312:     case 3:
313:       x2    = x[2];
314:       sum0 += x2*yy0[2]; sum1 += x2*yy1[2];
315:       sum2 += x2*yy2[2];
316:     case 2:
317:       x1    = x[1];
318:       sum0 += x1*yy0[1]; sum1 += x1*yy1[1];
319:       sum2 += x1*yy2[1];
320:     case 1:
321:       x0    = x[0];
322:       sum0 += x0*yy0[0]; sum1 += x0*yy1[0];
323:       sum2 += x0*yy2[0];
324:     case 0:
325:       x   += j_rem;
326:       yy0 += j_rem;
327:       yy1 += j_rem;
328:       yy2 += j_rem;
329:       j   -= j_rem;
330:       break;
331:     }
332:     while (j>0) {
333:       x0 = x[0];
334:       x1 = x[1];
335:       x2 = x[2];
336:       x3 = x[3];
337:       x += 4;

339:       sum0 += x0*yy0[0] + x1*yy0[1] + x2*yy0[2] + x3*yy0[3]; yy0+=4;
340:       sum1 += x0*yy1[0] + x1*yy1[1] + x2*yy1[2] + x3*yy1[3]; yy1+=4;
341:       sum2 += x0*yy2[0] + x1*yy2[1] + x2*yy2[2] + x3*yy2[3]; yy2+=4;
342:       j    -= 4;
343:     }
344:     z[0] = sum0;
345:     z[1] = sum1;
346:     z[2] = sum2;
347:     VecRestoreArrayRead(yy[0],&yy0);
348:     VecRestoreArrayRead(yy[1],&yy1);
349:     VecRestoreArrayRead(yy[2],&yy2);
350:     break;
351:   case 2:
352:     VecGetArrayRead(yy[0],&yy0);
353:     VecGetArrayRead(yy[1],&yy1);
354:     switch (j_rem=j&0x3) {
355:     case 3:
356:       x2    = x[2];
357:       sum0 += x2*yy0[2]; sum1 += x2*yy1[2];
358:     case 2:
359:       x1    = x[1];
360:       sum0 += x1*yy0[1]; sum1 += x1*yy1[1];
361:     case 1:
362:       x0    = x[0];
363:       sum0 += x0*yy0[0]; sum1 += x0*yy1[0];
364:     case 0:
365:       x   += j_rem;
366:       yy0 += j_rem;
367:       yy1 += j_rem;
368:       j   -= j_rem;
369:       break;
370:     }
371:     while (j>0) {
372:       x0 = x[0];
373:       x1 = x[1];
374:       x2 = x[2];
375:       x3 = x[3];
376:       x += 4;

378:       sum0 += x0*yy0[0] + x1*yy0[1] + x2*yy0[2] + x3*yy0[3]; yy0+=4;
379:       sum1 += x0*yy1[0] + x1*yy1[1] + x2*yy1[2] + x3*yy1[3]; yy1+=4;
380:       j    -= 4;
381:     }
382:     z[0] = sum0;
383:     z[1] = sum1;

385:     VecRestoreArrayRead(yy[0],&yy0);
386:     VecRestoreArrayRead(yy[1],&yy1);
387:     break;
388:   case 1:
389:     VecGetArrayRead(yy[0],&yy0);
390:     switch (j_rem=j&0x3) {
391:     case 3:
392:       x2 = x[2]; sum0 += x2*yy0[2];
393:     case 2:
394:       x1 = x[1]; sum0 += x1*yy0[1];
395:     case 1:
396:       x0 = x[0]; sum0 += x0*yy0[0];
397:     case 0:
398:       x   += j_rem;
399:       yy0 += j_rem;
400:       j   -= j_rem;
401:       break;
402:     }
403:     while (j>0) {
404:       sum0 += x[0]*yy0[0] + x[1]*yy0[1] + x[2]*yy0[2] + x[3]*yy0[3]; yy0+=4;
405:       j    -= 4; x+=4;
406:     }
407:     z[0] = sum0;

409:     VecRestoreArrayRead(yy[0],&yy0);
410:     break;
411:   case 0:
412:     break;
413:   }
414:   z  += nv_rem;
415:   i  -= nv_rem;
416:   yy += nv_rem;

418:   while (i >0) {
419:     sum0 = 0.;
420:     sum1 = 0.;
421:     sum2 = 0.;
422:     sum3 = 0.;
423:     VecGetArrayRead(yy[0],&yy0);
424:     VecGetArrayRead(yy[1],&yy1);
425:     VecGetArrayRead(yy[2],&yy2);
426:     VecGetArrayRead(yy[3],&yy3);
427:     x    = xbase;

429:     j = n;
430:     switch (j_rem=j&0x3) {
431:     case 3:
432:       x2    = x[2];
433:       sum0 += x2*yy0[2]; sum1 += x2*yy1[2];
434:       sum2 += x2*yy2[2]; sum3 += x2*yy3[2];
435:     case 2:
436:       x1    = x[1];
437:       sum0 += x1*yy0[1]; sum1 += x1*yy1[1];
438:       sum2 += x1*yy2[1]; sum3 += x1*yy3[1];
439:     case 1:
440:       x0    = x[0];
441:       sum0 += x0*yy0[0]; sum1 += x0*yy1[0];
442:       sum2 += x0*yy2[0]; sum3 += x0*yy3[0];
443:     case 0:
444:       x   += j_rem;
445:       yy0 += j_rem;
446:       yy1 += j_rem;
447:       yy2 += j_rem;
448:       yy3 += j_rem;
449:       j   -= j_rem;
450:       break;
451:     }
452:     while (j>0) {
453:       x0 = x[0];
454:       x1 = x[1];
455:       x2 = x[2];
456:       x3 = x[3];
457:       x += 4;

459:       sum0 += x0*yy0[0] + x1*yy0[1] + x2*yy0[2] + x3*yy0[3]; yy0+=4;
460:       sum1 += x0*yy1[0] + x1*yy1[1] + x2*yy1[2] + x3*yy1[3]; yy1+=4;
461:       sum2 += x0*yy2[0] + x1*yy2[1] + x2*yy2[2] + x3*yy2[3]; yy2+=4;
462:       sum3 += x0*yy3[0] + x1*yy3[1] + x2*yy3[2] + x3*yy3[3]; yy3+=4;
463:       j    -= 4;
464:     }
465:     z[0] = sum0;
466:     z[1] = sum1;
467:     z[2] = sum2;
468:     z[3] = sum3;
469:     z   += 4;
470:     i   -= 4;
471:     VecRestoreArrayRead(yy[0],&yy0);
472:     VecRestoreArrayRead(yy[1],&yy1);
473:     VecRestoreArrayRead(yy[2],&yy2);
474:     VecRestoreArrayRead(yy[3],&yy3);
475:     yy  += 4;
476:   }
477:   VecRestoreArrayRead(xin,&xbase);
478:   PetscLogFlops(PetscMax(nv*(2.0*xin->map->n-1),0.0));
479:   return 0;
480: }

482: PetscErrorCode VecMax_Seq(Vec xin,PetscInt *idx,PetscReal *z)
483: {
484:   PetscInt          i,j=0,n = xin->map->n;
485:   PetscReal         max,tmp;
486:   const PetscScalar *xx;

488:   VecGetArrayRead(xin,&xx);
489:   if (!n) {
490:     max = PETSC_MIN_REAL;
491:     j   = -1;
492:   } else {
493:     max = PetscRealPart(*xx++); j = 0;
494:     for (i=1; i<n; i++) {
495:       if ((tmp = PetscRealPart(*xx++)) > max) { j = i; max = tmp;}
496:     }
497:   }
498:   *z = max;
499:   if (idx) *idx = j;
500:   VecRestoreArrayRead(xin,&xx);
501:   return 0;
502: }

504: PetscErrorCode VecMin_Seq(Vec xin,PetscInt *idx,PetscReal *z)
505: {
506:   PetscInt          i,j=0,n = xin->map->n;
507:   PetscReal         min,tmp;
508:   const PetscScalar *xx;

510:   VecGetArrayRead(xin,&xx);
511:   if (!n) {
512:     min = PETSC_MAX_REAL;
513:     j   = -1;
514:   } else {
515:     min = PetscRealPart(*xx++); j = 0;
516:     for (i=1; i<n; i++) {
517:       if ((tmp = PetscRealPart(*xx++)) < min) { j = i; min = tmp;}
518:     }
519:   }
520:   *z = min;
521:   if (idx) *idx = j;
522:   VecRestoreArrayRead(xin,&xx);
523:   return 0;
524: }

526: PetscErrorCode VecSet_Seq(Vec xin,PetscScalar alpha)
527: {
528:   PetscInt       i,n = xin->map->n;
529:   PetscScalar    *xx;

531:   VecGetArrayWrite(xin,&xx);
532:   if (alpha == (PetscScalar)0.0) {
533:     PetscArrayzero(xx,n);
534:   } else {
535:     for (i=0; i<n; i++) xx[i] = alpha;
536:   }
537:   VecRestoreArrayWrite(xin,&xx);
538:   return 0;
539: }

541: PetscErrorCode VecMAXPY_Seq(Vec xin, PetscInt nv,const PetscScalar *alpha,Vec *y)
542: {
543:   PetscInt          n = xin->map->n,j,j_rem;
544:   const PetscScalar *yy0,*yy1,*yy2,*yy3;
545:   PetscScalar       *xx,alpha0,alpha1,alpha2,alpha3;

547: #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
548: #pragma disjoint(*xx,*yy0,*yy1,*yy2,*yy3,*alpha)
549: #endif

551:   PetscLogFlops(nv*2.0*n);
552:   VecGetArray(xin,&xx);
553:   switch (j_rem=nv&0x3) {
554:   case 3:
555:     VecGetArrayRead(y[0],&yy0);
556:     VecGetArrayRead(y[1],&yy1);
557:     VecGetArrayRead(y[2],&yy2);
558:     alpha0 = alpha[0];
559:     alpha1 = alpha[1];
560:     alpha2 = alpha[2];
561:     alpha += 3;
562:     PetscKernelAXPY3(xx,alpha0,alpha1,alpha2,yy0,yy1,yy2,n);
563:     VecRestoreArrayRead(y[0],&yy0);
564:     VecRestoreArrayRead(y[1],&yy1);
565:     VecRestoreArrayRead(y[2],&yy2);
566:     y   += 3;
567:     break;
568:   case 2:
569:     VecGetArrayRead(y[0],&yy0);
570:     VecGetArrayRead(y[1],&yy1);
571:     alpha0 = alpha[0];
572:     alpha1 = alpha[1];
573:     alpha +=2;
574:     PetscKernelAXPY2(xx,alpha0,alpha1,yy0,yy1,n);
575:     VecRestoreArrayRead(y[0],&yy0);
576:     VecRestoreArrayRead(y[1],&yy1);
577:     y   +=2;
578:     break;
579:   case 1:
580:     VecGetArrayRead(y[0],&yy0);
581:     alpha0 = *alpha++;
582:     PetscKernelAXPY(xx,alpha0,yy0,n);
583:     VecRestoreArrayRead(y[0],&yy0);
584:     y   +=1;
585:     break;
586:   }
587:   for (j=j_rem; j<nv; j+=4) {
588:     VecGetArrayRead(y[0],&yy0);
589:     VecGetArrayRead(y[1],&yy1);
590:     VecGetArrayRead(y[2],&yy2);
591:     VecGetArrayRead(y[3],&yy3);
592:     alpha0 = alpha[0];
593:     alpha1 = alpha[1];
594:     alpha2 = alpha[2];
595:     alpha3 = alpha[3];
596:     alpha += 4;

598:     PetscKernelAXPY4(xx,alpha0,alpha1,alpha2,alpha3,yy0,yy1,yy2,yy3,n);
599:     VecRestoreArrayRead(y[0],&yy0);
600:     VecRestoreArrayRead(y[1],&yy1);
601:     VecRestoreArrayRead(y[2],&yy2);
602:     VecRestoreArrayRead(y[3],&yy3);
603:     y   += 4;
604:   }
605:   VecRestoreArray(xin,&xx);
606:   return 0;
607: }

609: #include <../src/vec/vec/impls/seq/ftn-kernels/faypx.h>

611: PetscErrorCode VecAYPX_Seq(Vec yin,PetscScalar alpha,Vec xin)
612: {
613:   PetscInt          n = yin->map->n;
614:   PetscScalar       *yy;
615:   const PetscScalar *xx;

617:   if (alpha == (PetscScalar)0.0) {
618:     VecCopy(xin,yin);
619:   } else if (alpha == (PetscScalar)1.0) {
620:     VecAXPY_Seq(yin,alpha,xin);
621:   } else if (alpha == (PetscScalar)-1.0) {
622:     PetscInt i;
623:     VecGetArrayRead(xin,&xx);
624:     VecGetArray(yin,&yy);

626:     for (i=0; i<n; i++) yy[i] = xx[i] - yy[i];

628:     VecRestoreArrayRead(xin,&xx);
629:     VecRestoreArray(yin,&yy);
630:     PetscLogFlops(1.0*n);
631:   } else {
632:     VecGetArrayRead(xin,&xx);
633:     VecGetArray(yin,&yy);
634: #if defined(PETSC_USE_FORTRAN_KERNEL_AYPX)
635:     {
636:       PetscScalar oalpha = alpha;
637:       fortranaypx_(&n,&oalpha,xx,yy);
638:     }
639: #else
640:     {
641:       PetscInt i;

643:       for (i=0; i<n; i++) yy[i] = xx[i] + alpha*yy[i];
644:     }
645: #endif
646:     VecRestoreArrayRead(xin,&xx);
647:     VecRestoreArray(yin,&yy);
648:     PetscLogFlops(2.0*n);
649:   }
650:   return 0;
651: }

653: #include <../src/vec/vec/impls/seq/ftn-kernels/fwaxpy.h>
654: /*
655:    IBM ESSL contains a routine dzaxpy() that is our WAXPY() but it appears
656:    to be slower than a regular C loop.  Hence,we do not include it.
657:    void ?zaxpy(int*,PetscScalar*,PetscScalar*,int*,PetscScalar*,int*,PetscScalar*,int*);
658: */

660: PetscErrorCode VecWAXPY_Seq(Vec win, PetscScalar alpha,Vec xin,Vec yin)
661: {
662:   PetscInt           i,n = win->map->n;
663:   PetscScalar        *ww;
664:   const PetscScalar  *yy,*xx;

666:   VecGetArrayRead(xin,&xx);
667:   VecGetArrayRead(yin,&yy);
668:   VecGetArray(win,&ww);
669:   if (alpha == (PetscScalar)1.0) {
670:     PetscLogFlops(n);
671:     /* could call BLAS axpy after call to memcopy, but may be slower */
672:     for (i=0; i<n; i++) ww[i] = yy[i] + xx[i];
673:   } else if (alpha == (PetscScalar)-1.0) {
674:     PetscLogFlops(n);
675:     for (i=0; i<n; i++) ww[i] = yy[i] - xx[i];
676:   } else if (alpha == (PetscScalar)0.0) {
677:     PetscArraycpy(ww,yy,n);
678:   } else {
679:     PetscScalar oalpha = alpha;
680: #if defined(PETSC_USE_FORTRAN_KERNEL_WAXPY)
681:     fortranwaxpy_(&n,&oalpha,xx,yy,ww);
682: #else
683:     for (i=0; i<n; i++) ww[i] = yy[i] + oalpha * xx[i];
684: #endif
685:     PetscLogFlops(2.0*n);
686:   }
687:   VecRestoreArrayRead(xin,&xx);
688:   VecRestoreArrayRead(yin,&yy);
689:   VecRestoreArray(win,&ww);
690:   return 0;
691: }

693: PetscErrorCode VecMaxPointwiseDivide_Seq(Vec xin,Vec yin,PetscReal *max)
694: {
695:   PetscInt          n = xin->map->n,i;
696:   const PetscScalar *xx,*yy;
697:   PetscReal         m = 0.0;

699:   VecGetArrayRead(xin,&xx);
700:   VecGetArrayRead(yin,&yy);
701:   for (i = 0; i < n; i++) {
702:     if (yy[i] != (PetscScalar)0.0) {
703:       m = PetscMax(PetscAbsScalar(xx[i]/yy[i]), m);
704:     } else {
705:       m = PetscMax(PetscAbsScalar(xx[i]), m);
706:     }
707:   }
708:   VecRestoreArrayRead(xin,&xx);
709:   VecRestoreArrayRead(yin,&yy);
710:   MPIU_Allreduce(&m,max,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)xin));
711:   PetscLogFlops(n);
712:   return 0;
713: }

715: PetscErrorCode VecPlaceArray_Seq(Vec vin,const PetscScalar *a)
716: {
717:   Vec_Seq *v = (Vec_Seq*)vin->data;

720:   v->unplacedarray = v->array;  /* save previous array so reset can bring it back */
721:   v->array         = (PetscScalar*)a;
722:   return 0;
723: }

725: PetscErrorCode VecReplaceArray_Seq(Vec vin,const PetscScalar *a)
726: {
727:   Vec_Seq        *v = (Vec_Seq*)vin->data;

729:   PetscFree(v->array_allocated);
730:   v->array_allocated = v->array = (PetscScalar*)a;
731:   return 0;
732: }