Actual source code: dvec2.c


  2: /*
  3:    Defines some vector operation functions that are shared by
  4:    sequential and parallel vectors.
  5: */
  6: #include <../src/vec/vec/impls/dvecimpl.h>
  7: #include <petsc/private/kernels/petscaxpy.h>

  9: #if defined(PETSC_USE_FORTRAN_KERNEL_MDOT)
 10: #include <../src/vec/vec/impls/seq/ftn-kernels/fmdot.h>
 11: PetscErrorCode VecMDot_Seq(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
 12: {
 13:   PetscErrorCode    ierr;
 14:   PetscInt          i,nv_rem,n = xin->map->n;
 15:   PetscScalar       sum0,sum1,sum2,sum3;
 16:   const PetscScalar *yy0,*yy1,*yy2,*yy3,*x;
 17:   Vec               *yy;

 20:   sum0 = 0.0;
 21:   sum1 = 0.0;
 22:   sum2 = 0.0;

 24:   i      = nv;
 25:   nv_rem = nv&0x3;
 26:   yy     = (Vec*)yin;
 27:   VecGetArrayRead(xin,&x);

 29:   switch (nv_rem) {
 30:   case 3:
 31:     VecGetArrayRead(yy[0],&yy0);
 32:     VecGetArrayRead(yy[1],&yy1);
 33:     VecGetArrayRead(yy[2],&yy2);
 34:     fortranmdot3_(x,yy0,yy1,yy2,&n,&sum0,&sum1,&sum2);
 35:     VecRestoreArrayRead(yy[0],&yy0);
 36:     VecRestoreArrayRead(yy[1],&yy1);
 37:     VecRestoreArrayRead(yy[2],&yy2);
 38:     z[0] = sum0;
 39:     z[1] = sum1;
 40:     z[2] = sum2;
 41:     break;
 42:   case 2:
 43:     VecGetArrayRead(yy[0],&yy0);
 44:     VecGetArrayRead(yy[1],&yy1);
 45:     fortranmdot2_(x,yy0,yy1,&n,&sum0,&sum1);
 46:     VecRestoreArrayRead(yy[0],&yy0);
 47:     VecRestoreArrayRead(yy[1],&yy1);
 48:     z[0] = sum0;
 49:     z[1] = sum1;
 50:     break;
 51:   case 1:
 52:     VecGetArrayRead(yy[0],&yy0);
 53:     fortranmdot1_(x,yy0,&n,&sum0);
 54:     VecRestoreArrayRead(yy[0],&yy0);
 55:     z[0] = sum0;
 56:     break;
 57:   case 0:
 58:     break;
 59:   }
 60:   z  += nv_rem;
 61:   i  -= nv_rem;
 62:   yy += nv_rem;

 64:   while (i >0) {
 65:     sum0 = 0.;
 66:     sum1 = 0.;
 67:     sum2 = 0.;
 68:     sum3 = 0.;
 69:     VecGetArrayRead(yy[0],&yy0);
 70:     VecGetArrayRead(yy[1],&yy1);
 71:     VecGetArrayRead(yy[2],&yy2);
 72:     VecGetArrayRead(yy[3],&yy3);
 73:     fortranmdot4_(x,yy0,yy1,yy2,yy3,&n,&sum0,&sum1,&sum2,&sum3);
 74:     VecRestoreArrayRead(yy[0],&yy0);
 75:     VecRestoreArrayRead(yy[1],&yy1);
 76:     VecRestoreArrayRead(yy[2],&yy2);
 77:     VecRestoreArrayRead(yy[3],&yy3);
 78:     yy  += 4;
 79:     z[0] = sum0;
 80:     z[1] = sum1;
 81:     z[2] = sum2;
 82:     z[3] = sum3;
 83:     z   += 4;
 84:     i   -= 4;
 85:   }
 86:   VecRestoreArrayRead(xin,&x);
 87:   PetscLogFlops(PetscMax(nv*(2.0*xin->map->n-1),0.0));
 88:   return(0);
 89: }

 91: #else
 92: PetscErrorCode VecMDot_Seq(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
 93: {
 94:   PetscErrorCode    ierr;
 95:   PetscInt          n = xin->map->n,i,j,nv_rem,j_rem;
 96:   PetscScalar       sum0,sum1,sum2,sum3,x0,x1,x2,x3;
 97:   const PetscScalar *yy0,*yy1,*yy2,*yy3,*x,*xbase;
 98:   Vec               *yy;

101:   sum0 = 0.;
102:   sum1 = 0.;
103:   sum2 = 0.;

105:   i      = nv;
106:   nv_rem = nv&0x3;
107:   yy     = (Vec*)yin;
108:   j      = n;
109:   VecGetArrayRead(xin,&xbase);
110:   x      = xbase;

112:   switch (nv_rem) {
113:   case 3:
114:     VecGetArrayRead(yy[0],&yy0);
115:     VecGetArrayRead(yy[1],&yy1);
116:     VecGetArrayRead(yy[2],&yy2);
117:     switch (j_rem=j&0x3) {
118:     case 3:
119:       x2    = x[2];
120:       sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]);
121:       sum2 += x2*PetscConj(yy2[2]);
122:     case 2:
123:       x1    = x[1];
124:       sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]);
125:       sum2 += x1*PetscConj(yy2[1]);
126:     case 1:
127:       x0    = x[0];
128:       sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]);
129:       sum2 += x0*PetscConj(yy2[0]);
130:     case 0:
131:       x   += j_rem;
132:       yy0 += j_rem;
133:       yy1 += j_rem;
134:       yy2 += j_rem;
135:       j   -= j_rem;
136:       break;
137:     }
138:     while (j>0) {
139:       x0 = x[0];
140:       x1 = x[1];
141:       x2 = x[2];
142:       x3 = x[3];
143:       x += 4;

145:       sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
146:       sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
147:       sum2 += x0*PetscConj(yy2[0]) + x1*PetscConj(yy2[1]) + x2*PetscConj(yy2[2]) + x3*PetscConj(yy2[3]); yy2+=4;
148:       j    -= 4;
149:     }
150:     z[0] = sum0;
151:     z[1] = sum1;
152:     z[2] = sum2;
153:     VecRestoreArrayRead(yy[0],&yy0);
154:     VecRestoreArrayRead(yy[1],&yy1);
155:     VecRestoreArrayRead(yy[2],&yy2);
156:     break;
157:   case 2:
158:     VecGetArrayRead(yy[0],&yy0);
159:     VecGetArrayRead(yy[1],&yy1);
160:     switch (j_rem=j&0x3) {
161:     case 3:
162:       x2    = x[2];
163:       sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]);
164:     case 2:
165:       x1    = x[1];
166:       sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]);
167:     case 1:
168:       x0    = x[0];
169:       sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]);
170:     case 0:
171:       x   += j_rem;
172:       yy0 += j_rem;
173:       yy1 += j_rem;
174:       j   -= j_rem;
175:       break;
176:     }
177:     while (j>0) {
178:       x0 = x[0];
179:       x1 = x[1];
180:       x2 = x[2];
181:       x3 = x[3];
182:       x += 4;

184:       sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
185:       sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
186:       j    -= 4;
187:     }
188:     z[0] = sum0;
189:     z[1] = sum1;

191:     VecRestoreArrayRead(yy[0],&yy0);
192:     VecRestoreArrayRead(yy[1],&yy1);
193:     break;
194:   case 1:
195:     VecGetArrayRead(yy[0],&yy0);
196:     switch (j_rem=j&0x3) {
197:     case 3:
198:       x2 = x[2]; sum0 += x2*PetscConj(yy0[2]);
199:     case 2:
200:       x1 = x[1]; sum0 += x1*PetscConj(yy0[1]);
201:     case 1:
202:       x0 = x[0]; sum0 += x0*PetscConj(yy0[0]);
203:     case 0:
204:       x   += j_rem;
205:       yy0 += j_rem;
206:       j   -= j_rem;
207:       break;
208:     }
209:     while (j>0) {
210:       sum0 += x[0]*PetscConj(yy0[0]) + x[1]*PetscConj(yy0[1])
211:             + x[2]*PetscConj(yy0[2]) + x[3]*PetscConj(yy0[3]);
212:       yy0  +=4;
213:       j    -= 4; x+=4;
214:     }
215:     z[0] = sum0;

217:     VecRestoreArrayRead(yy[0],&yy0);
218:     break;
219:   case 0:
220:     break;
221:   }
222:   z  += nv_rem;
223:   i  -= nv_rem;
224:   yy += nv_rem;

226:   while (i >0) {
227:     sum0 = 0.;
228:     sum1 = 0.;
229:     sum2 = 0.;
230:     sum3 = 0.;
231:     VecGetArrayRead(yy[0],&yy0);
232:     VecGetArrayRead(yy[1],&yy1);
233:     VecGetArrayRead(yy[2],&yy2);
234:     VecGetArrayRead(yy[3],&yy3);

236:     j = n;
237:     x = xbase;
238:     switch (j_rem=j&0x3) {
239:     case 3:
240:       x2    = x[2];
241:       sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]);
242:       sum2 += x2*PetscConj(yy2[2]); sum3 += x2*PetscConj(yy3[2]);
243:     case 2:
244:       x1    = x[1];
245:       sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]);
246:       sum2 += x1*PetscConj(yy2[1]); sum3 += x1*PetscConj(yy3[1]);
247:     case 1:
248:       x0    = x[0];
249:       sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]);
250:       sum2 += x0*PetscConj(yy2[0]); sum3 += x0*PetscConj(yy3[0]);
251:     case 0:
252:       x   += j_rem;
253:       yy0 += j_rem;
254:       yy1 += j_rem;
255:       yy2 += j_rem;
256:       yy3 += j_rem;
257:       j   -= j_rem;
258:       break;
259:     }
260:     while (j>0) {
261:       x0 = x[0];
262:       x1 = x[1];
263:       x2 = x[2];
264:       x3 = x[3];
265:       x += 4;

267:       sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
268:       sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
269:       sum2 += x0*PetscConj(yy2[0]) + x1*PetscConj(yy2[1]) + x2*PetscConj(yy2[2]) + x3*PetscConj(yy2[3]); yy2+=4;
270:       sum3 += x0*PetscConj(yy3[0]) + x1*PetscConj(yy3[1]) + x2*PetscConj(yy3[2]) + x3*PetscConj(yy3[3]); yy3+=4;
271:       j    -= 4;
272:     }
273:     z[0] = sum0;
274:     z[1] = sum1;
275:     z[2] = sum2;
276:     z[3] = sum3;
277:     z   += 4;
278:     i   -= 4;
279:     VecRestoreArrayRead(yy[0],&yy0);
280:     VecRestoreArrayRead(yy[1],&yy1);
281:     VecRestoreArrayRead(yy[2],&yy2);
282:     VecRestoreArrayRead(yy[3],&yy3);
283:     yy  += 4;
284:   }
285:   VecRestoreArrayRead(xin,&xbase);
286:   PetscLogFlops(PetscMax(nv*(2.0*xin->map->n-1),0.0));
287:   return(0);
288: }
289: #endif

291: /* ----------------------------------------------------------------------------*/
292: PetscErrorCode VecMTDot_Seq(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
293: {
294:   PetscErrorCode    ierr;
295:   PetscInt          n = xin->map->n,i,j,nv_rem,j_rem;
296:   PetscScalar       sum0,sum1,sum2,sum3,x0,x1,x2,x3;
297:   const PetscScalar *yy0,*yy1,*yy2,*yy3,*x,*xbase;
298:   Vec               *yy;

301:   sum0 = 0.;
302:   sum1 = 0.;
303:   sum2 = 0.;

305:   i      = nv;
306:   nv_rem = nv&0x3;
307:   yy     = (Vec*)yin;
308:   j      = n;
309:   VecGetArrayRead(xin,&xbase);
310:   x      = xbase;

312:   switch (nv_rem) {
313:   case 3:
314:     VecGetArrayRead(yy[0],&yy0);
315:     VecGetArrayRead(yy[1],&yy1);
316:     VecGetArrayRead(yy[2],&yy2);
317:     switch (j_rem=j&0x3) {
318:     case 3:
319:       x2    = x[2];
320:       sum0 += x2*yy0[2]; sum1 += x2*yy1[2];
321:       sum2 += x2*yy2[2];
322:     case 2:
323:       x1    = x[1];
324:       sum0 += x1*yy0[1]; sum1 += x1*yy1[1];
325:       sum2 += x1*yy2[1];
326:     case 1:
327:       x0    = x[0];
328:       sum0 += x0*yy0[0]; sum1 += x0*yy1[0];
329:       sum2 += x0*yy2[0];
330:     case 0:
331:       x   += j_rem;
332:       yy0 += j_rem;
333:       yy1 += j_rem;
334:       yy2 += j_rem;
335:       j   -= j_rem;
336:       break;
337:     }
338:     while (j>0) {
339:       x0 = x[0];
340:       x1 = x[1];
341:       x2 = x[2];
342:       x3 = x[3];
343:       x += 4;

345:       sum0 += x0*yy0[0] + x1*yy0[1] + x2*yy0[2] + x3*yy0[3]; yy0+=4;
346:       sum1 += x0*yy1[0] + x1*yy1[1] + x2*yy1[2] + x3*yy1[3]; yy1+=4;
347:       sum2 += x0*yy2[0] + x1*yy2[1] + x2*yy2[2] + x3*yy2[3]; yy2+=4;
348:       j    -= 4;
349:     }
350:     z[0] = sum0;
351:     z[1] = sum1;
352:     z[2] = sum2;
353:     VecRestoreArrayRead(yy[0],&yy0);
354:     VecRestoreArrayRead(yy[1],&yy1);
355:     VecRestoreArrayRead(yy[2],&yy2);
356:     break;
357:   case 2:
358:     VecGetArrayRead(yy[0],&yy0);
359:     VecGetArrayRead(yy[1],&yy1);
360:     switch (j_rem=j&0x3) {
361:     case 3:
362:       x2    = x[2];
363:       sum0 += x2*yy0[2]; sum1 += x2*yy1[2];
364:     case 2:
365:       x1    = x[1];
366:       sum0 += x1*yy0[1]; sum1 += x1*yy1[1];
367:     case 1:
368:       x0    = x[0];
369:       sum0 += x0*yy0[0]; sum1 += x0*yy1[0];
370:     case 0:
371:       x   += j_rem;
372:       yy0 += j_rem;
373:       yy1 += j_rem;
374:       j   -= j_rem;
375:       break;
376:     }
377:     while (j>0) {
378:       x0 = x[0];
379:       x1 = x[1];
380:       x2 = x[2];
381:       x3 = x[3];
382:       x += 4;

384:       sum0 += x0*yy0[0] + x1*yy0[1] + x2*yy0[2] + x3*yy0[3]; yy0+=4;
385:       sum1 += x0*yy1[0] + x1*yy1[1] + x2*yy1[2] + x3*yy1[3]; yy1+=4;
386:       j    -= 4;
387:     }
388:     z[0] = sum0;
389:     z[1] = sum1;

391:     VecRestoreArrayRead(yy[0],&yy0);
392:     VecRestoreArrayRead(yy[1],&yy1);
393:     break;
394:   case 1:
395:     VecGetArrayRead(yy[0],&yy0);
396:     switch (j_rem=j&0x3) {
397:     case 3:
398:       x2 = x[2]; sum0 += x2*yy0[2];
399:     case 2:
400:       x1 = x[1]; sum0 += x1*yy0[1];
401:     case 1:
402:       x0 = x[0]; sum0 += x0*yy0[0];
403:     case 0:
404:       x   += j_rem;
405:       yy0 += j_rem;
406:       j   -= j_rem;
407:       break;
408:     }
409:     while (j>0) {
410:       sum0 += x[0]*yy0[0] + x[1]*yy0[1] + x[2]*yy0[2] + x[3]*yy0[3]; yy0+=4;
411:       j    -= 4; x+=4;
412:     }
413:     z[0] = sum0;

415:     VecRestoreArrayRead(yy[0],&yy0);
416:     break;
417:   case 0:
418:     break;
419:   }
420:   z  += nv_rem;
421:   i  -= nv_rem;
422:   yy += nv_rem;

424:   while (i >0) {
425:     sum0 = 0.;
426:     sum1 = 0.;
427:     sum2 = 0.;
428:     sum3 = 0.;
429:     VecGetArrayRead(yy[0],&yy0);
430:     VecGetArrayRead(yy[1],&yy1);
431:     VecGetArrayRead(yy[2],&yy2);
432:     VecGetArrayRead(yy[3],&yy3);
433:     x    = xbase;

435:     j = n;
436:     switch (j_rem=j&0x3) {
437:     case 3:
438:       x2    = x[2];
439:       sum0 += x2*yy0[2]; sum1 += x2*yy1[2];
440:       sum2 += x2*yy2[2]; sum3 += x2*yy3[2];
441:     case 2:
442:       x1    = x[1];
443:       sum0 += x1*yy0[1]; sum1 += x1*yy1[1];
444:       sum2 += x1*yy2[1]; sum3 += x1*yy3[1];
445:     case 1:
446:       x0    = x[0];
447:       sum0 += x0*yy0[0]; sum1 += x0*yy1[0];
448:       sum2 += x0*yy2[0]; sum3 += x0*yy3[0];
449:     case 0:
450:       x   += j_rem;
451:       yy0 += j_rem;
452:       yy1 += j_rem;
453:       yy2 += j_rem;
454:       yy3 += j_rem;
455:       j   -= j_rem;
456:       break;
457:     }
458:     while (j>0) {
459:       x0 = x[0];
460:       x1 = x[1];
461:       x2 = x[2];
462:       x3 = x[3];
463:       x += 4;

465:       sum0 += x0*yy0[0] + x1*yy0[1] + x2*yy0[2] + x3*yy0[3]; yy0+=4;
466:       sum1 += x0*yy1[0] + x1*yy1[1] + x2*yy1[2] + x3*yy1[3]; yy1+=4;
467:       sum2 += x0*yy2[0] + x1*yy2[1] + x2*yy2[2] + x3*yy2[3]; yy2+=4;
468:       sum3 += x0*yy3[0] + x1*yy3[1] + x2*yy3[2] + x3*yy3[3]; yy3+=4;
469:       j    -= 4;
470:     }
471:     z[0] = sum0;
472:     z[1] = sum1;
473:     z[2] = sum2;
474:     z[3] = sum3;
475:     z   += 4;
476:     i   -= 4;
477:     VecRestoreArrayRead(yy[0],&yy0);
478:     VecRestoreArrayRead(yy[1],&yy1);
479:     VecRestoreArrayRead(yy[2],&yy2);
480:     VecRestoreArrayRead(yy[3],&yy3);
481:     yy  += 4;
482:   }
483:   VecRestoreArrayRead(xin,&xbase);
484:   PetscLogFlops(PetscMax(nv*(2.0*xin->map->n-1),0.0));
485:   return(0);
486: }

488: PetscErrorCode VecMax_Seq(Vec xin,PetscInt *idx,PetscReal *z)
489: {
490:   PetscInt          i,j=0,n = xin->map->n;
491:   PetscReal         max,tmp;
492:   const PetscScalar *xx;
493:   PetscErrorCode    ierr;

496:   VecGetArrayRead(xin,&xx);
497:   if (!n) {
498:     max = PETSC_MIN_REAL;
499:     j   = -1;
500:   } else {
501:     max = PetscRealPart(*xx++); j = 0;
502:     for (i=1; i<n; i++) {
503:       if ((tmp = PetscRealPart(*xx++)) > max) { j = i; max = tmp;}
504:     }
505:   }
506:   *z = max;
507:   if (idx) *idx = j;
508:   VecRestoreArrayRead(xin,&xx);
509:   return(0);
510: }

512: PetscErrorCode VecMin_Seq(Vec xin,PetscInt *idx,PetscReal *z)
513: {
514:   PetscInt          i,j=0,n = xin->map->n;
515:   PetscReal         min,tmp;
516:   const PetscScalar *xx;
517:   PetscErrorCode    ierr;

520:   VecGetArrayRead(xin,&xx);
521:   if (!n) {
522:     min = PETSC_MAX_REAL;
523:     j   = -1;
524:   } else {
525:     min = PetscRealPart(*xx++); j = 0;
526:     for (i=1; i<n; i++) {
527:       if ((tmp = PetscRealPart(*xx++)) < min) { j = i; min = tmp;}
528:     }
529:   }
530:   *z = min;
531:   if (idx) *idx = j;
532:   VecRestoreArrayRead(xin,&xx);
533:   return(0);
534: }

536: PetscErrorCode VecSet_Seq(Vec xin,PetscScalar alpha)
537: {
538:   PetscInt       i,n = xin->map->n;
539:   PetscScalar    *xx;

543:   VecGetArrayWrite(xin,&xx);
544:   if (alpha == (PetscScalar)0.0) {
545:     PetscArrayzero(xx,n);
546:   } else {
547:     for (i=0; i<n; i++) xx[i] = alpha;
548:   }
549:   VecRestoreArrayWrite(xin,&xx);
550:   return(0);
551: }

553: PetscErrorCode VecMAXPY_Seq(Vec xin, PetscInt nv,const PetscScalar *alpha,Vec *y)
554: {
555:   PetscErrorCode    ierr;
556:   PetscInt          n = xin->map->n,j,j_rem;
557:   const PetscScalar *yy0,*yy1,*yy2,*yy3;
558:   PetscScalar       *xx,alpha0,alpha1,alpha2,alpha3;

560: #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
561: #pragma disjoint(*xx,*yy0,*yy1,*yy2,*yy3,*alpha)
562: #endif

565:   PetscLogFlops(nv*2.0*n);
566:   VecGetArray(xin,&xx);
567:   switch (j_rem=nv&0x3) {
568:   case 3:
569:     VecGetArrayRead(y[0],&yy0);
570:     VecGetArrayRead(y[1],&yy1);
571:     VecGetArrayRead(y[2],&yy2);
572:     alpha0 = alpha[0];
573:     alpha1 = alpha[1];
574:     alpha2 = alpha[2];
575:     alpha += 3;
576:     PetscKernelAXPY3(xx,alpha0,alpha1,alpha2,yy0,yy1,yy2,n);
577:     VecRestoreArrayRead(y[0],&yy0);
578:     VecRestoreArrayRead(y[1],&yy1);
579:     VecRestoreArrayRead(y[2],&yy2);
580:     y   += 3;
581:     break;
582:   case 2:
583:     VecGetArrayRead(y[0],&yy0);
584:     VecGetArrayRead(y[1],&yy1);
585:     alpha0 = alpha[0];
586:     alpha1 = alpha[1];
587:     alpha +=2;
588:     PetscKernelAXPY2(xx,alpha0,alpha1,yy0,yy1,n);
589:     VecRestoreArrayRead(y[0],&yy0);
590:     VecRestoreArrayRead(y[1],&yy1);
591:     y   +=2;
592:     break;
593:   case 1:
594:     VecGetArrayRead(y[0],&yy0);
595:     alpha0 = *alpha++;
596:     PetscKernelAXPY(xx,alpha0,yy0,n);
597:     VecRestoreArrayRead(y[0],&yy0);
598:     y   +=1;
599:     break;
600:   }
601:   for (j=j_rem; j<nv; j+=4) {
602:     VecGetArrayRead(y[0],&yy0);
603:     VecGetArrayRead(y[1],&yy1);
604:     VecGetArrayRead(y[2],&yy2);
605:     VecGetArrayRead(y[3],&yy3);
606:     alpha0 = alpha[0];
607:     alpha1 = alpha[1];
608:     alpha2 = alpha[2];
609:     alpha3 = alpha[3];
610:     alpha += 4;

612:     PetscKernelAXPY4(xx,alpha0,alpha1,alpha2,alpha3,yy0,yy1,yy2,yy3,n);
613:     VecRestoreArrayRead(y[0],&yy0);
614:     VecRestoreArrayRead(y[1],&yy1);
615:     VecRestoreArrayRead(y[2],&yy2);
616:     VecRestoreArrayRead(y[3],&yy3);
617:     y   += 4;
618:   }
619:   VecRestoreArray(xin,&xx);
620:   return(0);
621: }

623: #include <../src/vec/vec/impls/seq/ftn-kernels/faypx.h>

625: PetscErrorCode VecAYPX_Seq(Vec yin,PetscScalar alpha,Vec xin)
626: {
627:   PetscErrorCode    ierr;
628:   PetscInt          n = yin->map->n;
629:   PetscScalar       *yy;
630:   const PetscScalar *xx;

633:   if (alpha == (PetscScalar)0.0) {
634:     VecCopy(xin,yin);
635:   } else if (alpha == (PetscScalar)1.0) {
636:     VecAXPY_Seq(yin,alpha,xin);
637:   } else if (alpha == (PetscScalar)-1.0) {
638:     PetscInt i;
639:     VecGetArrayRead(xin,&xx);
640:     VecGetArray(yin,&yy);

642:     for (i=0; i<n; i++) yy[i] = xx[i] - yy[i];

644:     VecRestoreArrayRead(xin,&xx);
645:     VecRestoreArray(yin,&yy);
646:     PetscLogFlops(1.0*n);
647:   } else {
648:     VecGetArrayRead(xin,&xx);
649:     VecGetArray(yin,&yy);
650: #if defined(PETSC_USE_FORTRAN_KERNEL_AYPX)
651:     {
652:       PetscScalar oalpha = alpha;
653:       fortranaypx_(&n,&oalpha,xx,yy);
654:     }
655: #else
656:     {
657:       PetscInt i;

659:       for (i=0; i<n; i++) yy[i] = xx[i] + alpha*yy[i];
660:     }
661: #endif
662:     VecRestoreArrayRead(xin,&xx);
663:     VecRestoreArray(yin,&yy);
664:     PetscLogFlops(2.0*n);
665:   }
666:   return(0);
667: }

669: #include <../src/vec/vec/impls/seq/ftn-kernels/fwaxpy.h>
670: /*
671:    IBM ESSL contains a routine dzaxpy() that is our WAXPY() but it appears
672:    to be slower than a regular C loop.  Hence,we do not include it.
673:    void ?zaxpy(int*,PetscScalar*,PetscScalar*,int*,PetscScalar*,int*,PetscScalar*,int*);
674: */

676: PetscErrorCode VecWAXPY_Seq(Vec win, PetscScalar alpha,Vec xin,Vec yin)
677: {
678:   PetscErrorCode     ierr;
679:   PetscInt           i,n = win->map->n;
680:   PetscScalar        *ww;
681:   const PetscScalar  *yy,*xx;

684:   VecGetArrayRead(xin,&xx);
685:   VecGetArrayRead(yin,&yy);
686:   VecGetArray(win,&ww);
687:   if (alpha == (PetscScalar)1.0) {
688:     PetscLogFlops(n);
689:     /* could call BLAS axpy after call to memcopy, but may be slower */
690:     for (i=0; i<n; i++) ww[i] = yy[i] + xx[i];
691:   } else if (alpha == (PetscScalar)-1.0) {
692:     PetscLogFlops(n);
693:     for (i=0; i<n; i++) ww[i] = yy[i] - xx[i];
694:   } else if (alpha == (PetscScalar)0.0) {
695:     PetscArraycpy(ww,yy,n);
696:   } else {
697:     PetscScalar oalpha = alpha;
698: #if defined(PETSC_USE_FORTRAN_KERNEL_WAXPY)
699:     fortranwaxpy_(&n,&oalpha,xx,yy,ww);
700: #else
701:     for (i=0; i<n; i++) ww[i] = yy[i] + oalpha * xx[i];
702: #endif
703:     PetscLogFlops(2.0*n);
704:   }
705:   VecRestoreArrayRead(xin,&xx);
706:   VecRestoreArrayRead(yin,&yy);
707:   VecRestoreArray(win,&ww);
708:   return(0);
709: }

711: PetscErrorCode VecMaxPointwiseDivide_Seq(Vec xin,Vec yin,PetscReal *max)
712: {
713:   PetscErrorCode    ierr;
714:   PetscInt          n = xin->map->n,i;
715:   const PetscScalar *xx,*yy;
716:   PetscReal         m = 0.0;

719:   VecGetArrayRead(xin,&xx);
720:   VecGetArrayRead(yin,&yy);
721:   for (i = 0; i < n; i++) {
722:     if (yy[i] != (PetscScalar)0.0) {
723:       m = PetscMax(PetscAbsScalar(xx[i]/yy[i]), m);
724:     } else {
725:       m = PetscMax(PetscAbsScalar(xx[i]), m);
726:     }
727:   }
728:   VecRestoreArrayRead(xin,&xx);
729:   VecRestoreArrayRead(yin,&yy);
730:   MPIU_Allreduce(&m,max,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)xin));
731:   PetscLogFlops(n);
732:   return(0);
733: }

735: PetscErrorCode VecPlaceArray_Seq(Vec vin,const PetscScalar *a)
736: {
737:   Vec_Seq *v = (Vec_Seq*)vin->data;

740:   if (v->unplacedarray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"VecPlaceArray() was already called on this vector, without a call to VecResetArray()");
741:   v->unplacedarray = v->array;  /* save previous array so reset can bring it back */
742:   v->array         = (PetscScalar*)a;
743:   return(0);
744: }

746: PetscErrorCode VecReplaceArray_Seq(Vec vin,const PetscScalar *a)
747: {
748:   Vec_Seq        *v = (Vec_Seq*)vin->data;

752:   PetscFree(v->array_allocated);
753:   v->array_allocated = v->array = (PetscScalar*)a;
754:   return(0);
755: }