Actual source code: maij.c


  2: #include <../src/mat/impls/maij/maij.h>
  3: #include <../src/mat/utils/freespace.h>

  5: /*@
  6:    MatMAIJGetAIJ - Get the `MATAIJ` matrix describing the blockwise action of the `MATMAIJ` matrix

  8:    Not Collective, but if the `MATMAIJ` matrix is parallel, the `MATAIJ` matrix is also parallel

 10:    Input Parameter:
 11: .  A - the `MATMAIJ` matrix

 13:    Output Parameter:
 14: .  B - the `MATAIJ` matrix

 16:    Level: advanced

 18:    Note:
 19:     The reference count on the `MATAIJ` matrix is not increased so you should not destroy it.

 21: .seealso: `MATMAIJ`, `MATAIJ`, `MatCreateMAIJ()`
 22: @*/
 23: PetscErrorCode MatMAIJGetAIJ(Mat A, Mat *B)
 24: {
 25:   PetscBool ismpimaij, isseqmaij;

 27:   PetscObjectTypeCompare((PetscObject)A, MATMPIMAIJ, &ismpimaij);
 28:   PetscObjectTypeCompare((PetscObject)A, MATSEQMAIJ, &isseqmaij);
 29:   if (ismpimaij) {
 30:     Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;

 32:     *B = b->A;
 33:   } else if (isseqmaij) {
 34:     Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;

 36:     *B = b->AIJ;
 37:   } else {
 38:     *B = A;
 39:   }
 40:   return 0;
 41: }

 43: /*@
 44:    MatMAIJRedimension - Get a new `MATMAIJ` matrix with the same action, but for a different block size

 46:    Logically Collective

 48:    Input Parameters:
 49: +  A - the `MATMAIJ` matrix
 50: -  dof - the block size for the new matrix

 52:    Output Parameter:
 53: .  B - the new `MATMAIJ` matrix

 55:    Level: advanced

 57: .seealso: `MATMAIJ`, `MatCreateMAIJ()`
 58: @*/
 59: PetscErrorCode MatMAIJRedimension(Mat A, PetscInt dof, Mat *B)
 60: {
 61:   Mat Aij = NULL;

 64:   MatMAIJGetAIJ(A, &Aij);
 65:   MatCreateMAIJ(Aij, dof, B);
 66:   return 0;
 67: }

 69: PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
 70: {
 71:   Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;

 73:   MatDestroy(&b->AIJ);
 74:   PetscFree(A->data);
 75:   PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaijcusparse_C", NULL);
 76:   PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaij_C", NULL);
 77:   PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaij_seqmaij_C", NULL);
 78:   return 0;
 79: }

 81: PetscErrorCode MatSetUp_MAIJ(Mat A)
 82: {
 83:   SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Must use MatCreateMAIJ() to create MAIJ matrices");
 84: }

 86: PetscErrorCode MatView_SeqMAIJ(Mat A, PetscViewer viewer)
 87: {
 88:   Mat B;

 90:   MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B);
 91:   MatView(B, viewer);
 92:   MatDestroy(&B);
 93:   return 0;
 94: }

 96: PetscErrorCode MatView_MPIMAIJ(Mat A, PetscViewer viewer)
 97: {
 98:   Mat B;

100:   MatConvert(A, MATMPIAIJ, MAT_INITIAL_MATRIX, &B);
101:   MatView(B, viewer);
102:   MatDestroy(&B);
103:   return 0;
104: }

106: PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
107: {
108:   Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;

110:   MatDestroy(&b->AIJ);
111:   MatDestroy(&b->OAIJ);
112:   MatDestroy(&b->A);
113:   VecScatterDestroy(&b->ctx);
114:   VecDestroy(&b->w);
115:   PetscFree(A->data);
116:   PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaijcusparse_C", NULL);
117:   PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaij_C", NULL);
118:   PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_mpiaij_mpimaij_C", NULL);
119:   PetscObjectChangeTypeName((PetscObject)A, NULL);
120:   return 0;
121: }

123: /*MC
124:   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
125:   multicomponent problems, interpolating or restricting each component the same way independently.
126:   The matrix type is based on `MATSEQAIJ` for sequential matrices, and `MATMPIAIJ` for distributed matrices.

128:   Operations provided:
129: .vb
130:     MatMult()
131:     MatMultTranspose()
132:     MatMultAdd()
133:     MatMultTransposeAdd()
134: .ve

136:   Level: advanced

138: .seealso: `MATAIJ`, `MatMAIJGetAIJ()`, `MatMAIJRedimension()`, `MatCreateMAIJ()`
139: M*/

141: PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A)
142: {
143:   Mat_MPIMAIJ *b;
144:   PetscMPIInt  size;

146:   PetscNew(&b);
147:   A->data = (void *)b;

149:   PetscMemzero(A->ops, sizeof(struct _MatOps));

151:   A->ops->setup = MatSetUp_MAIJ;

153:   b->AIJ  = NULL;
154:   b->dof  = 0;
155:   b->OAIJ = NULL;
156:   b->ctx  = NULL;
157:   b->w    = NULL;
158:   MPI_Comm_size(PetscObjectComm((PetscObject)A), &size);
159:   if (size == 1) {
160:     PetscObjectChangeTypeName((PetscObject)A, MATSEQMAIJ);
161:   } else {
162:     PetscObjectChangeTypeName((PetscObject)A, MATMPIMAIJ);
163:   }
164:   A->preallocated = PETSC_TRUE;
165:   A->assembled    = PETSC_TRUE;
166:   return 0;
167: }

169: /* --------------------------------------------------------------------------------------*/
170: PetscErrorCode MatMult_SeqMAIJ_2(Mat A, Vec xx, Vec yy)
171: {
172:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
173:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
174:   const PetscScalar *x, *v;
175:   PetscScalar       *y, sum1, sum2;
176:   PetscInt           nonzerorow = 0, n, i, jrow, j;
177:   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;

179:   VecGetArrayRead(xx, &x);
180:   VecGetArray(yy, &y);
181:   idx = a->j;
182:   v   = a->a;
183:   ii  = a->i;

185:   for (i = 0; i < m; i++) {
186:     jrow = ii[i];
187:     n    = ii[i + 1] - jrow;
188:     sum1 = 0.0;
189:     sum2 = 0.0;

191:     nonzerorow += (n > 0);
192:     for (j = 0; j < n; j++) {
193:       sum1 += v[jrow] * x[2 * idx[jrow]];
194:       sum2 += v[jrow] * x[2 * idx[jrow] + 1];
195:       jrow++;
196:     }
197:     y[2 * i]     = sum1;
198:     y[2 * i + 1] = sum2;
199:   }

201:   PetscLogFlops(4.0 * a->nz - 2.0 * nonzerorow);
202:   VecRestoreArrayRead(xx, &x);
203:   VecRestoreArray(yy, &y);
204:   return 0;
205: }

207: PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A, Vec xx, Vec yy)
208: {
209:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
210:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
211:   const PetscScalar *x, *v;
212:   PetscScalar       *y, alpha1, alpha2;
213:   PetscInt           n, i;
214:   const PetscInt     m = b->AIJ->rmap->n, *idx;

216:   VecSet(yy, 0.0);
217:   VecGetArrayRead(xx, &x);
218:   VecGetArray(yy, &y);

220:   for (i = 0; i < m; i++) {
221:     idx    = a->j + a->i[i];
222:     v      = a->a + a->i[i];
223:     n      = a->i[i + 1] - a->i[i];
224:     alpha1 = x[2 * i];
225:     alpha2 = x[2 * i + 1];
226:     while (n-- > 0) {
227:       y[2 * (*idx)] += alpha1 * (*v);
228:       y[2 * (*idx) + 1] += alpha2 * (*v);
229:       idx++;
230:       v++;
231:     }
232:   }
233:   PetscLogFlops(4.0 * a->nz);
234:   VecRestoreArrayRead(xx, &x);
235:   VecRestoreArray(yy, &y);
236:   return 0;
237: }

239: PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A, Vec xx, Vec yy, Vec zz)
240: {
241:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
242:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
243:   const PetscScalar *x, *v;
244:   PetscScalar       *y, sum1, sum2;
245:   PetscInt           n, i, jrow, j;
246:   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;

248:   if (yy != zz) VecCopy(yy, zz);
249:   VecGetArrayRead(xx, &x);
250:   VecGetArray(zz, &y);
251:   idx = a->j;
252:   v   = a->a;
253:   ii  = a->i;

255:   for (i = 0; i < m; i++) {
256:     jrow = ii[i];
257:     n    = ii[i + 1] - jrow;
258:     sum1 = 0.0;
259:     sum2 = 0.0;
260:     for (j = 0; j < n; j++) {
261:       sum1 += v[jrow] * x[2 * idx[jrow]];
262:       sum2 += v[jrow] * x[2 * idx[jrow] + 1];
263:       jrow++;
264:     }
265:     y[2 * i] += sum1;
266:     y[2 * i + 1] += sum2;
267:   }

269:   PetscLogFlops(4.0 * a->nz);
270:   VecRestoreArrayRead(xx, &x);
271:   VecRestoreArray(zz, &y);
272:   return 0;
273: }
274: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A, Vec xx, Vec yy, Vec zz)
275: {
276:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
277:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
278:   const PetscScalar *x, *v;
279:   PetscScalar       *y, alpha1, alpha2;
280:   PetscInt           n, i;
281:   const PetscInt     m = b->AIJ->rmap->n, *idx;

283:   if (yy != zz) VecCopy(yy, zz);
284:   VecGetArrayRead(xx, &x);
285:   VecGetArray(zz, &y);

287:   for (i = 0; i < m; i++) {
288:     idx    = a->j + a->i[i];
289:     v      = a->a + a->i[i];
290:     n      = a->i[i + 1] - a->i[i];
291:     alpha1 = x[2 * i];
292:     alpha2 = x[2 * i + 1];
293:     while (n-- > 0) {
294:       y[2 * (*idx)] += alpha1 * (*v);
295:       y[2 * (*idx) + 1] += alpha2 * (*v);
296:       idx++;
297:       v++;
298:     }
299:   }
300:   PetscLogFlops(4.0 * a->nz);
301:   VecRestoreArrayRead(xx, &x);
302:   VecRestoreArray(zz, &y);
303:   return 0;
304: }
305: /* --------------------------------------------------------------------------------------*/
306: PetscErrorCode MatMult_SeqMAIJ_3(Mat A, Vec xx, Vec yy)
307: {
308:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
309:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
310:   const PetscScalar *x, *v;
311:   PetscScalar       *y, sum1, sum2, sum3;
312:   PetscInt           nonzerorow = 0, n, i, jrow, j;
313:   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;

315:   VecGetArrayRead(xx, &x);
316:   VecGetArray(yy, &y);
317:   idx = a->j;
318:   v   = a->a;
319:   ii  = a->i;

321:   for (i = 0; i < m; i++) {
322:     jrow = ii[i];
323:     n    = ii[i + 1] - jrow;
324:     sum1 = 0.0;
325:     sum2 = 0.0;
326:     sum3 = 0.0;

328:     nonzerorow += (n > 0);
329:     for (j = 0; j < n; j++) {
330:       sum1 += v[jrow] * x[3 * idx[jrow]];
331:       sum2 += v[jrow] * x[3 * idx[jrow] + 1];
332:       sum3 += v[jrow] * x[3 * idx[jrow] + 2];
333:       jrow++;
334:     }
335:     y[3 * i]     = sum1;
336:     y[3 * i + 1] = sum2;
337:     y[3 * i + 2] = sum3;
338:   }

340:   PetscLogFlops(6.0 * a->nz - 3.0 * nonzerorow);
341:   VecRestoreArrayRead(xx, &x);
342:   VecRestoreArray(yy, &y);
343:   return 0;
344: }

346: PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A, Vec xx, Vec yy)
347: {
348:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
349:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
350:   const PetscScalar *x, *v;
351:   PetscScalar       *y, alpha1, alpha2, alpha3;
352:   PetscInt           n, i;
353:   const PetscInt     m = b->AIJ->rmap->n, *idx;

355:   VecSet(yy, 0.0);
356:   VecGetArrayRead(xx, &x);
357:   VecGetArray(yy, &y);

359:   for (i = 0; i < m; i++) {
360:     idx    = a->j + a->i[i];
361:     v      = a->a + a->i[i];
362:     n      = a->i[i + 1] - a->i[i];
363:     alpha1 = x[3 * i];
364:     alpha2 = x[3 * i + 1];
365:     alpha3 = x[3 * i + 2];
366:     while (n-- > 0) {
367:       y[3 * (*idx)] += alpha1 * (*v);
368:       y[3 * (*idx) + 1] += alpha2 * (*v);
369:       y[3 * (*idx) + 2] += alpha3 * (*v);
370:       idx++;
371:       v++;
372:     }
373:   }
374:   PetscLogFlops(6.0 * a->nz);
375:   VecRestoreArrayRead(xx, &x);
376:   VecRestoreArray(yy, &y);
377:   return 0;
378: }

380: PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A, Vec xx, Vec yy, Vec zz)
381: {
382:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
383:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
384:   const PetscScalar *x, *v;
385:   PetscScalar       *y, sum1, sum2, sum3;
386:   PetscInt           n, i, jrow, j;
387:   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;

389:   if (yy != zz) VecCopy(yy, zz);
390:   VecGetArrayRead(xx, &x);
391:   VecGetArray(zz, &y);
392:   idx = a->j;
393:   v   = a->a;
394:   ii  = a->i;

396:   for (i = 0; i < m; i++) {
397:     jrow = ii[i];
398:     n    = ii[i + 1] - jrow;
399:     sum1 = 0.0;
400:     sum2 = 0.0;
401:     sum3 = 0.0;
402:     for (j = 0; j < n; j++) {
403:       sum1 += v[jrow] * x[3 * idx[jrow]];
404:       sum2 += v[jrow] * x[3 * idx[jrow] + 1];
405:       sum3 += v[jrow] * x[3 * idx[jrow] + 2];
406:       jrow++;
407:     }
408:     y[3 * i] += sum1;
409:     y[3 * i + 1] += sum2;
410:     y[3 * i + 2] += sum3;
411:   }

413:   PetscLogFlops(6.0 * a->nz);
414:   VecRestoreArrayRead(xx, &x);
415:   VecRestoreArray(zz, &y);
416:   return 0;
417: }
418: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A, Vec xx, Vec yy, Vec zz)
419: {
420:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
421:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
422:   const PetscScalar *x, *v;
423:   PetscScalar       *y, alpha1, alpha2, alpha3;
424:   PetscInt           n, i;
425:   const PetscInt     m = b->AIJ->rmap->n, *idx;

427:   if (yy != zz) VecCopy(yy, zz);
428:   VecGetArrayRead(xx, &x);
429:   VecGetArray(zz, &y);
430:   for (i = 0; i < m; i++) {
431:     idx    = a->j + a->i[i];
432:     v      = a->a + a->i[i];
433:     n      = a->i[i + 1] - a->i[i];
434:     alpha1 = x[3 * i];
435:     alpha2 = x[3 * i + 1];
436:     alpha3 = x[3 * i + 2];
437:     while (n-- > 0) {
438:       y[3 * (*idx)] += alpha1 * (*v);
439:       y[3 * (*idx) + 1] += alpha2 * (*v);
440:       y[3 * (*idx) + 2] += alpha3 * (*v);
441:       idx++;
442:       v++;
443:     }
444:   }
445:   PetscLogFlops(6.0 * a->nz);
446:   VecRestoreArrayRead(xx, &x);
447:   VecRestoreArray(zz, &y);
448:   return 0;
449: }

451: /* ------------------------------------------------------------------------------*/
452: PetscErrorCode MatMult_SeqMAIJ_4(Mat A, Vec xx, Vec yy)
453: {
454:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
455:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
456:   const PetscScalar *x, *v;
457:   PetscScalar       *y, sum1, sum2, sum3, sum4;
458:   PetscInt           nonzerorow = 0, n, i, jrow, j;
459:   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;

461:   VecGetArrayRead(xx, &x);
462:   VecGetArray(yy, &y);
463:   idx = a->j;
464:   v   = a->a;
465:   ii  = a->i;

467:   for (i = 0; i < m; i++) {
468:     jrow = ii[i];
469:     n    = ii[i + 1] - jrow;
470:     sum1 = 0.0;
471:     sum2 = 0.0;
472:     sum3 = 0.0;
473:     sum4 = 0.0;
474:     nonzerorow += (n > 0);
475:     for (j = 0; j < n; j++) {
476:       sum1 += v[jrow] * x[4 * idx[jrow]];
477:       sum2 += v[jrow] * x[4 * idx[jrow] + 1];
478:       sum3 += v[jrow] * x[4 * idx[jrow] + 2];
479:       sum4 += v[jrow] * x[4 * idx[jrow] + 3];
480:       jrow++;
481:     }
482:     y[4 * i]     = sum1;
483:     y[4 * i + 1] = sum2;
484:     y[4 * i + 2] = sum3;
485:     y[4 * i + 3] = sum4;
486:   }

488:   PetscLogFlops(8.0 * a->nz - 4.0 * nonzerorow);
489:   VecRestoreArrayRead(xx, &x);
490:   VecRestoreArray(yy, &y);
491:   return 0;
492: }

494: PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A, Vec xx, Vec yy)
495: {
496:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
497:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
498:   const PetscScalar *x, *v;
499:   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4;
500:   PetscInt           n, i;
501:   const PetscInt     m = b->AIJ->rmap->n, *idx;

503:   VecSet(yy, 0.0);
504:   VecGetArrayRead(xx, &x);
505:   VecGetArray(yy, &y);
506:   for (i = 0; i < m; i++) {
507:     idx    = a->j + a->i[i];
508:     v      = a->a + a->i[i];
509:     n      = a->i[i + 1] - a->i[i];
510:     alpha1 = x[4 * i];
511:     alpha2 = x[4 * i + 1];
512:     alpha3 = x[4 * i + 2];
513:     alpha4 = x[4 * i + 3];
514:     while (n-- > 0) {
515:       y[4 * (*idx)] += alpha1 * (*v);
516:       y[4 * (*idx) + 1] += alpha2 * (*v);
517:       y[4 * (*idx) + 2] += alpha3 * (*v);
518:       y[4 * (*idx) + 3] += alpha4 * (*v);
519:       idx++;
520:       v++;
521:     }
522:   }
523:   PetscLogFlops(8.0 * a->nz);
524:   VecRestoreArrayRead(xx, &x);
525:   VecRestoreArray(yy, &y);
526:   return 0;
527: }

529: PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A, Vec xx, Vec yy, Vec zz)
530: {
531:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
532:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
533:   const PetscScalar *x, *v;
534:   PetscScalar       *y, sum1, sum2, sum3, sum4;
535:   PetscInt           n, i, jrow, j;
536:   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;

538:   if (yy != zz) VecCopy(yy, zz);
539:   VecGetArrayRead(xx, &x);
540:   VecGetArray(zz, &y);
541:   idx = a->j;
542:   v   = a->a;
543:   ii  = a->i;

545:   for (i = 0; i < m; i++) {
546:     jrow = ii[i];
547:     n    = ii[i + 1] - jrow;
548:     sum1 = 0.0;
549:     sum2 = 0.0;
550:     sum3 = 0.0;
551:     sum4 = 0.0;
552:     for (j = 0; j < n; j++) {
553:       sum1 += v[jrow] * x[4 * idx[jrow]];
554:       sum2 += v[jrow] * x[4 * idx[jrow] + 1];
555:       sum3 += v[jrow] * x[4 * idx[jrow] + 2];
556:       sum4 += v[jrow] * x[4 * idx[jrow] + 3];
557:       jrow++;
558:     }
559:     y[4 * i] += sum1;
560:     y[4 * i + 1] += sum2;
561:     y[4 * i + 2] += sum3;
562:     y[4 * i + 3] += sum4;
563:   }

565:   PetscLogFlops(8.0 * a->nz);
566:   VecRestoreArrayRead(xx, &x);
567:   VecRestoreArray(zz, &y);
568:   return 0;
569: }
570: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A, Vec xx, Vec yy, Vec zz)
571: {
572:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
573:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
574:   const PetscScalar *x, *v;
575:   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4;
576:   PetscInt           n, i;
577:   const PetscInt     m = b->AIJ->rmap->n, *idx;

579:   if (yy != zz) VecCopy(yy, zz);
580:   VecGetArrayRead(xx, &x);
581:   VecGetArray(zz, &y);

583:   for (i = 0; i < m; i++) {
584:     idx    = a->j + a->i[i];
585:     v      = a->a + a->i[i];
586:     n      = a->i[i + 1] - a->i[i];
587:     alpha1 = x[4 * i];
588:     alpha2 = x[4 * i + 1];
589:     alpha3 = x[4 * i + 2];
590:     alpha4 = x[4 * i + 3];
591:     while (n-- > 0) {
592:       y[4 * (*idx)] += alpha1 * (*v);
593:       y[4 * (*idx) + 1] += alpha2 * (*v);
594:       y[4 * (*idx) + 2] += alpha3 * (*v);
595:       y[4 * (*idx) + 3] += alpha4 * (*v);
596:       idx++;
597:       v++;
598:     }
599:   }
600:   PetscLogFlops(8.0 * a->nz);
601:   VecRestoreArrayRead(xx, &x);
602:   VecRestoreArray(zz, &y);
603:   return 0;
604: }
605: /* ------------------------------------------------------------------------------*/

607: PetscErrorCode MatMult_SeqMAIJ_5(Mat A, Vec xx, Vec yy)
608: {
609:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
610:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
611:   const PetscScalar *x, *v;
612:   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5;
613:   PetscInt           nonzerorow = 0, n, i, jrow, j;
614:   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;

616:   VecGetArrayRead(xx, &x);
617:   VecGetArray(yy, &y);
618:   idx = a->j;
619:   v   = a->a;
620:   ii  = a->i;

622:   for (i = 0; i < m; i++) {
623:     jrow = ii[i];
624:     n    = ii[i + 1] - jrow;
625:     sum1 = 0.0;
626:     sum2 = 0.0;
627:     sum3 = 0.0;
628:     sum4 = 0.0;
629:     sum5 = 0.0;

631:     nonzerorow += (n > 0);
632:     for (j = 0; j < n; j++) {
633:       sum1 += v[jrow] * x[5 * idx[jrow]];
634:       sum2 += v[jrow] * x[5 * idx[jrow] + 1];
635:       sum3 += v[jrow] * x[5 * idx[jrow] + 2];
636:       sum4 += v[jrow] * x[5 * idx[jrow] + 3];
637:       sum5 += v[jrow] * x[5 * idx[jrow] + 4];
638:       jrow++;
639:     }
640:     y[5 * i]     = sum1;
641:     y[5 * i + 1] = sum2;
642:     y[5 * i + 2] = sum3;
643:     y[5 * i + 3] = sum4;
644:     y[5 * i + 4] = sum5;
645:   }

647:   PetscLogFlops(10.0 * a->nz - 5.0 * nonzerorow);
648:   VecRestoreArrayRead(xx, &x);
649:   VecRestoreArray(yy, &y);
650:   return 0;
651: }

653: PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A, Vec xx, Vec yy)
654: {
655:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
656:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
657:   const PetscScalar *x, *v;
658:   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5;
659:   PetscInt           n, i;
660:   const PetscInt     m = b->AIJ->rmap->n, *idx;

662:   VecSet(yy, 0.0);
663:   VecGetArrayRead(xx, &x);
664:   VecGetArray(yy, &y);

666:   for (i = 0; i < m; i++) {
667:     idx    = a->j + a->i[i];
668:     v      = a->a + a->i[i];
669:     n      = a->i[i + 1] - a->i[i];
670:     alpha1 = x[5 * i];
671:     alpha2 = x[5 * i + 1];
672:     alpha3 = x[5 * i + 2];
673:     alpha4 = x[5 * i + 3];
674:     alpha5 = x[5 * i + 4];
675:     while (n-- > 0) {
676:       y[5 * (*idx)] += alpha1 * (*v);
677:       y[5 * (*idx) + 1] += alpha2 * (*v);
678:       y[5 * (*idx) + 2] += alpha3 * (*v);
679:       y[5 * (*idx) + 3] += alpha4 * (*v);
680:       y[5 * (*idx) + 4] += alpha5 * (*v);
681:       idx++;
682:       v++;
683:     }
684:   }
685:   PetscLogFlops(10.0 * a->nz);
686:   VecRestoreArrayRead(xx, &x);
687:   VecRestoreArray(yy, &y);
688:   return 0;
689: }

691: PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A, Vec xx, Vec yy, Vec zz)
692: {
693:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
694:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
695:   const PetscScalar *x, *v;
696:   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5;
697:   PetscInt           n, i, jrow, j;
698:   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;

700:   if (yy != zz) VecCopy(yy, zz);
701:   VecGetArrayRead(xx, &x);
702:   VecGetArray(zz, &y);
703:   idx = a->j;
704:   v   = a->a;
705:   ii  = a->i;

707:   for (i = 0; i < m; i++) {
708:     jrow = ii[i];
709:     n    = ii[i + 1] - jrow;
710:     sum1 = 0.0;
711:     sum2 = 0.0;
712:     sum3 = 0.0;
713:     sum4 = 0.0;
714:     sum5 = 0.0;
715:     for (j = 0; j < n; j++) {
716:       sum1 += v[jrow] * x[5 * idx[jrow]];
717:       sum2 += v[jrow] * x[5 * idx[jrow] + 1];
718:       sum3 += v[jrow] * x[5 * idx[jrow] + 2];
719:       sum4 += v[jrow] * x[5 * idx[jrow] + 3];
720:       sum5 += v[jrow] * x[5 * idx[jrow] + 4];
721:       jrow++;
722:     }
723:     y[5 * i] += sum1;
724:     y[5 * i + 1] += sum2;
725:     y[5 * i + 2] += sum3;
726:     y[5 * i + 3] += sum4;
727:     y[5 * i + 4] += sum5;
728:   }

730:   PetscLogFlops(10.0 * a->nz);
731:   VecRestoreArrayRead(xx, &x);
732:   VecRestoreArray(zz, &y);
733:   return 0;
734: }

736: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A, Vec xx, Vec yy, Vec zz)
737: {
738:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
739:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
740:   const PetscScalar *x, *v;
741:   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5;
742:   PetscInt           n, i;
743:   const PetscInt     m = b->AIJ->rmap->n, *idx;

745:   if (yy != zz) VecCopy(yy, zz);
746:   VecGetArrayRead(xx, &x);
747:   VecGetArray(zz, &y);

749:   for (i = 0; i < m; i++) {
750:     idx    = a->j + a->i[i];
751:     v      = a->a + a->i[i];
752:     n      = a->i[i + 1] - a->i[i];
753:     alpha1 = x[5 * i];
754:     alpha2 = x[5 * i + 1];
755:     alpha3 = x[5 * i + 2];
756:     alpha4 = x[5 * i + 3];
757:     alpha5 = x[5 * i + 4];
758:     while (n-- > 0) {
759:       y[5 * (*idx)] += alpha1 * (*v);
760:       y[5 * (*idx) + 1] += alpha2 * (*v);
761:       y[5 * (*idx) + 2] += alpha3 * (*v);
762:       y[5 * (*idx) + 3] += alpha4 * (*v);
763:       y[5 * (*idx) + 4] += alpha5 * (*v);
764:       idx++;
765:       v++;
766:     }
767:   }
768:   PetscLogFlops(10.0 * a->nz);
769:   VecRestoreArrayRead(xx, &x);
770:   VecRestoreArray(zz, &y);
771:   return 0;
772: }

774: /* ------------------------------------------------------------------------------*/
775: PetscErrorCode MatMult_SeqMAIJ_6(Mat A, Vec xx, Vec yy)
776: {
777:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
778:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
779:   const PetscScalar *x, *v;
780:   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6;
781:   PetscInt           nonzerorow = 0, n, i, jrow, j;
782:   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;

784:   VecGetArrayRead(xx, &x);
785:   VecGetArray(yy, &y);
786:   idx = a->j;
787:   v   = a->a;
788:   ii  = a->i;

790:   for (i = 0; i < m; i++) {
791:     jrow = ii[i];
792:     n    = ii[i + 1] - jrow;
793:     sum1 = 0.0;
794:     sum2 = 0.0;
795:     sum3 = 0.0;
796:     sum4 = 0.0;
797:     sum5 = 0.0;
798:     sum6 = 0.0;

800:     nonzerorow += (n > 0);
801:     for (j = 0; j < n; j++) {
802:       sum1 += v[jrow] * x[6 * idx[jrow]];
803:       sum2 += v[jrow] * x[6 * idx[jrow] + 1];
804:       sum3 += v[jrow] * x[6 * idx[jrow] + 2];
805:       sum4 += v[jrow] * x[6 * idx[jrow] + 3];
806:       sum5 += v[jrow] * x[6 * idx[jrow] + 4];
807:       sum6 += v[jrow] * x[6 * idx[jrow] + 5];
808:       jrow++;
809:     }
810:     y[6 * i]     = sum1;
811:     y[6 * i + 1] = sum2;
812:     y[6 * i + 2] = sum3;
813:     y[6 * i + 3] = sum4;
814:     y[6 * i + 4] = sum5;
815:     y[6 * i + 5] = sum6;
816:   }

818:   PetscLogFlops(12.0 * a->nz - 6.0 * nonzerorow);
819:   VecRestoreArrayRead(xx, &x);
820:   VecRestoreArray(yy, &y);
821:   return 0;
822: }

824: PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A, Vec xx, Vec yy)
825: {
826:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
827:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
828:   const PetscScalar *x, *v;
829:   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6;
830:   PetscInt           n, i;
831:   const PetscInt     m = b->AIJ->rmap->n, *idx;

833:   VecSet(yy, 0.0);
834:   VecGetArrayRead(xx, &x);
835:   VecGetArray(yy, &y);

837:   for (i = 0; i < m; i++) {
838:     idx    = a->j + a->i[i];
839:     v      = a->a + a->i[i];
840:     n      = a->i[i + 1] - a->i[i];
841:     alpha1 = x[6 * i];
842:     alpha2 = x[6 * i + 1];
843:     alpha3 = x[6 * i + 2];
844:     alpha4 = x[6 * i + 3];
845:     alpha5 = x[6 * i + 4];
846:     alpha6 = x[6 * i + 5];
847:     while (n-- > 0) {
848:       y[6 * (*idx)] += alpha1 * (*v);
849:       y[6 * (*idx) + 1] += alpha2 * (*v);
850:       y[6 * (*idx) + 2] += alpha3 * (*v);
851:       y[6 * (*idx) + 3] += alpha4 * (*v);
852:       y[6 * (*idx) + 4] += alpha5 * (*v);
853:       y[6 * (*idx) + 5] += alpha6 * (*v);
854:       idx++;
855:       v++;
856:     }
857:   }
858:   PetscLogFlops(12.0 * a->nz);
859:   VecRestoreArrayRead(xx, &x);
860:   VecRestoreArray(yy, &y);
861:   return 0;
862: }

864: PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A, Vec xx, Vec yy, Vec zz)
865: {
866:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
867:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
868:   const PetscScalar *x, *v;
869:   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6;
870:   PetscInt           n, i, jrow, j;
871:   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;

873:   if (yy != zz) VecCopy(yy, zz);
874:   VecGetArrayRead(xx, &x);
875:   VecGetArray(zz, &y);
876:   idx = a->j;
877:   v   = a->a;
878:   ii  = a->i;

880:   for (i = 0; i < m; i++) {
881:     jrow = ii[i];
882:     n    = ii[i + 1] - jrow;
883:     sum1 = 0.0;
884:     sum2 = 0.0;
885:     sum3 = 0.0;
886:     sum4 = 0.0;
887:     sum5 = 0.0;
888:     sum6 = 0.0;
889:     for (j = 0; j < n; j++) {
890:       sum1 += v[jrow] * x[6 * idx[jrow]];
891:       sum2 += v[jrow] * x[6 * idx[jrow] + 1];
892:       sum3 += v[jrow] * x[6 * idx[jrow] + 2];
893:       sum4 += v[jrow] * x[6 * idx[jrow] + 3];
894:       sum5 += v[jrow] * x[6 * idx[jrow] + 4];
895:       sum6 += v[jrow] * x[6 * idx[jrow] + 5];
896:       jrow++;
897:     }
898:     y[6 * i] += sum1;
899:     y[6 * i + 1] += sum2;
900:     y[6 * i + 2] += sum3;
901:     y[6 * i + 3] += sum4;
902:     y[6 * i + 4] += sum5;
903:     y[6 * i + 5] += sum6;
904:   }

906:   PetscLogFlops(12.0 * a->nz);
907:   VecRestoreArrayRead(xx, &x);
908:   VecRestoreArray(zz, &y);
909:   return 0;
910: }

912: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A, Vec xx, Vec yy, Vec zz)
913: {
914:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
915:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
916:   const PetscScalar *x, *v;
917:   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6;
918:   PetscInt           n, i;
919:   const PetscInt     m = b->AIJ->rmap->n, *idx;

921:   if (yy != zz) VecCopy(yy, zz);
922:   VecGetArrayRead(xx, &x);
923:   VecGetArray(zz, &y);

925:   for (i = 0; i < m; i++) {
926:     idx    = a->j + a->i[i];
927:     v      = a->a + a->i[i];
928:     n      = a->i[i + 1] - a->i[i];
929:     alpha1 = x[6 * i];
930:     alpha2 = x[6 * i + 1];
931:     alpha3 = x[6 * i + 2];
932:     alpha4 = x[6 * i + 3];
933:     alpha5 = x[6 * i + 4];
934:     alpha6 = x[6 * i + 5];
935:     while (n-- > 0) {
936:       y[6 * (*idx)] += alpha1 * (*v);
937:       y[6 * (*idx) + 1] += alpha2 * (*v);
938:       y[6 * (*idx) + 2] += alpha3 * (*v);
939:       y[6 * (*idx) + 3] += alpha4 * (*v);
940:       y[6 * (*idx) + 4] += alpha5 * (*v);
941:       y[6 * (*idx) + 5] += alpha6 * (*v);
942:       idx++;
943:       v++;
944:     }
945:   }
946:   PetscLogFlops(12.0 * a->nz);
947:   VecRestoreArrayRead(xx, &x);
948:   VecRestoreArray(zz, &y);
949:   return 0;
950: }

952: /* ------------------------------------------------------------------------------*/
953: PetscErrorCode MatMult_SeqMAIJ_7(Mat A, Vec xx, Vec yy)
954: {
955:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
956:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
957:   const PetscScalar *x, *v;
958:   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7;
959:   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
960:   PetscInt           nonzerorow = 0, n, i, jrow, j;

962:   VecGetArrayRead(xx, &x);
963:   VecGetArray(yy, &y);
964:   idx = a->j;
965:   v   = a->a;
966:   ii  = a->i;

968:   for (i = 0; i < m; i++) {
969:     jrow = ii[i];
970:     n    = ii[i + 1] - jrow;
971:     sum1 = 0.0;
972:     sum2 = 0.0;
973:     sum3 = 0.0;
974:     sum4 = 0.0;
975:     sum5 = 0.0;
976:     sum6 = 0.0;
977:     sum7 = 0.0;

979:     nonzerorow += (n > 0);
980:     for (j = 0; j < n; j++) {
981:       sum1 += v[jrow] * x[7 * idx[jrow]];
982:       sum2 += v[jrow] * x[7 * idx[jrow] + 1];
983:       sum3 += v[jrow] * x[7 * idx[jrow] + 2];
984:       sum4 += v[jrow] * x[7 * idx[jrow] + 3];
985:       sum5 += v[jrow] * x[7 * idx[jrow] + 4];
986:       sum6 += v[jrow] * x[7 * idx[jrow] + 5];
987:       sum7 += v[jrow] * x[7 * idx[jrow] + 6];
988:       jrow++;
989:     }
990:     y[7 * i]     = sum1;
991:     y[7 * i + 1] = sum2;
992:     y[7 * i + 2] = sum3;
993:     y[7 * i + 3] = sum4;
994:     y[7 * i + 4] = sum5;
995:     y[7 * i + 5] = sum6;
996:     y[7 * i + 6] = sum7;
997:   }

999:   PetscLogFlops(14.0 * a->nz - 7.0 * nonzerorow);
1000:   VecRestoreArrayRead(xx, &x);
1001:   VecRestoreArray(yy, &y);
1002:   return 0;
1003: }

1005: PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A, Vec xx, Vec yy)
1006: {
1007:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1008:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1009:   const PetscScalar *x, *v;
1010:   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7;
1011:   const PetscInt     m = b->AIJ->rmap->n, *idx;
1012:   PetscInt           n, i;

1014:   VecSet(yy, 0.0);
1015:   VecGetArrayRead(xx, &x);
1016:   VecGetArray(yy, &y);

1018:   for (i = 0; i < m; i++) {
1019:     idx    = a->j + a->i[i];
1020:     v      = a->a + a->i[i];
1021:     n      = a->i[i + 1] - a->i[i];
1022:     alpha1 = x[7 * i];
1023:     alpha2 = x[7 * i + 1];
1024:     alpha3 = x[7 * i + 2];
1025:     alpha4 = x[7 * i + 3];
1026:     alpha5 = x[7 * i + 4];
1027:     alpha6 = x[7 * i + 5];
1028:     alpha7 = x[7 * i + 6];
1029:     while (n-- > 0) {
1030:       y[7 * (*idx)] += alpha1 * (*v);
1031:       y[7 * (*idx) + 1] += alpha2 * (*v);
1032:       y[7 * (*idx) + 2] += alpha3 * (*v);
1033:       y[7 * (*idx) + 3] += alpha4 * (*v);
1034:       y[7 * (*idx) + 4] += alpha5 * (*v);
1035:       y[7 * (*idx) + 5] += alpha6 * (*v);
1036:       y[7 * (*idx) + 6] += alpha7 * (*v);
1037:       idx++;
1038:       v++;
1039:     }
1040:   }
1041:   PetscLogFlops(14.0 * a->nz);
1042:   VecRestoreArrayRead(xx, &x);
1043:   VecRestoreArray(yy, &y);
1044:   return 0;
1045: }

1047: PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A, Vec xx, Vec yy, Vec zz)
1048: {
1049:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1050:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1051:   const PetscScalar *x, *v;
1052:   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1053:   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
1054:   PetscInt           n, i, jrow, j;

1056:   if (yy != zz) VecCopy(yy, zz);
1057:   VecGetArrayRead(xx, &x);
1058:   VecGetArray(zz, &y);
1059:   idx = a->j;
1060:   v   = a->a;
1061:   ii  = a->i;

1063:   for (i = 0; i < m; i++) {
1064:     jrow = ii[i];
1065:     n    = ii[i + 1] - jrow;
1066:     sum1 = 0.0;
1067:     sum2 = 0.0;
1068:     sum3 = 0.0;
1069:     sum4 = 0.0;
1070:     sum5 = 0.0;
1071:     sum6 = 0.0;
1072:     sum7 = 0.0;
1073:     for (j = 0; j < n; j++) {
1074:       sum1 += v[jrow] * x[7 * idx[jrow]];
1075:       sum2 += v[jrow] * x[7 * idx[jrow] + 1];
1076:       sum3 += v[jrow] * x[7 * idx[jrow] + 2];
1077:       sum4 += v[jrow] * x[7 * idx[jrow] + 3];
1078:       sum5 += v[jrow] * x[7 * idx[jrow] + 4];
1079:       sum6 += v[jrow] * x[7 * idx[jrow] + 5];
1080:       sum7 += v[jrow] * x[7 * idx[jrow] + 6];
1081:       jrow++;
1082:     }
1083:     y[7 * i] += sum1;
1084:     y[7 * i + 1] += sum2;
1085:     y[7 * i + 2] += sum3;
1086:     y[7 * i + 3] += sum4;
1087:     y[7 * i + 4] += sum5;
1088:     y[7 * i + 5] += sum6;
1089:     y[7 * i + 6] += sum7;
1090:   }

1092:   PetscLogFlops(14.0 * a->nz);
1093:   VecRestoreArrayRead(xx, &x);
1094:   VecRestoreArray(zz, &y);
1095:   return 0;
1096: }

1098: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A, Vec xx, Vec yy, Vec zz)
1099: {
1100:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1101:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1102:   const PetscScalar *x, *v;
1103:   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7;
1104:   const PetscInt     m = b->AIJ->rmap->n, *idx;
1105:   PetscInt           n, i;

1107:   if (yy != zz) VecCopy(yy, zz);
1108:   VecGetArrayRead(xx, &x);
1109:   VecGetArray(zz, &y);
1110:   for (i = 0; i < m; i++) {
1111:     idx    = a->j + a->i[i];
1112:     v      = a->a + a->i[i];
1113:     n      = a->i[i + 1] - a->i[i];
1114:     alpha1 = x[7 * i];
1115:     alpha2 = x[7 * i + 1];
1116:     alpha3 = x[7 * i + 2];
1117:     alpha4 = x[7 * i + 3];
1118:     alpha5 = x[7 * i + 4];
1119:     alpha6 = x[7 * i + 5];
1120:     alpha7 = x[7 * i + 6];
1121:     while (n-- > 0) {
1122:       y[7 * (*idx)] += alpha1 * (*v);
1123:       y[7 * (*idx) + 1] += alpha2 * (*v);
1124:       y[7 * (*idx) + 2] += alpha3 * (*v);
1125:       y[7 * (*idx) + 3] += alpha4 * (*v);
1126:       y[7 * (*idx) + 4] += alpha5 * (*v);
1127:       y[7 * (*idx) + 5] += alpha6 * (*v);
1128:       y[7 * (*idx) + 6] += alpha7 * (*v);
1129:       idx++;
1130:       v++;
1131:     }
1132:   }
1133:   PetscLogFlops(14.0 * a->nz);
1134:   VecRestoreArrayRead(xx, &x);
1135:   VecRestoreArray(zz, &y);
1136:   return 0;
1137: }

1139: PetscErrorCode MatMult_SeqMAIJ_8(Mat A, Vec xx, Vec yy)
1140: {
1141:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1142:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1143:   const PetscScalar *x, *v;
1144:   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1145:   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
1146:   PetscInt           nonzerorow = 0, n, i, jrow, j;

1148:   VecGetArrayRead(xx, &x);
1149:   VecGetArray(yy, &y);
1150:   idx = a->j;
1151:   v   = a->a;
1152:   ii  = a->i;

1154:   for (i = 0; i < m; i++) {
1155:     jrow = ii[i];
1156:     n    = ii[i + 1] - jrow;
1157:     sum1 = 0.0;
1158:     sum2 = 0.0;
1159:     sum3 = 0.0;
1160:     sum4 = 0.0;
1161:     sum5 = 0.0;
1162:     sum6 = 0.0;
1163:     sum7 = 0.0;
1164:     sum8 = 0.0;

1166:     nonzerorow += (n > 0);
1167:     for (j = 0; j < n; j++) {
1168:       sum1 += v[jrow] * x[8 * idx[jrow]];
1169:       sum2 += v[jrow] * x[8 * idx[jrow] + 1];
1170:       sum3 += v[jrow] * x[8 * idx[jrow] + 2];
1171:       sum4 += v[jrow] * x[8 * idx[jrow] + 3];
1172:       sum5 += v[jrow] * x[8 * idx[jrow] + 4];
1173:       sum6 += v[jrow] * x[8 * idx[jrow] + 5];
1174:       sum7 += v[jrow] * x[8 * idx[jrow] + 6];
1175:       sum8 += v[jrow] * x[8 * idx[jrow] + 7];
1176:       jrow++;
1177:     }
1178:     y[8 * i]     = sum1;
1179:     y[8 * i + 1] = sum2;
1180:     y[8 * i + 2] = sum3;
1181:     y[8 * i + 3] = sum4;
1182:     y[8 * i + 4] = sum5;
1183:     y[8 * i + 5] = sum6;
1184:     y[8 * i + 6] = sum7;
1185:     y[8 * i + 7] = sum8;
1186:   }

1188:   PetscLogFlops(16.0 * a->nz - 8.0 * nonzerorow);
1189:   VecRestoreArrayRead(xx, &x);
1190:   VecRestoreArray(yy, &y);
1191:   return 0;
1192: }

1194: PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A, Vec xx, Vec yy)
1195: {
1196:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1197:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1198:   const PetscScalar *x, *v;
1199:   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8;
1200:   const PetscInt     m = b->AIJ->rmap->n, *idx;
1201:   PetscInt           n, i;

1203:   VecSet(yy, 0.0);
1204:   VecGetArrayRead(xx, &x);
1205:   VecGetArray(yy, &y);

1207:   for (i = 0; i < m; i++) {
1208:     idx    = a->j + a->i[i];
1209:     v      = a->a + a->i[i];
1210:     n      = a->i[i + 1] - a->i[i];
1211:     alpha1 = x[8 * i];
1212:     alpha2 = x[8 * i + 1];
1213:     alpha3 = x[8 * i + 2];
1214:     alpha4 = x[8 * i + 3];
1215:     alpha5 = x[8 * i + 4];
1216:     alpha6 = x[8 * i + 5];
1217:     alpha7 = x[8 * i + 6];
1218:     alpha8 = x[8 * i + 7];
1219:     while (n-- > 0) {
1220:       y[8 * (*idx)] += alpha1 * (*v);
1221:       y[8 * (*idx) + 1] += alpha2 * (*v);
1222:       y[8 * (*idx) + 2] += alpha3 * (*v);
1223:       y[8 * (*idx) + 3] += alpha4 * (*v);
1224:       y[8 * (*idx) + 4] += alpha5 * (*v);
1225:       y[8 * (*idx) + 5] += alpha6 * (*v);
1226:       y[8 * (*idx) + 6] += alpha7 * (*v);
1227:       y[8 * (*idx) + 7] += alpha8 * (*v);
1228:       idx++;
1229:       v++;
1230:     }
1231:   }
1232:   PetscLogFlops(16.0 * a->nz);
1233:   VecRestoreArrayRead(xx, &x);
1234:   VecRestoreArray(yy, &y);
1235:   return 0;
1236: }

1238: PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A, Vec xx, Vec yy, Vec zz)
1239: {
1240:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1241:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1242:   const PetscScalar *x, *v;
1243:   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1244:   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
1245:   PetscInt           n, i, jrow, j;

1247:   if (yy != zz) VecCopy(yy, zz);
1248:   VecGetArrayRead(xx, &x);
1249:   VecGetArray(zz, &y);
1250:   idx = a->j;
1251:   v   = a->a;
1252:   ii  = a->i;

1254:   for (i = 0; i < m; i++) {
1255:     jrow = ii[i];
1256:     n    = ii[i + 1] - jrow;
1257:     sum1 = 0.0;
1258:     sum2 = 0.0;
1259:     sum3 = 0.0;
1260:     sum4 = 0.0;
1261:     sum5 = 0.0;
1262:     sum6 = 0.0;
1263:     sum7 = 0.0;
1264:     sum8 = 0.0;
1265:     for (j = 0; j < n; j++) {
1266:       sum1 += v[jrow] * x[8 * idx[jrow]];
1267:       sum2 += v[jrow] * x[8 * idx[jrow] + 1];
1268:       sum3 += v[jrow] * x[8 * idx[jrow] + 2];
1269:       sum4 += v[jrow] * x[8 * idx[jrow] + 3];
1270:       sum5 += v[jrow] * x[8 * idx[jrow] + 4];
1271:       sum6 += v[jrow] * x[8 * idx[jrow] + 5];
1272:       sum7 += v[jrow] * x[8 * idx[jrow] + 6];
1273:       sum8 += v[jrow] * x[8 * idx[jrow] + 7];
1274:       jrow++;
1275:     }
1276:     y[8 * i] += sum1;
1277:     y[8 * i + 1] += sum2;
1278:     y[8 * i + 2] += sum3;
1279:     y[8 * i + 3] += sum4;
1280:     y[8 * i + 4] += sum5;
1281:     y[8 * i + 5] += sum6;
1282:     y[8 * i + 6] += sum7;
1283:     y[8 * i + 7] += sum8;
1284:   }

1286:   PetscLogFlops(16.0 * a->nz);
1287:   VecRestoreArrayRead(xx, &x);
1288:   VecRestoreArray(zz, &y);
1289:   return 0;
1290: }

1292: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A, Vec xx, Vec yy, Vec zz)
1293: {
1294:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1295:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1296:   const PetscScalar *x, *v;
1297:   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8;
1298:   const PetscInt     m = b->AIJ->rmap->n, *idx;
1299:   PetscInt           n, i;

1301:   if (yy != zz) VecCopy(yy, zz);
1302:   VecGetArrayRead(xx, &x);
1303:   VecGetArray(zz, &y);
1304:   for (i = 0; i < m; i++) {
1305:     idx    = a->j + a->i[i];
1306:     v      = a->a + a->i[i];
1307:     n      = a->i[i + 1] - a->i[i];
1308:     alpha1 = x[8 * i];
1309:     alpha2 = x[8 * i + 1];
1310:     alpha3 = x[8 * i + 2];
1311:     alpha4 = x[8 * i + 3];
1312:     alpha5 = x[8 * i + 4];
1313:     alpha6 = x[8 * i + 5];
1314:     alpha7 = x[8 * i + 6];
1315:     alpha8 = x[8 * i + 7];
1316:     while (n-- > 0) {
1317:       y[8 * (*idx)] += alpha1 * (*v);
1318:       y[8 * (*idx) + 1] += alpha2 * (*v);
1319:       y[8 * (*idx) + 2] += alpha3 * (*v);
1320:       y[8 * (*idx) + 3] += alpha4 * (*v);
1321:       y[8 * (*idx) + 4] += alpha5 * (*v);
1322:       y[8 * (*idx) + 5] += alpha6 * (*v);
1323:       y[8 * (*idx) + 6] += alpha7 * (*v);
1324:       y[8 * (*idx) + 7] += alpha8 * (*v);
1325:       idx++;
1326:       v++;
1327:     }
1328:   }
1329:   PetscLogFlops(16.0 * a->nz);
1330:   VecRestoreArrayRead(xx, &x);
1331:   VecRestoreArray(zz, &y);
1332:   return 0;
1333: }

1335: /* ------------------------------------------------------------------------------*/
1336: PetscErrorCode MatMult_SeqMAIJ_9(Mat A, Vec xx, Vec yy)
1337: {
1338:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1339:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1340:   const PetscScalar *x, *v;
1341:   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1342:   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
1343:   PetscInt           nonzerorow = 0, n, i, jrow, j;

1345:   VecGetArrayRead(xx, &x);
1346:   VecGetArray(yy, &y);
1347:   idx = a->j;
1348:   v   = a->a;
1349:   ii  = a->i;

1351:   for (i = 0; i < m; i++) {
1352:     jrow = ii[i];
1353:     n    = ii[i + 1] - jrow;
1354:     sum1 = 0.0;
1355:     sum2 = 0.0;
1356:     sum3 = 0.0;
1357:     sum4 = 0.0;
1358:     sum5 = 0.0;
1359:     sum6 = 0.0;
1360:     sum7 = 0.0;
1361:     sum8 = 0.0;
1362:     sum9 = 0.0;

1364:     nonzerorow += (n > 0);
1365:     for (j = 0; j < n; j++) {
1366:       sum1 += v[jrow] * x[9 * idx[jrow]];
1367:       sum2 += v[jrow] * x[9 * idx[jrow] + 1];
1368:       sum3 += v[jrow] * x[9 * idx[jrow] + 2];
1369:       sum4 += v[jrow] * x[9 * idx[jrow] + 3];
1370:       sum5 += v[jrow] * x[9 * idx[jrow] + 4];
1371:       sum6 += v[jrow] * x[9 * idx[jrow] + 5];
1372:       sum7 += v[jrow] * x[9 * idx[jrow] + 6];
1373:       sum8 += v[jrow] * x[9 * idx[jrow] + 7];
1374:       sum9 += v[jrow] * x[9 * idx[jrow] + 8];
1375:       jrow++;
1376:     }
1377:     y[9 * i]     = sum1;
1378:     y[9 * i + 1] = sum2;
1379:     y[9 * i + 2] = sum3;
1380:     y[9 * i + 3] = sum4;
1381:     y[9 * i + 4] = sum5;
1382:     y[9 * i + 5] = sum6;
1383:     y[9 * i + 6] = sum7;
1384:     y[9 * i + 7] = sum8;
1385:     y[9 * i + 8] = sum9;
1386:   }

1388:   PetscLogFlops(18.0 * a->nz - 9 * nonzerorow);
1389:   VecRestoreArrayRead(xx, &x);
1390:   VecRestoreArray(yy, &y);
1391:   return 0;
1392: }

1394: /* ------------------------------------------------------------------------------*/

1396: PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A, Vec xx, Vec yy)
1397: {
1398:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1399:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1400:   const PetscScalar *x, *v;
1401:   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9;
1402:   const PetscInt     m = b->AIJ->rmap->n, *idx;
1403:   PetscInt           n, i;

1405:   VecSet(yy, 0.0);
1406:   VecGetArrayRead(xx, &x);
1407:   VecGetArray(yy, &y);

1409:   for (i = 0; i < m; i++) {
1410:     idx    = a->j + a->i[i];
1411:     v      = a->a + a->i[i];
1412:     n      = a->i[i + 1] - a->i[i];
1413:     alpha1 = x[9 * i];
1414:     alpha2 = x[9 * i + 1];
1415:     alpha3 = x[9 * i + 2];
1416:     alpha4 = x[9 * i + 3];
1417:     alpha5 = x[9 * i + 4];
1418:     alpha6 = x[9 * i + 5];
1419:     alpha7 = x[9 * i + 6];
1420:     alpha8 = x[9 * i + 7];
1421:     alpha9 = x[9 * i + 8];
1422:     while (n-- > 0) {
1423:       y[9 * (*idx)] += alpha1 * (*v);
1424:       y[9 * (*idx) + 1] += alpha2 * (*v);
1425:       y[9 * (*idx) + 2] += alpha3 * (*v);
1426:       y[9 * (*idx) + 3] += alpha4 * (*v);
1427:       y[9 * (*idx) + 4] += alpha5 * (*v);
1428:       y[9 * (*idx) + 5] += alpha6 * (*v);
1429:       y[9 * (*idx) + 6] += alpha7 * (*v);
1430:       y[9 * (*idx) + 7] += alpha8 * (*v);
1431:       y[9 * (*idx) + 8] += alpha9 * (*v);
1432:       idx++;
1433:       v++;
1434:     }
1435:   }
1436:   PetscLogFlops(18.0 * a->nz);
1437:   VecRestoreArrayRead(xx, &x);
1438:   VecRestoreArray(yy, &y);
1439:   return 0;
1440: }

1442: PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A, Vec xx, Vec yy, Vec zz)
1443: {
1444:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1445:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1446:   const PetscScalar *x, *v;
1447:   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1448:   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
1449:   PetscInt           n, i, jrow, j;

1451:   if (yy != zz) VecCopy(yy, zz);
1452:   VecGetArrayRead(xx, &x);
1453:   VecGetArray(zz, &y);
1454:   idx = a->j;
1455:   v   = a->a;
1456:   ii  = a->i;

1458:   for (i = 0; i < m; i++) {
1459:     jrow = ii[i];
1460:     n    = ii[i + 1] - jrow;
1461:     sum1 = 0.0;
1462:     sum2 = 0.0;
1463:     sum3 = 0.0;
1464:     sum4 = 0.0;
1465:     sum5 = 0.0;
1466:     sum6 = 0.0;
1467:     sum7 = 0.0;
1468:     sum8 = 0.0;
1469:     sum9 = 0.0;
1470:     for (j = 0; j < n; j++) {
1471:       sum1 += v[jrow] * x[9 * idx[jrow]];
1472:       sum2 += v[jrow] * x[9 * idx[jrow] + 1];
1473:       sum3 += v[jrow] * x[9 * idx[jrow] + 2];
1474:       sum4 += v[jrow] * x[9 * idx[jrow] + 3];
1475:       sum5 += v[jrow] * x[9 * idx[jrow] + 4];
1476:       sum6 += v[jrow] * x[9 * idx[jrow] + 5];
1477:       sum7 += v[jrow] * x[9 * idx[jrow] + 6];
1478:       sum8 += v[jrow] * x[9 * idx[jrow] + 7];
1479:       sum9 += v[jrow] * x[9 * idx[jrow] + 8];
1480:       jrow++;
1481:     }
1482:     y[9 * i] += sum1;
1483:     y[9 * i + 1] += sum2;
1484:     y[9 * i + 2] += sum3;
1485:     y[9 * i + 3] += sum4;
1486:     y[9 * i + 4] += sum5;
1487:     y[9 * i + 5] += sum6;
1488:     y[9 * i + 6] += sum7;
1489:     y[9 * i + 7] += sum8;
1490:     y[9 * i + 8] += sum9;
1491:   }

1493:   PetscLogFlops(18.0 * a->nz);
1494:   VecRestoreArrayRead(xx, &x);
1495:   VecRestoreArray(zz, &y);
1496:   return 0;
1497: }

1499: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A, Vec xx, Vec yy, Vec zz)
1500: {
1501:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1502:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1503:   const PetscScalar *x, *v;
1504:   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9;
1505:   const PetscInt     m = b->AIJ->rmap->n, *idx;
1506:   PetscInt           n, i;

1508:   if (yy != zz) VecCopy(yy, zz);
1509:   VecGetArrayRead(xx, &x);
1510:   VecGetArray(zz, &y);
1511:   for (i = 0; i < m; i++) {
1512:     idx    = a->j + a->i[i];
1513:     v      = a->a + a->i[i];
1514:     n      = a->i[i + 1] - a->i[i];
1515:     alpha1 = x[9 * i];
1516:     alpha2 = x[9 * i + 1];
1517:     alpha3 = x[9 * i + 2];
1518:     alpha4 = x[9 * i + 3];
1519:     alpha5 = x[9 * i + 4];
1520:     alpha6 = x[9 * i + 5];
1521:     alpha7 = x[9 * i + 6];
1522:     alpha8 = x[9 * i + 7];
1523:     alpha9 = x[9 * i + 8];
1524:     while (n-- > 0) {
1525:       y[9 * (*idx)] += alpha1 * (*v);
1526:       y[9 * (*idx) + 1] += alpha2 * (*v);
1527:       y[9 * (*idx) + 2] += alpha3 * (*v);
1528:       y[9 * (*idx) + 3] += alpha4 * (*v);
1529:       y[9 * (*idx) + 4] += alpha5 * (*v);
1530:       y[9 * (*idx) + 5] += alpha6 * (*v);
1531:       y[9 * (*idx) + 6] += alpha7 * (*v);
1532:       y[9 * (*idx) + 7] += alpha8 * (*v);
1533:       y[9 * (*idx) + 8] += alpha9 * (*v);
1534:       idx++;
1535:       v++;
1536:     }
1537:   }
1538:   PetscLogFlops(18.0 * a->nz);
1539:   VecRestoreArrayRead(xx, &x);
1540:   VecRestoreArray(zz, &y);
1541:   return 0;
1542: }
1543: PetscErrorCode MatMult_SeqMAIJ_10(Mat A, Vec xx, Vec yy)
1544: {
1545:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1546:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1547:   const PetscScalar *x, *v;
1548:   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1549:   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
1550:   PetscInt           nonzerorow = 0, n, i, jrow, j;

1552:   VecGetArrayRead(xx, &x);
1553:   VecGetArray(yy, &y);
1554:   idx = a->j;
1555:   v   = a->a;
1556:   ii  = a->i;

1558:   for (i = 0; i < m; i++) {
1559:     jrow  = ii[i];
1560:     n     = ii[i + 1] - jrow;
1561:     sum1  = 0.0;
1562:     sum2  = 0.0;
1563:     sum3  = 0.0;
1564:     sum4  = 0.0;
1565:     sum5  = 0.0;
1566:     sum6  = 0.0;
1567:     sum7  = 0.0;
1568:     sum8  = 0.0;
1569:     sum9  = 0.0;
1570:     sum10 = 0.0;

1572:     nonzerorow += (n > 0);
1573:     for (j = 0; j < n; j++) {
1574:       sum1 += v[jrow] * x[10 * idx[jrow]];
1575:       sum2 += v[jrow] * x[10 * idx[jrow] + 1];
1576:       sum3 += v[jrow] * x[10 * idx[jrow] + 2];
1577:       sum4 += v[jrow] * x[10 * idx[jrow] + 3];
1578:       sum5 += v[jrow] * x[10 * idx[jrow] + 4];
1579:       sum6 += v[jrow] * x[10 * idx[jrow] + 5];
1580:       sum7 += v[jrow] * x[10 * idx[jrow] + 6];
1581:       sum8 += v[jrow] * x[10 * idx[jrow] + 7];
1582:       sum9 += v[jrow] * x[10 * idx[jrow] + 8];
1583:       sum10 += v[jrow] * x[10 * idx[jrow] + 9];
1584:       jrow++;
1585:     }
1586:     y[10 * i]     = sum1;
1587:     y[10 * i + 1] = sum2;
1588:     y[10 * i + 2] = sum3;
1589:     y[10 * i + 3] = sum4;
1590:     y[10 * i + 4] = sum5;
1591:     y[10 * i + 5] = sum6;
1592:     y[10 * i + 6] = sum7;
1593:     y[10 * i + 7] = sum8;
1594:     y[10 * i + 8] = sum9;
1595:     y[10 * i + 9] = sum10;
1596:   }

1598:   PetscLogFlops(20.0 * a->nz - 10.0 * nonzerorow);
1599:   VecRestoreArrayRead(xx, &x);
1600:   VecRestoreArray(yy, &y);
1601:   return 0;
1602: }

1604: PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A, Vec xx, Vec yy, Vec zz)
1605: {
1606:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1607:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1608:   const PetscScalar *x, *v;
1609:   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1610:   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
1611:   PetscInt           n, i, jrow, j;

1613:   if (yy != zz) VecCopy(yy, zz);
1614:   VecGetArrayRead(xx, &x);
1615:   VecGetArray(zz, &y);
1616:   idx = a->j;
1617:   v   = a->a;
1618:   ii  = a->i;

1620:   for (i = 0; i < m; i++) {
1621:     jrow  = ii[i];
1622:     n     = ii[i + 1] - jrow;
1623:     sum1  = 0.0;
1624:     sum2  = 0.0;
1625:     sum3  = 0.0;
1626:     sum4  = 0.0;
1627:     sum5  = 0.0;
1628:     sum6  = 0.0;
1629:     sum7  = 0.0;
1630:     sum8  = 0.0;
1631:     sum9  = 0.0;
1632:     sum10 = 0.0;
1633:     for (j = 0; j < n; j++) {
1634:       sum1 += v[jrow] * x[10 * idx[jrow]];
1635:       sum2 += v[jrow] * x[10 * idx[jrow] + 1];
1636:       sum3 += v[jrow] * x[10 * idx[jrow] + 2];
1637:       sum4 += v[jrow] * x[10 * idx[jrow] + 3];
1638:       sum5 += v[jrow] * x[10 * idx[jrow] + 4];
1639:       sum6 += v[jrow] * x[10 * idx[jrow] + 5];
1640:       sum7 += v[jrow] * x[10 * idx[jrow] + 6];
1641:       sum8 += v[jrow] * x[10 * idx[jrow] + 7];
1642:       sum9 += v[jrow] * x[10 * idx[jrow] + 8];
1643:       sum10 += v[jrow] * x[10 * idx[jrow] + 9];
1644:       jrow++;
1645:     }
1646:     y[10 * i] += sum1;
1647:     y[10 * i + 1] += sum2;
1648:     y[10 * i + 2] += sum3;
1649:     y[10 * i + 3] += sum4;
1650:     y[10 * i + 4] += sum5;
1651:     y[10 * i + 5] += sum6;
1652:     y[10 * i + 6] += sum7;
1653:     y[10 * i + 7] += sum8;
1654:     y[10 * i + 8] += sum9;
1655:     y[10 * i + 9] += sum10;
1656:   }

1658:   PetscLogFlops(20.0 * a->nz);
1659:   VecRestoreArrayRead(xx, &x);
1660:   VecRestoreArray(yy, &y);
1661:   return 0;
1662: }

1664: PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A, Vec xx, Vec yy)
1665: {
1666:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1667:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1668:   const PetscScalar *x, *v;
1669:   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9, alpha10;
1670:   const PetscInt     m = b->AIJ->rmap->n, *idx;
1671:   PetscInt           n, i;

1673:   VecSet(yy, 0.0);
1674:   VecGetArrayRead(xx, &x);
1675:   VecGetArray(yy, &y);

1677:   for (i = 0; i < m; i++) {
1678:     idx     = a->j + a->i[i];
1679:     v       = a->a + a->i[i];
1680:     n       = a->i[i + 1] - a->i[i];
1681:     alpha1  = x[10 * i];
1682:     alpha2  = x[10 * i + 1];
1683:     alpha3  = x[10 * i + 2];
1684:     alpha4  = x[10 * i + 3];
1685:     alpha5  = x[10 * i + 4];
1686:     alpha6  = x[10 * i + 5];
1687:     alpha7  = x[10 * i + 6];
1688:     alpha8  = x[10 * i + 7];
1689:     alpha9  = x[10 * i + 8];
1690:     alpha10 = x[10 * i + 9];
1691:     while (n-- > 0) {
1692:       y[10 * (*idx)] += alpha1 * (*v);
1693:       y[10 * (*idx) + 1] += alpha2 * (*v);
1694:       y[10 * (*idx) + 2] += alpha3 * (*v);
1695:       y[10 * (*idx) + 3] += alpha4 * (*v);
1696:       y[10 * (*idx) + 4] += alpha5 * (*v);
1697:       y[10 * (*idx) + 5] += alpha6 * (*v);
1698:       y[10 * (*idx) + 6] += alpha7 * (*v);
1699:       y[10 * (*idx) + 7] += alpha8 * (*v);
1700:       y[10 * (*idx) + 8] += alpha9 * (*v);
1701:       y[10 * (*idx) + 9] += alpha10 * (*v);
1702:       idx++;
1703:       v++;
1704:     }
1705:   }
1706:   PetscLogFlops(20.0 * a->nz);
1707:   VecRestoreArrayRead(xx, &x);
1708:   VecRestoreArray(yy, &y);
1709:   return 0;
1710: }

1712: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A, Vec xx, Vec yy, Vec zz)
1713: {
1714:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1715:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1716:   const PetscScalar *x, *v;
1717:   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9, alpha10;
1718:   const PetscInt     m = b->AIJ->rmap->n, *idx;
1719:   PetscInt           n, i;

1721:   if (yy != zz) VecCopy(yy, zz);
1722:   VecGetArrayRead(xx, &x);
1723:   VecGetArray(zz, &y);
1724:   for (i = 0; i < m; i++) {
1725:     idx     = a->j + a->i[i];
1726:     v       = a->a + a->i[i];
1727:     n       = a->i[i + 1] - a->i[i];
1728:     alpha1  = x[10 * i];
1729:     alpha2  = x[10 * i + 1];
1730:     alpha3  = x[10 * i + 2];
1731:     alpha4  = x[10 * i + 3];
1732:     alpha5  = x[10 * i + 4];
1733:     alpha6  = x[10 * i + 5];
1734:     alpha7  = x[10 * i + 6];
1735:     alpha8  = x[10 * i + 7];
1736:     alpha9  = x[10 * i + 8];
1737:     alpha10 = x[10 * i + 9];
1738:     while (n-- > 0) {
1739:       y[10 * (*idx)] += alpha1 * (*v);
1740:       y[10 * (*idx) + 1] += alpha2 * (*v);
1741:       y[10 * (*idx) + 2] += alpha3 * (*v);
1742:       y[10 * (*idx) + 3] += alpha4 * (*v);
1743:       y[10 * (*idx) + 4] += alpha5 * (*v);
1744:       y[10 * (*idx) + 5] += alpha6 * (*v);
1745:       y[10 * (*idx) + 6] += alpha7 * (*v);
1746:       y[10 * (*idx) + 7] += alpha8 * (*v);
1747:       y[10 * (*idx) + 8] += alpha9 * (*v);
1748:       y[10 * (*idx) + 9] += alpha10 * (*v);
1749:       idx++;
1750:       v++;
1751:     }
1752:   }
1753:   PetscLogFlops(20.0 * a->nz);
1754:   VecRestoreArrayRead(xx, &x);
1755:   VecRestoreArray(zz, &y);
1756:   return 0;
1757: }

1759: /*--------------------------------------------------------------------------------------------*/
1760: PetscErrorCode MatMult_SeqMAIJ_11(Mat A, Vec xx, Vec yy)
1761: {
1762:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1763:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1764:   const PetscScalar *x, *v;
1765:   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1766:   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
1767:   PetscInt           nonzerorow = 0, n, i, jrow, j;

1769:   VecGetArrayRead(xx, &x);
1770:   VecGetArray(yy, &y);
1771:   idx = a->j;
1772:   v   = a->a;
1773:   ii  = a->i;

1775:   for (i = 0; i < m; i++) {
1776:     jrow  = ii[i];
1777:     n     = ii[i + 1] - jrow;
1778:     sum1  = 0.0;
1779:     sum2  = 0.0;
1780:     sum3  = 0.0;
1781:     sum4  = 0.0;
1782:     sum5  = 0.0;
1783:     sum6  = 0.0;
1784:     sum7  = 0.0;
1785:     sum8  = 0.0;
1786:     sum9  = 0.0;
1787:     sum10 = 0.0;
1788:     sum11 = 0.0;

1790:     nonzerorow += (n > 0);
1791:     for (j = 0; j < n; j++) {
1792:       sum1 += v[jrow] * x[11 * idx[jrow]];
1793:       sum2 += v[jrow] * x[11 * idx[jrow] + 1];
1794:       sum3 += v[jrow] * x[11 * idx[jrow] + 2];
1795:       sum4 += v[jrow] * x[11 * idx[jrow] + 3];
1796:       sum5 += v[jrow] * x[11 * idx[jrow] + 4];
1797:       sum6 += v[jrow] * x[11 * idx[jrow] + 5];
1798:       sum7 += v[jrow] * x[11 * idx[jrow] + 6];
1799:       sum8 += v[jrow] * x[11 * idx[jrow] + 7];
1800:       sum9 += v[jrow] * x[11 * idx[jrow] + 8];
1801:       sum10 += v[jrow] * x[11 * idx[jrow] + 9];
1802:       sum11 += v[jrow] * x[11 * idx[jrow] + 10];
1803:       jrow++;
1804:     }
1805:     y[11 * i]      = sum1;
1806:     y[11 * i + 1]  = sum2;
1807:     y[11 * i + 2]  = sum3;
1808:     y[11 * i + 3]  = sum4;
1809:     y[11 * i + 4]  = sum5;
1810:     y[11 * i + 5]  = sum6;
1811:     y[11 * i + 6]  = sum7;
1812:     y[11 * i + 7]  = sum8;
1813:     y[11 * i + 8]  = sum9;
1814:     y[11 * i + 9]  = sum10;
1815:     y[11 * i + 10] = sum11;
1816:   }

1818:   PetscLogFlops(22.0 * a->nz - 11 * nonzerorow);
1819:   VecRestoreArrayRead(xx, &x);
1820:   VecRestoreArray(yy, &y);
1821:   return 0;
1822: }

1824: PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A, Vec xx, Vec yy, Vec zz)
1825: {
1826:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1827:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1828:   const PetscScalar *x, *v;
1829:   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1830:   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
1831:   PetscInt           n, i, jrow, j;

1833:   if (yy != zz) VecCopy(yy, zz);
1834:   VecGetArrayRead(xx, &x);
1835:   VecGetArray(zz, &y);
1836:   idx = a->j;
1837:   v   = a->a;
1838:   ii  = a->i;

1840:   for (i = 0; i < m; i++) {
1841:     jrow  = ii[i];
1842:     n     = ii[i + 1] - jrow;
1843:     sum1  = 0.0;
1844:     sum2  = 0.0;
1845:     sum3  = 0.0;
1846:     sum4  = 0.0;
1847:     sum5  = 0.0;
1848:     sum6  = 0.0;
1849:     sum7  = 0.0;
1850:     sum8  = 0.0;
1851:     sum9  = 0.0;
1852:     sum10 = 0.0;
1853:     sum11 = 0.0;
1854:     for (j = 0; j < n; j++) {
1855:       sum1 += v[jrow] * x[11 * idx[jrow]];
1856:       sum2 += v[jrow] * x[11 * idx[jrow] + 1];
1857:       sum3 += v[jrow] * x[11 * idx[jrow] + 2];
1858:       sum4 += v[jrow] * x[11 * idx[jrow] + 3];
1859:       sum5 += v[jrow] * x[11 * idx[jrow] + 4];
1860:       sum6 += v[jrow] * x[11 * idx[jrow] + 5];
1861:       sum7 += v[jrow] * x[11 * idx[jrow] + 6];
1862:       sum8 += v[jrow] * x[11 * idx[jrow] + 7];
1863:       sum9 += v[jrow] * x[11 * idx[jrow] + 8];
1864:       sum10 += v[jrow] * x[11 * idx[jrow] + 9];
1865:       sum11 += v[jrow] * x[11 * idx[jrow] + 10];
1866:       jrow++;
1867:     }
1868:     y[11 * i] += sum1;
1869:     y[11 * i + 1] += sum2;
1870:     y[11 * i + 2] += sum3;
1871:     y[11 * i + 3] += sum4;
1872:     y[11 * i + 4] += sum5;
1873:     y[11 * i + 5] += sum6;
1874:     y[11 * i + 6] += sum7;
1875:     y[11 * i + 7] += sum8;
1876:     y[11 * i + 8] += sum9;
1877:     y[11 * i + 9] += sum10;
1878:     y[11 * i + 10] += sum11;
1879:   }

1881:   PetscLogFlops(22.0 * a->nz);
1882:   VecRestoreArrayRead(xx, &x);
1883:   VecRestoreArray(yy, &y);
1884:   return 0;
1885: }

1887: PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A, Vec xx, Vec yy)
1888: {
1889:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1890:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1891:   const PetscScalar *x, *v;
1892:   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9, alpha10, alpha11;
1893:   const PetscInt     m = b->AIJ->rmap->n, *idx;
1894:   PetscInt           n, i;

1896:   VecSet(yy, 0.0);
1897:   VecGetArrayRead(xx, &x);
1898:   VecGetArray(yy, &y);

1900:   for (i = 0; i < m; i++) {
1901:     idx     = a->j + a->i[i];
1902:     v       = a->a + a->i[i];
1903:     n       = a->i[i + 1] - a->i[i];
1904:     alpha1  = x[11 * i];
1905:     alpha2  = x[11 * i + 1];
1906:     alpha3  = x[11 * i + 2];
1907:     alpha4  = x[11 * i + 3];
1908:     alpha5  = x[11 * i + 4];
1909:     alpha6  = x[11 * i + 5];
1910:     alpha7  = x[11 * i + 6];
1911:     alpha8  = x[11 * i + 7];
1912:     alpha9  = x[11 * i + 8];
1913:     alpha10 = x[11 * i + 9];
1914:     alpha11 = x[11 * i + 10];
1915:     while (n-- > 0) {
1916:       y[11 * (*idx)] += alpha1 * (*v);
1917:       y[11 * (*idx) + 1] += alpha2 * (*v);
1918:       y[11 * (*idx) + 2] += alpha3 * (*v);
1919:       y[11 * (*idx) + 3] += alpha4 * (*v);
1920:       y[11 * (*idx) + 4] += alpha5 * (*v);
1921:       y[11 * (*idx) + 5] += alpha6 * (*v);
1922:       y[11 * (*idx) + 6] += alpha7 * (*v);
1923:       y[11 * (*idx) + 7] += alpha8 * (*v);
1924:       y[11 * (*idx) + 8] += alpha9 * (*v);
1925:       y[11 * (*idx) + 9] += alpha10 * (*v);
1926:       y[11 * (*idx) + 10] += alpha11 * (*v);
1927:       idx++;
1928:       v++;
1929:     }
1930:   }
1931:   PetscLogFlops(22.0 * a->nz);
1932:   VecRestoreArrayRead(xx, &x);
1933:   VecRestoreArray(yy, &y);
1934:   return 0;
1935: }

1937: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A, Vec xx, Vec yy, Vec zz)
1938: {
1939:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1940:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1941:   const PetscScalar *x, *v;
1942:   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9, alpha10, alpha11;
1943:   const PetscInt     m = b->AIJ->rmap->n, *idx;
1944:   PetscInt           n, i;

1946:   if (yy != zz) VecCopy(yy, zz);
1947:   VecGetArrayRead(xx, &x);
1948:   VecGetArray(zz, &y);
1949:   for (i = 0; i < m; i++) {
1950:     idx     = a->j + a->i[i];
1951:     v       = a->a + a->i[i];
1952:     n       = a->i[i + 1] - a->i[i];
1953:     alpha1  = x[11 * i];
1954:     alpha2  = x[11 * i + 1];
1955:     alpha3  = x[11 * i + 2];
1956:     alpha4  = x[11 * i + 3];
1957:     alpha5  = x[11 * i + 4];
1958:     alpha6  = x[11 * i + 5];
1959:     alpha7  = x[11 * i + 6];
1960:     alpha8  = x[11 * i + 7];
1961:     alpha9  = x[11 * i + 8];
1962:     alpha10 = x[11 * i + 9];
1963:     alpha11 = x[11 * i + 10];
1964:     while (n-- > 0) {
1965:       y[11 * (*idx)] += alpha1 * (*v);
1966:       y[11 * (*idx) + 1] += alpha2 * (*v);
1967:       y[11 * (*idx) + 2] += alpha3 * (*v);
1968:       y[11 * (*idx) + 3] += alpha4 * (*v);
1969:       y[11 * (*idx) + 4] += alpha5 * (*v);
1970:       y[11 * (*idx) + 5] += alpha6 * (*v);
1971:       y[11 * (*idx) + 6] += alpha7 * (*v);
1972:       y[11 * (*idx) + 7] += alpha8 * (*v);
1973:       y[11 * (*idx) + 8] += alpha9 * (*v);
1974:       y[11 * (*idx) + 9] += alpha10 * (*v);
1975:       y[11 * (*idx) + 10] += alpha11 * (*v);
1976:       idx++;
1977:       v++;
1978:     }
1979:   }
1980:   PetscLogFlops(22.0 * a->nz);
1981:   VecRestoreArrayRead(xx, &x);
1982:   VecRestoreArray(zz, &y);
1983:   return 0;
1984: }

1986: /*--------------------------------------------------------------------------------------------*/
1987: PetscErrorCode MatMult_SeqMAIJ_16(Mat A, Vec xx, Vec yy)
1988: {
1989:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1990:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1991:   const PetscScalar *x, *v;
1992:   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1993:   PetscScalar        sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1994:   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
1995:   PetscInt           nonzerorow = 0, n, i, jrow, j;

1997:   VecGetArrayRead(xx, &x);
1998:   VecGetArray(yy, &y);
1999:   idx = a->j;
2000:   v   = a->a;
2001:   ii  = a->i;

2003:   for (i = 0; i < m; i++) {
2004:     jrow  = ii[i];
2005:     n     = ii[i + 1] - jrow;
2006:     sum1  = 0.0;
2007:     sum2  = 0.0;
2008:     sum3  = 0.0;
2009:     sum4  = 0.0;
2010:     sum5  = 0.0;
2011:     sum6  = 0.0;
2012:     sum7  = 0.0;
2013:     sum8  = 0.0;
2014:     sum9  = 0.0;
2015:     sum10 = 0.0;
2016:     sum11 = 0.0;
2017:     sum12 = 0.0;
2018:     sum13 = 0.0;
2019:     sum14 = 0.0;
2020:     sum15 = 0.0;
2021:     sum16 = 0.0;

2023:     nonzerorow += (n > 0);
2024:     for (j = 0; j < n; j++) {
2025:       sum1 += v[jrow] * x[16 * idx[jrow]];
2026:       sum2 += v[jrow] * x[16 * idx[jrow] + 1];
2027:       sum3 += v[jrow] * x[16 * idx[jrow] + 2];
2028:       sum4 += v[jrow] * x[16 * idx[jrow] + 3];
2029:       sum5 += v[jrow] * x[16 * idx[jrow] + 4];
2030:       sum6 += v[jrow] * x[16 * idx[jrow] + 5];
2031:       sum7 += v[jrow] * x[16 * idx[jrow] + 6];
2032:       sum8 += v[jrow] * x[16 * idx[jrow] + 7];
2033:       sum9 += v[jrow] * x[16 * idx[jrow] + 8];
2034:       sum10 += v[jrow] * x[16 * idx[jrow] + 9];
2035:       sum11 += v[jrow] * x[16 * idx[jrow] + 10];
2036:       sum12 += v[jrow] * x[16 * idx[jrow] + 11];
2037:       sum13 += v[jrow] * x[16 * idx[jrow] + 12];
2038:       sum14 += v[jrow] * x[16 * idx[jrow] + 13];
2039:       sum15 += v[jrow] * x[16 * idx[jrow] + 14];
2040:       sum16 += v[jrow] * x[16 * idx[jrow] + 15];
2041:       jrow++;
2042:     }
2043:     y[16 * i]      = sum1;
2044:     y[16 * i + 1]  = sum2;
2045:     y[16 * i + 2]  = sum3;
2046:     y[16 * i + 3]  = sum4;
2047:     y[16 * i + 4]  = sum5;
2048:     y[16 * i + 5]  = sum6;
2049:     y[16 * i + 6]  = sum7;
2050:     y[16 * i + 7]  = sum8;
2051:     y[16 * i + 8]  = sum9;
2052:     y[16 * i + 9]  = sum10;
2053:     y[16 * i + 10] = sum11;
2054:     y[16 * i + 11] = sum12;
2055:     y[16 * i + 12] = sum13;
2056:     y[16 * i + 13] = sum14;
2057:     y[16 * i + 14] = sum15;
2058:     y[16 * i + 15] = sum16;
2059:   }

2061:   PetscLogFlops(32.0 * a->nz - 16.0 * nonzerorow);
2062:   VecRestoreArrayRead(xx, &x);
2063:   VecRestoreArray(yy, &y);
2064:   return 0;
2065: }

2067: PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A, Vec xx, Vec yy)
2068: {
2069:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
2070:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
2071:   const PetscScalar *x, *v;
2072:   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8;
2073:   PetscScalar        alpha9, alpha10, alpha11, alpha12, alpha13, alpha14, alpha15, alpha16;
2074:   const PetscInt     m = b->AIJ->rmap->n, *idx;
2075:   PetscInt           n, i;

2077:   VecSet(yy, 0.0);
2078:   VecGetArrayRead(xx, &x);
2079:   VecGetArray(yy, &y);

2081:   for (i = 0; i < m; i++) {
2082:     idx     = a->j + a->i[i];
2083:     v       = a->a + a->i[i];
2084:     n       = a->i[i + 1] - a->i[i];
2085:     alpha1  = x[16 * i];
2086:     alpha2  = x[16 * i + 1];
2087:     alpha3  = x[16 * i + 2];
2088:     alpha4  = x[16 * i + 3];
2089:     alpha5  = x[16 * i + 4];
2090:     alpha6  = x[16 * i + 5];
2091:     alpha7  = x[16 * i + 6];
2092:     alpha8  = x[16 * i + 7];
2093:     alpha9  = x[16 * i + 8];
2094:     alpha10 = x[16 * i + 9];
2095:     alpha11 = x[16 * i + 10];
2096:     alpha12 = x[16 * i + 11];
2097:     alpha13 = x[16 * i + 12];
2098:     alpha14 = x[16 * i + 13];
2099:     alpha15 = x[16 * i + 14];
2100:     alpha16 = x[16 * i + 15];
2101:     while (n-- > 0) {
2102:       y[16 * (*idx)] += alpha1 * (*v);
2103:       y[16 * (*idx) + 1] += alpha2 * (*v);
2104:       y[16 * (*idx) + 2] += alpha3 * (*v);
2105:       y[16 * (*idx) + 3] += alpha4 * (*v);
2106:       y[16 * (*idx) + 4] += alpha5 * (*v);
2107:       y[16 * (*idx) + 5] += alpha6 * (*v);
2108:       y[16 * (*idx) + 6] += alpha7 * (*v);
2109:       y[16 * (*idx) + 7] += alpha8 * (*v);
2110:       y[16 * (*idx) + 8] += alpha9 * (*v);
2111:       y[16 * (*idx) + 9] += alpha10 * (*v);
2112:       y[16 * (*idx) + 10] += alpha11 * (*v);
2113:       y[16 * (*idx) + 11] += alpha12 * (*v);
2114:       y[16 * (*idx) + 12] += alpha13 * (*v);
2115:       y[16 * (*idx) + 13] += alpha14 * (*v);
2116:       y[16 * (*idx) + 14] += alpha15 * (*v);
2117:       y[16 * (*idx) + 15] += alpha16 * (*v);
2118:       idx++;
2119:       v++;
2120:     }
2121:   }
2122:   PetscLogFlops(32.0 * a->nz);
2123:   VecRestoreArrayRead(xx, &x);
2124:   VecRestoreArray(yy, &y);
2125:   return 0;
2126: }

2128: PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A, Vec xx, Vec yy, Vec zz)
2129: {
2130:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
2131:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
2132:   const PetscScalar *x, *v;
2133:   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2134:   PetscScalar        sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2135:   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
2136:   PetscInt           n, i, jrow, j;

2138:   if (yy != zz) VecCopy(yy, zz);
2139:   VecGetArrayRead(xx, &x);
2140:   VecGetArray(zz, &y);
2141:   idx = a->j;
2142:   v   = a->a;
2143:   ii  = a->i;

2145:   for (i = 0; i < m; i++) {
2146:     jrow  = ii[i];
2147:     n     = ii[i + 1] - jrow;
2148:     sum1  = 0.0;
2149:     sum2  = 0.0;
2150:     sum3  = 0.0;
2151:     sum4  = 0.0;
2152:     sum5  = 0.0;
2153:     sum6  = 0.0;
2154:     sum7  = 0.0;
2155:     sum8  = 0.0;
2156:     sum9  = 0.0;
2157:     sum10 = 0.0;
2158:     sum11 = 0.0;
2159:     sum12 = 0.0;
2160:     sum13 = 0.0;
2161:     sum14 = 0.0;
2162:     sum15 = 0.0;
2163:     sum16 = 0.0;
2164:     for (j = 0; j < n; j++) {
2165:       sum1 += v[jrow] * x[16 * idx[jrow]];
2166:       sum2 += v[jrow] * x[16 * idx[jrow] + 1];
2167:       sum3 += v[jrow] * x[16 * idx[jrow] + 2];
2168:       sum4 += v[jrow] * x[16 * idx[jrow] + 3];
2169:       sum5 += v[jrow] * x[16 * idx[jrow] + 4];
2170:       sum6 += v[jrow] * x[16 * idx[jrow] + 5];
2171:       sum7 += v[jrow] * x[16 * idx[jrow] + 6];
2172:       sum8 += v[jrow] * x[16 * idx[jrow] + 7];
2173:       sum9 += v[jrow] * x[16 * idx[jrow] + 8];
2174:       sum10 += v[jrow] * x[16 * idx[jrow] + 9];
2175:       sum11 += v[jrow] * x[16 * idx[jrow] + 10];
2176:       sum12 += v[jrow] * x[16 * idx[jrow] + 11];
2177:       sum13 += v[jrow] * x[16 * idx[jrow] + 12];
2178:       sum14 += v[jrow] * x[16 * idx[jrow] + 13];
2179:       sum15 += v[jrow] * x[16 * idx[jrow] + 14];
2180:       sum16 += v[jrow] * x[16 * idx[jrow] + 15];
2181:       jrow++;
2182:     }
2183:     y[16 * i] += sum1;
2184:     y[16 * i + 1] += sum2;
2185:     y[16 * i + 2] += sum3;
2186:     y[16 * i + 3] += sum4;
2187:     y[16 * i + 4] += sum5;
2188:     y[16 * i + 5] += sum6;
2189:     y[16 * i + 6] += sum7;
2190:     y[16 * i + 7] += sum8;
2191:     y[16 * i + 8] += sum9;
2192:     y[16 * i + 9] += sum10;
2193:     y[16 * i + 10] += sum11;
2194:     y[16 * i + 11] += sum12;
2195:     y[16 * i + 12] += sum13;
2196:     y[16 * i + 13] += sum14;
2197:     y[16 * i + 14] += sum15;
2198:     y[16 * i + 15] += sum16;
2199:   }

2201:   PetscLogFlops(32.0 * a->nz);
2202:   VecRestoreArrayRead(xx, &x);
2203:   VecRestoreArray(zz, &y);
2204:   return 0;
2205: }

2207: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A, Vec xx, Vec yy, Vec zz)
2208: {
2209:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
2210:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
2211:   const PetscScalar *x, *v;
2212:   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8;
2213:   PetscScalar        alpha9, alpha10, alpha11, alpha12, alpha13, alpha14, alpha15, alpha16;
2214:   const PetscInt     m = b->AIJ->rmap->n, *idx;
2215:   PetscInt           n, i;

2217:   if (yy != zz) VecCopy(yy, zz);
2218:   VecGetArrayRead(xx, &x);
2219:   VecGetArray(zz, &y);
2220:   for (i = 0; i < m; i++) {
2221:     idx     = a->j + a->i[i];
2222:     v       = a->a + a->i[i];
2223:     n       = a->i[i + 1] - a->i[i];
2224:     alpha1  = x[16 * i];
2225:     alpha2  = x[16 * i + 1];
2226:     alpha3  = x[16 * i + 2];
2227:     alpha4  = x[16 * i + 3];
2228:     alpha5  = x[16 * i + 4];
2229:     alpha6  = x[16 * i + 5];
2230:     alpha7  = x[16 * i + 6];
2231:     alpha8  = x[16 * i + 7];
2232:     alpha9  = x[16 * i + 8];
2233:     alpha10 = x[16 * i + 9];
2234:     alpha11 = x[16 * i + 10];
2235:     alpha12 = x[16 * i + 11];
2236:     alpha13 = x[16 * i + 12];
2237:     alpha14 = x[16 * i + 13];
2238:     alpha15 = x[16 * i + 14];
2239:     alpha16 = x[16 * i + 15];
2240:     while (n-- > 0) {
2241:       y[16 * (*idx)] += alpha1 * (*v);
2242:       y[16 * (*idx) + 1] += alpha2 * (*v);
2243:       y[16 * (*idx) + 2] += alpha3 * (*v);
2244:       y[16 * (*idx) + 3] += alpha4 * (*v);
2245:       y[16 * (*idx) + 4] += alpha5 * (*v);
2246:       y[16 * (*idx) + 5] += alpha6 * (*v);
2247:       y[16 * (*idx) + 6] += alpha7 * (*v);
2248:       y[16 * (*idx) + 7] += alpha8 * (*v);
2249:       y[16 * (*idx) + 8] += alpha9 * (*v);
2250:       y[16 * (*idx) + 9] += alpha10 * (*v);
2251:       y[16 * (*idx) + 10] += alpha11 * (*v);
2252:       y[16 * (*idx) + 11] += alpha12 * (*v);
2253:       y[16 * (*idx) + 12] += alpha13 * (*v);
2254:       y[16 * (*idx) + 13] += alpha14 * (*v);
2255:       y[16 * (*idx) + 14] += alpha15 * (*v);
2256:       y[16 * (*idx) + 15] += alpha16 * (*v);
2257:       idx++;
2258:       v++;
2259:     }
2260:   }
2261:   PetscLogFlops(32.0 * a->nz);
2262:   VecRestoreArrayRead(xx, &x);
2263:   VecRestoreArray(zz, &y);
2264:   return 0;
2265: }

2267: /*--------------------------------------------------------------------------------------------*/
2268: PetscErrorCode MatMult_SeqMAIJ_18(Mat A, Vec xx, Vec yy)
2269: {
2270:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
2271:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
2272:   const PetscScalar *x, *v;
2273:   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2274:   PetscScalar        sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2275:   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
2276:   PetscInt           nonzerorow = 0, n, i, jrow, j;

2278:   VecGetArrayRead(xx, &x);
2279:   VecGetArray(yy, &y);
2280:   idx = a->j;
2281:   v   = a->a;
2282:   ii  = a->i;

2284:   for (i = 0; i < m; i++) {
2285:     jrow  = ii[i];
2286:     n     = ii[i + 1] - jrow;
2287:     sum1  = 0.0;
2288:     sum2  = 0.0;
2289:     sum3  = 0.0;
2290:     sum4  = 0.0;
2291:     sum5  = 0.0;
2292:     sum6  = 0.0;
2293:     sum7  = 0.0;
2294:     sum8  = 0.0;
2295:     sum9  = 0.0;
2296:     sum10 = 0.0;
2297:     sum11 = 0.0;
2298:     sum12 = 0.0;
2299:     sum13 = 0.0;
2300:     sum14 = 0.0;
2301:     sum15 = 0.0;
2302:     sum16 = 0.0;
2303:     sum17 = 0.0;
2304:     sum18 = 0.0;

2306:     nonzerorow += (n > 0);
2307:     for (j = 0; j < n; j++) {
2308:       sum1 += v[jrow] * x[18 * idx[jrow]];
2309:       sum2 += v[jrow] * x[18 * idx[jrow] + 1];
2310:       sum3 += v[jrow] * x[18 * idx[jrow] + 2];
2311:       sum4 += v[jrow] * x[18 * idx[jrow] + 3];
2312:       sum5 += v[jrow] * x[18 * idx[jrow] + 4];
2313:       sum6 += v[jrow] * x[18 * idx[jrow] + 5];
2314:       sum7 += v[jrow] * x[18 * idx[jrow] + 6];
2315:       sum8 += v[jrow] * x[18 * idx[jrow] + 7];
2316:       sum9 += v[jrow] * x[18 * idx[jrow] + 8];
2317:       sum10 += v[jrow] * x[18 * idx[jrow] + 9];
2318:       sum11 += v[jrow] * x[18 * idx[jrow] + 10];
2319:       sum12 += v[jrow] * x[18 * idx[jrow] + 11];
2320:       sum13 += v[jrow] * x[18 * idx[jrow] + 12];
2321:       sum14 += v[jrow] * x[18 * idx[jrow] + 13];
2322:       sum15 += v[jrow] * x[18 * idx[jrow] + 14];
2323:       sum16 += v[jrow] * x[18 * idx[jrow] + 15];
2324:       sum17 += v[jrow] * x[18 * idx[jrow] + 16];
2325:       sum18 += v[jrow] * x[18 * idx[jrow] + 17];
2326:       jrow++;
2327:     }
2328:     y[18 * i]      = sum1;
2329:     y[18 * i + 1]  = sum2;
2330:     y[18 * i + 2]  = sum3;
2331:     y[18 * i + 3]  = sum4;
2332:     y[18 * i + 4]  = sum5;
2333:     y[18 * i + 5]  = sum6;
2334:     y[18 * i + 6]  = sum7;
2335:     y[18 * i + 7]  = sum8;
2336:     y[18 * i + 8]  = sum9;
2337:     y[18 * i + 9]  = sum10;
2338:     y[18 * i + 10] = sum11;
2339:     y[18 * i + 11] = sum12;
2340:     y[18 * i + 12] = sum13;
2341:     y[18 * i + 13] = sum14;
2342:     y[18 * i + 14] = sum15;
2343:     y[18 * i + 15] = sum16;
2344:     y[18 * i + 16] = sum17;
2345:     y[18 * i + 17] = sum18;
2346:   }

2348:   PetscLogFlops(36.0 * a->nz - 18.0 * nonzerorow);
2349:   VecRestoreArrayRead(xx, &x);
2350:   VecRestoreArray(yy, &y);
2351:   return 0;
2352: }

2354: PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A, Vec xx, Vec yy)
2355: {
2356:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
2357:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
2358:   const PetscScalar *x, *v;
2359:   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8;
2360:   PetscScalar        alpha9, alpha10, alpha11, alpha12, alpha13, alpha14, alpha15, alpha16, alpha17, alpha18;
2361:   const PetscInt     m = b->AIJ->rmap->n, *idx;
2362:   PetscInt           n, i;

2364:   VecSet(yy, 0.0);
2365:   VecGetArrayRead(xx, &x);
2366:   VecGetArray(yy, &y);

2368:   for (i = 0; i < m; i++) {
2369:     idx     = a->j + a->i[i];
2370:     v       = a->a + a->i[i];
2371:     n       = a->i[i + 1] - a->i[i];
2372:     alpha1  = x[18 * i];
2373:     alpha2  = x[18 * i + 1];
2374:     alpha3  = x[18 * i + 2];
2375:     alpha4  = x[18 * i + 3];
2376:     alpha5  = x[18 * i + 4];
2377:     alpha6  = x[18 * i + 5];
2378:     alpha7  = x[18 * i + 6];
2379:     alpha8  = x[18 * i + 7];
2380:     alpha9  = x[18 * i + 8];
2381:     alpha10 = x[18 * i + 9];
2382:     alpha11 = x[18 * i + 10];
2383:     alpha12 = x[18 * i + 11];
2384:     alpha13 = x[18 * i + 12];
2385:     alpha14 = x[18 * i + 13];
2386:     alpha15 = x[18 * i + 14];
2387:     alpha16 = x[18 * i + 15];
2388:     alpha17 = x[18 * i + 16];
2389:     alpha18 = x[18 * i + 17];
2390:     while (n-- > 0) {
2391:       y[18 * (*idx)] += alpha1 * (*v);
2392:       y[18 * (*idx) + 1] += alpha2 * (*v);
2393:       y[18 * (*idx) + 2] += alpha3 * (*v);
2394:       y[18 * (*idx) + 3] += alpha4 * (*v);
2395:       y[18 * (*idx) + 4] += alpha5 * (*v);
2396:       y[18 * (*idx) + 5] += alpha6 * (*v);
2397:       y[18 * (*idx) + 6] += alpha7 * (*v);
2398:       y[18 * (*idx) + 7] += alpha8 * (*v);
2399:       y[18 * (*idx) + 8] += alpha9 * (*v);
2400:       y[18 * (*idx) + 9] += alpha10 * (*v);
2401:       y[18 * (*idx) + 10] += alpha11 * (*v);
2402:       y[18 * (*idx) + 11] += alpha12 * (*v);
2403:       y[18 * (*idx) + 12] += alpha13 * (*v);
2404:       y[18 * (*idx) + 13] += alpha14 * (*v);
2405:       y[18 * (*idx) + 14] += alpha15 * (*v);
2406:       y[18 * (*idx) + 15] += alpha16 * (*v);
2407:       y[18 * (*idx) + 16] += alpha17 * (*v);
2408:       y[18 * (*idx) + 17] += alpha18 * (*v);
2409:       idx++;
2410:       v++;
2411:     }
2412:   }
2413:   PetscLogFlops(36.0 * a->nz);
2414:   VecRestoreArrayRead(xx, &x);
2415:   VecRestoreArray(yy, &y);
2416:   return 0;
2417: }

2419: PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A, Vec xx, Vec yy, Vec zz)
2420: {
2421:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
2422:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
2423:   const PetscScalar *x, *v;
2424:   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2425:   PetscScalar        sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2426:   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
2427:   PetscInt           n, i, jrow, j;

2429:   if (yy != zz) VecCopy(yy, zz);
2430:   VecGetArrayRead(xx, &x);
2431:   VecGetArray(zz, &y);
2432:   idx = a->j;
2433:   v   = a->a;
2434:   ii  = a->i;

2436:   for (i = 0; i < m; i++) {
2437:     jrow  = ii[i];
2438:     n     = ii[i + 1] - jrow;
2439:     sum1  = 0.0;
2440:     sum2  = 0.0;
2441:     sum3  = 0.0;
2442:     sum4  = 0.0;
2443:     sum5  = 0.0;
2444:     sum6  = 0.0;
2445:     sum7  = 0.0;
2446:     sum8  = 0.0;
2447:     sum9  = 0.0;
2448:     sum10 = 0.0;
2449:     sum11 = 0.0;
2450:     sum12 = 0.0;
2451:     sum13 = 0.0;
2452:     sum14 = 0.0;
2453:     sum15 = 0.0;
2454:     sum16 = 0.0;
2455:     sum17 = 0.0;
2456:     sum18 = 0.0;
2457:     for (j = 0; j < n; j++) {
2458:       sum1 += v[jrow] * x[18 * idx[jrow]];
2459:       sum2 += v[jrow] * x[18 * idx[jrow] + 1];
2460:       sum3 += v[jrow] * x[18 * idx[jrow] + 2];
2461:       sum4 += v[jrow] * x[18 * idx[jrow] + 3];
2462:       sum5 += v[jrow] * x[18 * idx[jrow] + 4];
2463:       sum6 += v[jrow] * x[18 * idx[jrow] + 5];
2464:       sum7 += v[jrow] * x[18 * idx[jrow] + 6];
2465:       sum8 += v[jrow] * x[18 * idx[jrow] + 7];
2466:       sum9 += v[jrow] * x[18 * idx[jrow] + 8];
2467:       sum10 += v[jrow] * x[18 * idx[jrow] + 9];
2468:       sum11 += v[jrow] * x[18 * idx[jrow] + 10];
2469:       sum12 += v[jrow] * x[18 * idx[jrow] + 11];
2470:       sum13 += v[jrow] * x[18 * idx[jrow] + 12];
2471:       sum14 += v[jrow] * x[18 * idx[jrow] + 13];
2472:       sum15 += v[jrow] * x[18 * idx[jrow] + 14];
2473:       sum16 += v[jrow] * x[18 * idx[jrow] + 15];
2474:       sum17 += v[jrow] * x[18 * idx[jrow] + 16];
2475:       sum18 += v[jrow] * x[18 * idx[jrow] + 17];
2476:       jrow++;
2477:     }
2478:     y[18 * i] += sum1;
2479:     y[18 * i + 1] += sum2;
2480:     y[18 * i + 2] += sum3;
2481:     y[18 * i + 3] += sum4;
2482:     y[18 * i + 4] += sum5;
2483:     y[18 * i + 5] += sum6;
2484:     y[18 * i + 6] += sum7;
2485:     y[18 * i + 7] += sum8;
2486:     y[18 * i + 8] += sum9;
2487:     y[18 * i + 9] += sum10;
2488:     y[18 * i + 10] += sum11;
2489:     y[18 * i + 11] += sum12;
2490:     y[18 * i + 12] += sum13;
2491:     y[18 * i + 13] += sum14;
2492:     y[18 * i + 14] += sum15;
2493:     y[18 * i + 15] += sum16;
2494:     y[18 * i + 16] += sum17;
2495:     y[18 * i + 17] += sum18;
2496:   }

2498:   PetscLogFlops(36.0 * a->nz);
2499:   VecRestoreArrayRead(xx, &x);
2500:   VecRestoreArray(zz, &y);
2501:   return 0;
2502: }

2504: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A, Vec xx, Vec yy, Vec zz)
2505: {
2506:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
2507:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
2508:   const PetscScalar *x, *v;
2509:   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8;
2510:   PetscScalar        alpha9, alpha10, alpha11, alpha12, alpha13, alpha14, alpha15, alpha16, alpha17, alpha18;
2511:   const PetscInt     m = b->AIJ->rmap->n, *idx;
2512:   PetscInt           n, i;

2514:   if (yy != zz) VecCopy(yy, zz);
2515:   VecGetArrayRead(xx, &x);
2516:   VecGetArray(zz, &y);
2517:   for (i = 0; i < m; i++) {
2518:     idx     = a->j + a->i[i];
2519:     v       = a->a + a->i[i];
2520:     n       = a->i[i + 1] - a->i[i];
2521:     alpha1  = x[18 * i];
2522:     alpha2  = x[18 * i + 1];
2523:     alpha3  = x[18 * i + 2];
2524:     alpha4  = x[18 * i + 3];
2525:     alpha5  = x[18 * i + 4];
2526:     alpha6  = x[18 * i + 5];
2527:     alpha7  = x[18 * i + 6];
2528:     alpha8  = x[18 * i + 7];
2529:     alpha9  = x[18 * i + 8];
2530:     alpha10 = x[18 * i + 9];
2531:     alpha11 = x[18 * i + 10];
2532:     alpha12 = x[18 * i + 11];
2533:     alpha13 = x[18 * i + 12];
2534:     alpha14 = x[18 * i + 13];
2535:     alpha15 = x[18 * i + 14];
2536:     alpha16 = x[18 * i + 15];
2537:     alpha17 = x[18 * i + 16];
2538:     alpha18 = x[18 * i + 17];
2539:     while (n-- > 0) {
2540:       y[18 * (*idx)] += alpha1 * (*v);
2541:       y[18 * (*idx) + 1] += alpha2 * (*v);
2542:       y[18 * (*idx) + 2] += alpha3 * (*v);
2543:       y[18 * (*idx) + 3] += alpha4 * (*v);
2544:       y[18 * (*idx) + 4] += alpha5 * (*v);
2545:       y[18 * (*idx) + 5] += alpha6 * (*v);
2546:       y[18 * (*idx) + 6] += alpha7 * (*v);
2547:       y[18 * (*idx) + 7] += alpha8 * (*v);
2548:       y[18 * (*idx) + 8] += alpha9 * (*v);
2549:       y[18 * (*idx) + 9] += alpha10 * (*v);
2550:       y[18 * (*idx) + 10] += alpha11 * (*v);
2551:       y[18 * (*idx) + 11] += alpha12 * (*v);
2552:       y[18 * (*idx) + 12] += alpha13 * (*v);
2553:       y[18 * (*idx) + 13] += alpha14 * (*v);
2554:       y[18 * (*idx) + 14] += alpha15 * (*v);
2555:       y[18 * (*idx) + 15] += alpha16 * (*v);
2556:       y[18 * (*idx) + 16] += alpha17 * (*v);
2557:       y[18 * (*idx) + 17] += alpha18 * (*v);
2558:       idx++;
2559:       v++;
2560:     }
2561:   }
2562:   PetscLogFlops(36.0 * a->nz);
2563:   VecRestoreArrayRead(xx, &x);
2564:   VecRestoreArray(zz, &y);
2565:   return 0;
2566: }

2568: PetscErrorCode MatMult_SeqMAIJ_N(Mat A, Vec xx, Vec yy)
2569: {
2570:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
2571:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
2572:   const PetscScalar *x, *v;
2573:   PetscScalar       *y, *sums;
2574:   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
2575:   PetscInt           n, i, jrow, j, dof = b->dof, k;

2577:   VecGetArrayRead(xx, &x);
2578:   VecSet(yy, 0.0);
2579:   VecGetArray(yy, &y);
2580:   idx = a->j;
2581:   v   = a->a;
2582:   ii  = a->i;

2584:   for (i = 0; i < m; i++) {
2585:     jrow = ii[i];
2586:     n    = ii[i + 1] - jrow;
2587:     sums = y + dof * i;
2588:     for (j = 0; j < n; j++) {
2589:       for (k = 0; k < dof; k++) sums[k] += v[jrow] * x[dof * idx[jrow] + k];
2590:       jrow++;
2591:     }
2592:   }

2594:   PetscLogFlops(2.0 * dof * a->nz);
2595:   VecRestoreArrayRead(xx, &x);
2596:   VecRestoreArray(yy, &y);
2597:   return 0;
2598: }

2600: PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz)
2601: {
2602:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
2603:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
2604:   const PetscScalar *x, *v;
2605:   PetscScalar       *y, *sums;
2606:   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
2607:   PetscInt           n, i, jrow, j, dof = b->dof, k;

2609:   if (yy != zz) VecCopy(yy, zz);
2610:   VecGetArrayRead(xx, &x);
2611:   VecGetArray(zz, &y);
2612:   idx = a->j;
2613:   v   = a->a;
2614:   ii  = a->i;

2616:   for (i = 0; i < m; i++) {
2617:     jrow = ii[i];
2618:     n    = ii[i + 1] - jrow;
2619:     sums = y + dof * i;
2620:     for (j = 0; j < n; j++) {
2621:       for (k = 0; k < dof; k++) sums[k] += v[jrow] * x[dof * idx[jrow] + k];
2622:       jrow++;
2623:     }
2624:   }

2626:   PetscLogFlops(2.0 * dof * a->nz);
2627:   VecRestoreArrayRead(xx, &x);
2628:   VecRestoreArray(zz, &y);
2629:   return 0;
2630: }

2632: PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A, Vec xx, Vec yy)
2633: {
2634:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
2635:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
2636:   const PetscScalar *x, *v, *alpha;
2637:   PetscScalar       *y;
2638:   const PetscInt     m = b->AIJ->rmap->n, *idx, dof = b->dof;
2639:   PetscInt           n, i, k;

2641:   VecGetArrayRead(xx, &x);
2642:   VecSet(yy, 0.0);
2643:   VecGetArray(yy, &y);
2644:   for (i = 0; i < m; i++) {
2645:     idx   = a->j + a->i[i];
2646:     v     = a->a + a->i[i];
2647:     n     = a->i[i + 1] - a->i[i];
2648:     alpha = x + dof * i;
2649:     while (n-- > 0) {
2650:       for (k = 0; k < dof; k++) y[dof * (*idx) + k] += alpha[k] * (*v);
2651:       idx++;
2652:       v++;
2653:     }
2654:   }
2655:   PetscLogFlops(2.0 * dof * a->nz);
2656:   VecRestoreArrayRead(xx, &x);
2657:   VecRestoreArray(yy, &y);
2658:   return 0;
2659: }

2661: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz)
2662: {
2663:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
2664:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
2665:   const PetscScalar *x, *v, *alpha;
2666:   PetscScalar       *y;
2667:   const PetscInt     m = b->AIJ->rmap->n, *idx, dof = b->dof;
2668:   PetscInt           n, i, k;

2670:   if (yy != zz) VecCopy(yy, zz);
2671:   VecGetArrayRead(xx, &x);
2672:   VecGetArray(zz, &y);
2673:   for (i = 0; i < m; i++) {
2674:     idx   = a->j + a->i[i];
2675:     v     = a->a + a->i[i];
2676:     n     = a->i[i + 1] - a->i[i];
2677:     alpha = x + dof * i;
2678:     while (n-- > 0) {
2679:       for (k = 0; k < dof; k++) y[dof * (*idx) + k] += alpha[k] * (*v);
2680:       idx++;
2681:       v++;
2682:     }
2683:   }
2684:   PetscLogFlops(2.0 * dof * a->nz);
2685:   VecRestoreArrayRead(xx, &x);
2686:   VecRestoreArray(zz, &y);
2687:   return 0;
2688: }

2690: /*===================================================================================*/
2691: PetscErrorCode MatMult_MPIMAIJ_dof(Mat A, Vec xx, Vec yy)
2692: {
2693:   Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;

2695:   /* start the scatter */
2696:   VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD);
2697:   (*b->AIJ->ops->mult)(b->AIJ, xx, yy);
2698:   VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD);
2699:   (*b->OAIJ->ops->multadd)(b->OAIJ, b->w, yy, yy);
2700:   return 0;
2701: }

2703: PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A, Vec xx, Vec yy)
2704: {
2705:   Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;

2707:   (*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w);
2708:   (*b->AIJ->ops->multtranspose)(b->AIJ, xx, yy);
2709:   VecScatterBegin(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE);
2710:   VecScatterEnd(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE);
2711:   return 0;
2712: }

2714: PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz)
2715: {
2716:   Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;

2718:   /* start the scatter */
2719:   VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD);
2720:   (*b->AIJ->ops->multadd)(b->AIJ, xx, yy, zz);
2721:   VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD);
2722:   (*b->OAIJ->ops->multadd)(b->OAIJ, b->w, zz, zz);
2723:   return 0;
2724: }

2726: PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz)
2727: {
2728:   Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;

2730:   (*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w);
2731:   (*b->AIJ->ops->multtransposeadd)(b->AIJ, xx, yy, zz);
2732:   VecScatterBegin(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE);
2733:   VecScatterEnd(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE);
2734:   return 0;
2735: }

2737: /* ----------------------------------------------------------------*/
2738: PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqMAIJ(Mat C)
2739: {
2740:   Mat_Product *product = C->product;

2742:   if (product->type == MATPRODUCT_PtAP) {
2743:     C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ;
2744:   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product type %s is not supported for SeqAIJ and SeqMAIJ matrices", MatProductTypes[product->type]);
2745:   return 0;
2746: }

2748: PetscErrorCode MatProductSetFromOptions_MPIAIJ_MPIMAIJ(Mat C)
2749: {
2750:   Mat_Product *product = C->product;
2751:   PetscBool    flg     = PETSC_FALSE;
2752:   Mat          A = product->A, P = product->B;
2753:   PetscInt     alg = 1; /* set default algorithm */
2754: #if !defined(PETSC_HAVE_HYPRE)
2755:   const char *algTypes[4] = {"scalable", "nonscalable", "allatonce", "allatonce_merged"};
2756:   PetscInt    nalg        = 4;
2757: #else
2758:   const char *algTypes[5] = {"scalable", "nonscalable", "allatonce", "allatonce_merged", "hypre"};
2759:   PetscInt    nalg        = 5;
2760: #endif


2764:   /* PtAP */
2765:   /* Check matrix local sizes */
2767:              A->rmap->rstart, A->rmap->rend, P->rmap->rstart, P->rmap->rend);
2769:              A->cmap->rstart, A->cmap->rend, P->rmap->rstart, P->rmap->rend);

2771:   /* Set the default algorithm */
2772:   PetscStrcmp(C->product->alg, "default", &flg);
2773:   if (flg) MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]);

2775:   /* Get runtime option */
2776:   PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat");
2777:   PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatPtAP", algTypes, nalg, algTypes[alg], &alg, &flg);
2778:   if (flg) MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]);
2779:   PetscOptionsEnd();

2781:   PetscStrcmp(C->product->alg, "allatonce", &flg);
2782:   if (flg) {
2783:     C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ;
2784:     return 0;
2785:   }

2787:   PetscStrcmp(C->product->alg, "allatonce_merged", &flg);
2788:   if (flg) {
2789:     C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ;
2790:     return 0;
2791:   }

2793:   /* Convert P from MAIJ to AIJ matrix since implementation not available for MAIJ */
2794:   PetscInfo((PetscObject)A, "Converting from MAIJ to AIJ matrix since implementation not available for MAIJ\n");
2795:   MatConvert(P, MATMPIAIJ, MAT_INPLACE_MATRIX, &P);
2796:   MatProductSetFromOptions(C);
2797:   return 0;
2798: }

2800: /* ----------------------------------------------------------------*/
2801: PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A, Mat PP, PetscReal fill, Mat C)
2802: {
2803:   PetscFreeSpaceList free_space = NULL, current_space = NULL;
2804:   Mat_SeqMAIJ       *pp = (Mat_SeqMAIJ *)PP->data;
2805:   Mat                P  = pp->AIJ;
2806:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *p = (Mat_SeqAIJ *)P->data, *c;
2807:   PetscInt          *pti, *ptj, *ptJ;
2808:   PetscInt          *ci, *cj, *ptadenserow, *ptasparserow, *denserow, *sparserow, *ptaj;
2809:   const PetscInt     an = A->cmap->N, am = A->rmap->N, pn = P->cmap->N, pm = P->rmap->N, ppdof = pp->dof;
2810:   PetscInt           i, j, k, dof, pshift, ptnzi, arow, anzj, ptanzi, prow, pnzj, cnzi, cn;
2811:   MatScalar         *ca;
2812:   const PetscInt    *pi = p->i, *pj = p->j, *pjj, *ai = a->i, *aj = a->j, *ajj;

2814:   /* Get ij structure of P^T */
2815:   MatGetSymbolicTranspose_SeqAIJ(P, &pti, &ptj);

2817:   cn = pn * ppdof;
2818:   /* Allocate ci array, arrays for fill computation and */
2819:   /* free space for accumulating nonzero column info */
2820:   PetscMalloc1(cn + 1, &ci);
2821:   ci[0] = 0;

2823:   /* Work arrays for rows of P^T*A */
2824:   PetscMalloc4(an, &ptadenserow, an, &ptasparserow, cn, &denserow, cn, &sparserow);
2825:   PetscArrayzero(ptadenserow, an);
2826:   PetscArrayzero(denserow, cn);

2828:   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
2829:   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
2830:   /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
2831:   PetscFreeSpaceGet(PetscIntMultTruncate(ai[am] / pm, pn), &free_space);
2832:   current_space = free_space;

2834:   /* Determine symbolic info for each row of C: */
2835:   for (i = 0; i < pn; i++) {
2836:     ptnzi = pti[i + 1] - pti[i];
2837:     ptJ   = ptj + pti[i];
2838:     for (dof = 0; dof < ppdof; dof++) {
2839:       ptanzi = 0;
2840:       /* Determine symbolic row of PtA: */
2841:       for (j = 0; j < ptnzi; j++) {
2842:         /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
2843:         arow = ptJ[j] * ppdof + dof;
2844:         /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
2845:         anzj = ai[arow + 1] - ai[arow];
2846:         ajj  = aj + ai[arow];
2847:         for (k = 0; k < anzj; k++) {
2848:           if (!ptadenserow[ajj[k]]) {
2849:             ptadenserow[ajj[k]]    = -1;
2850:             ptasparserow[ptanzi++] = ajj[k];
2851:           }
2852:         }
2853:       }
2854:       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
2855:       ptaj = ptasparserow;
2856:       cnzi = 0;
2857:       for (j = 0; j < ptanzi; j++) {
2858:         /* Get offset within block of P */
2859:         pshift = *ptaj % ppdof;
2860:         /* Get block row of P */
2861:         prow = (*ptaj++) / ppdof; /* integer division */
2862:         /* P has same number of nonzeros per row as the compressed form */
2863:         pnzj = pi[prow + 1] - pi[prow];
2864:         pjj  = pj + pi[prow];
2865:         for (k = 0; k < pnzj; k++) {
2866:           /* Locations in C are shifted by the offset within the block */
2867:           /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
2868:           if (!denserow[pjj[k] * ppdof + pshift]) {
2869:             denserow[pjj[k] * ppdof + pshift] = -1;
2870:             sparserow[cnzi++]                 = pjj[k] * ppdof + pshift;
2871:           }
2872:         }
2873:       }

2875:       /* sort sparserow */
2876:       PetscSortInt(cnzi, sparserow);

2878:       /* If free space is not available, make more free space */
2879:       /* Double the amount of total space in the list */
2880:       if (current_space->local_remaining < cnzi) PetscFreeSpaceGet(PetscIntSumTruncate(cnzi, current_space->total_array_size), &current_space);

2882:       /* Copy data into free space, and zero out denserows */
2883:       PetscArraycpy(current_space->array, sparserow, cnzi);

2885:       current_space->array += cnzi;
2886:       current_space->local_used += cnzi;
2887:       current_space->local_remaining -= cnzi;

2889:       for (j = 0; j < ptanzi; j++) ptadenserow[ptasparserow[j]] = 0;
2890:       for (j = 0; j < cnzi; j++) denserow[sparserow[j]] = 0;

2892:       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
2893:       /*        For now, we will recompute what is needed. */
2894:       ci[i * ppdof + 1 + dof] = ci[i * ppdof + dof] + cnzi;
2895:     }
2896:   }
2897:   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
2898:   /* Allocate space for cj, initialize cj, and */
2899:   /* destroy list of free space and other temporary array(s) */
2900:   PetscMalloc1(ci[cn] + 1, &cj);
2901:   PetscFreeSpaceContiguous(&free_space, cj);
2902:   PetscFree4(ptadenserow, ptasparserow, denserow, sparserow);

2904:   /* Allocate space for ca */
2905:   PetscCalloc1(ci[cn] + 1, &ca);

2907:   /* put together the new matrix */
2908:   MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A), cn, cn, ci, cj, ca, NULL, C);
2909:   MatSetBlockSize(C, pp->dof);

2911:   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
2912:   /* Since these are PETSc arrays, change flags to free them as necessary. */
2913:   c          = (Mat_SeqAIJ *)(C->data);
2914:   c->free_a  = PETSC_TRUE;
2915:   c->free_ij = PETSC_TRUE;
2916:   c->nonew   = 0;

2918:   C->ops->ptapnumeric    = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
2919:   C->ops->productnumeric = MatProductNumeric_PtAP;

2921:   /* Clean up. */
2922:   MatRestoreSymbolicTranspose_SeqAIJ(P, &pti, &ptj);
2923:   return 0;
2924: }

2926: PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A, Mat PP, Mat C)
2927: {
2928:   /* This routine requires testing -- first draft only */
2929:   Mat_SeqMAIJ     *pp = (Mat_SeqMAIJ *)PP->data;
2930:   Mat              P  = pp->AIJ;
2931:   Mat_SeqAIJ      *a  = (Mat_SeqAIJ *)A->data;
2932:   Mat_SeqAIJ      *p  = (Mat_SeqAIJ *)P->data;
2933:   Mat_SeqAIJ      *c  = (Mat_SeqAIJ *)C->data;
2934:   const PetscInt  *ai = a->i, *aj = a->j, *pi = p->i, *pj = p->j, *pJ, *pjj;
2935:   const PetscInt  *ci = c->i, *cj = c->j, *cjj;
2936:   const PetscInt   am = A->rmap->N, cn = C->cmap->N, cm = C->rmap->N, ppdof = pp->dof;
2937:   PetscInt         i, j, k, pshift, poffset, anzi, pnzi, apnzj, nextap, pnzj, prow, crow, *apj, *apjdense;
2938:   const MatScalar *aa = a->a, *pa = p->a, *pA, *paj;
2939:   MatScalar       *ca = c->a, *caj, *apa;

2941:   /* Allocate temporary array for storage of one row of A*P */
2942:   PetscCalloc3(cn, &apa, cn, &apj, cn, &apjdense);

2944:   /* Clear old values in C */
2945:   PetscArrayzero(ca, ci[cm]);

2947:   for (i = 0; i < am; i++) {
2948:     /* Form sparse row of A*P */
2949:     anzi  = ai[i + 1] - ai[i];
2950:     apnzj = 0;
2951:     for (j = 0; j < anzi; j++) {
2952:       /* Get offset within block of P */
2953:       pshift = *aj % ppdof;
2954:       /* Get block row of P */
2955:       prow = *aj++ / ppdof; /* integer division */
2956:       pnzj = pi[prow + 1] - pi[prow];
2957:       pjj  = pj + pi[prow];
2958:       paj  = pa + pi[prow];
2959:       for (k = 0; k < pnzj; k++) {
2960:         poffset = pjj[k] * ppdof + pshift;
2961:         if (!apjdense[poffset]) {
2962:           apjdense[poffset] = -1;
2963:           apj[apnzj++]      = poffset;
2964:         }
2965:         apa[poffset] += (*aa) * paj[k];
2966:       }
2967:       PetscLogFlops(2.0 * pnzj);
2968:       aa++;
2969:     }

2971:     /* Sort the j index array for quick sparse axpy. */
2972:     /* Note: a array does not need sorting as it is in dense storage locations. */
2973:     PetscSortInt(apnzj, apj);

2975:     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
2976:     prow    = i / ppdof; /* integer division */
2977:     pshift  = i % ppdof;
2978:     poffset = pi[prow];
2979:     pnzi    = pi[prow + 1] - poffset;
2980:     /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
2981:     pJ = pj + poffset;
2982:     pA = pa + poffset;
2983:     for (j = 0; j < pnzi; j++) {
2984:       crow = (*pJ) * ppdof + pshift;
2985:       cjj  = cj + ci[crow];
2986:       caj  = ca + ci[crow];
2987:       pJ++;
2988:       /* Perform sparse axpy operation.  Note cjj includes apj. */
2989:       for (k = 0, nextap = 0; nextap < apnzj; k++) {
2990:         if (cjj[k] == apj[nextap]) caj[k] += (*pA) * apa[apj[nextap++]];
2991:       }
2992:       PetscLogFlops(2.0 * apnzj);
2993:       pA++;
2994:     }

2996:     /* Zero the current row info for A*P */
2997:     for (j = 0; j < apnzj; j++) {
2998:       apa[apj[j]]      = 0.;
2999:       apjdense[apj[j]] = 0;
3000:     }
3001:   }

3003:   /* Assemble the final matrix and clean up */
3004:   MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY);
3005:   MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY);
3006:   PetscFree3(apa, apj, apjdense);
3007:   return 0;
3008: }

3010: PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ(Mat C)
3011: {
3012:   Mat_Product *product = C->product;
3013:   Mat          A = product->A, P = product->B;

3015:   MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A, P, product->fill, C);
3016:   return 0;
3017: }

3019: PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A, Mat PP, PetscReal fill, Mat *C)
3020: {
3021:   SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MatPtAPSymbolic is not implemented for MPIMAIJ matrix yet");
3022: }

3024: PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A, Mat PP, Mat C)
3025: {
3026:   SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MatPtAPNumeric is not implemented for MPIMAIJ matrix yet");
3027: }

3029: PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, Mat);

3031: PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, Mat C)
3032: {
3033:   Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;


3036:   MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, C);
3037:   return 0;
3038: }

3040: PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, PetscReal, Mat);

3042: PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, PetscReal fill, Mat C)
3043: {
3044:   Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;

3046:   MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, fill, C);
3047:   C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce;
3048:   return 0;
3049: }

3051: PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, Mat);

3053: PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, Mat C)
3054: {
3055:   Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;


3058:   MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, C);
3059:   return 0;
3060: }

3062: PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, PetscReal, Mat);

3064: PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, PetscReal fill, Mat C)
3065: {
3066:   Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;


3069:   MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, fill, C);
3070:   C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged;
3071:   return 0;
3072: }

3074: PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ(Mat C)
3075: {
3076:   Mat_Product *product = C->product;
3077:   Mat          A = product->A, P = product->B;
3078:   PetscBool    flg;

3080:   PetscStrcmp(product->alg, "allatonce", &flg);
3081:   if (flg) {
3082:     MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(A, P, product->fill, C);
3083:     C->ops->productnumeric = MatProductNumeric_PtAP;
3084:     return 0;
3085:   }

3087:   PetscStrcmp(product->alg, "allatonce_merged", &flg);
3088:   if (flg) {
3089:     MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(A, P, product->fill, C);
3090:     C->ops->productnumeric = MatProductNumeric_PtAP;
3091:     return 0;
3092:   }

3094:   SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "Mat Product Algorithm is not supported");
3095: }

3097: PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
3098: {
3099:   Mat_SeqMAIJ *b   = (Mat_SeqMAIJ *)A->data;
3100:   Mat          a   = b->AIJ, B;
3101:   Mat_SeqAIJ  *aij = (Mat_SeqAIJ *)a->data;
3102:   PetscInt     m, n, i, ncols, *ilen, nmax = 0, *icols, j, k, ii, dof = b->dof;
3103:   PetscInt    *cols;
3104:   PetscScalar *vals;

3106:   MatGetSize(a, &m, &n);
3107:   PetscMalloc1(dof * m, &ilen);
3108:   for (i = 0; i < m; i++) {
3109:     nmax = PetscMax(nmax, aij->ilen[i]);
3110:     for (j = 0; j < dof; j++) ilen[dof * i + j] = aij->ilen[i];
3111:   }
3112:   MatCreate(PETSC_COMM_SELF, &B);
3113:   MatSetSizes(B, dof * m, dof * n, dof * m, dof * n);
3114:   MatSetType(B, newtype);
3115:   MatSeqAIJSetPreallocation(B, 0, ilen);
3116:   PetscFree(ilen);
3117:   PetscMalloc1(nmax, &icols);
3118:   ii = 0;
3119:   for (i = 0; i < m; i++) {
3120:     MatGetRow_SeqAIJ(a, i, &ncols, &cols, &vals);
3121:     for (j = 0; j < dof; j++) {
3122:       for (k = 0; k < ncols; k++) icols[k] = dof * cols[k] + j;
3123:       MatSetValues_SeqAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES);
3124:       ii++;
3125:     }
3126:     MatRestoreRow_SeqAIJ(a, i, &ncols, &cols, &vals);
3127:   }
3128:   PetscFree(icols);
3129:   MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY);
3130:   MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY);

3132:   if (reuse == MAT_INPLACE_MATRIX) {
3133:     MatHeaderReplace(A, &B);
3134:   } else {
3135:     *newmat = B;
3136:   }
3137:   return 0;
3138: }

3140: #include <../src/mat/impls/aij/mpi/mpiaij.h>

3142: PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
3143: {
3144:   Mat_MPIMAIJ *maij    = (Mat_MPIMAIJ *)A->data;
3145:   Mat          MatAIJ  = ((Mat_SeqMAIJ *)maij->AIJ->data)->AIJ, B;
3146:   Mat          MatOAIJ = ((Mat_SeqMAIJ *)maij->OAIJ->data)->AIJ;
3147:   Mat_SeqAIJ  *AIJ     = (Mat_SeqAIJ *)MatAIJ->data;
3148:   Mat_SeqAIJ  *OAIJ    = (Mat_SeqAIJ *)MatOAIJ->data;
3149:   Mat_MPIAIJ  *mpiaij  = (Mat_MPIAIJ *)maij->A->data;
3150:   PetscInt     dof = maij->dof, i, j, *dnz = NULL, *onz = NULL, nmax = 0, onmax = 0;
3151:   PetscInt    *oicols = NULL, *icols = NULL, ncols, *cols = NULL, oncols, *ocols = NULL;
3152:   PetscInt     rstart, cstart, *garray, ii, k;
3153:   PetscScalar *vals, *ovals;

3155:   PetscMalloc2(A->rmap->n, &dnz, A->rmap->n, &onz);
3156:   for (i = 0; i < A->rmap->n / dof; i++) {
3157:     nmax  = PetscMax(nmax, AIJ->ilen[i]);
3158:     onmax = PetscMax(onmax, OAIJ->ilen[i]);
3159:     for (j = 0; j < dof; j++) {
3160:       dnz[dof * i + j] = AIJ->ilen[i];
3161:       onz[dof * i + j] = OAIJ->ilen[i];
3162:     }
3163:   }
3164:   MatCreate(PetscObjectComm((PetscObject)A), &B);
3165:   MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N);
3166:   MatSetType(B, newtype);
3167:   MatMPIAIJSetPreallocation(B, 0, dnz, 0, onz);
3168:   MatSetBlockSize(B, dof);
3169:   PetscFree2(dnz, onz);

3171:   PetscMalloc2(nmax, &icols, onmax, &oicols);
3172:   rstart = dof * maij->A->rmap->rstart;
3173:   cstart = dof * maij->A->cmap->rstart;
3174:   garray = mpiaij->garray;

3176:   ii = rstart;
3177:   for (i = 0; i < A->rmap->n / dof; i++) {
3178:     MatGetRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals);
3179:     MatGetRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals);
3180:     for (j = 0; j < dof; j++) {
3181:       for (k = 0; k < ncols; k++) icols[k] = cstart + dof * cols[k] + j;
3182:       for (k = 0; k < oncols; k++) oicols[k] = dof * garray[ocols[k]] + j;
3183:       MatSetValues_MPIAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES);
3184:       MatSetValues_MPIAIJ(B, 1, &ii, oncols, oicols, ovals, INSERT_VALUES);
3185:       ii++;
3186:     }
3187:     MatRestoreRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals);
3188:     MatRestoreRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals);
3189:   }
3190:   PetscFree2(icols, oicols);

3192:   MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY);
3193:   MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY);

3195:   if (reuse == MAT_INPLACE_MATRIX) {
3196:     PetscInt refct          = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
3197:     ((PetscObject)A)->refct = 1;

3199:     MatHeaderReplace(A, &B);

3201:     ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
3202:   } else {
3203:     *newmat = B;
3204:   }
3205:   return 0;
3206: }

3208: PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat, IS isrow, IS iscol, MatReuse cll, Mat *newmat)
3209: {
3210:   Mat A;

3212:   MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A);
3213:   MatCreateSubMatrix(A, isrow, iscol, cll, newmat);
3214:   MatDestroy(&A);
3215:   return 0;
3216: }

3218: PetscErrorCode MatCreateSubMatrices_MAIJ(Mat mat, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *submat[])
3219: {
3220:   Mat A;

3222:   MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A);
3223:   MatCreateSubMatrices(A, n, irow, icol, scall, submat);
3224:   MatDestroy(&A);
3225:   return 0;
3226: }

3228: /* ---------------------------------------------------------------------------------- */
3229: /*@
3230:   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
3231:   operations for multicomponent problems.  It interpolates each component the same
3232:   way independently.  The matrix type is based on `MATSEQAIJ` for sequential matrices,
3233:   and `MATMPIAIJ` for distributed matrices.

3235:   Collective

3237:   Input Parameters:
3238: + A - the `MATAIJ` matrix describing the action on blocks
3239: - dof - the block size (number of components per node)

3241:   Output Parameter:
3242: . maij - the new `MATMAIJ` matrix

3244:   Operations provided:
3245: .vb
3246:     MatMult()
3247:     MatMultTranspose()
3248:     MatMultAdd()
3249:     MatMultTransposeAdd()
3250:     MatView()
3251: .ve

3253:   Level: advanced

3255: .seealso: `MATAIJ`, `MATMAIJ`, `MatMAIJGetAIJ()`, `MatMAIJRedimension()`, `MATMAIJ`
3256: @*/
3257: PetscErrorCode MatCreateMAIJ(Mat A, PetscInt dof, Mat *maij)
3258: {
3259:   PetscInt  n;
3260:   Mat       B;
3261:   PetscBool flg;
3262: #if defined(PETSC_HAVE_CUDA)
3263:   /* hack to prevent conversion to AIJ format for CUDA when used inside a parallel MAIJ */
3264:   PetscBool convert = dof < 0 ? PETSC_FALSE : PETSC_TRUE;
3265: #endif

3267:   dof = PetscAbs(dof);
3268:   PetscObjectReference((PetscObject)A);

3270:   if (dof == 1) *maij = A;
3271:   else {
3272:     MatCreate(PetscObjectComm((PetscObject)A), &B);
3273:     /* propagate vec type */
3274:     MatSetVecType(B, A->defaultvectype);
3275:     MatSetSizes(B, dof * A->rmap->n, dof * A->cmap->n, dof * A->rmap->N, dof * A->cmap->N);
3276:     PetscLayoutSetBlockSize(B->rmap, dof);
3277:     PetscLayoutSetBlockSize(B->cmap, dof);
3278:     PetscLayoutSetUp(B->rmap);
3279:     PetscLayoutSetUp(B->cmap);

3281:     B->assembled = PETSC_TRUE;

3283:     PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &flg);
3284:     if (flg) {
3285:       Mat_SeqMAIJ *b;

3287:       MatSetType(B, MATSEQMAIJ);

3289:       B->ops->setup   = NULL;
3290:       B->ops->destroy = MatDestroy_SeqMAIJ;
3291:       B->ops->view    = MatView_SeqMAIJ;

3293:       b      = (Mat_SeqMAIJ *)B->data;
3294:       b->dof = dof;
3295:       b->AIJ = A;

3297:       if (dof == 2) {
3298:         B->ops->mult             = MatMult_SeqMAIJ_2;
3299:         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
3300:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
3301:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
3302:       } else if (dof == 3) {
3303:         B->ops->mult             = MatMult_SeqMAIJ_3;
3304:         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
3305:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
3306:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
3307:       } else if (dof == 4) {
3308:         B->ops->mult             = MatMult_SeqMAIJ_4;
3309:         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
3310:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
3311:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
3312:       } else if (dof == 5) {
3313:         B->ops->mult             = MatMult_SeqMAIJ_5;
3314:         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
3315:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
3316:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
3317:       } else if (dof == 6) {
3318:         B->ops->mult             = MatMult_SeqMAIJ_6;
3319:         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
3320:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
3321:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
3322:       } else if (dof == 7) {
3323:         B->ops->mult             = MatMult_SeqMAIJ_7;
3324:         B->ops->multadd          = MatMultAdd_SeqMAIJ_7;
3325:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_7;
3326:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
3327:       } else if (dof == 8) {
3328:         B->ops->mult             = MatMult_SeqMAIJ_8;
3329:         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
3330:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
3331:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
3332:       } else if (dof == 9) {
3333:         B->ops->mult             = MatMult_SeqMAIJ_9;
3334:         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
3335:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
3336:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
3337:       } else if (dof == 10) {
3338:         B->ops->mult             = MatMult_SeqMAIJ_10;
3339:         B->ops->multadd          = MatMultAdd_SeqMAIJ_10;
3340:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_10;
3341:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
3342:       } else if (dof == 11) {
3343:         B->ops->mult             = MatMult_SeqMAIJ_11;
3344:         B->ops->multadd          = MatMultAdd_SeqMAIJ_11;
3345:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_11;
3346:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11;
3347:       } else if (dof == 16) {
3348:         B->ops->mult             = MatMult_SeqMAIJ_16;
3349:         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
3350:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
3351:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
3352:       } else if (dof == 18) {
3353:         B->ops->mult             = MatMult_SeqMAIJ_18;
3354:         B->ops->multadd          = MatMultAdd_SeqMAIJ_18;
3355:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_18;
3356:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
3357:       } else {
3358:         B->ops->mult             = MatMult_SeqMAIJ_N;
3359:         B->ops->multadd          = MatMultAdd_SeqMAIJ_N;
3360:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_N;
3361:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N;
3362:       }
3363: #if defined(PETSC_HAVE_CUDA)
3364:       PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaijcusparse_C", MatConvert_SeqMAIJ_SeqAIJ);
3365: #endif
3366:       PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaij_C", MatConvert_SeqMAIJ_SeqAIJ);
3367:       PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqaij_seqmaij_C", MatProductSetFromOptions_SeqAIJ_SeqMAIJ);
3368:     } else {
3369:       Mat_MPIAIJ  *mpiaij = (Mat_MPIAIJ *)A->data;
3370:       Mat_MPIMAIJ *b;
3371:       IS           from, to;
3372:       Vec          gvec;

3374:       MatSetType(B, MATMPIMAIJ);

3376:       B->ops->setup   = NULL;
3377:       B->ops->destroy = MatDestroy_MPIMAIJ;
3378:       B->ops->view    = MatView_MPIMAIJ;

3380:       b      = (Mat_MPIMAIJ *)B->data;
3381:       b->dof = dof;
3382:       b->A   = A;

3384:       MatCreateMAIJ(mpiaij->A, -dof, &b->AIJ);
3385:       MatCreateMAIJ(mpiaij->B, -dof, &b->OAIJ);

3387:       VecGetSize(mpiaij->lvec, &n);
3388:       VecCreate(PETSC_COMM_SELF, &b->w);
3389:       VecSetSizes(b->w, n * dof, n * dof);
3390:       VecSetBlockSize(b->w, dof);
3391:       VecSetType(b->w, VECSEQ);

3393:       /* create two temporary Index sets for build scatter gather */
3394:       ISCreateBlock(PetscObjectComm((PetscObject)A), dof, n, mpiaij->garray, PETSC_COPY_VALUES, &from);
3395:       ISCreateStride(PETSC_COMM_SELF, n * dof, 0, 1, &to);

3397:       /* create temporary global vector to generate scatter context */
3398:       VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), dof, dof * A->cmap->n, dof * A->cmap->N, NULL, &gvec);

3400:       /* generate the scatter context */
3401:       VecScatterCreate(gvec, from, b->w, to, &b->ctx);

3403:       ISDestroy(&from);
3404:       ISDestroy(&to);
3405:       VecDestroy(&gvec);

3407:       B->ops->mult             = MatMult_MPIMAIJ_dof;
3408:       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
3409:       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
3410:       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;

3412: #if defined(PETSC_HAVE_CUDA)
3413:       PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaijcusparse_C", MatConvert_MPIMAIJ_MPIAIJ);
3414: #endif
3415:       PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaij_C", MatConvert_MPIMAIJ_MPIAIJ);
3416:       PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpimaij_C", MatProductSetFromOptions_MPIAIJ_MPIMAIJ);
3417:     }
3418:     B->ops->createsubmatrix   = MatCreateSubMatrix_MAIJ;
3419:     B->ops->createsubmatrices = MatCreateSubMatrices_MAIJ;
3420:     MatSetUp(B);
3421: #if defined(PETSC_HAVE_CUDA)
3422:     /* temporary until we have CUDA implementation of MAIJ */
3423:     {
3424:       PetscBool flg;
3425:       if (convert) {
3426:         PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, MATAIJCUSPARSE, "");
3427:         if (flg) MatConvert(B, ((PetscObject)A)->type_name, MAT_INPLACE_MATRIX, &B);
3428:       }
3429:     }
3430: #endif
3431:     *maij = B;
3432:   }
3433:   return 0;
3434: }