Actual source code: kaij.c


  2: /*
  3:   Defines the basic matrix operations for the KAIJ  matrix storage format.
  4:   This format is used to evaluate matrices of the form:

  6:     [I \otimes S + A \otimes T]

  8:   where
  9:     S is a dense (p \times q) matrix
 10:     T is a dense (p \times q) matrix
 11:     A is an AIJ  (n \times n) matrix
 12:     I is the identity matrix

 14:   The resulting matrix is (np \times nq)

 16:   We provide:
 17:      MatMult()
 18:      MatMultAdd()
 19:      MatInvertBlockDiagonal()
 20:   and
 21:      MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*)

 23:   This single directory handles both the sequential and parallel codes
 24: */

 26: #include <../src/mat/impls/kaij/kaij.h>
 27: #include <../src/mat/utils/freespace.h>
 28: #include <petsc/private/vecimpl.h>

 30: /*@C
 31:    MatKAIJGetAIJ - Get the `MATAIJ` matrix describing the blockwise action of the `MATKAIJ` matrix

 33:    Not Collective, but if the `MATKAIJ` matrix is parallel, the `MATAIJ` matrix is also parallel

 35:    Input Parameter:
 36: .  A - the `MATKAIJ` matrix

 38:    Output Parameter:
 39: .  B - the `MATAIJ` matrix

 41:    Level: advanced

 43:    Note:
 44:    The reference count on the `MATAIJ` matrix is not increased so you should not destroy it.

 46: .seealso: `MatCreateKAIJ()`, `MATKAIJ`, `MATAIJ`
 47: @*/
 48: PetscErrorCode MatKAIJGetAIJ(Mat A, Mat *B)
 49: {
 50:   PetscBool ismpikaij, isseqkaij;

 52:   PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij);
 53:   PetscObjectTypeCompare((PetscObject)A, MATSEQKAIJ, &isseqkaij);
 54:   if (ismpikaij) {
 55:     Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;

 57:     *B = b->A;
 58:   } else if (isseqkaij) {
 59:     Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;

 61:     *B = b->AIJ;
 62:   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix passed in is not of type KAIJ");
 63:   return 0;
 64: }

 66: /*@C
 67:    MatKAIJGetS - Get the S matrix describing the shift action of the `MATKAIJ` matrix

 69:    Not Collective; the entire S is stored and returned independently on all processes.

 71:    Input Parameter:
 72: .  A - the `MATKAIJ` matrix

 74:    Output Parameters:
 75: +  m - the number of rows in S
 76: .  n - the number of columns in S
 77: -  S - the S matrix, in form of a scalar array in column-major format

 79:    Note:
 80:    All output parameters are optional (pass NULL if not desired)

 82:    Level: advanced

 84: .seealso: `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()`
 85: @*/
 86: PetscErrorCode MatKAIJGetS(Mat A, PetscInt *m, PetscInt *n, PetscScalar **S)
 87: {
 88:   Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
 89:   if (m) *m = b->p;
 90:   if (n) *n = b->q;
 91:   if (S) *S = b->S;
 92:   return 0;
 93: }

 95: /*@C
 96:    MatKAIJGetSRead - Get a read-only pointer to the S matrix describing the shift action of the `MATKAIJ` matrix

 98:    Not Collective; the entire S is stored and returned independently on all processes.

100:    Input Parameter:
101: .  A - the `MATKAIJ` matrix

103:    Output Parameters:
104: +  m - the number of rows in S
105: .  n - the number of columns in S
106: -  S - the S matrix, in form of a scalar array in column-major format

108:    Note:
109:    All output parameters are optional (pass NULL if not desired)

111:    Level: advanced

113: .seealso: `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()`
114: @*/
115: PetscErrorCode MatKAIJGetSRead(Mat A, PetscInt *m, PetscInt *n, const PetscScalar **S)
116: {
117:   Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
118:   if (m) *m = b->p;
119:   if (n) *n = b->q;
120:   if (S) *S = b->S;
121:   return 0;
122: }

124: /*@C
125:   MatKAIJRestoreS - Restore array obtained with `MatKAIJGetS()`

127:   Not collective

129:   Input Parameter:
130: . A - the `MATKAIJ` matrix

132:   Output Parameter:
133: . S - location of pointer to array obtained with `MatKAIJGetS()`

135:   Note:
136:   This routine zeros the array pointer to prevent accidental reuse after it has been restored.
137:   If NULL is passed, it will not attempt to zero the array pointer.

139:   Level: advanced

141: .seealso: `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJGetSRead()`, `MatKAIJRestoreSRead()`
142: @*/
143: PetscErrorCode MatKAIJRestoreS(Mat A, PetscScalar **S)
144: {
145:   if (S) *S = NULL;
146:   PetscObjectStateIncrease((PetscObject)A);
147:   return 0;
148: }

150: /*@C
151:   MatKAIJRestoreSRead - Restore array obtained with `MatKAIJGetSRead()`

153:   Not collective

155:   Input Parameter:
156: . A - the `MATKAIJ` matrix

158:   Output Parameter:
159: . S - location of pointer to array obtained with `MatKAIJGetS()`

161:   Note:
162:   This routine zeros the array pointer to prevent accidental reuse after it has been restored.
163:   If NULL is passed, it will not attempt to zero the array pointer.

165:   Level: advanced

167: .seealso: `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJGetSRead()`, `MatKAIJRestoreSRead()`
168: @*/
169: PetscErrorCode MatKAIJRestoreSRead(Mat A, const PetscScalar **S)
170: {
171:   if (S) *S = NULL;
172:   return 0;
173: }

175: /*@C
176:    MatKAIJGetT - Get the transformation matrix T associated with the `MATKAIJ` matrix

178:    Not Collective; the entire T is stored and returned independently on all processes

180:    Input Parameter:
181: .  A - the `MATKAIJ` matrix

183:    Output Parameters:
184: +  m - the number of rows in T
185: .  n - the number of columns in T
186: -  T - the T matrix, in form of a scalar array in column-major format

188:    Note:
189:    All output parameters are optional (pass NULL if not desired)

191:    Level: advanced

193: .seealso: `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()`
194: @*/
195: PetscErrorCode MatKAIJGetT(Mat A, PetscInt *m, PetscInt *n, PetscScalar **T)
196: {
197:   Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
198:   if (m) *m = b->p;
199:   if (n) *n = b->q;
200:   if (T) *T = b->T;
201:   return 0;
202: }

204: /*@C
205:    MatKAIJGetTRead - Get a read-only pointer to the transformation matrix T associated with the `MATKAIJ` matrix

207:    Not Collective; the entire T is stored and returned independently on all processes

209:    Input Parameter:
210: .  A - the `MATKAIJ` matrix

212:    Output Parameters:
213: +  m - the number of rows in T
214: .  n - the number of columns in T
215: -  T - the T matrix, in form of a scalar array in column-major format

217:    Note: All output parameters are optional (pass NULL if not desired)

219:    Level: advanced

221: .seealso: `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()`
222: @*/
223: PetscErrorCode MatKAIJGetTRead(Mat A, PetscInt *m, PetscInt *n, const PetscScalar **T)
224: {
225:   Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
226:   if (m) *m = b->p;
227:   if (n) *n = b->q;
228:   if (T) *T = b->T;
229:   return 0;
230: }

232: /*@C
233:   MatKAIJRestoreT - Restore array obtained with `MatKAIJGetT()`

235:   Not collective

237:   Input Parameter:
238: . A - the `MATKAIJ` matrix

240:   Output Parameter:
241: . T - location of pointer to array obtained with `MatKAIJGetS()`

243:   Note:
244:   This routine zeros the array pointer to prevent accidental reuse after it has been restored.
245:   If NULL is passed, it will not attempt to zero the array pointer.

247:   Level: advanced

249: .seealso: `MATKAIJ`, `MatKAIJGetT()`, `MatKAIJGetTRead()`, `MatKAIJRestoreTRead()`
250: @*/
251: PetscErrorCode MatKAIJRestoreT(Mat A, PetscScalar **T)
252: {
253:   if (T) *T = NULL;
254:   PetscObjectStateIncrease((PetscObject)A);
255:   return 0;
256: }

258: /*@C
259:   MatKAIJRestoreTRead - Restore array obtained with `MatKAIJGetTRead()`

261:   Not collective

263:   Input Parameter:
264: . A - the `MATKAIJ` matrix

266:   Output Parameter:
267: . T - location of pointer to array obtained with `MatKAIJGetS()`

269:   Note:
270:   This routine zeros the array pointer to prevent accidental reuse after it has been restored.
271:   If NULL is passed, it will not attempt to zero the array pointer.

273:   Level: advanced

275: .seealso: `MATKAIJ`, `MatKAIJGetT()`, `MatKAIJGetTRead()`, `MatKAIJRestoreTRead()`
276: @*/
277: PetscErrorCode MatKAIJRestoreTRead(Mat A, const PetscScalar **T)
278: {
279:   if (T) *T = NULL;
280:   return 0;
281: }

283: /*@
284:    MatKAIJSetAIJ - Set the `MATAIJ` matrix describing the blockwise action of the `MATKAIJ` matrix

286:    Logically Collective; if the `MATAIJ` matrix is parallel, the `MATKAIJ` matrix is also parallel

288:    Input Parameters:
289: +  A - the `MATKAIJ` matrix
290: -  B - the `MATAIJ` matrix

292:    Notes:
293:    This function increases the reference count on the `MATAIJ` matrix, so the user is free to destroy the matrix if it is not needed.

295:    Changes to the entries of the `MATAIJ` matrix will immediately affect the `MATKAIJ` matrix.

297:    Level: advanced

299: .seealso: `MATKAIJ`, `MatKAIJGetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`
300: @*/
301: PetscErrorCode MatKAIJSetAIJ(Mat A, Mat B)
302: {
303:   PetscMPIInt size;
304:   PetscBool   flg;

306:   MPI_Comm_size(PetscObjectComm((PetscObject)A), &size);
307:   if (size == 1) {
308:     PetscObjectTypeCompare((PetscObject)B, MATSEQAIJ, &flg);
310:     Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data;
311:     a->AIJ         = B;
312:   } else {
313:     Mat_MPIKAIJ *a = (Mat_MPIKAIJ *)A->data;
314:     a->A           = B;
315:   }
316:   PetscObjectReference((PetscObject)B);
317:   return 0;
318: }

320: /*@C
321:    MatKAIJSetS - Set the S matrix describing the shift action of the `MATKAIJ` matrix

323:    Logically Collective; the entire S is stored independently on all processes.

325:    Input Parameters:
326: +  A - the `MATKAIJ` matrix
327: .  p - the number of rows in S
328: .  q - the number of columns in S
329: -  S - the S matrix, in form of a scalar array in column-major format

331:    Notes:
332:    The dimensions p and q must match those of the transformation matrix T associated with the `MATKAIJ` matrix.

334:    The S matrix is copied, so the user can destroy this array.

336:    Level: advanced

338: .seealso: `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJSetT()`, `MatKAIJSetAIJ()`
339: @*/
340: PetscErrorCode MatKAIJSetS(Mat A, PetscInt p, PetscInt q, const PetscScalar S[])
341: {
342:   Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data;

344:   PetscFree(a->S);
345:   if (S) {
346:     PetscMalloc1(p * q, &a->S);
347:     PetscMemcpy(a->S, S, p * q * sizeof(PetscScalar));
348:   } else a->S = NULL;

350:   a->p = p;
351:   a->q = q;
352:   return 0;
353: }

355: /*@C
356:    MatKAIJGetScaledIdentity - Check if both S and T are scaled identities.

358:    Logically Collective.

360:    Input Parameter:
361: .  A - the `MATKAIJ` matrix

363:   Output Parameter:
364: .  identity - the Boolean value

366:    Level: Advanced

368: .seealso: `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJGetT()`
369: @*/
370: PetscErrorCode MatKAIJGetScaledIdentity(Mat A, PetscBool *identity)
371: {
372:   Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data;
373:   PetscInt     i, j;

375:   if (a->p != a->q) {
376:     *identity = PETSC_FALSE;
377:     return 0;
378:   } else *identity = PETSC_TRUE;
379:   if (!a->isTI || a->S) {
380:     for (i = 0; i < a->p && *identity; i++) {
381:       for (j = 0; j < a->p && *identity; j++) {
382:         if (i != j) {
383:           if (a->S && PetscAbsScalar(a->S[i + j * a->p]) > PETSC_SMALL) *identity = PETSC_FALSE;
384:           if (a->T && PetscAbsScalar(a->T[i + j * a->p]) > PETSC_SMALL) *identity = PETSC_FALSE;
385:         } else {
386:           if (a->S && PetscAbsScalar(a->S[i * (a->p + 1)] - a->S[0]) > PETSC_SMALL) *identity = PETSC_FALSE;
387:           if (a->T && PetscAbsScalar(a->T[i * (a->p + 1)] - a->T[0]) > PETSC_SMALL) *identity = PETSC_FALSE;
388:         }
389:       }
390:     }
391:   }
392:   return 0;
393: }

395: /*@C
396:    MatKAIJSetT - Set the transformation matrix T associated with the `MATKAIJ` matrix

398:    Logically Collective; the entire T is stored independently on all processes.

400:    Input Parameters:
401: +  A - the `MATKAIJ` matrix
402: .  p - the number of rows in S
403: .  q - the number of columns in S
404: -  T - the T matrix, in form of a scalar array in column-major format

406:    Notes:
407:    The dimensions p and q must match those of the shift matrix S associated with the `MATKAIJ` matrix.

409:    The T matrix is copied, so the user can destroy this array.

411:    Level: Advanced

413: .seealso: `MATKAIJ`, `MatKAIJGetT()`, `MatKAIJSetS()`, `MatKAIJSetAIJ()`
414: @*/
415: PetscErrorCode MatKAIJSetT(Mat A, PetscInt p, PetscInt q, const PetscScalar T[])
416: {
417:   PetscInt     i, j;
418:   Mat_SeqKAIJ *a    = (Mat_SeqKAIJ *)A->data;
419:   PetscBool    isTI = PETSC_FALSE;

421:   /* check if T is an identity matrix */
422:   if (T && (p == q)) {
423:     isTI = PETSC_TRUE;
424:     for (i = 0; i < p; i++) {
425:       for (j = 0; j < q; j++) {
426:         if (i == j) {
427:           /* diagonal term must be 1 */
428:           if (T[i + j * p] != 1.0) isTI = PETSC_FALSE;
429:         } else {
430:           /* off-diagonal term must be 0 */
431:           if (T[i + j * p] != 0.0) isTI = PETSC_FALSE;
432:         }
433:       }
434:     }
435:   }
436:   a->isTI = isTI;

438:   PetscFree(a->T);
439:   if (T && (!isTI)) {
440:     PetscMalloc1(p * q, &a->T);
441:     PetscMemcpy(a->T, T, p * q * sizeof(PetscScalar));
442:   } else a->T = NULL;

444:   a->p = p;
445:   a->q = q;
446:   return 0;
447: }

449: PetscErrorCode MatDestroy_SeqKAIJ(Mat A)
450: {
451:   Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;

453:   MatDestroy(&b->AIJ);
454:   PetscFree(b->S);
455:   PetscFree(b->T);
456:   PetscFree(b->ibdiag);
457:   PetscFree5(b->sor.w, b->sor.y, b->sor.work, b->sor.t, b->sor.arr);
458:   PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqkaij_seqaij_C", NULL);
459:   PetscFree(A->data);
460:   return 0;
461: }

463: PETSC_INTERN PetscErrorCode MatKAIJ_build_AIJ_OAIJ(Mat A)
464: {
465:   Mat_MPIKAIJ     *a;
466:   Mat_MPIAIJ      *mpiaij;
467:   PetscScalar     *T;
468:   PetscInt         i, j;
469:   PetscObjectState state;

471:   a      = (Mat_MPIKAIJ *)A->data;
472:   mpiaij = (Mat_MPIAIJ *)a->A->data;

474:   PetscObjectStateGet((PetscObject)a->A, &state);
475:   if (state == a->state) {
476:     /* The existing AIJ and KAIJ members are up-to-date, so simply exit. */
477:     return 0;
478:   } else {
479:     MatDestroy(&a->AIJ);
480:     MatDestroy(&a->OAIJ);
481:     if (a->isTI) {
482:       /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL.
483:        * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will
484:        * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix
485:        * to pass in. */
486:       PetscMalloc1(a->p * a->q, &T);
487:       for (i = 0; i < a->p; i++) {
488:         for (j = 0; j < a->q; j++) {
489:           if (i == j) T[i + j * a->p] = 1.0;
490:           else T[i + j * a->p] = 0.0;
491:         }
492:       }
493:     } else T = a->T;
494:     MatCreateKAIJ(mpiaij->A, a->p, a->q, a->S, T, &a->AIJ);
495:     MatCreateKAIJ(mpiaij->B, a->p, a->q, NULL, T, &a->OAIJ);
496:     if (a->isTI) PetscFree(T);
497:     a->state = state;
498:   }

500:   return 0;
501: }

503: PetscErrorCode MatSetUp_KAIJ(Mat A)
504: {
505:   PetscInt     n;
506:   PetscMPIInt  size;
507:   Mat_SeqKAIJ *seqkaij = (Mat_SeqKAIJ *)A->data;

509:   MPI_Comm_size(PetscObjectComm((PetscObject)A), &size);
510:   if (size == 1) {
511:     MatSetSizes(A, seqkaij->p * seqkaij->AIJ->rmap->n, seqkaij->q * seqkaij->AIJ->cmap->n, seqkaij->p * seqkaij->AIJ->rmap->N, seqkaij->q * seqkaij->AIJ->cmap->N);
512:     PetscLayoutSetBlockSize(A->rmap, seqkaij->p);
513:     PetscLayoutSetBlockSize(A->cmap, seqkaij->q);
514:     PetscLayoutSetUp(A->rmap);
515:     PetscLayoutSetUp(A->cmap);
516:   } else {
517:     Mat_MPIKAIJ *a;
518:     Mat_MPIAIJ  *mpiaij;
519:     IS           from, to;
520:     Vec          gvec;

522:     a      = (Mat_MPIKAIJ *)A->data;
523:     mpiaij = (Mat_MPIAIJ *)a->A->data;
524:     MatSetSizes(A, a->p * a->A->rmap->n, a->q * a->A->cmap->n, a->p * a->A->rmap->N, a->q * a->A->cmap->N);
525:     PetscLayoutSetBlockSize(A->rmap, seqkaij->p);
526:     PetscLayoutSetBlockSize(A->cmap, seqkaij->q);
527:     PetscLayoutSetUp(A->rmap);
528:     PetscLayoutSetUp(A->cmap);

530:     MatKAIJ_build_AIJ_OAIJ(A);

532:     VecGetSize(mpiaij->lvec, &n);
533:     VecCreate(PETSC_COMM_SELF, &a->w);
534:     VecSetSizes(a->w, n * a->q, n * a->q);
535:     VecSetBlockSize(a->w, a->q);
536:     VecSetType(a->w, VECSEQ);

538:     /* create two temporary Index sets for build scatter gather */
539:     ISCreateBlock(PetscObjectComm((PetscObject)a->A), a->q, n, mpiaij->garray, PETSC_COPY_VALUES, &from);
540:     ISCreateStride(PETSC_COMM_SELF, n * a->q, 0, 1, &to);

542:     /* create temporary global vector to generate scatter context */
543:     VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A), a->q, a->q * a->A->cmap->n, a->q * a->A->cmap->N, NULL, &gvec);

545:     /* generate the scatter context */
546:     VecScatterCreate(gvec, from, a->w, to, &a->ctx);

548:     ISDestroy(&from);
549:     ISDestroy(&to);
550:     VecDestroy(&gvec);
551:   }

553:   A->assembled = PETSC_TRUE;
554:   return 0;
555: }

557: PetscErrorCode MatView_KAIJ(Mat A, PetscViewer viewer)
558: {
559:   PetscViewerFormat format;
560:   Mat_SeqKAIJ      *a = (Mat_SeqKAIJ *)A->data;
561:   Mat               B;
562:   PetscInt          i;
563:   PetscBool         ismpikaij;

565:   PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij);
566:   PetscViewerGetFormat(viewer, &format);
567:   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL || format == PETSC_VIEWER_ASCII_IMPL) {
568:     PetscViewerASCIIPrintf(viewer, "S and T have %" PetscInt_FMT " rows and %" PetscInt_FMT " columns\n", a->p, a->q);

570:     /* Print appropriate details for S. */
571:     if (!a->S) {
572:       PetscViewerASCIIPrintf(viewer, "S is NULL\n");
573:     } else if (format == PETSC_VIEWER_ASCII_IMPL) {
574:       PetscViewerASCIIPrintf(viewer, "Entries of S are ");
575:       for (i = 0; i < (a->p * a->q); i++) {
576: #if defined(PETSC_USE_COMPLEX)
577:         PetscViewerASCIIPrintf(viewer, "%18.16e %18.16e ", (double)PetscRealPart(a->S[i]), (double)PetscImaginaryPart(a->S[i]));
578: #else
579:         PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)PetscRealPart(a->S[i]));
580: #endif
581:       }
582:       PetscViewerASCIIPrintf(viewer, "\n");
583:     }

585:     /* Print appropriate details for T. */
586:     if (a->isTI) {
587:       PetscViewerASCIIPrintf(viewer, "T is the identity matrix\n");
588:     } else if (!a->T) {
589:       PetscViewerASCIIPrintf(viewer, "T is NULL\n");
590:     } else if (format == PETSC_VIEWER_ASCII_IMPL) {
591:       PetscViewerASCIIPrintf(viewer, "Entries of T are ");
592:       for (i = 0; i < (a->p * a->q); i++) {
593: #if defined(PETSC_USE_COMPLEX)
594:         PetscViewerASCIIPrintf(viewer, "%18.16e %18.16e ", (double)PetscRealPart(a->T[i]), (double)PetscImaginaryPart(a->T[i]));
595: #else
596:         PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)PetscRealPart(a->T[i]));
597: #endif
598:       }
599:       PetscViewerASCIIPrintf(viewer, "\n");
600:     }

602:     /* Now print details for the AIJ matrix, using the AIJ viewer. */
603:     PetscViewerASCIIPrintf(viewer, "Now viewing the associated AIJ matrix:\n");
604:     if (ismpikaij) {
605:       Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
606:       MatView(b->A, viewer);
607:     } else {
608:       MatView(a->AIJ, viewer);
609:     }

611:   } else {
612:     /* For all other matrix viewer output formats, simply convert to an AIJ matrix and call MatView() on that. */
613:     MatConvert(A, MATAIJ, MAT_INITIAL_MATRIX, &B);
614:     MatView(B, viewer);
615:     MatDestroy(&B);
616:   }
617:   return 0;
618: }

620: PetscErrorCode MatDestroy_MPIKAIJ(Mat A)
621: {
622:   Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;

624:   MatDestroy(&b->AIJ);
625:   MatDestroy(&b->OAIJ);
626:   MatDestroy(&b->A);
627:   VecScatterDestroy(&b->ctx);
628:   VecDestroy(&b->w);
629:   PetscFree(b->S);
630:   PetscFree(b->T);
631:   PetscFree(b->ibdiag);
632:   PetscObjectComposeFunction((PetscObject)A, "MatGetDiagonalBlock_C", NULL);
633:   PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpikaij_mpiaij_C", NULL);
634:   PetscFree(A->data);
635:   return 0;
636: }

638: /* --------------------------------------------------------------------------------------*/

640: /* zz = yy + Axx */
641: PetscErrorCode MatMultAdd_SeqKAIJ(Mat A, Vec xx, Vec yy, Vec zz)
642: {
643:   Mat_SeqKAIJ       *b = (Mat_SeqKAIJ *)A->data;
644:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
645:   const PetscScalar *s = b->S, *t = b->T;
646:   const PetscScalar *x, *v, *bx;
647:   PetscScalar       *y, *sums;
648:   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
649:   PetscInt           n, i, jrow, j, l, p = b->p, q = b->q, k;

651:   if (!yy) {
652:     VecSet(zz, 0.0);
653:   } else {
654:     VecCopy(yy, zz);
655:   }
656:   if ((!s) && (!t) && (!b->isTI)) return 0;

658:   VecGetArrayRead(xx, &x);
659:   VecGetArray(zz, &y);
660:   idx = a->j;
661:   v   = a->a;
662:   ii  = a->i;

664:   if (b->isTI) {
665:     for (i = 0; i < m; i++) {
666:       jrow = ii[i];
667:       n    = ii[i + 1] - jrow;
668:       sums = y + p * i;
669:       for (j = 0; j < n; j++) {
670:         for (k = 0; k < p; k++) sums[k] += v[jrow + j] * x[q * idx[jrow + j] + k];
671:       }
672:     }
673:     PetscLogFlops(3.0 * (a->nz) * p);
674:   } else if (t) {
675:     for (i = 0; i < m; i++) {
676:       jrow = ii[i];
677:       n    = ii[i + 1] - jrow;
678:       sums = y + p * i;
679:       for (j = 0; j < n; j++) {
680:         for (k = 0; k < p; k++) {
681:           for (l = 0; l < q; l++) sums[k] += v[jrow + j] * t[k + l * p] * x[q * idx[jrow + j] + l];
682:         }
683:       }
684:     }
685:     /* The flop count below assumes that v[jrow+j] is hoisted out (which an optimizing compiler is likely to do),
686:      * and also that T part is hoisted outside this loop (in exchange for temporary storage) as (A \otimes I) (I \otimes T),
687:      * so that this multiply doesn't have to be redone for each matrix entry, but just once per column. The latter
688:      * transformation is much less likely to be applied, but we nonetheless count the minimum flops required. */
689:     PetscLogFlops((2.0 * p * q - p) * m + 2.0 * p * a->nz);
690:   }
691:   if (s) {
692:     for (i = 0; i < m; i++) {
693:       sums = y + p * i;
694:       bx   = x + q * i;
695:       if (i < b->AIJ->cmap->n) {
696:         for (j = 0; j < q; j++) {
697:           for (k = 0; k < p; k++) sums[k] += s[k + j * p] * bx[j];
698:         }
699:       }
700:     }
701:     PetscLogFlops(2.0 * m * p * q);
702:   }

704:   VecRestoreArrayRead(xx, &x);
705:   VecRestoreArray(zz, &y);
706:   return 0;
707: }

709: PetscErrorCode MatMult_SeqKAIJ(Mat A, Vec xx, Vec yy)
710: {
711:   MatMultAdd_SeqKAIJ(A, xx, PETSC_NULL, yy);
712:   return 0;
713: }

715: #include <petsc/private/kernels/blockinvert.h>

717: PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ(Mat A, const PetscScalar **values)
718: {
719:   Mat_SeqKAIJ       *b = (Mat_SeqKAIJ *)A->data;
720:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
721:   const PetscScalar *S = b->S;
722:   const PetscScalar *T = b->T;
723:   const PetscScalar *v = a->a;
724:   const PetscInt     p = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i;
725:   PetscInt           i, j, *v_pivots, dof, dof2;
726:   PetscScalar       *diag, aval, *v_work;


731:   dof  = p;
732:   dof2 = dof * dof;

734:   if (b->ibdiagvalid) {
735:     if (values) *values = b->ibdiag;
736:     return 0;
737:   }
738:   if (!b->ibdiag) { PetscMalloc1(dof2 * m, &b->ibdiag); }
739:   if (values) *values = b->ibdiag;
740:   diag = b->ibdiag;

742:   PetscMalloc2(dof, &v_work, dof, &v_pivots);
743:   for (i = 0; i < m; i++) {
744:     if (S) {
745:       PetscMemcpy(diag, S, dof2 * sizeof(PetscScalar));
746:     } else {
747:       PetscMemzero(diag, dof2 * sizeof(PetscScalar));
748:     }
749:     if (b->isTI) {
750:       aval = 0;
751:       for (j = ii[i]; j < ii[i + 1]; j++)
752:         if (idx[j] == i) aval = v[j];
753:       for (j = 0; j < dof; j++) diag[j + dof * j] += aval;
754:     } else if (T) {
755:       aval = 0;
756:       for (j = ii[i]; j < ii[i + 1]; j++)
757:         if (idx[j] == i) aval = v[j];
758:       for (j = 0; j < dof2; j++) diag[j] += aval * T[j];
759:     }
760:     PetscKernel_A_gets_inverse_A(dof, diag, v_pivots, v_work, PETSC_FALSE, NULL);
761:     diag += dof2;
762:   }
763:   PetscFree2(v_work, v_pivots);

765:   b->ibdiagvalid = PETSC_TRUE;
766:   return 0;
767: }

769: static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A, Mat *B)
770: {
771:   Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ *)A->data;

773:   *B = kaij->AIJ;
774:   return 0;
775: }

777: static PetscErrorCode MatConvert_KAIJ_AIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
778: {
779:   Mat_SeqKAIJ   *a = (Mat_SeqKAIJ *)A->data;
780:   Mat            AIJ, OAIJ, B;
781:   PetscInt      *d_nnz, *o_nnz = NULL, nz, i, j, m, d;
782:   const PetscInt p = a->p, q = a->q;
783:   PetscBool      ismpikaij, missing;

785:   if (reuse != MAT_REUSE_MATRIX) {
786:     PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij);
787:     if (ismpikaij) {
788:       Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
789:       AIJ            = ((Mat_SeqKAIJ *)b->AIJ->data)->AIJ;
790:       OAIJ           = ((Mat_SeqKAIJ *)b->OAIJ->data)->AIJ;
791:     } else {
792:       AIJ  = a->AIJ;
793:       OAIJ = NULL;
794:     }
795:     MatCreate(PetscObjectComm((PetscObject)A), &B);
796:     MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N);
797:     MatSetType(B, MATAIJ);
798:     MatGetSize(AIJ, &m, NULL);
799:     MatMissingDiagonal(AIJ, &missing, &d); /* assumption that all successive rows will have a missing diagonal */
800:     if (!missing || !a->S) d = m;
801:     PetscMalloc1(m * p, &d_nnz);
802:     for (i = 0; i < m; ++i) {
803:       MatGetRow_SeqAIJ(AIJ, i, &nz, NULL, NULL);
804:       for (j = 0; j < p; ++j) d_nnz[i * p + j] = nz * q + (i >= d) * q;
805:       MatRestoreRow_SeqAIJ(AIJ, i, &nz, NULL, NULL);
806:     }
807:     if (OAIJ) {
808:       PetscMalloc1(m * p, &o_nnz);
809:       for (i = 0; i < m; ++i) {
810:         MatGetRow_SeqAIJ(OAIJ, i, &nz, NULL, NULL);
811:         for (j = 0; j < p; ++j) o_nnz[i * p + j] = nz * q;
812:         MatRestoreRow_SeqAIJ(OAIJ, i, &nz, NULL, NULL);
813:       }
814:       MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz);
815:     } else {
816:       MatSeqAIJSetPreallocation(B, 0, d_nnz);
817:     }
818:     PetscFree(d_nnz);
819:     PetscFree(o_nnz);
820:   } else B = *newmat;
821:   MatConvert_Basic(A, newtype, MAT_REUSE_MATRIX, &B);
822:   if (reuse == MAT_INPLACE_MATRIX) {
823:     MatHeaderReplace(A, &B);
824:   } else *newmat = B;
825:   return 0;
826: }

828: PetscErrorCode MatSOR_SeqKAIJ(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
829: {
830:   Mat_SeqKAIJ       *kaij = (Mat_SeqKAIJ *)A->data;
831:   Mat_SeqAIJ        *a    = (Mat_SeqAIJ *)kaij->AIJ->data;
832:   const PetscScalar *aa = a->a, *T = kaij->T, *v;
833:   const PetscInt     m = kaij->AIJ->rmap->n, *ai = a->i, *aj = a->j, p = kaij->p, q = kaij->q, *diag, *vi;
834:   const PetscScalar *b, *xb, *idiag;
835:   PetscScalar       *x, *work, *workt, *w, *y, *arr, *t, *arrt;
836:   PetscInt           i, j, k, i2, bs, bs2, nz;

838:   its = its * lits;
844:   bs  = p;
845:   bs2 = bs * bs;

847:   if (!m) return 0;

849:   if (!kaij->ibdiagvalid) MatInvertBlockDiagonal_SeqKAIJ(A, NULL);
850:   idiag = kaij->ibdiag;
851:   diag  = a->diag;

853:   if (!kaij->sor.setup) {
854:     PetscMalloc5(bs, &kaij->sor.w, bs, &kaij->sor.y, m * bs, &kaij->sor.work, m * bs, &kaij->sor.t, m * bs2, &kaij->sor.arr);
855:     kaij->sor.setup = PETSC_TRUE;
856:   }
857:   y    = kaij->sor.y;
858:   w    = kaij->sor.w;
859:   work = kaij->sor.work;
860:   t    = kaij->sor.t;
861:   arr  = kaij->sor.arr;

863:   VecGetArray(xx, &x);
864:   VecGetArrayRead(bb, &b);

866:   if (flag & SOR_ZERO_INITIAL_GUESS) {
867:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
868:       PetscKernel_w_gets_Ar_times_v(bs, bs, b, idiag, x); /* x[0:bs] <- D^{-1} b[0:bs] */
869:       PetscMemcpy(t, b, bs * sizeof(PetscScalar));
870:       i2 = bs;
871:       idiag += bs2;
872:       for (i = 1; i < m; i++) {
873:         v  = aa + ai[i];
874:         vi = aj + ai[i];
875:         nz = diag[i] - ai[i];

877:         if (T) { /* b - T (Arow * x) */
878:           PetscMemzero(w, bs * sizeof(PetscScalar));
879:           for (j = 0; j < nz; j++) {
880:             for (k = 0; k < bs; k++) w[k] -= v[j] * x[vi[j] * bs + k];
881:           }
882:           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs, w, T, &t[i2]);
883:           for (k = 0; k < bs; k++) t[i2 + k] += b[i2 + k];
884:         } else if (kaij->isTI) {
885:           PetscMemcpy(t + i2, b + i2, bs * sizeof(PetscScalar));
886:           for (j = 0; j < nz; j++) {
887:             for (k = 0; k < bs; k++) t[i2 + k] -= v[j] * x[vi[j] * bs + k];
888:           }
889:         } else {
890:           PetscMemcpy(t + i2, b + i2, bs * sizeof(PetscScalar));
891:         }

893:         PetscKernel_w_gets_Ar_times_v(bs, bs, t + i2, idiag, y);
894:         for (j = 0; j < bs; j++) x[i2 + j] = omega * y[j];

896:         idiag += bs2;
897:         i2 += bs;
898:       }
899:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
900:       PetscLogFlops(1.0 * bs2 * a->nz);
901:       xb = t;
902:     } else xb = b;
903:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
904:       idiag = kaij->ibdiag + bs2 * (m - 1);
905:       i2    = bs * (m - 1);
906:       PetscMemcpy(w, xb + i2, bs * sizeof(PetscScalar));
907:       PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
908:       i2 -= bs;
909:       idiag -= bs2;
910:       for (i = m - 2; i >= 0; i--) {
911:         v  = aa + diag[i] + 1;
912:         vi = aj + diag[i] + 1;
913:         nz = ai[i + 1] - diag[i] - 1;

915:         if (T) { /* FIXME: This branch untested */
916:           PetscMemcpy(w, xb + i2, bs * sizeof(PetscScalar));
917:           /* copy all rows of x that are needed into contiguous space */
918:           workt = work;
919:           for (j = 0; j < nz; j++) {
920:             PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar));
921:             workt += bs;
922:           }
923:           arrt = arr;
924:           for (j = 0; j < nz; j++) {
925:             PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar));
926:             for (k = 0; k < bs2; k++) arrt[k] *= v[j];
927:             arrt += bs2;
928:           }
929:           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
930:         } else if (kaij->isTI) {
931:           PetscMemcpy(w, t + i2, bs * sizeof(PetscScalar));
932:           for (j = 0; j < nz; j++) {
933:             for (k = 0; k < bs; k++) w[k] -= v[j] * x[vi[j] * bs + k];
934:           }
935:         }

937:         PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y); /* RHS incorrect for omega != 1.0 */
938:         for (j = 0; j < bs; j++) x[i2 + j] = (1.0 - omega) * x[i2 + j] + omega * y[j];

940:         idiag -= bs2;
941:         i2 -= bs;
942:       }
943:       PetscLogFlops(1.0 * bs2 * (a->nz));
944:     }
945:     its--;
946:   }
947:   while (its--) { /* FIXME: This branch not updated */
948:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
949:       i2    = 0;
950:       idiag = kaij->ibdiag;
951:       for (i = 0; i < m; i++) {
952:         PetscMemcpy(w, b + i2, bs * sizeof(PetscScalar));

954:         v     = aa + ai[i];
955:         vi    = aj + ai[i];
956:         nz    = diag[i] - ai[i];
957:         workt = work;
958:         for (j = 0; j < nz; j++) {
959:           PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar));
960:           workt += bs;
961:         }
962:         arrt = arr;
963:         if (T) {
964:           for (j = 0; j < nz; j++) {
965:             PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar));
966:             for (k = 0; k < bs2; k++) arrt[k] *= v[j];
967:             arrt += bs2;
968:           }
969:           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
970:         } else if (kaij->isTI) {
971:           for (j = 0; j < nz; j++) {
972:             PetscMemzero(arrt, bs2 * sizeof(PetscScalar));
973:             for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
974:             arrt += bs2;
975:           }
976:           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
977:         }
978:         PetscMemcpy(t + i2, w, bs * sizeof(PetscScalar));

980:         v     = aa + diag[i] + 1;
981:         vi    = aj + diag[i] + 1;
982:         nz    = ai[i + 1] - diag[i] - 1;
983:         workt = work;
984:         for (j = 0; j < nz; j++) {
985:           PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar));
986:           workt += bs;
987:         }
988:         arrt = arr;
989:         if (T) {
990:           for (j = 0; j < nz; j++) {
991:             PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar));
992:             for (k = 0; k < bs2; k++) arrt[k] *= v[j];
993:             arrt += bs2;
994:           }
995:           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
996:         } else if (kaij->isTI) {
997:           for (j = 0; j < nz; j++) {
998:             PetscMemzero(arrt, bs2 * sizeof(PetscScalar));
999:             for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
1000:             arrt += bs2;
1001:           }
1002:           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1003:         }

1005:         PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y);
1006:         for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j);

1008:         idiag += bs2;
1009:         i2 += bs;
1010:       }
1011:       xb = t;
1012:     } else xb = b;
1013:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1014:       idiag = kaij->ibdiag + bs2 * (m - 1);
1015:       i2    = bs * (m - 1);
1016:       if (xb == b) {
1017:         for (i = m - 1; i >= 0; i--) {
1018:           PetscMemcpy(w, b + i2, bs * sizeof(PetscScalar));

1020:           v     = aa + ai[i];
1021:           vi    = aj + ai[i];
1022:           nz    = diag[i] - ai[i];
1023:           workt = work;
1024:           for (j = 0; j < nz; j++) {
1025:             PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar));
1026:             workt += bs;
1027:           }
1028:           arrt = arr;
1029:           if (T) {
1030:             for (j = 0; j < nz; j++) {
1031:               PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar));
1032:               for (k = 0; k < bs2; k++) arrt[k] *= v[j];
1033:               arrt += bs2;
1034:             }
1035:             PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1036:           } else if (kaij->isTI) {
1037:             for (j = 0; j < nz; j++) {
1038:               PetscMemzero(arrt, bs2 * sizeof(PetscScalar));
1039:               for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
1040:               arrt += bs2;
1041:             }
1042:             PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1043:           }

1045:           v     = aa + diag[i] + 1;
1046:           vi    = aj + diag[i] + 1;
1047:           nz    = ai[i + 1] - diag[i] - 1;
1048:           workt = work;
1049:           for (j = 0; j < nz; j++) {
1050:             PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar));
1051:             workt += bs;
1052:           }
1053:           arrt = arr;
1054:           if (T) {
1055:             for (j = 0; j < nz; j++) {
1056:               PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar));
1057:               for (k = 0; k < bs2; k++) arrt[k] *= v[j];
1058:               arrt += bs2;
1059:             }
1060:             PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1061:           } else if (kaij->isTI) {
1062:             for (j = 0; j < nz; j++) {
1063:               PetscMemzero(arrt, bs2 * sizeof(PetscScalar));
1064:               for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
1065:               arrt += bs2;
1066:             }
1067:             PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1068:           }

1070:           PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y);
1071:           for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j);
1072:         }
1073:       } else {
1074:         for (i = m - 1; i >= 0; i--) {
1075:           PetscMemcpy(w, xb + i2, bs * sizeof(PetscScalar));
1076:           v     = aa + diag[i] + 1;
1077:           vi    = aj + diag[i] + 1;
1078:           nz    = ai[i + 1] - diag[i] - 1;
1079:           workt = work;
1080:           for (j = 0; j < nz; j++) {
1081:             PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar));
1082:             workt += bs;
1083:           }
1084:           arrt = arr;
1085:           if (T) {
1086:             for (j = 0; j < nz; j++) {
1087:               PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar));
1088:               for (k = 0; k < bs2; k++) arrt[k] *= v[j];
1089:               arrt += bs2;
1090:             }
1091:             PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1092:           } else if (kaij->isTI) {
1093:             for (j = 0; j < nz; j++) {
1094:               PetscMemzero(arrt, bs2 * sizeof(PetscScalar));
1095:               for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
1096:               arrt += bs2;
1097:             }
1098:             PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1099:           }
1100:           PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y);
1101:           for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j);
1102:         }
1103:       }
1104:       PetscLogFlops(1.0 * bs2 * (a->nz));
1105:     }
1106:   }

1108:   VecRestoreArray(xx, &x);
1109:   VecRestoreArrayRead(bb, &b);
1110:   return 0;
1111: }

1113: /*===================================================================================*/

1115: PetscErrorCode MatMultAdd_MPIKAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1116: {
1117:   Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;

1119:   if (!yy) {
1120:     VecSet(zz, 0.0);
1121:   } else {
1122:     VecCopy(yy, zz);
1123:   }
1124:   MatKAIJ_build_AIJ_OAIJ(A); /* Ensure b->AIJ and b->OAIJ are up to date. */
1125:   /* start the scatter */
1126:   VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD);
1127:   (*b->AIJ->ops->multadd)(b->AIJ, xx, zz, zz);
1128:   VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD);
1129:   (*b->OAIJ->ops->multadd)(b->OAIJ, b->w, zz, zz);
1130:   return 0;
1131: }

1133: PetscErrorCode MatMult_MPIKAIJ(Mat A, Vec xx, Vec yy)
1134: {
1135:   MatMultAdd_MPIKAIJ(A, xx, PETSC_NULL, yy);
1136:   return 0;
1137: }

1139: PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ(Mat A, const PetscScalar **values)
1140: {
1141:   Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;

1143:   MatKAIJ_build_AIJ_OAIJ(A); /* Ensure b->AIJ is up to date. */
1144:   (*b->AIJ->ops->invertblockdiagonal)(b->AIJ, values);
1145:   return 0;
1146: }

1148: /* ----------------------------------------------------------------*/

1150: PetscErrorCode MatGetRow_SeqKAIJ(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **values)
1151: {
1152:   Mat_SeqKAIJ   *b    = (Mat_SeqKAIJ *)A->data;
1153:   PetscErrorCode diag = PETSC_FALSE;
1154:   PetscInt       nzaij, nz, *colsaij, *idx, i, j, p = b->p, q = b->q, r = row / p, s = row % p, c;
1155:   PetscScalar   *vaij, *v, *S = b->S, *T = b->T;

1158:   b->getrowactive = PETSC_TRUE;

1161:   if ((!S) && (!T) && (!b->isTI)) {
1162:     if (ncols) *ncols = 0;
1163:     if (cols) *cols = NULL;
1164:     if (values) *values = NULL;
1165:     return 0;
1166:   }

1168:   if (T || b->isTI) {
1169:     MatGetRow_SeqAIJ(b->AIJ, r, &nzaij, &colsaij, &vaij);
1170:     c = nzaij;
1171:     for (i = 0; i < nzaij; i++) {
1172:       /* check if this row contains a diagonal entry */
1173:       if (colsaij[i] == r) {
1174:         diag = PETSC_TRUE;
1175:         c    = i;
1176:       }
1177:     }
1178:   } else nzaij = c = 0;

1180:   /* calculate size of row */
1181:   nz = 0;
1182:   if (S) nz += q;
1183:   if (T || b->isTI) nz += (diag && S ? (nzaij - 1) * q : nzaij * q);

1185:   if (cols || values) {
1186:     PetscMalloc2(nz, &idx, nz, &v);
1187:     for (i = 0; i < q; i++) {
1188:       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1189:       v[i] = 0.0;
1190:     }
1191:     if (b->isTI) {
1192:       for (i = 0; i < nzaij; i++) {
1193:         for (j = 0; j < q; j++) {
1194:           idx[i * q + j] = colsaij[i] * q + j;
1195:           v[i * q + j]   = (j == s ? vaij[i] : 0);
1196:         }
1197:       }
1198:     } else if (T) {
1199:       for (i = 0; i < nzaij; i++) {
1200:         for (j = 0; j < q; j++) {
1201:           idx[i * q + j] = colsaij[i] * q + j;
1202:           v[i * q + j]   = vaij[i] * T[s + j * p];
1203:         }
1204:       }
1205:     }
1206:     if (S) {
1207:       for (j = 0; j < q; j++) {
1208:         idx[c * q + j] = r * q + j;
1209:         v[c * q + j] += S[s + j * p];
1210:       }
1211:     }
1212:   }

1214:   if (ncols) *ncols = nz;
1215:   if (cols) *cols = idx;
1216:   if (values) *values = v;
1217:   return 0;
1218: }

1220: PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1221: {
1222:   if (nz) *nz = 0;
1223:   PetscFree2(*idx, *v);
1224:   ((Mat_SeqKAIJ *)A->data)->getrowactive = PETSC_FALSE;
1225:   return 0;
1226: }

1228: PetscErrorCode MatGetRow_MPIKAIJ(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **values)
1229: {
1230:   Mat_MPIKAIJ   *b    = (Mat_MPIKAIJ *)A->data;
1231:   Mat            AIJ  = b->A;
1232:   PetscBool      diag = PETSC_FALSE;
1233:   Mat            MatAIJ, MatOAIJ;
1234:   const PetscInt rstart = A->rmap->rstart, rend = A->rmap->rend, p = b->p, q = b->q, *garray;
1235:   PetscInt       nz, *idx, ncolsaij = 0, ncolsoaij = 0, *colsaij, *colsoaij, r, s, c, i, j, lrow;
1236:   PetscScalar   *v, *vals, *ovals, *S = b->S, *T = b->T;

1238:   MatKAIJ_build_AIJ_OAIJ(A); /* Ensure b->AIJ and b->OAIJ are up to date. */
1239:   MatAIJ  = ((Mat_SeqKAIJ *)b->AIJ->data)->AIJ;
1240:   MatOAIJ = ((Mat_SeqKAIJ *)b->OAIJ->data)->AIJ;
1242:   b->getrowactive = PETSC_TRUE;
1244:   lrow = row - rstart;

1246:   if ((!S) && (!T) && (!b->isTI)) {
1247:     if (ncols) *ncols = 0;
1248:     if (cols) *cols = NULL;
1249:     if (values) *values = NULL;
1250:     return 0;
1251:   }

1253:   r = lrow / p;
1254:   s = lrow % p;

1256:   if (T || b->isTI) {
1257:     MatMPIAIJGetSeqAIJ(AIJ, NULL, NULL, &garray);
1258:     MatGetRow_SeqAIJ(MatAIJ, lrow / p, &ncolsaij, &colsaij, &vals);
1259:     MatGetRow_SeqAIJ(MatOAIJ, lrow / p, &ncolsoaij, &colsoaij, &ovals);

1261:     c = ncolsaij + ncolsoaij;
1262:     for (i = 0; i < ncolsaij; i++) {
1263:       /* check if this row contains a diagonal entry */
1264:       if (colsaij[i] == r) {
1265:         diag = PETSC_TRUE;
1266:         c    = i;
1267:       }
1268:     }
1269:   } else c = 0;

1271:   /* calculate size of row */
1272:   nz = 0;
1273:   if (S) nz += q;
1274:   if (T || b->isTI) nz += (diag && S ? (ncolsaij + ncolsoaij - 1) * q : (ncolsaij + ncolsoaij) * q);

1276:   if (cols || values) {
1277:     PetscMalloc2(nz, &idx, nz, &v);
1278:     for (i = 0; i < q; i++) {
1279:       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1280:       v[i] = 0.0;
1281:     }
1282:     if (b->isTI) {
1283:       for (i = 0; i < ncolsaij; i++) {
1284:         for (j = 0; j < q; j++) {
1285:           idx[i * q + j] = (colsaij[i] + rstart / p) * q + j;
1286:           v[i * q + j]   = (j == s ? vals[i] : 0.0);
1287:         }
1288:       }
1289:       for (i = 0; i < ncolsoaij; i++) {
1290:         for (j = 0; j < q; j++) {
1291:           idx[(i + ncolsaij) * q + j] = garray[colsoaij[i]] * q + j;
1292:           v[(i + ncolsaij) * q + j]   = (j == s ? ovals[i] : 0.0);
1293:         }
1294:       }
1295:     } else if (T) {
1296:       for (i = 0; i < ncolsaij; i++) {
1297:         for (j = 0; j < q; j++) {
1298:           idx[i * q + j] = (colsaij[i] + rstart / p) * q + j;
1299:           v[i * q + j]   = vals[i] * T[s + j * p];
1300:         }
1301:       }
1302:       for (i = 0; i < ncolsoaij; i++) {
1303:         for (j = 0; j < q; j++) {
1304:           idx[(i + ncolsaij) * q + j] = garray[colsoaij[i]] * q + j;
1305:           v[(i + ncolsaij) * q + j]   = ovals[i] * T[s + j * p];
1306:         }
1307:       }
1308:     }
1309:     if (S) {
1310:       for (j = 0; j < q; j++) {
1311:         idx[c * q + j] = (r + rstart / p) * q + j;
1312:         v[c * q + j] += S[s + j * p];
1313:       }
1314:     }
1315:   }

1317:   if (ncols) *ncols = nz;
1318:   if (cols) *cols = idx;
1319:   if (values) *values = v;
1320:   return 0;
1321: }

1323: PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1324: {
1325:   PetscFree2(*idx, *v);
1326:   ((Mat_SeqKAIJ *)A->data)->getrowactive = PETSC_FALSE;
1327:   return 0;
1328: }

1330: PetscErrorCode MatCreateSubMatrix_KAIJ(Mat mat, IS isrow, IS iscol, MatReuse cll, Mat *newmat)
1331: {
1332:   Mat A;

1334:   MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A);
1335:   MatCreateSubMatrix(A, isrow, iscol, cll, newmat);
1336:   MatDestroy(&A);
1337:   return 0;
1338: }

1340: /* ---------------------------------------------------------------------------------- */
1341: /*@C
1342:   MatCreateKAIJ - Creates a matrix of type `MATKAIJ` to be used for matrices of the following form:

1344:     [I \otimes S + A \otimes T]

1346:   where
1347:     S is a dense (p \times q) matrix
1348:     T is a dense (p \times q) matrix
1349:     A is an `MATAIJ`  (n \times n) matrix
1350:     I is the identity matrix
1351:   The resulting matrix is (np \times nq)

1353:   S and T are always stored independently on all processes as `PetscScalar` arrays in column-major format.

1355:   Collective

1357:   Input Parameters:
1358: + A - the `MATAIJ` matrix
1359: . p - number of rows in S and T
1360: . q - number of columns in S and T
1361: . S - the S matrix (can be NULL), stored as a `PetscScalar` array (column-major)
1362: - T - the T matrix (can be NULL), stored as a `PetscScalar` array (column-major)

1364:   Output Parameter:
1365: . kaij - the new `MATKAIJ` matrix

1367:   Notes:
1368:   This function increases the reference count on the `MATAIJ` matrix, so the user is free to destroy the matrix if it is not needed.

1370:   Changes to the entries of the `MATAIJ` matrix will immediately affect the `MATKAIJ` matrix.

1372:   Developer Note:
1373:   In the `MATMPIKAIJ` case, the internal 'AIJ' and 'OAIJ' sequential KAIJ matrices are kept up to date by tracking the object state
1374:   of the AIJ matrix 'A' that describes the blockwise action of the MATMPIKAIJ matrix and, if the object state has changed, lazily
1375:   rebuilding 'AIJ' and 'OAIJ' just before executing operations with the MATMPIKAIJ matrix. If new types of operations are added,
1376:   routines implementing those must also ensure these are rebuilt when needed (by calling the internal MatKAIJ_build_AIJ_OAIJ() routine).

1378:   Level: advanced

1380: .seealso: `MatKAIJSetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`, `MatKAIJGetAIJ()`, `MatKAIJGetS()`, `MatKAIJGetT()`, `MATKAIJ`
1381: @*/
1382: PetscErrorCode MatCreateKAIJ(Mat A, PetscInt p, PetscInt q, const PetscScalar S[], const PetscScalar T[], Mat *kaij)
1383: {
1384:   MatCreate(PetscObjectComm((PetscObject)A), kaij);
1385:   MatSetType(*kaij, MATKAIJ);
1386:   MatKAIJSetAIJ(*kaij, A);
1387:   MatKAIJSetS(*kaij, p, q, S);
1388:   MatKAIJSetT(*kaij, p, q, T);
1389:   MatSetUp(*kaij);
1390:   return 0;
1391: }

1393: /*MC
1394:   MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of form
1395:     [I \otimes S + A \otimes T],
1396:   where
1397:     S is a dense (p \times q) matrix,
1398:     T is a dense (p \times q) matrix,
1399:     A is an AIJ  (n \times n) matrix,
1400:     and I is the identity matrix.
1401:   The resulting matrix is (np \times nq).

1403:   S and T are always stored independently on all processes as `PetscScalar` arrays in column-major format.

1405:   Note:
1406:   A linear system with multiple right-hand sides, AX = B, can be expressed in the KAIJ-friendly form of (A \otimes I) x = b,
1407:   where x and b are column vectors containing the row-major representations of X and B.

1409:   Level: advanced

1411: .seealso: `MatKAIJSetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`, `MatKAIJGetAIJ()`, `MatKAIJGetS()`, `MatKAIJGetT()`, `MatCreateKAIJ()`
1412: M*/

1414: PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A)
1415: {
1416:   Mat_MPIKAIJ *b;
1417:   PetscMPIInt  size;

1419:   PetscNew(&b);
1420:   A->data = (void *)b;

1422:   PetscMemzero(A->ops, sizeof(struct _MatOps));

1424:   b->w = NULL;
1425:   MPI_Comm_size(PetscObjectComm((PetscObject)A), &size);
1426:   if (size == 1) {
1427:     PetscObjectChangeTypeName((PetscObject)A, MATSEQKAIJ);
1428:     A->ops->destroy             = MatDestroy_SeqKAIJ;
1429:     A->ops->mult                = MatMult_SeqKAIJ;
1430:     A->ops->multadd             = MatMultAdd_SeqKAIJ;
1431:     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ;
1432:     A->ops->getrow              = MatGetRow_SeqKAIJ;
1433:     A->ops->restorerow          = MatRestoreRow_SeqKAIJ;
1434:     A->ops->sor                 = MatSOR_SeqKAIJ;
1435:     PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqkaij_seqaij_C", MatConvert_KAIJ_AIJ);
1436:   } else {
1437:     PetscObjectChangeTypeName((PetscObject)A, MATMPIKAIJ);
1438:     A->ops->destroy             = MatDestroy_MPIKAIJ;
1439:     A->ops->mult                = MatMult_MPIKAIJ;
1440:     A->ops->multadd             = MatMultAdd_MPIKAIJ;
1441:     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ;
1442:     A->ops->getrow              = MatGetRow_MPIKAIJ;
1443:     A->ops->restorerow          = MatRestoreRow_MPIKAIJ;
1444:     PetscObjectComposeFunction((PetscObject)A, "MatGetDiagonalBlock_C", MatGetDiagonalBlock_MPIKAIJ);
1445:     PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpikaij_mpiaij_C", MatConvert_KAIJ_AIJ);
1446:   }
1447:   A->ops->setup           = MatSetUp_KAIJ;
1448:   A->ops->view            = MatView_KAIJ;
1449:   A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ;
1450:   return 0;
1451: }