Actual source code: kaij.c

  1: /*
  2:   Defines the basic matrix operations for the KAIJ  matrix storage format.
  3:   This format is used to evaluate matrices of the form:

  5:     [I \otimes S + A \otimes T]

  7:   where
  8:     S is a dense (p \times q) matrix
  9:     T is a dense (p \times q) matrix
 10:     A is an AIJ  (n \times n) matrix
 11:     I is the identity matrix

 13:   The resulting matrix is (np \times nq)

 15:   We provide:
 16:      MatMult()
 17:      MatMultAdd()
 18:      MatInvertBlockDiagonal()
 19:   and
 20:      MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*)

 22:   This single directory handles both the sequential and parallel codes
 23: */

 25: #include <../src/mat/impls/kaij/kaij.h>
 26: #include <../src/mat/utils/freespace.h>
 27: #include <petsc/private/vecimpl.h>

 29: /*@
 30:   MatKAIJGetAIJ - Get the `MATAIJ` matrix describing the blockwise action of the `MATKAIJ` matrix

 32:   Not Collective, but if the `MATKAIJ` matrix is parallel, the `MATAIJ` matrix is also parallel

 34:   Input Parameter:
 35: . A - the `MATKAIJ` matrix

 37:   Output Parameter:
 38: . B - the `MATAIJ` matrix

 40:   Level: advanced

 42:   Note:
 43:   The reference count on the `MATAIJ` matrix is not increased so you should not destroy it.

 45: .seealso: [](ch_matrices), `Mat`, `MatCreateKAIJ()`, `MATKAIJ`, `MATAIJ`
 46: @*/
 47: PetscErrorCode MatKAIJGetAIJ(Mat A, Mat *B)
 48: {
 49:   PetscBool ismpikaij, isseqkaij;

 51:   PetscFunctionBegin;
 52:   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij));
 53:   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQKAIJ, &isseqkaij));
 54:   if (ismpikaij) {
 55:     Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;

 57:     *B = b->A;
 58:   } else if (isseqkaij) {
 59:     Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;

 61:     *B = b->AIJ;
 62:   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix passed in is not of type KAIJ");
 63:   PetscFunctionReturn(PETSC_SUCCESS);
 64: }

 66: /*@C
 67:   MatKAIJGetS - Get the `S` matrix describing the shift action of the `MATKAIJ` matrix

 69:   Not Collective; the entire `S` is stored and returned independently on all processes.

 71:   Input Parameter:
 72: . A - the `MATKAIJ` matrix

 74:   Output Parameters:
 75: + m - the number of rows in `S`
 76: . n - the number of columns in `S`
 77: - S - the S matrix, in form of a scalar array in column-major format

 79:   Level: advanced

 81:   Note:
 82:   All output parameters are optional (pass `NULL` if not desired)

 84: .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()`
 85: @*/
 86: PetscErrorCode MatKAIJGetS(Mat A, PetscInt *m, PetscInt *n, PetscScalar *S[])
 87: {
 88:   Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;

 90:   PetscFunctionBegin;
 91:   if (m) *m = b->p;
 92:   if (n) *n = b->q;
 93:   if (S) *S = b->S;
 94:   PetscFunctionReturn(PETSC_SUCCESS);
 95: }

 97: /*@C
 98:   MatKAIJGetSRead - Get a read-only pointer to the `S` matrix describing the shift action of the `MATKAIJ` matrix

100:   Not Collective; the entire `S` is stored and returned independently on all processes.

102:   Input Parameter:
103: . A - the `MATKAIJ` matrix

105:   Output Parameters:
106: + m - the number of rows in `S`
107: . n - the number of columns in `S`
108: - S - the S matrix, in form of a scalar array in column-major format

110:   Level: advanced

112:   Note:
113:   All output parameters are optional (pass `NULL` if not desired)

115: .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()`
116: @*/
117: PetscErrorCode MatKAIJGetSRead(Mat A, PetscInt *m, PetscInt *n, const PetscScalar *S[])
118: {
119:   Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;

121:   PetscFunctionBegin;
122:   if (m) *m = b->p;
123:   if (n) *n = b->q;
124:   if (S) *S = b->S;
125:   PetscFunctionReturn(PETSC_SUCCESS);
126: }

128: /*@C
129:   MatKAIJRestoreS - Restore array obtained with `MatKAIJGetS()`

131:   Not Collective

133:   Input Parameters:
134: + A - the `MATKAIJ` matrix
135: - S - location of pointer to array obtained with `MatKAIJGetS()`

137:   Level: advanced

139:   Note:
140:   This routine zeros the array pointer to prevent accidental reuse after it has been restored.
141:   If `NULL` is passed, it will not attempt to zero the array pointer.

143: .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJGetSRead()`, `MatKAIJRestoreSRead()`
144: @*/
145: PetscErrorCode MatKAIJRestoreS(Mat A, PetscScalar *S[])
146: {
147:   PetscFunctionBegin;
148:   if (S) *S = NULL;
149:   PetscCall(PetscObjectStateIncrease((PetscObject)A));
150:   PetscFunctionReturn(PETSC_SUCCESS);
151: }

153: /*@C
154:   MatKAIJRestoreSRead - Restore array obtained with `MatKAIJGetSRead()`

156:   Not Collective

158:   Input Parameters:
159: + A - the `MATKAIJ` matrix
160: - S - location of pointer to array obtained with `MatKAIJGetS()`

162:   Level: advanced

164:   Note:
165:   This routine zeros the array pointer to prevent accidental reuse after it has been restored.
166:   If `NULL` is passed, it will not attempt to zero the array pointer.

168: .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJGetSRead()`
169: @*/
170: PetscErrorCode MatKAIJRestoreSRead(Mat A, const PetscScalar *S[])
171: {
172:   PetscFunctionBegin;
173:   if (S) *S = NULL;
174:   PetscFunctionReturn(PETSC_SUCCESS);
175: }

177: /*@C
178:   MatKAIJGetT - Get the transformation matrix `T` associated with the `MATKAIJ` matrix

180:   Not Collective; the entire `T` is stored and returned independently on all processes

182:   Input Parameter:
183: . A - the `MATKAIJ` matrix

185:   Output Parameters:
186: + m - the number of rows in `T`
187: . n - the number of columns in `T`
188: - T - the T matrix, in form of a scalar array in column-major format

190:   Level: advanced

192:   Note:
193:   All output parameters are optional (pass `NULL` if not desired)

195: .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()`
196: @*/
197: PetscErrorCode MatKAIJGetT(Mat A, PetscInt *m, PetscInt *n, PetscScalar *T[])
198: {
199:   Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;

201:   PetscFunctionBegin;
202:   if (m) *m = b->p;
203:   if (n) *n = b->q;
204:   if (T) *T = b->T;
205:   PetscFunctionReturn(PETSC_SUCCESS);
206: }

208: /*@C
209:   MatKAIJGetTRead - Get a read-only pointer to the transformation matrix `T` associated with the `MATKAIJ` matrix

211:   Not Collective; the entire `T` is stored and returned independently on all processes

213:   Input Parameter:
214: . A - the `MATKAIJ` matrix

216:   Output Parameters:
217: + m - the number of rows in `T`
218: . n - the number of columns in `T`
219: - T - the T matrix, in form of a scalar array in column-major format

221:   Level: advanced

223:   Note:
224:   All output parameters are optional (pass `NULL` if not desired)

226: .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()`
227: @*/
228: PetscErrorCode MatKAIJGetTRead(Mat A, PetscInt *m, PetscInt *n, const PetscScalar *T[])
229: {
230:   Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;

232:   PetscFunctionBegin;
233:   if (m) *m = b->p;
234:   if (n) *n = b->q;
235:   if (T) *T = b->T;
236:   PetscFunctionReturn(PETSC_SUCCESS);
237: }

239: /*@C
240:   MatKAIJRestoreT - Restore array obtained with `MatKAIJGetT()`

242:   Not Collective

244:   Input Parameters:
245: + A - the `MATKAIJ` matrix
246: - T - location of pointer to array obtained with `MatKAIJGetS()`

248:   Level: advanced

250:   Note:
251:   This routine zeros the array pointer to prevent accidental reuse after it has been restored.
252:   If `NULL` is passed, it will not attempt to zero the array pointer.

254: .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetT()`, `MatKAIJGetTRead()`, `MatKAIJRestoreTRead()`
255: @*/
256: PetscErrorCode MatKAIJRestoreT(Mat A, PetscScalar *T[])
257: {
258:   PetscFunctionBegin;
259:   if (T) *T = NULL;
260:   PetscCall(PetscObjectStateIncrease((PetscObject)A));
261:   PetscFunctionReturn(PETSC_SUCCESS);
262: }

264: /*@C
265:   MatKAIJRestoreTRead - Restore array obtained with `MatKAIJGetTRead()`

267:   Not Collective

269:   Input Parameters:
270: + A - the `MATKAIJ` matrix
271: - T - location of pointer to array obtained with `MatKAIJGetS()`

273:   Level: advanced

275:   Note:
276:   This routine zeros the array pointer to prevent accidental reuse after it has been restored.
277:   If `NULL` is passed, it will not attempt to zero the array pointer.

279: .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetT()`, `MatKAIJGetTRead()`
280: @*/
281: PetscErrorCode MatKAIJRestoreTRead(Mat A, const PetscScalar *T[])
282: {
283:   PetscFunctionBegin;
284:   if (T) *T = NULL;
285:   PetscFunctionReturn(PETSC_SUCCESS);
286: }

288: /*@
289:   MatKAIJSetAIJ - Set the `MATAIJ` matrix describing the blockwise action of the `MATKAIJ` matrix

291:   Logically Collective; if the `MATAIJ` matrix is parallel, the `MATKAIJ` matrix is also parallel

293:   Input Parameters:
294: + A - the `MATKAIJ` matrix
295: - B - the `MATAIJ` matrix

297:   Level: advanced

299:   Notes:
300:   This function increases the reference count on the `MATAIJ` matrix, so the user is free to destroy the matrix if it is not needed.

302:   Changes to the entries of the `MATAIJ` matrix will immediately affect the `MATKAIJ` matrix.

304: .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`
305: @*/
306: PetscErrorCode MatKAIJSetAIJ(Mat A, Mat B)
307: {
308:   PetscMPIInt size;
309:   PetscBool   flg;

311:   PetscFunctionBegin;
312:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
313:   if (size == 1) {
314:     PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQAIJ, &flg));
315:     PetscCheck(flg, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "MatKAIJSetAIJ() with MATSEQKAIJ does not support %s as the AIJ mat", ((PetscObject)B)->type_name);
316:     Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data;
317:     a->AIJ         = B;
318:   } else {
319:     Mat_MPIKAIJ *a = (Mat_MPIKAIJ *)A->data;
320:     a->A           = B;
321:   }
322:   PetscCall(PetscObjectReference((PetscObject)B));
323:   PetscFunctionReturn(PETSC_SUCCESS);
324: }

326: /*@
327:   MatKAIJSetS - Set the `S` matrix describing the shift action of the `MATKAIJ` matrix

329:   Logically Collective; the entire `S` is stored independently on all processes.

331:   Input Parameters:
332: + A - the `MATKAIJ` matrix
333: . p - the number of rows in `S`
334: . q - the number of columns in `S`
335: - S - the S matrix, in form of a scalar array in column-major format

337:   Level: advanced

339:   Notes:
340:   The dimensions `p` and `q` must match those of the transformation matrix `T` associated with the `MATKAIJ` matrix.

342:   The `S` matrix is copied, so the user can destroy this array.

344: .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJSetT()`, `MatKAIJSetAIJ()`
345: @*/
346: PetscErrorCode MatKAIJSetS(Mat A, PetscInt p, PetscInt q, const PetscScalar S[])
347: {
348:   Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data;

350:   PetscFunctionBegin;
351:   PetscCall(PetscFree(a->S));
352:   if (S) {
353:     PetscCall(PetscMalloc1(p * q, &a->S));
354:     PetscCall(PetscMemcpy(a->S, S, p * q * sizeof(PetscScalar)));
355:   } else a->S = NULL;

357:   a->p = p;
358:   a->q = q;
359:   PetscFunctionReturn(PETSC_SUCCESS);
360: }

362: /*@
363:   MatKAIJGetScaledIdentity - Check if both `S` and `T` are scaled identities.

365:   Logically Collective.

367:   Input Parameter:
368: . A - the `MATKAIJ` matrix

370:   Output Parameter:
371: . identity - the Boolean value

373:   Level: advanced

375: .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJGetT()`
376: @*/
377: PetscErrorCode MatKAIJGetScaledIdentity(Mat A, PetscBool *identity)
378: {
379:   Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data;
380:   PetscInt     i, j;

382:   PetscFunctionBegin;
383:   if (a->p != a->q) {
384:     *identity = PETSC_FALSE;
385:     PetscFunctionReturn(PETSC_SUCCESS);
386:   } else *identity = PETSC_TRUE;
387:   if (!a->isTI || a->S) {
388:     for (i = 0; i < a->p && *identity; i++) {
389:       for (j = 0; j < a->p && *identity; j++) {
390:         if (i != j) {
391:           if (a->S && PetscAbsScalar(a->S[i + j * a->p]) > PETSC_SMALL) *identity = PETSC_FALSE;
392:           if (a->T && PetscAbsScalar(a->T[i + j * a->p]) > PETSC_SMALL) *identity = PETSC_FALSE;
393:         } else {
394:           if (a->S && PetscAbsScalar(a->S[i * (a->p + 1)] - a->S[0]) > PETSC_SMALL) *identity = PETSC_FALSE;
395:           if (a->T && PetscAbsScalar(a->T[i * (a->p + 1)] - a->T[0]) > PETSC_SMALL) *identity = PETSC_FALSE;
396:         }
397:       }
398:     }
399:   }
400:   PetscFunctionReturn(PETSC_SUCCESS);
401: }

403: /*@
404:   MatKAIJSetT - Set the transformation matrix `T` associated with the `MATKAIJ` matrix

406:   Logically Collective; the entire `T` is stored independently on all processes.

408:   Input Parameters:
409: + A - the `MATKAIJ` matrix
410: . p - the number of rows in `S`
411: . q - the number of columns in `S`
412: - T - the `T` matrix, in form of a scalar array in column-major format

414:   Level: advanced

416:   Notes:
417:   The dimensions `p` and `q` must match those of the shift matrix `S` associated with the `MATKAIJ` matrix.

419:   The `T` matrix is copied, so the user can destroy this array.

421: .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetT()`, `MatKAIJSetS()`, `MatKAIJSetAIJ()`
422: @*/
423: PetscErrorCode MatKAIJSetT(Mat A, PetscInt p, PetscInt q, const PetscScalar T[])
424: {
425:   PetscInt     i, j;
426:   Mat_SeqKAIJ *a    = (Mat_SeqKAIJ *)A->data;
427:   PetscBool    isTI = PETSC_FALSE;

429:   PetscFunctionBegin;
430:   /* check if T is an identity matrix */
431:   if (T && (p == q)) {
432:     isTI = PETSC_TRUE;
433:     for (i = 0; i < p; i++) {
434:       for (j = 0; j < q; j++) {
435:         if (i == j) {
436:           /* diagonal term must be 1 */
437:           if (T[i + j * p] != 1.0) isTI = PETSC_FALSE;
438:         } else {
439:           /* off-diagonal term must be 0 */
440:           if (T[i + j * p] != 0.0) isTI = PETSC_FALSE;
441:         }
442:       }
443:     }
444:   }
445:   a->isTI = isTI;

447:   PetscCall(PetscFree(a->T));
448:   if (T && (!isTI)) {
449:     PetscCall(PetscMalloc1(p * q, &a->T));
450:     PetscCall(PetscMemcpy(a->T, T, p * q * sizeof(PetscScalar)));
451:   } else a->T = NULL;

453:   a->p = p;
454:   a->q = q;
455:   PetscFunctionReturn(PETSC_SUCCESS);
456: }

458: static PetscErrorCode MatDestroy_SeqKAIJ(Mat A)
459: {
460:   Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;

462:   PetscFunctionBegin;
463:   PetscCall(MatDestroy(&b->AIJ));
464:   PetscCall(PetscFree(b->S));
465:   PetscCall(PetscFree(b->T));
466:   PetscCall(PetscFree(b->ibdiag));
467:   PetscCall(PetscFree5(b->sor.w, b->sor.y, b->sor.work, b->sor.t, b->sor.arr));
468:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqkaij_seqaij_C", NULL));
469:   PetscCall(PetscFree(A->data));
470:   PetscFunctionReturn(PETSC_SUCCESS);
471: }

473: static PetscErrorCode MatKAIJ_build_AIJ_OAIJ(Mat A)
474: {
475:   Mat_MPIKAIJ     *a;
476:   Mat_MPIAIJ      *mpiaij;
477:   PetscScalar     *T;
478:   PetscInt         i, j;
479:   PetscObjectState state;

481:   PetscFunctionBegin;
482:   a      = (Mat_MPIKAIJ *)A->data;
483:   mpiaij = (Mat_MPIAIJ *)a->A->data;

485:   PetscCall(PetscObjectStateGet((PetscObject)a->A, &state));
486:   if (state == a->state) {
487:     /* The existing AIJ and KAIJ members are up-to-date, so simply exit. */
488:     PetscFunctionReturn(PETSC_SUCCESS);
489:   } else {
490:     PetscCall(MatDestroy(&a->AIJ));
491:     PetscCall(MatDestroy(&a->OAIJ));
492:     if (a->isTI) {
493:       /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL.
494:        * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will
495:        * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix
496:        * to pass in. */
497:       PetscCall(PetscMalloc1(a->p * a->q, &T));
498:       for (i = 0; i < a->p; i++) {
499:         for (j = 0; j < a->q; j++) {
500:           if (i == j) T[i + j * a->p] = 1.0;
501:           else T[i + j * a->p] = 0.0;
502:         }
503:       }
504:     } else T = a->T;
505:     PetscCall(MatCreateKAIJ(mpiaij->A, a->p, a->q, a->S, T, &a->AIJ));
506:     PetscCall(MatCreateKAIJ(mpiaij->B, a->p, a->q, NULL, T, &a->OAIJ));
507:     if (a->isTI) PetscCall(PetscFree(T));
508:     a->state = state;
509:   }
510:   PetscFunctionReturn(PETSC_SUCCESS);
511: }

513: static PetscErrorCode MatSetUp_KAIJ(Mat A)
514: {
515:   PetscInt     n;
516:   PetscMPIInt  size;
517:   Mat_SeqKAIJ *seqkaij = (Mat_SeqKAIJ *)A->data;

519:   PetscFunctionBegin;
520:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
521:   if (size == 1) {
522:     PetscCall(MatSetSizes(A, seqkaij->p * seqkaij->AIJ->rmap->n, seqkaij->q * seqkaij->AIJ->cmap->n, seqkaij->p * seqkaij->AIJ->rmap->N, seqkaij->q * seqkaij->AIJ->cmap->N));
523:     PetscCall(PetscLayoutSetBlockSize(A->rmap, seqkaij->p));
524:     PetscCall(PetscLayoutSetBlockSize(A->cmap, seqkaij->q));
525:     PetscCall(PetscLayoutSetUp(A->rmap));
526:     PetscCall(PetscLayoutSetUp(A->cmap));
527:   } else {
528:     Mat_MPIKAIJ *a;
529:     Mat_MPIAIJ  *mpiaij;
530:     IS           from, to;
531:     Vec          gvec;

533:     a      = (Mat_MPIKAIJ *)A->data;
534:     mpiaij = (Mat_MPIAIJ *)a->A->data;
535:     PetscCall(MatSetSizes(A, a->p * a->A->rmap->n, a->q * a->A->cmap->n, a->p * a->A->rmap->N, a->q * a->A->cmap->N));
536:     PetscCall(PetscLayoutSetBlockSize(A->rmap, seqkaij->p));
537:     PetscCall(PetscLayoutSetBlockSize(A->cmap, seqkaij->q));
538:     PetscCall(PetscLayoutSetUp(A->rmap));
539:     PetscCall(PetscLayoutSetUp(A->cmap));

541:     PetscCall(MatKAIJ_build_AIJ_OAIJ(A));

543:     PetscCall(VecGetSize(mpiaij->lvec, &n));
544:     PetscCall(VecCreate(PETSC_COMM_SELF, &a->w));
545:     PetscCall(VecSetSizes(a->w, n * a->q, n * a->q));
546:     PetscCall(VecSetBlockSize(a->w, a->q));
547:     PetscCall(VecSetType(a->w, VECSEQ));

549:     /* create two temporary Index sets for build scatter gather */
550:     PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)a->A), a->q, n, mpiaij->garray, PETSC_COPY_VALUES, &from));
551:     PetscCall(ISCreateStride(PETSC_COMM_SELF, n * a->q, 0, 1, &to));

553:     /* create temporary global vector to generate scatter context */
554:     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A), a->q, a->q * a->A->cmap->n, a->q * a->A->cmap->N, NULL, &gvec));

556:     /* generate the scatter context */
557:     PetscCall(VecScatterCreate(gvec, from, a->w, to, &a->ctx));

559:     PetscCall(ISDestroy(&from));
560:     PetscCall(ISDestroy(&to));
561:     PetscCall(VecDestroy(&gvec));
562:   }

564:   A->assembled = PETSC_TRUE;
565:   PetscFunctionReturn(PETSC_SUCCESS);
566: }

568: static PetscErrorCode MatView_KAIJ(Mat A, PetscViewer viewer)
569: {
570:   PetscViewerFormat format;
571:   Mat_SeqKAIJ      *a = (Mat_SeqKAIJ *)A->data;
572:   Mat               B;
573:   PetscInt          i;
574:   PetscBool         ismpikaij;

576:   PetscFunctionBegin;
577:   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij));
578:   PetscCall(PetscViewerGetFormat(viewer, &format));
579:   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL || format == PETSC_VIEWER_ASCII_IMPL) {
580:     PetscCall(PetscViewerASCIIPrintf(viewer, "S and T have %" PetscInt_FMT " rows and %" PetscInt_FMT " columns\n", a->p, a->q));

582:     /* Print appropriate details for S. */
583:     if (!a->S) {
584:       PetscCall(PetscViewerASCIIPrintf(viewer, "S is NULL\n"));
585:     } else if (format == PETSC_VIEWER_ASCII_IMPL) {
586:       PetscCall(PetscViewerASCIIPrintf(viewer, "Entries of S are "));
587:       for (i = 0; i < (a->p * a->q); i++) {
588: #if defined(PETSC_USE_COMPLEX)
589:         PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e %18.16e ", (double)PetscRealPart(a->S[i]), (double)PetscImaginaryPart(a->S[i])));
590: #else
591:         PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)PetscRealPart(a->S[i])));
592: #endif
593:       }
594:       PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
595:     }

597:     /* Print appropriate details for T. */
598:     if (a->isTI) {
599:       PetscCall(PetscViewerASCIIPrintf(viewer, "T is the identity matrix\n"));
600:     } else if (!a->T) {
601:       PetscCall(PetscViewerASCIIPrintf(viewer, "T is NULL\n"));
602:     } else if (format == PETSC_VIEWER_ASCII_IMPL) {
603:       PetscCall(PetscViewerASCIIPrintf(viewer, "Entries of T are "));
604:       for (i = 0; i < (a->p * a->q); i++) {
605: #if defined(PETSC_USE_COMPLEX)
606:         PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e %18.16e ", (double)PetscRealPart(a->T[i]), (double)PetscImaginaryPart(a->T[i])));
607: #else
608:         PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)PetscRealPart(a->T[i])));
609: #endif
610:       }
611:       PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
612:     }

614:     /* Now print details for the AIJ matrix, using the AIJ viewer. */
615:     PetscCall(PetscViewerASCIIPrintf(viewer, "Now viewing the associated AIJ matrix:\n"));
616:     if (ismpikaij) {
617:       Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
618:       PetscCall(MatView(b->A, viewer));
619:     } else {
620:       PetscCall(MatView(a->AIJ, viewer));
621:     }

623:   } else {
624:     /* For all other matrix viewer output formats, simply convert to an AIJ matrix and call MatView() on that. */
625:     PetscCall(MatConvert(A, MATAIJ, MAT_INITIAL_MATRIX, &B));
626:     PetscCall(MatView(B, viewer));
627:     PetscCall(MatDestroy(&B));
628:   }
629:   PetscFunctionReturn(PETSC_SUCCESS);
630: }

632: static PetscErrorCode MatDestroy_MPIKAIJ(Mat A)
633: {
634:   Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;

636:   PetscFunctionBegin;
637:   PetscCall(MatDestroy(&b->AIJ));
638:   PetscCall(MatDestroy(&b->OAIJ));
639:   PetscCall(MatDestroy(&b->A));
640:   PetscCall(VecScatterDestroy(&b->ctx));
641:   PetscCall(VecDestroy(&b->w));
642:   PetscCall(PetscFree(b->S));
643:   PetscCall(PetscFree(b->T));
644:   PetscCall(PetscFree(b->ibdiag));
645:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetDiagonalBlock_C", NULL));
646:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpikaij_mpiaij_C", NULL));
647:   PetscCall(PetscFree(A->data));
648:   PetscFunctionReturn(PETSC_SUCCESS);
649: }

651: /* zz = yy + Axx */
652: static PetscErrorCode MatMultAdd_SeqKAIJ(Mat A, Vec xx, Vec yy, Vec zz)
653: {
654:   Mat_SeqKAIJ       *b = (Mat_SeqKAIJ *)A->data;
655:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
656:   const PetscScalar *s = b->S, *t = b->T;
657:   const PetscScalar *x, *v, *bx;
658:   PetscScalar       *y, *sums;
659:   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
660:   PetscInt           n, i, jrow, j, l, p = b->p, q = b->q, k;

662:   PetscFunctionBegin;
663:   if (!yy) {
664:     PetscCall(VecSet(zz, 0.0));
665:   } else {
666:     PetscCall(VecCopy(yy, zz));
667:   }
668:   if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(PETSC_SUCCESS);

670:   PetscCall(VecGetArrayRead(xx, &x));
671:   PetscCall(VecGetArray(zz, &y));
672:   idx = a->j;
673:   v   = a->a;
674:   ii  = a->i;

676:   if (b->isTI) {
677:     for (i = 0; i < m; i++) {
678:       jrow = ii[i];
679:       n    = ii[i + 1] - jrow;
680:       sums = y + p * i;
681:       for (j = 0; j < n; j++) {
682:         for (k = 0; k < p; k++) sums[k] += v[jrow + j] * x[q * idx[jrow + j] + k];
683:       }
684:     }
685:     PetscCall(PetscLogFlops(3.0 * (a->nz) * p));
686:   } else if (t) {
687:     for (i = 0; i < m; i++) {
688:       jrow = ii[i];
689:       n    = ii[i + 1] - jrow;
690:       sums = y + p * i;
691:       for (j = 0; j < n; j++) {
692:         for (k = 0; k < p; k++) {
693:           for (l = 0; l < q; l++) sums[k] += v[jrow + j] * t[k + l * p] * x[q * idx[jrow + j] + l];
694:         }
695:       }
696:     }
697:     /* The flop count below assumes that v[jrow+j] is hoisted out (which an optimizing compiler is likely to do),
698:      * and also that T part is hoisted outside this loop (in exchange for temporary storage) as (A \otimes I) (I \otimes T),
699:      * so that this multiply doesn't have to be redone for each matrix entry, but just once per column. The latter
700:      * transformation is much less likely to be applied, but we nonetheless count the minimum flops required. */
701:     PetscCall(PetscLogFlops((2.0 * p * q - p) * m + 2.0 * p * a->nz));
702:   }
703:   if (s) {
704:     for (i = 0; i < m; i++) {
705:       sums = y + p * i;
706:       bx   = x + q * i;
707:       if (i < b->AIJ->cmap->n) {
708:         for (j = 0; j < q; j++) {
709:           for (k = 0; k < p; k++) sums[k] += s[k + j * p] * bx[j];
710:         }
711:       }
712:     }
713:     PetscCall(PetscLogFlops(2.0 * m * p * q));
714:   }

716:   PetscCall(VecRestoreArrayRead(xx, &x));
717:   PetscCall(VecRestoreArray(zz, &y));
718:   PetscFunctionReturn(PETSC_SUCCESS);
719: }

721: static PetscErrorCode MatMult_SeqKAIJ(Mat A, Vec xx, Vec yy)
722: {
723:   PetscFunctionBegin;
724:   PetscCall(MatMultAdd_SeqKAIJ(A, xx, NULL, yy));
725:   PetscFunctionReturn(PETSC_SUCCESS);
726: }

728: #include <petsc/private/kernels/blockinvert.h>

730: static PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ(Mat A, const PetscScalar **values)
731: {
732:   Mat_SeqKAIJ       *b = (Mat_SeqKAIJ *)A->data;
733:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
734:   const PetscScalar *S = b->S;
735:   const PetscScalar *T = b->T;
736:   const PetscScalar *v = a->a;
737:   const PetscInt     p = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i;
738:   PetscInt           i, j, *v_pivots, dof, dof2;
739:   PetscScalar       *diag, aval, *v_work;

741:   PetscFunctionBegin;
742:   PetscCheck(p == q, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MATKAIJ: Block size must be square to calculate inverse.");
743:   PetscCheck(S || T || b->isTI, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MATKAIJ: Cannot invert a zero matrix.");

745:   dof  = p;
746:   dof2 = dof * dof;

748:   if (b->ibdiagvalid) {
749:     if (values) *values = b->ibdiag;
750:     PetscFunctionReturn(PETSC_SUCCESS);
751:   }
752:   if (!b->ibdiag) PetscCall(PetscMalloc1(dof2 * m, &b->ibdiag));
753:   if (values) *values = b->ibdiag;
754:   diag = b->ibdiag;

756:   PetscCall(PetscMalloc2(dof, &v_work, dof, &v_pivots));
757:   for (i = 0; i < m; i++) {
758:     if (S) {
759:       PetscCall(PetscMemcpy(diag, S, dof2 * sizeof(PetscScalar)));
760:     } else {
761:       PetscCall(PetscMemzero(diag, dof2 * sizeof(PetscScalar)));
762:     }
763:     if (b->isTI) {
764:       aval = 0;
765:       for (j = ii[i]; j < ii[i + 1]; j++)
766:         if (idx[j] == i) aval = v[j];
767:       for (j = 0; j < dof; j++) diag[j + dof * j] += aval;
768:     } else if (T) {
769:       aval = 0;
770:       for (j = ii[i]; j < ii[i + 1]; j++)
771:         if (idx[j] == i) aval = v[j];
772:       for (j = 0; j < dof2; j++) diag[j] += aval * T[j];
773:     }
774:     PetscCall(PetscKernel_A_gets_inverse_A(dof, diag, v_pivots, v_work, PETSC_FALSE, NULL));
775:     diag += dof2;
776:   }
777:   PetscCall(PetscFree2(v_work, v_pivots));

779:   b->ibdiagvalid = PETSC_TRUE;
780:   PetscFunctionReturn(PETSC_SUCCESS);
781: }

783: static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A, Mat *B)
784: {
785:   Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ *)A->data;

787:   PetscFunctionBegin;
788:   *B = kaij->AIJ;
789:   PetscFunctionReturn(PETSC_SUCCESS);
790: }

792: static PetscErrorCode MatConvert_KAIJ_AIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
793: {
794:   Mat_SeqKAIJ   *a = (Mat_SeqKAIJ *)A->data;
795:   Mat            AIJ, OAIJ, B;
796:   PetscInt      *d_nnz, *o_nnz = NULL, nz, i, j, m, d;
797:   const PetscInt p = a->p, q = a->q;
798:   PetscBool      ismpikaij, missing;

800:   PetscFunctionBegin;
801:   if (reuse != MAT_REUSE_MATRIX) {
802:     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij));
803:     if (ismpikaij) {
804:       Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
805:       AIJ            = ((Mat_SeqKAIJ *)b->AIJ->data)->AIJ;
806:       OAIJ           = ((Mat_SeqKAIJ *)b->OAIJ->data)->AIJ;
807:     } else {
808:       AIJ  = a->AIJ;
809:       OAIJ = NULL;
810:     }
811:     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
812:     PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
813:     PetscCall(MatSetType(B, MATAIJ));
814:     PetscCall(MatGetSize(AIJ, &m, NULL));
815:     PetscCall(MatMissingDiagonal(AIJ, &missing, &d)); /* assumption that all successive rows will have a missing diagonal */
816:     if (!missing || !a->S) d = m;
817:     PetscCall(PetscMalloc1(m * p, &d_nnz));
818:     for (i = 0; i < m; ++i) {
819:       PetscCall(MatGetRow_SeqAIJ(AIJ, i, &nz, NULL, NULL));
820:       for (j = 0; j < p; ++j) d_nnz[i * p + j] = nz * q + (i >= d) * q;
821:       PetscCall(MatRestoreRow_SeqAIJ(AIJ, i, &nz, NULL, NULL));
822:     }
823:     if (OAIJ) {
824:       PetscCall(PetscMalloc1(m * p, &o_nnz));
825:       for (i = 0; i < m; ++i) {
826:         PetscCall(MatGetRow_SeqAIJ(OAIJ, i, &nz, NULL, NULL));
827:         for (j = 0; j < p; ++j) o_nnz[i * p + j] = nz * q;
828:         PetscCall(MatRestoreRow_SeqAIJ(OAIJ, i, &nz, NULL, NULL));
829:       }
830:       PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
831:     } else {
832:       PetscCall(MatSeqAIJSetPreallocation(B, 0, d_nnz));
833:     }
834:     PetscCall(PetscFree(d_nnz));
835:     PetscCall(PetscFree(o_nnz));
836:   } else B = *newmat;
837:   PetscCall(MatConvert_Basic(A, newtype, MAT_REUSE_MATRIX, &B));
838:   if (reuse == MAT_INPLACE_MATRIX) {
839:     PetscCall(MatHeaderReplace(A, &B));
840:   } else *newmat = B;
841:   PetscFunctionReturn(PETSC_SUCCESS);
842: }

844: static PetscErrorCode MatSOR_SeqKAIJ(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
845: {
846:   Mat_SeqKAIJ       *kaij = (Mat_SeqKAIJ *)A->data;
847:   Mat_SeqAIJ        *a    = (Mat_SeqAIJ *)kaij->AIJ->data;
848:   const PetscScalar *aa = a->a, *T = kaij->T, *v;
849:   const PetscInt     m = kaij->AIJ->rmap->n, *ai = a->i, *aj = a->j, p = kaij->p, q = kaij->q, *diag, *vi;
850:   const PetscScalar *b, *xb, *idiag;
851:   PetscScalar       *x, *work, *workt, *w, *y, *arr, *t, *arrt;
852:   PetscInt           i, j, k, i2, bs, bs2, nz;

854:   PetscFunctionBegin;
855:   its = its * lits;
856:   PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat");
857:   PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
858:   PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for diagonal shift");
859:   PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for applying upper or lower triangular parts");
860:   PetscCheck(p == q, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatSOR for KAIJ: No support for non-square dense blocks");
861:   bs  = p;
862:   bs2 = bs * bs;

864:   if (!m) PetscFunctionReturn(PETSC_SUCCESS);

866:   if (!kaij->ibdiagvalid) PetscCall(MatInvertBlockDiagonal_SeqKAIJ(A, NULL));
867:   idiag = kaij->ibdiag;
868:   diag  = a->diag;

870:   if (!kaij->sor.setup) {
871:     PetscCall(PetscMalloc5(bs, &kaij->sor.w, bs, &kaij->sor.y, m * bs, &kaij->sor.work, m * bs, &kaij->sor.t, m * bs2, &kaij->sor.arr));
872:     kaij->sor.setup = PETSC_TRUE;
873:   }
874:   y    = kaij->sor.y;
875:   w    = kaij->sor.w;
876:   work = kaij->sor.work;
877:   t    = kaij->sor.t;
878:   arr  = kaij->sor.arr;

880:   PetscCall(VecGetArray(xx, &x));
881:   PetscCall(VecGetArrayRead(bb, &b));

883:   if (flag & SOR_ZERO_INITIAL_GUESS) {
884:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
885:       PetscKernel_w_gets_Ar_times_v(bs, bs, b, idiag, x); /* x[0:bs] <- D^{-1} b[0:bs] */
886:       PetscCall(PetscMemcpy(t, b, bs * sizeof(PetscScalar)));
887:       i2 = bs;
888:       idiag += bs2;
889:       for (i = 1; i < m; i++) {
890:         v  = aa + ai[i];
891:         vi = aj + ai[i];
892:         nz = diag[i] - ai[i];

894:         if (T) { /* b - T (Arow * x) */
895:           PetscCall(PetscMemzero(w, bs * sizeof(PetscScalar)));
896:           for (j = 0; j < nz; j++) {
897:             for (k = 0; k < bs; k++) w[k] -= v[j] * x[vi[j] * bs + k];
898:           }
899:           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs, w, T, &t[i2]);
900:           for (k = 0; k < bs; k++) t[i2 + k] += b[i2 + k];
901:         } else if (kaij->isTI) {
902:           PetscCall(PetscMemcpy(t + i2, b + i2, bs * sizeof(PetscScalar)));
903:           for (j = 0; j < nz; j++) {
904:             for (k = 0; k < bs; k++) t[i2 + k] -= v[j] * x[vi[j] * bs + k];
905:           }
906:         } else {
907:           PetscCall(PetscMemcpy(t + i2, b + i2, bs * sizeof(PetscScalar)));
908:         }

910:         PetscKernel_w_gets_Ar_times_v(bs, bs, t + i2, idiag, y);
911:         for (j = 0; j < bs; j++) x[i2 + j] = omega * y[j];

913:         idiag += bs2;
914:         i2 += bs;
915:       }
916:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
917:       PetscCall(PetscLogFlops(1.0 * bs2 * a->nz));
918:       xb = t;
919:     } else xb = b;
920:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
921:       idiag = kaij->ibdiag + bs2 * (m - 1);
922:       i2    = bs * (m - 1);
923:       PetscCall(PetscMemcpy(w, xb + i2, bs * sizeof(PetscScalar)));
924:       PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
925:       i2 -= bs;
926:       idiag -= bs2;
927:       for (i = m - 2; i >= 0; i--) {
928:         v  = aa + diag[i] + 1;
929:         vi = aj + diag[i] + 1;
930:         nz = ai[i + 1] - diag[i] - 1;

932:         if (T) { /* FIXME: This branch untested */
933:           PetscCall(PetscMemcpy(w, xb + i2, bs * sizeof(PetscScalar)));
934:           /* copy all rows of x that are needed into contiguous space */
935:           workt = work;
936:           for (j = 0; j < nz; j++) {
937:             PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
938:             workt += bs;
939:           }
940:           arrt = arr;
941:           for (j = 0; j < nz; j++) {
942:             PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
943:             for (k = 0; k < bs2; k++) arrt[k] *= v[j];
944:             arrt += bs2;
945:           }
946:           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
947:         } else if (kaij->isTI) {
948:           PetscCall(PetscMemcpy(w, t + i2, bs * sizeof(PetscScalar)));
949:           for (j = 0; j < nz; j++) {
950:             for (k = 0; k < bs; k++) w[k] -= v[j] * x[vi[j] * bs + k];
951:           }
952:         }

954:         PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y); /* RHS incorrect for omega != 1.0 */
955:         for (j = 0; j < bs; j++) x[i2 + j] = (1.0 - omega) * x[i2 + j] + omega * y[j];

957:         idiag -= bs2;
958:         i2 -= bs;
959:       }
960:       PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz)));
961:     }
962:     its--;
963:   }
964:   while (its--) { /* FIXME: This branch not updated */
965:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
966:       i2    = 0;
967:       idiag = kaij->ibdiag;
968:       for (i = 0; i < m; i++) {
969:         PetscCall(PetscMemcpy(w, b + i2, bs * sizeof(PetscScalar)));

971:         v     = aa + ai[i];
972:         vi    = aj + ai[i];
973:         nz    = diag[i] - ai[i];
974:         workt = work;
975:         for (j = 0; j < nz; j++) {
976:           PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
977:           workt += bs;
978:         }
979:         arrt = arr;
980:         if (T) {
981:           for (j = 0; j < nz; j++) {
982:             PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
983:             for (k = 0; k < bs2; k++) arrt[k] *= v[j];
984:             arrt += bs2;
985:           }
986:           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
987:         } else if (kaij->isTI) {
988:           for (j = 0; j < nz; j++) {
989:             PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar)));
990:             for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
991:             arrt += bs2;
992:           }
993:           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
994:         }
995:         PetscCall(PetscMemcpy(t + i2, w, bs * sizeof(PetscScalar)));

997:         v     = aa + diag[i] + 1;
998:         vi    = aj + diag[i] + 1;
999:         nz    = ai[i + 1] - diag[i] - 1;
1000:         workt = work;
1001:         for (j = 0; j < nz; j++) {
1002:           PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
1003:           workt += bs;
1004:         }
1005:         arrt = arr;
1006:         if (T) {
1007:           for (j = 0; j < nz; j++) {
1008:             PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
1009:             for (k = 0; k < bs2; k++) arrt[k] *= v[j];
1010:             arrt += bs2;
1011:           }
1012:           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1013:         } else if (kaij->isTI) {
1014:           for (j = 0; j < nz; j++) {
1015:             PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar)));
1016:             for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
1017:             arrt += bs2;
1018:           }
1019:           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1020:         }

1022:         PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y);
1023:         for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j);

1025:         idiag += bs2;
1026:         i2 += bs;
1027:       }
1028:       xb = t;
1029:     } else xb = b;
1030:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1031:       idiag = kaij->ibdiag + bs2 * (m - 1);
1032:       i2    = bs * (m - 1);
1033:       if (xb == b) {
1034:         for (i = m - 1; i >= 0; i--) {
1035:           PetscCall(PetscMemcpy(w, b + i2, bs * sizeof(PetscScalar)));

1037:           v     = aa + ai[i];
1038:           vi    = aj + ai[i];
1039:           nz    = diag[i] - ai[i];
1040:           workt = work;
1041:           for (j = 0; j < nz; j++) {
1042:             PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
1043:             workt += bs;
1044:           }
1045:           arrt = arr;
1046:           if (T) {
1047:             for (j = 0; j < nz; j++) {
1048:               PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
1049:               for (k = 0; k < bs2; k++) arrt[k] *= v[j];
1050:               arrt += bs2;
1051:             }
1052:             PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1053:           } else if (kaij->isTI) {
1054:             for (j = 0; j < nz; j++) {
1055:               PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar)));
1056:               for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
1057:               arrt += bs2;
1058:             }
1059:             PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1060:           }

1062:           v     = aa + diag[i] + 1;
1063:           vi    = aj + diag[i] + 1;
1064:           nz    = ai[i + 1] - diag[i] - 1;
1065:           workt = work;
1066:           for (j = 0; j < nz; j++) {
1067:             PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
1068:             workt += bs;
1069:           }
1070:           arrt = arr;
1071:           if (T) {
1072:             for (j = 0; j < nz; j++) {
1073:               PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
1074:               for (k = 0; k < bs2; k++) arrt[k] *= v[j];
1075:               arrt += bs2;
1076:             }
1077:             PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1078:           } else if (kaij->isTI) {
1079:             for (j = 0; j < nz; j++) {
1080:               PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar)));
1081:               for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
1082:               arrt += bs2;
1083:             }
1084:             PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1085:           }

1087:           PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y);
1088:           for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j);
1089:         }
1090:       } else {
1091:         for (i = m - 1; i >= 0; i--) {
1092:           PetscCall(PetscMemcpy(w, xb + i2, bs * sizeof(PetscScalar)));
1093:           v     = aa + diag[i] + 1;
1094:           vi    = aj + diag[i] + 1;
1095:           nz    = ai[i + 1] - diag[i] - 1;
1096:           workt = work;
1097:           for (j = 0; j < nz; j++) {
1098:             PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
1099:             workt += bs;
1100:           }
1101:           arrt = arr;
1102:           if (T) {
1103:             for (j = 0; j < nz; j++) {
1104:               PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
1105:               for (k = 0; k < bs2; k++) arrt[k] *= v[j];
1106:               arrt += bs2;
1107:             }
1108:             PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1109:           } else if (kaij->isTI) {
1110:             for (j = 0; j < nz; j++) {
1111:               PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar)));
1112:               for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
1113:               arrt += bs2;
1114:             }
1115:             PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1116:           }
1117:           PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y);
1118:           for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j);
1119:         }
1120:       }
1121:       PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz)));
1122:     }
1123:   }

1125:   PetscCall(VecRestoreArray(xx, &x));
1126:   PetscCall(VecRestoreArrayRead(bb, &b));
1127:   PetscFunctionReturn(PETSC_SUCCESS);
1128: }

1130: /*===================================================================================*/

1132: static PetscErrorCode MatMultAdd_MPIKAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1133: {
1134:   Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;

1136:   PetscFunctionBegin;
1137:   if (!yy) {
1138:     PetscCall(VecSet(zz, 0.0));
1139:   } else {
1140:     PetscCall(VecCopy(yy, zz));
1141:   }
1142:   PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ and b->OAIJ are up to date. */
1143:   /* start the scatter */
1144:   PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
1145:   PetscCall((*b->AIJ->ops->multadd)(b->AIJ, xx, zz, zz));
1146:   PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
1147:   PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, zz, zz));
1148:   PetscFunctionReturn(PETSC_SUCCESS);
1149: }

1151: static PetscErrorCode MatMult_MPIKAIJ(Mat A, Vec xx, Vec yy)
1152: {
1153:   PetscFunctionBegin;
1154:   PetscCall(MatMultAdd_MPIKAIJ(A, xx, NULL, yy));
1155:   PetscFunctionReturn(PETSC_SUCCESS);
1156: }

1158: static PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ(Mat A, const PetscScalar **values)
1159: {
1160:   Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;

1162:   PetscFunctionBegin;
1163:   PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ is up to date. */
1164:   PetscCall((*b->AIJ->ops->invertblockdiagonal)(b->AIJ, values));
1165:   PetscFunctionReturn(PETSC_SUCCESS);
1166: }

1168: static PetscErrorCode MatGetRow_SeqKAIJ(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **values)
1169: {
1170:   Mat_SeqKAIJ *b    = (Mat_SeqKAIJ *)A->data;
1171:   PetscBool    diag = PETSC_FALSE;
1172:   PetscInt     nzaij, nz, *colsaij, *idx, i, j, p = b->p, q = b->q, r = row / p, s = row % p, c;
1173:   PetscScalar *vaij, *v, *S = b->S, *T = b->T;

1175:   PetscFunctionBegin;
1176:   PetscCheck(!b->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1177:   b->getrowactive = PETSC_TRUE;
1178:   PetscCheck(row >= 0 && row < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range", row);

1180:   if ((!S) && (!T) && (!b->isTI)) {
1181:     if (ncols) *ncols = 0;
1182:     if (cols) *cols = NULL;
1183:     if (values) *values = NULL;
1184:     PetscFunctionReturn(PETSC_SUCCESS);
1185:   }

1187:   if (T || b->isTI) {
1188:     PetscCall(MatGetRow_SeqAIJ(b->AIJ, r, &nzaij, &colsaij, &vaij));
1189:     c = nzaij;
1190:     for (i = 0; i < nzaij; i++) {
1191:       /* check if this row contains a diagonal entry */
1192:       if (colsaij[i] == r) {
1193:         diag = PETSC_TRUE;
1194:         c    = i;
1195:       }
1196:     }
1197:   } else nzaij = c = 0;

1199:   /* calculate size of row */
1200:   nz = 0;
1201:   if (S) nz += q;
1202:   if (T || b->isTI) nz += (diag && S ? (nzaij - 1) * q : nzaij * q);

1204:   if (cols || values) {
1205:     PetscCall(PetscMalloc2(nz, &idx, nz, &v));
1206:     for (i = 0; i < q; i++) {
1207:       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1208:       v[i] = 0.0;
1209:     }
1210:     if (b->isTI) {
1211:       for (i = 0; i < nzaij; i++) {
1212:         for (j = 0; j < q; j++) {
1213:           idx[i * q + j] = colsaij[i] * q + j;
1214:           v[i * q + j]   = (j == s ? vaij[i] : 0);
1215:         }
1216:       }
1217:     } else if (T) {
1218:       for (i = 0; i < nzaij; i++) {
1219:         for (j = 0; j < q; j++) {
1220:           idx[i * q + j] = colsaij[i] * q + j;
1221:           v[i * q + j]   = vaij[i] * T[s + j * p];
1222:         }
1223:       }
1224:     }
1225:     if (S) {
1226:       for (j = 0; j < q; j++) {
1227:         idx[c * q + j] = r * q + j;
1228:         v[c * q + j] += S[s + j * p];
1229:       }
1230:     }
1231:   }

1233:   if (ncols) *ncols = nz;
1234:   if (cols) *cols = idx;
1235:   if (values) *values = v;
1236:   PetscFunctionReturn(PETSC_SUCCESS);
1237: }

1239: static PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1240: {
1241:   PetscFunctionBegin;
1242:   PetscCall(PetscFree2(*idx, *v));
1243:   ((Mat_SeqKAIJ *)A->data)->getrowactive = PETSC_FALSE;
1244:   PetscFunctionReturn(PETSC_SUCCESS);
1245: }

1247: static PetscErrorCode MatGetRow_MPIKAIJ(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **values)
1248: {
1249:   Mat_MPIKAIJ   *b    = (Mat_MPIKAIJ *)A->data;
1250:   Mat            AIJ  = b->A;
1251:   PetscBool      diag = PETSC_FALSE;
1252:   Mat            MatAIJ, MatOAIJ;
1253:   const PetscInt rstart = A->rmap->rstart, rend = A->rmap->rend, p = b->p, q = b->q, *garray;
1254:   PetscInt       nz, *idx, ncolsaij = 0, ncolsoaij = 0, *colsaij, *colsoaij, r, s, c, i, j, lrow;
1255:   PetscScalar   *v, *vals, *ovals, *S = b->S, *T = b->T;

1257:   PetscFunctionBegin;
1258:   PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ and b->OAIJ are up to date. */
1259:   MatAIJ  = ((Mat_SeqKAIJ *)b->AIJ->data)->AIJ;
1260:   MatOAIJ = ((Mat_SeqKAIJ *)b->OAIJ->data)->AIJ;
1261:   PetscCheck(!b->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1262:   b->getrowactive = PETSC_TRUE;
1263:   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows");
1264:   lrow = row - rstart;

1266:   if ((!S) && (!T) && (!b->isTI)) {
1267:     if (ncols) *ncols = 0;
1268:     if (cols) *cols = NULL;
1269:     if (values) *values = NULL;
1270:     PetscFunctionReturn(PETSC_SUCCESS);
1271:   }

1273:   r = lrow / p;
1274:   s = lrow % p;

1276:   if (T || b->isTI) {
1277:     PetscCall(MatMPIAIJGetSeqAIJ(AIJ, NULL, NULL, &garray));
1278:     PetscCall(MatGetRow_SeqAIJ(MatAIJ, lrow / p, &ncolsaij, &colsaij, &vals));
1279:     PetscCall(MatGetRow_SeqAIJ(MatOAIJ, lrow / p, &ncolsoaij, &colsoaij, &ovals));

1281:     c = ncolsaij + ncolsoaij;
1282:     for (i = 0; i < ncolsaij; i++) {
1283:       /* check if this row contains a diagonal entry */
1284:       if (colsaij[i] == r) {
1285:         diag = PETSC_TRUE;
1286:         c    = i;
1287:       }
1288:     }
1289:   } else c = 0;

1291:   /* calculate size of row */
1292:   nz = 0;
1293:   if (S) nz += q;
1294:   if (T || b->isTI) nz += (diag && S ? (ncolsaij + ncolsoaij - 1) * q : (ncolsaij + ncolsoaij) * q);

1296:   if (cols || values) {
1297:     PetscCall(PetscMalloc2(nz, &idx, nz, &v));
1298:     for (i = 0; i < q; i++) {
1299:       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1300:       v[i] = 0.0;
1301:     }
1302:     if (b->isTI) {
1303:       for (i = 0; i < ncolsaij; i++) {
1304:         for (j = 0; j < q; j++) {
1305:           idx[i * q + j] = (colsaij[i] + rstart / p) * q + j;
1306:           v[i * q + j]   = (j == s ? vals[i] : 0.0);
1307:         }
1308:       }
1309:       for (i = 0; i < ncolsoaij; i++) {
1310:         for (j = 0; j < q; j++) {
1311:           idx[(i + ncolsaij) * q + j] = garray[colsoaij[i]] * q + j;
1312:           v[(i + ncolsaij) * q + j]   = (j == s ? ovals[i] : 0.0);
1313:         }
1314:       }
1315:     } else if (T) {
1316:       for (i = 0; i < ncolsaij; i++) {
1317:         for (j = 0; j < q; j++) {
1318:           idx[i * q + j] = (colsaij[i] + rstart / p) * q + j;
1319:           v[i * q + j]   = vals[i] * T[s + j * p];
1320:         }
1321:       }
1322:       for (i = 0; i < ncolsoaij; i++) {
1323:         for (j = 0; j < q; j++) {
1324:           idx[(i + ncolsaij) * q + j] = garray[colsoaij[i]] * q + j;
1325:           v[(i + ncolsaij) * q + j]   = ovals[i] * T[s + j * p];
1326:         }
1327:       }
1328:     }
1329:     if (S) {
1330:       for (j = 0; j < q; j++) {
1331:         idx[c * q + j] = (r + rstart / p) * q + j;
1332:         v[c * q + j] += S[s + j * p];
1333:       }
1334:     }
1335:   }

1337:   if (ncols) *ncols = nz;
1338:   if (cols) *cols = idx;
1339:   if (values) *values = v;
1340:   PetscFunctionReturn(PETSC_SUCCESS);
1341: }

1343: static PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1344: {
1345:   PetscFunctionBegin;
1346:   PetscCall(PetscFree2(*idx, *v));
1347:   ((Mat_SeqKAIJ *)A->data)->getrowactive = PETSC_FALSE;
1348:   PetscFunctionReturn(PETSC_SUCCESS);
1349: }

1351: static PetscErrorCode MatCreateSubMatrix_KAIJ(Mat mat, IS isrow, IS iscol, MatReuse cll, Mat *newmat)
1352: {
1353:   Mat A;

1355:   PetscFunctionBegin;
1356:   PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A));
1357:   PetscCall(MatCreateSubMatrix(A, isrow, iscol, cll, newmat));
1358:   PetscCall(MatDestroy(&A));
1359:   PetscFunctionReturn(PETSC_SUCCESS);
1360: }

1362: /*@C
1363:   MatCreateKAIJ - Creates a matrix of type `MATKAIJ`.

1365:   Collective

1367:   Input Parameters:
1368: + A - the `MATAIJ` matrix
1369: . p - number of rows in `S` and `T`
1370: . q - number of columns in `S` and `T`
1371: . S - the `S` matrix (can be `NULL`), stored as a `PetscScalar` array (column-major)
1372: - T - the `T` matrix (can be `NULL`), stored as a `PetscScalar` array (column-major)

1374:   Output Parameter:
1375: . kaij - the new `MATKAIJ` matrix

1377:   Level: advanced

1379:   Notes:
1380:   The created matrix is of the following form\:
1381: .vb
1382:     [I \otimes S + A \otimes T]
1383: .ve
1384:   where
1385: .vb
1386:   S is a dense (p \times q) matrix
1387:   T is a dense (p \times q) matrix
1388:   A is a `MATAIJ`  (n \times n) matrix
1389:   I is the identity matrix
1390: .ve
1391:   The resulting matrix is (np \times nq)

1393:   `S` and `T` are always stored independently on all processes as `PetscScalar` arrays in
1394:   column-major format.

1396:   This function increases the reference count on the `MATAIJ` matrix, so the user is free to destroy the matrix if it is not needed.

1398:   Changes to the entries of the `MATAIJ` matrix will immediately affect the `MATKAIJ` matrix.

1400:   Developer Notes:
1401:   In the `MATMPIKAIJ` case, the internal 'AIJ' and 'OAIJ' sequential KAIJ matrices are kept up to date by tracking the object state
1402:   of the AIJ matrix 'A' that describes the blockwise action of the `MATMPIKAIJ` matrix and, if the object state has changed, lazily
1403:   rebuilding 'AIJ' and 'OAIJ' just before executing operations with the `MATMPIKAIJ` matrix. If new types of operations are added,
1404:   routines implementing those must also ensure these are rebuilt when needed (by calling the internal MatKAIJ_build_AIJ_OAIJ() routine).

1406: .seealso: [](ch_matrices), `Mat`, `MatKAIJSetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`, `MatKAIJGetAIJ()`, `MatKAIJGetS()`, `MatKAIJGetT()`, `MATKAIJ`
1407: @*/
1408: PetscErrorCode MatCreateKAIJ(Mat A, PetscInt p, PetscInt q, const PetscScalar S[], const PetscScalar T[], Mat *kaij)
1409: {
1410:   PetscFunctionBegin;
1411:   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), kaij));
1412:   PetscCall(MatSetType(*kaij, MATKAIJ));
1413:   PetscCall(MatKAIJSetAIJ(*kaij, A));
1414:   PetscCall(MatKAIJSetS(*kaij, p, q, S));
1415:   PetscCall(MatKAIJSetT(*kaij, p, q, T));
1416:   PetscCall(MatSetUp(*kaij));
1417:   PetscFunctionReturn(PETSC_SUCCESS);
1418: }

1420: /*MC
1421:   MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of form
1422:     [I \otimes S + A \otimes T],
1423:   where
1424: .vb
1425:     S is a dense (p \times q) matrix,
1426:     T is a dense (p \times q) matrix,
1427:     A is an AIJ  (n \times n) matrix,
1428:     and I is the identity matrix.
1429: .ve
1430:   The resulting matrix is (np \times nq).

1432:   S and T are always stored independently on all processes as `PetscScalar` arrays in column-major format.

1434:   Level: advanced

1436:   Note:
1437:   A linear system with multiple right-hand sides, AX = B, can be expressed in the KAIJ-friendly form of (A \otimes I) x = b,
1438:   where x and b are column vectors containing the row-major representations of X and B.

1440: .seealso: [](ch_matrices), `Mat`, `MatKAIJSetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`, `MatKAIJGetAIJ()`, `MatKAIJGetS()`, `MatKAIJGetT()`, `MatCreateKAIJ()`
1441: M*/

1443: PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A)
1444: {
1445:   Mat_MPIKAIJ *b;
1446:   PetscMPIInt  size;

1448:   PetscFunctionBegin;
1449:   PetscCall(PetscNew(&b));
1450:   A->data = (void *)b;

1452:   PetscCall(PetscMemzero(A->ops, sizeof(struct _MatOps)));

1454:   b->w = NULL;
1455:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
1456:   if (size == 1) {
1457:     PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQKAIJ));
1458:     A->ops->destroy             = MatDestroy_SeqKAIJ;
1459:     A->ops->mult                = MatMult_SeqKAIJ;
1460:     A->ops->multadd             = MatMultAdd_SeqKAIJ;
1461:     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ;
1462:     A->ops->getrow              = MatGetRow_SeqKAIJ;
1463:     A->ops->restorerow          = MatRestoreRow_SeqKAIJ;
1464:     A->ops->sor                 = MatSOR_SeqKAIJ;
1465:     PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqkaij_seqaij_C", MatConvert_KAIJ_AIJ));
1466:   } else {
1467:     PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPIKAIJ));
1468:     A->ops->destroy             = MatDestroy_MPIKAIJ;
1469:     A->ops->mult                = MatMult_MPIKAIJ;
1470:     A->ops->multadd             = MatMultAdd_MPIKAIJ;
1471:     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ;
1472:     A->ops->getrow              = MatGetRow_MPIKAIJ;
1473:     A->ops->restorerow          = MatRestoreRow_MPIKAIJ;
1474:     PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetDiagonalBlock_C", MatGetDiagonalBlock_MPIKAIJ));
1475:     PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpikaij_mpiaij_C", MatConvert_KAIJ_AIJ));
1476:   }
1477:   A->ops->setup           = MatSetUp_KAIJ;
1478:   A->ops->view            = MatView_KAIJ;
1479:   A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ;
1480:   PetscFunctionReturn(PETSC_SUCCESS);
1481: }