Actual source code: kaij.c
1: /*
2: Defines the basic matrix operations for the KAIJ matrix storage format.
3: This format is used to evaluate matrices of the form:
5: [I \otimes S + A \otimes T]
7: where
8: S is a dense (p \times q) matrix
9: T is a dense (p \times q) matrix
10: A is an AIJ (n \times n) matrix
11: I is the identity matrix
13: The resulting matrix is (np \times nq)
15: We provide:
16: MatMult()
17: MatMultAdd()
18: MatInvertBlockDiagonal()
19: and
20: MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*)
22: This single directory handles both the sequential and parallel codes
23: */
25: #include <../src/mat/impls/kaij/kaij.h>
26: #include <../src/mat/utils/freespace.h>
27: #include <petsc/private/vecimpl.h>
29: /*@
30: MatKAIJGetAIJ - Get the `MATAIJ` matrix describing the blockwise action of the `MATKAIJ` matrix
32: Not Collective, but if the `MATKAIJ` matrix is parallel, the `MATAIJ` matrix is also parallel
34: Input Parameter:
35: . A - the `MATKAIJ` matrix
37: Output Parameter:
38: . B - the `MATAIJ` matrix
40: Level: advanced
42: Note:
43: The reference count on the `MATAIJ` matrix is not increased so you should not destroy it.
45: .seealso: [](ch_matrices), `Mat`, `MatCreateKAIJ()`, `MATKAIJ`, `MATAIJ`
46: @*/
47: PetscErrorCode MatKAIJGetAIJ(Mat A, Mat *B)
48: {
49: PetscBool ismpikaij, isseqkaij;
51: PetscFunctionBegin;
52: PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij));
53: PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQKAIJ, &isseqkaij));
54: if (ismpikaij) {
55: Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
57: *B = b->A;
58: } else if (isseqkaij) {
59: Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
61: *B = b->AIJ;
62: } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix passed in is not of type KAIJ");
63: PetscFunctionReturn(PETSC_SUCCESS);
64: }
66: /*@C
67: MatKAIJGetS - Get the `S` matrix describing the shift action of the `MATKAIJ` matrix
69: Not Collective; the entire `S` is stored and returned independently on all processes.
71: Input Parameter:
72: . A - the `MATKAIJ` matrix
74: Output Parameters:
75: + m - the number of rows in `S`
76: . n - the number of columns in `S`
77: - S - the S matrix, in form of a scalar array in column-major format
79: Level: advanced
81: Note:
82: All output parameters are optional (pass `NULL` if not desired)
84: .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()`
85: @*/
86: PetscErrorCode MatKAIJGetS(Mat A, PetscInt *m, PetscInt *n, PetscScalar *S[])
87: {
88: Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
90: PetscFunctionBegin;
91: if (m) *m = b->p;
92: if (n) *n = b->q;
93: if (S) *S = b->S;
94: PetscFunctionReturn(PETSC_SUCCESS);
95: }
97: /*@C
98: MatKAIJGetSRead - Get a read-only pointer to the `S` matrix describing the shift action of the `MATKAIJ` matrix
100: Not Collective; the entire `S` is stored and returned independently on all processes.
102: Input Parameter:
103: . A - the `MATKAIJ` matrix
105: Output Parameters:
106: + m - the number of rows in `S`
107: . n - the number of columns in `S`
108: - S - the S matrix, in form of a scalar array in column-major format
110: Level: advanced
112: Note:
113: All output parameters are optional (pass `NULL` if not desired)
115: .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()`
116: @*/
117: PetscErrorCode MatKAIJGetSRead(Mat A, PetscInt *m, PetscInt *n, const PetscScalar *S[])
118: {
119: Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
121: PetscFunctionBegin;
122: if (m) *m = b->p;
123: if (n) *n = b->q;
124: if (S) *S = b->S;
125: PetscFunctionReturn(PETSC_SUCCESS);
126: }
128: /*@C
129: MatKAIJRestoreS - Restore array obtained with `MatKAIJGetS()`
131: Not Collective
133: Input Parameters:
134: + A - the `MATKAIJ` matrix
135: - S - location of pointer to array obtained with `MatKAIJGetS()`
137: Level: advanced
139: Note:
140: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
141: If `NULL` is passed, it will not attempt to zero the array pointer.
143: .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJGetSRead()`, `MatKAIJRestoreSRead()`
144: @*/
145: PetscErrorCode MatKAIJRestoreS(Mat A, PetscScalar *S[])
146: {
147: PetscFunctionBegin;
148: if (S) *S = NULL;
149: PetscCall(PetscObjectStateIncrease((PetscObject)A));
150: PetscFunctionReturn(PETSC_SUCCESS);
151: }
153: /*@C
154: MatKAIJRestoreSRead - Restore array obtained with `MatKAIJGetSRead()`
156: Not Collective
158: Input Parameters:
159: + A - the `MATKAIJ` matrix
160: - S - location of pointer to array obtained with `MatKAIJGetS()`
162: Level: advanced
164: Note:
165: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
166: If `NULL` is passed, it will not attempt to zero the array pointer.
168: .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJGetSRead()`
169: @*/
170: PetscErrorCode MatKAIJRestoreSRead(Mat A, const PetscScalar *S[])
171: {
172: PetscFunctionBegin;
173: if (S) *S = NULL;
174: PetscFunctionReturn(PETSC_SUCCESS);
175: }
177: /*@C
178: MatKAIJGetT - Get the transformation matrix `T` associated with the `MATKAIJ` matrix
180: Not Collective; the entire `T` is stored and returned independently on all processes
182: Input Parameter:
183: . A - the `MATKAIJ` matrix
185: Output Parameters:
186: + m - the number of rows in `T`
187: . n - the number of columns in `T`
188: - T - the T matrix, in form of a scalar array in column-major format
190: Level: advanced
192: Note:
193: All output parameters are optional (pass `NULL` if not desired)
195: .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()`
196: @*/
197: PetscErrorCode MatKAIJGetT(Mat A, PetscInt *m, PetscInt *n, PetscScalar *T[])
198: {
199: Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
201: PetscFunctionBegin;
202: if (m) *m = b->p;
203: if (n) *n = b->q;
204: if (T) *T = b->T;
205: PetscFunctionReturn(PETSC_SUCCESS);
206: }
208: /*@C
209: MatKAIJGetTRead - Get a read-only pointer to the transformation matrix `T` associated with the `MATKAIJ` matrix
211: Not Collective; the entire `T` is stored and returned independently on all processes
213: Input Parameter:
214: . A - the `MATKAIJ` matrix
216: Output Parameters:
217: + m - the number of rows in `T`
218: . n - the number of columns in `T`
219: - T - the T matrix, in form of a scalar array in column-major format
221: Level: advanced
223: Note:
224: All output parameters are optional (pass `NULL` if not desired)
226: .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()`
227: @*/
228: PetscErrorCode MatKAIJGetTRead(Mat A, PetscInt *m, PetscInt *n, const PetscScalar *T[])
229: {
230: Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
232: PetscFunctionBegin;
233: if (m) *m = b->p;
234: if (n) *n = b->q;
235: if (T) *T = b->T;
236: PetscFunctionReturn(PETSC_SUCCESS);
237: }
239: /*@C
240: MatKAIJRestoreT - Restore array obtained with `MatKAIJGetT()`
242: Not Collective
244: Input Parameters:
245: + A - the `MATKAIJ` matrix
246: - T - location of pointer to array obtained with `MatKAIJGetS()`
248: Level: advanced
250: Note:
251: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
252: If `NULL` is passed, it will not attempt to zero the array pointer.
254: .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetT()`, `MatKAIJGetTRead()`, `MatKAIJRestoreTRead()`
255: @*/
256: PetscErrorCode MatKAIJRestoreT(Mat A, PetscScalar *T[])
257: {
258: PetscFunctionBegin;
259: if (T) *T = NULL;
260: PetscCall(PetscObjectStateIncrease((PetscObject)A));
261: PetscFunctionReturn(PETSC_SUCCESS);
262: }
264: /*@C
265: MatKAIJRestoreTRead - Restore array obtained with `MatKAIJGetTRead()`
267: Not Collective
269: Input Parameters:
270: + A - the `MATKAIJ` matrix
271: - T - location of pointer to array obtained with `MatKAIJGetS()`
273: Level: advanced
275: Note:
276: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
277: If `NULL` is passed, it will not attempt to zero the array pointer.
279: .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetT()`, `MatKAIJGetTRead()`
280: @*/
281: PetscErrorCode MatKAIJRestoreTRead(Mat A, const PetscScalar *T[])
282: {
283: PetscFunctionBegin;
284: if (T) *T = NULL;
285: PetscFunctionReturn(PETSC_SUCCESS);
286: }
288: /*@
289: MatKAIJSetAIJ - Set the `MATAIJ` matrix describing the blockwise action of the `MATKAIJ` matrix
291: Logically Collective; if the `MATAIJ` matrix is parallel, the `MATKAIJ` matrix is also parallel
293: Input Parameters:
294: + A - the `MATKAIJ` matrix
295: - B - the `MATAIJ` matrix
297: Level: advanced
299: Notes:
300: This function increases the reference count on the `MATAIJ` matrix, so the user is free to destroy the matrix if it is not needed.
302: Changes to the entries of the `MATAIJ` matrix will immediately affect the `MATKAIJ` matrix.
304: .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`
305: @*/
306: PetscErrorCode MatKAIJSetAIJ(Mat A, Mat B)
307: {
308: PetscMPIInt size;
309: PetscBool flg;
311: PetscFunctionBegin;
312: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
313: if (size == 1) {
314: PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQAIJ, &flg));
315: PetscCheck(flg, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "MatKAIJSetAIJ() with MATSEQKAIJ does not support %s as the AIJ mat", ((PetscObject)B)->type_name);
316: Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data;
317: a->AIJ = B;
318: } else {
319: Mat_MPIKAIJ *a = (Mat_MPIKAIJ *)A->data;
320: a->A = B;
321: }
322: PetscCall(PetscObjectReference((PetscObject)B));
323: PetscFunctionReturn(PETSC_SUCCESS);
324: }
326: /*@
327: MatKAIJSetS - Set the `S` matrix describing the shift action of the `MATKAIJ` matrix
329: Logically Collective; the entire `S` is stored independently on all processes.
331: Input Parameters:
332: + A - the `MATKAIJ` matrix
333: . p - the number of rows in `S`
334: . q - the number of columns in `S`
335: - S - the S matrix, in form of a scalar array in column-major format
337: Level: advanced
339: Notes:
340: The dimensions `p` and `q` must match those of the transformation matrix `T` associated with the `MATKAIJ` matrix.
342: The `S` matrix is copied, so the user can destroy this array.
344: .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJSetT()`, `MatKAIJSetAIJ()`
345: @*/
346: PetscErrorCode MatKAIJSetS(Mat A, PetscInt p, PetscInt q, const PetscScalar S[])
347: {
348: Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data;
350: PetscFunctionBegin;
351: PetscCall(PetscFree(a->S));
352: if (S) {
353: PetscCall(PetscMalloc1(p * q, &a->S));
354: PetscCall(PetscMemcpy(a->S, S, p * q * sizeof(PetscScalar)));
355: } else a->S = NULL;
357: a->p = p;
358: a->q = q;
359: PetscFunctionReturn(PETSC_SUCCESS);
360: }
362: /*@
363: MatKAIJGetScaledIdentity - Check if both `S` and `T` are scaled identities.
365: Logically Collective.
367: Input Parameter:
368: . A - the `MATKAIJ` matrix
370: Output Parameter:
371: . identity - the Boolean value
373: Level: advanced
375: .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJGetT()`
376: @*/
377: PetscErrorCode MatKAIJGetScaledIdentity(Mat A, PetscBool *identity)
378: {
379: Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data;
380: PetscInt i, j;
382: PetscFunctionBegin;
383: if (a->p != a->q) {
384: *identity = PETSC_FALSE;
385: PetscFunctionReturn(PETSC_SUCCESS);
386: } else *identity = PETSC_TRUE;
387: if (!a->isTI || a->S) {
388: for (i = 0; i < a->p && *identity; i++) {
389: for (j = 0; j < a->p && *identity; j++) {
390: if (i != j) {
391: if (a->S && PetscAbsScalar(a->S[i + j * a->p]) > PETSC_SMALL) *identity = PETSC_FALSE;
392: if (a->T && PetscAbsScalar(a->T[i + j * a->p]) > PETSC_SMALL) *identity = PETSC_FALSE;
393: } else {
394: if (a->S && PetscAbsScalar(a->S[i * (a->p + 1)] - a->S[0]) > PETSC_SMALL) *identity = PETSC_FALSE;
395: if (a->T && PetscAbsScalar(a->T[i * (a->p + 1)] - a->T[0]) > PETSC_SMALL) *identity = PETSC_FALSE;
396: }
397: }
398: }
399: }
400: PetscFunctionReturn(PETSC_SUCCESS);
401: }
403: /*@
404: MatKAIJSetT - Set the transformation matrix `T` associated with the `MATKAIJ` matrix
406: Logically Collective; the entire `T` is stored independently on all processes.
408: Input Parameters:
409: + A - the `MATKAIJ` matrix
410: . p - the number of rows in `S`
411: . q - the number of columns in `S`
412: - T - the `T` matrix, in form of a scalar array in column-major format
414: Level: advanced
416: Notes:
417: The dimensions `p` and `q` must match those of the shift matrix `S` associated with the `MATKAIJ` matrix.
419: The `T` matrix is copied, so the user can destroy this array.
421: .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetT()`, `MatKAIJSetS()`, `MatKAIJSetAIJ()`
422: @*/
423: PetscErrorCode MatKAIJSetT(Mat A, PetscInt p, PetscInt q, const PetscScalar T[])
424: {
425: PetscInt i, j;
426: Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data;
427: PetscBool isTI = PETSC_FALSE;
429: PetscFunctionBegin;
430: /* check if T is an identity matrix */
431: if (T && (p == q)) {
432: isTI = PETSC_TRUE;
433: for (i = 0; i < p; i++) {
434: for (j = 0; j < q; j++) {
435: if (i == j) {
436: /* diagonal term must be 1 */
437: if (T[i + j * p] != 1.0) isTI = PETSC_FALSE;
438: } else {
439: /* off-diagonal term must be 0 */
440: if (T[i + j * p] != 0.0) isTI = PETSC_FALSE;
441: }
442: }
443: }
444: }
445: a->isTI = isTI;
447: PetscCall(PetscFree(a->T));
448: if (T && (!isTI)) {
449: PetscCall(PetscMalloc1(p * q, &a->T));
450: PetscCall(PetscMemcpy(a->T, T, p * q * sizeof(PetscScalar)));
451: } else a->T = NULL;
453: a->p = p;
454: a->q = q;
455: PetscFunctionReturn(PETSC_SUCCESS);
456: }
458: static PetscErrorCode MatDestroy_SeqKAIJ(Mat A)
459: {
460: Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
462: PetscFunctionBegin;
463: PetscCall(MatDestroy(&b->AIJ));
464: PetscCall(PetscFree(b->S));
465: PetscCall(PetscFree(b->T));
466: PetscCall(PetscFree(b->ibdiag));
467: PetscCall(PetscFree5(b->sor.w, b->sor.y, b->sor.work, b->sor.t, b->sor.arr));
468: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqkaij_seqaij_C", NULL));
469: PetscCall(PetscFree(A->data));
470: PetscFunctionReturn(PETSC_SUCCESS);
471: }
473: static PetscErrorCode MatKAIJ_build_AIJ_OAIJ(Mat A)
474: {
475: Mat_MPIKAIJ *a;
476: Mat_MPIAIJ *mpiaij;
477: PetscScalar *T;
478: PetscInt i, j;
479: PetscObjectState state;
481: PetscFunctionBegin;
482: a = (Mat_MPIKAIJ *)A->data;
483: mpiaij = (Mat_MPIAIJ *)a->A->data;
485: PetscCall(PetscObjectStateGet((PetscObject)a->A, &state));
486: if (state == a->state) {
487: /* The existing AIJ and KAIJ members are up-to-date, so simply exit. */
488: PetscFunctionReturn(PETSC_SUCCESS);
489: } else {
490: PetscCall(MatDestroy(&a->AIJ));
491: PetscCall(MatDestroy(&a->OAIJ));
492: if (a->isTI) {
493: /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL.
494: * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will
495: * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix
496: * to pass in. */
497: PetscCall(PetscMalloc1(a->p * a->q, &T));
498: for (i = 0; i < a->p; i++) {
499: for (j = 0; j < a->q; j++) {
500: if (i == j) T[i + j * a->p] = 1.0;
501: else T[i + j * a->p] = 0.0;
502: }
503: }
504: } else T = a->T;
505: PetscCall(MatCreateKAIJ(mpiaij->A, a->p, a->q, a->S, T, &a->AIJ));
506: PetscCall(MatCreateKAIJ(mpiaij->B, a->p, a->q, NULL, T, &a->OAIJ));
507: if (a->isTI) PetscCall(PetscFree(T));
508: a->state = state;
509: }
510: PetscFunctionReturn(PETSC_SUCCESS);
511: }
513: static PetscErrorCode MatSetUp_KAIJ(Mat A)
514: {
515: PetscInt n;
516: PetscMPIInt size;
517: Mat_SeqKAIJ *seqkaij = (Mat_SeqKAIJ *)A->data;
519: PetscFunctionBegin;
520: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
521: if (size == 1) {
522: PetscCall(MatSetSizes(A, seqkaij->p * seqkaij->AIJ->rmap->n, seqkaij->q * seqkaij->AIJ->cmap->n, seqkaij->p * seqkaij->AIJ->rmap->N, seqkaij->q * seqkaij->AIJ->cmap->N));
523: PetscCall(PetscLayoutSetBlockSize(A->rmap, seqkaij->p));
524: PetscCall(PetscLayoutSetBlockSize(A->cmap, seqkaij->q));
525: PetscCall(PetscLayoutSetUp(A->rmap));
526: PetscCall(PetscLayoutSetUp(A->cmap));
527: } else {
528: Mat_MPIKAIJ *a;
529: Mat_MPIAIJ *mpiaij;
530: IS from, to;
531: Vec gvec;
533: a = (Mat_MPIKAIJ *)A->data;
534: mpiaij = (Mat_MPIAIJ *)a->A->data;
535: PetscCall(MatSetSizes(A, a->p * a->A->rmap->n, a->q * a->A->cmap->n, a->p * a->A->rmap->N, a->q * a->A->cmap->N));
536: PetscCall(PetscLayoutSetBlockSize(A->rmap, seqkaij->p));
537: PetscCall(PetscLayoutSetBlockSize(A->cmap, seqkaij->q));
538: PetscCall(PetscLayoutSetUp(A->rmap));
539: PetscCall(PetscLayoutSetUp(A->cmap));
541: PetscCall(MatKAIJ_build_AIJ_OAIJ(A));
543: PetscCall(VecGetSize(mpiaij->lvec, &n));
544: PetscCall(VecCreate(PETSC_COMM_SELF, &a->w));
545: PetscCall(VecSetSizes(a->w, n * a->q, n * a->q));
546: PetscCall(VecSetBlockSize(a->w, a->q));
547: PetscCall(VecSetType(a->w, VECSEQ));
549: /* create two temporary Index sets for build scatter gather */
550: PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)a->A), a->q, n, mpiaij->garray, PETSC_COPY_VALUES, &from));
551: PetscCall(ISCreateStride(PETSC_COMM_SELF, n * a->q, 0, 1, &to));
553: /* create temporary global vector to generate scatter context */
554: PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A), a->q, a->q * a->A->cmap->n, a->q * a->A->cmap->N, NULL, &gvec));
556: /* generate the scatter context */
557: PetscCall(VecScatterCreate(gvec, from, a->w, to, &a->ctx));
559: PetscCall(ISDestroy(&from));
560: PetscCall(ISDestroy(&to));
561: PetscCall(VecDestroy(&gvec));
562: }
564: A->assembled = PETSC_TRUE;
565: PetscFunctionReturn(PETSC_SUCCESS);
566: }
568: static PetscErrorCode MatView_KAIJ(Mat A, PetscViewer viewer)
569: {
570: PetscViewerFormat format;
571: Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data;
572: Mat B;
573: PetscInt i;
574: PetscBool ismpikaij;
576: PetscFunctionBegin;
577: PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij));
578: PetscCall(PetscViewerGetFormat(viewer, &format));
579: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL || format == PETSC_VIEWER_ASCII_IMPL) {
580: PetscCall(PetscViewerASCIIPrintf(viewer, "S and T have %" PetscInt_FMT " rows and %" PetscInt_FMT " columns\n", a->p, a->q));
582: /* Print appropriate details for S. */
583: if (!a->S) {
584: PetscCall(PetscViewerASCIIPrintf(viewer, "S is NULL\n"));
585: } else if (format == PETSC_VIEWER_ASCII_IMPL) {
586: PetscCall(PetscViewerASCIIPrintf(viewer, "Entries of S are "));
587: for (i = 0; i < (a->p * a->q); i++) {
588: #if defined(PETSC_USE_COMPLEX)
589: PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e %18.16e ", (double)PetscRealPart(a->S[i]), (double)PetscImaginaryPart(a->S[i])));
590: #else
591: PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)PetscRealPart(a->S[i])));
592: #endif
593: }
594: PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
595: }
597: /* Print appropriate details for T. */
598: if (a->isTI) {
599: PetscCall(PetscViewerASCIIPrintf(viewer, "T is the identity matrix\n"));
600: } else if (!a->T) {
601: PetscCall(PetscViewerASCIIPrintf(viewer, "T is NULL\n"));
602: } else if (format == PETSC_VIEWER_ASCII_IMPL) {
603: PetscCall(PetscViewerASCIIPrintf(viewer, "Entries of T are "));
604: for (i = 0; i < (a->p * a->q); i++) {
605: #if defined(PETSC_USE_COMPLEX)
606: PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e %18.16e ", (double)PetscRealPart(a->T[i]), (double)PetscImaginaryPart(a->T[i])));
607: #else
608: PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)PetscRealPart(a->T[i])));
609: #endif
610: }
611: PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
612: }
614: /* Now print details for the AIJ matrix, using the AIJ viewer. */
615: PetscCall(PetscViewerASCIIPrintf(viewer, "Now viewing the associated AIJ matrix:\n"));
616: if (ismpikaij) {
617: Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
618: PetscCall(MatView(b->A, viewer));
619: } else {
620: PetscCall(MatView(a->AIJ, viewer));
621: }
623: } else {
624: /* For all other matrix viewer output formats, simply convert to an AIJ matrix and call MatView() on that. */
625: PetscCall(MatConvert(A, MATAIJ, MAT_INITIAL_MATRIX, &B));
626: PetscCall(MatView(B, viewer));
627: PetscCall(MatDestroy(&B));
628: }
629: PetscFunctionReturn(PETSC_SUCCESS);
630: }
632: static PetscErrorCode MatDestroy_MPIKAIJ(Mat A)
633: {
634: Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
636: PetscFunctionBegin;
637: PetscCall(MatDestroy(&b->AIJ));
638: PetscCall(MatDestroy(&b->OAIJ));
639: PetscCall(MatDestroy(&b->A));
640: PetscCall(VecScatterDestroy(&b->ctx));
641: PetscCall(VecDestroy(&b->w));
642: PetscCall(PetscFree(b->S));
643: PetscCall(PetscFree(b->T));
644: PetscCall(PetscFree(b->ibdiag));
645: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetDiagonalBlock_C", NULL));
646: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpikaij_mpiaij_C", NULL));
647: PetscCall(PetscFree(A->data));
648: PetscFunctionReturn(PETSC_SUCCESS);
649: }
651: /* zz = yy + Axx */
652: static PetscErrorCode MatMultAdd_SeqKAIJ(Mat A, Vec xx, Vec yy, Vec zz)
653: {
654: Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
655: Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data;
656: const PetscScalar *s = b->S, *t = b->T;
657: const PetscScalar *x, *v, *bx;
658: PetscScalar *y, *sums;
659: const PetscInt m = b->AIJ->rmap->n, *idx, *ii;
660: PetscInt n, i, jrow, j, l, p = b->p, q = b->q, k;
662: PetscFunctionBegin;
663: if (!yy) {
664: PetscCall(VecSet(zz, 0.0));
665: } else {
666: PetscCall(VecCopy(yy, zz));
667: }
668: if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(PETSC_SUCCESS);
670: PetscCall(VecGetArrayRead(xx, &x));
671: PetscCall(VecGetArray(zz, &y));
672: idx = a->j;
673: v = a->a;
674: ii = a->i;
676: if (b->isTI) {
677: for (i = 0; i < m; i++) {
678: jrow = ii[i];
679: n = ii[i + 1] - jrow;
680: sums = y + p * i;
681: for (j = 0; j < n; j++) {
682: for (k = 0; k < p; k++) sums[k] += v[jrow + j] * x[q * idx[jrow + j] + k];
683: }
684: }
685: PetscCall(PetscLogFlops(3.0 * (a->nz) * p));
686: } else if (t) {
687: for (i = 0; i < m; i++) {
688: jrow = ii[i];
689: n = ii[i + 1] - jrow;
690: sums = y + p * i;
691: for (j = 0; j < n; j++) {
692: for (k = 0; k < p; k++) {
693: for (l = 0; l < q; l++) sums[k] += v[jrow + j] * t[k + l * p] * x[q * idx[jrow + j] + l];
694: }
695: }
696: }
697: /* The flop count below assumes that v[jrow+j] is hoisted out (which an optimizing compiler is likely to do),
698: * and also that T part is hoisted outside this loop (in exchange for temporary storage) as (A \otimes I) (I \otimes T),
699: * so that this multiply doesn't have to be redone for each matrix entry, but just once per column. The latter
700: * transformation is much less likely to be applied, but we nonetheless count the minimum flops required. */
701: PetscCall(PetscLogFlops((2.0 * p * q - p) * m + 2.0 * p * a->nz));
702: }
703: if (s) {
704: for (i = 0; i < m; i++) {
705: sums = y + p * i;
706: bx = x + q * i;
707: if (i < b->AIJ->cmap->n) {
708: for (j = 0; j < q; j++) {
709: for (k = 0; k < p; k++) sums[k] += s[k + j * p] * bx[j];
710: }
711: }
712: }
713: PetscCall(PetscLogFlops(2.0 * m * p * q));
714: }
716: PetscCall(VecRestoreArrayRead(xx, &x));
717: PetscCall(VecRestoreArray(zz, &y));
718: PetscFunctionReturn(PETSC_SUCCESS);
719: }
721: static PetscErrorCode MatMult_SeqKAIJ(Mat A, Vec xx, Vec yy)
722: {
723: PetscFunctionBegin;
724: PetscCall(MatMultAdd_SeqKAIJ(A, xx, NULL, yy));
725: PetscFunctionReturn(PETSC_SUCCESS);
726: }
728: #include <petsc/private/kernels/blockinvert.h>
730: static PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ(Mat A, const PetscScalar **values)
731: {
732: Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
733: Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data;
734: const PetscScalar *S = b->S;
735: const PetscScalar *T = b->T;
736: const PetscScalar *v = a->a;
737: const PetscInt p = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i;
738: PetscInt i, j, *v_pivots, dof, dof2;
739: PetscScalar *diag, aval, *v_work;
741: PetscFunctionBegin;
742: PetscCheck(p == q, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MATKAIJ: Block size must be square to calculate inverse.");
743: PetscCheck(S || T || b->isTI, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MATKAIJ: Cannot invert a zero matrix.");
745: dof = p;
746: dof2 = dof * dof;
748: if (b->ibdiagvalid) {
749: if (values) *values = b->ibdiag;
750: PetscFunctionReturn(PETSC_SUCCESS);
751: }
752: if (!b->ibdiag) PetscCall(PetscMalloc1(dof2 * m, &b->ibdiag));
753: if (values) *values = b->ibdiag;
754: diag = b->ibdiag;
756: PetscCall(PetscMalloc2(dof, &v_work, dof, &v_pivots));
757: for (i = 0; i < m; i++) {
758: if (S) {
759: PetscCall(PetscMemcpy(diag, S, dof2 * sizeof(PetscScalar)));
760: } else {
761: PetscCall(PetscMemzero(diag, dof2 * sizeof(PetscScalar)));
762: }
763: if (b->isTI) {
764: aval = 0;
765: for (j = ii[i]; j < ii[i + 1]; j++)
766: if (idx[j] == i) aval = v[j];
767: for (j = 0; j < dof; j++) diag[j + dof * j] += aval;
768: } else if (T) {
769: aval = 0;
770: for (j = ii[i]; j < ii[i + 1]; j++)
771: if (idx[j] == i) aval = v[j];
772: for (j = 0; j < dof2; j++) diag[j] += aval * T[j];
773: }
774: PetscCall(PetscKernel_A_gets_inverse_A(dof, diag, v_pivots, v_work, PETSC_FALSE, NULL));
775: diag += dof2;
776: }
777: PetscCall(PetscFree2(v_work, v_pivots));
779: b->ibdiagvalid = PETSC_TRUE;
780: PetscFunctionReturn(PETSC_SUCCESS);
781: }
783: static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A, Mat *B)
784: {
785: Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ *)A->data;
787: PetscFunctionBegin;
788: *B = kaij->AIJ;
789: PetscFunctionReturn(PETSC_SUCCESS);
790: }
792: static PetscErrorCode MatConvert_KAIJ_AIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
793: {
794: Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data;
795: Mat AIJ, OAIJ, B;
796: PetscInt *d_nnz, *o_nnz = NULL, nz, i, j, m, d;
797: const PetscInt p = a->p, q = a->q;
798: PetscBool ismpikaij, missing;
800: PetscFunctionBegin;
801: if (reuse != MAT_REUSE_MATRIX) {
802: PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij));
803: if (ismpikaij) {
804: Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
805: AIJ = ((Mat_SeqKAIJ *)b->AIJ->data)->AIJ;
806: OAIJ = ((Mat_SeqKAIJ *)b->OAIJ->data)->AIJ;
807: } else {
808: AIJ = a->AIJ;
809: OAIJ = NULL;
810: }
811: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
812: PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
813: PetscCall(MatSetType(B, MATAIJ));
814: PetscCall(MatGetSize(AIJ, &m, NULL));
815: PetscCall(MatMissingDiagonal(AIJ, &missing, &d)); /* assumption that all successive rows will have a missing diagonal */
816: if (!missing || !a->S) d = m;
817: PetscCall(PetscMalloc1(m * p, &d_nnz));
818: for (i = 0; i < m; ++i) {
819: PetscCall(MatGetRow_SeqAIJ(AIJ, i, &nz, NULL, NULL));
820: for (j = 0; j < p; ++j) d_nnz[i * p + j] = nz * q + (i >= d) * q;
821: PetscCall(MatRestoreRow_SeqAIJ(AIJ, i, &nz, NULL, NULL));
822: }
823: if (OAIJ) {
824: PetscCall(PetscMalloc1(m * p, &o_nnz));
825: for (i = 0; i < m; ++i) {
826: PetscCall(MatGetRow_SeqAIJ(OAIJ, i, &nz, NULL, NULL));
827: for (j = 0; j < p; ++j) o_nnz[i * p + j] = nz * q;
828: PetscCall(MatRestoreRow_SeqAIJ(OAIJ, i, &nz, NULL, NULL));
829: }
830: PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
831: } else {
832: PetscCall(MatSeqAIJSetPreallocation(B, 0, d_nnz));
833: }
834: PetscCall(PetscFree(d_nnz));
835: PetscCall(PetscFree(o_nnz));
836: } else B = *newmat;
837: PetscCall(MatConvert_Basic(A, newtype, MAT_REUSE_MATRIX, &B));
838: if (reuse == MAT_INPLACE_MATRIX) {
839: PetscCall(MatHeaderReplace(A, &B));
840: } else *newmat = B;
841: PetscFunctionReturn(PETSC_SUCCESS);
842: }
844: static PetscErrorCode MatSOR_SeqKAIJ(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
845: {
846: Mat_SeqKAIJ *kaij = (Mat_SeqKAIJ *)A->data;
847: Mat_SeqAIJ *a = (Mat_SeqAIJ *)kaij->AIJ->data;
848: const PetscScalar *aa = a->a, *T = kaij->T, *v;
849: const PetscInt m = kaij->AIJ->rmap->n, *ai = a->i, *aj = a->j, p = kaij->p, q = kaij->q, *diag, *vi;
850: const PetscScalar *b, *xb, *idiag;
851: PetscScalar *x, *work, *workt, *w, *y, *arr, *t, *arrt;
852: PetscInt i, j, k, i2, bs, bs2, nz;
854: PetscFunctionBegin;
855: its = its * lits;
856: PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat");
857: PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
858: PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for diagonal shift");
859: PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for applying upper or lower triangular parts");
860: PetscCheck(p == q, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatSOR for KAIJ: No support for non-square dense blocks");
861: bs = p;
862: bs2 = bs * bs;
864: if (!m) PetscFunctionReturn(PETSC_SUCCESS);
866: if (!kaij->ibdiagvalid) PetscCall(MatInvertBlockDiagonal_SeqKAIJ(A, NULL));
867: idiag = kaij->ibdiag;
868: diag = a->diag;
870: if (!kaij->sor.setup) {
871: PetscCall(PetscMalloc5(bs, &kaij->sor.w, bs, &kaij->sor.y, m * bs, &kaij->sor.work, m * bs, &kaij->sor.t, m * bs2, &kaij->sor.arr));
872: kaij->sor.setup = PETSC_TRUE;
873: }
874: y = kaij->sor.y;
875: w = kaij->sor.w;
876: work = kaij->sor.work;
877: t = kaij->sor.t;
878: arr = kaij->sor.arr;
880: PetscCall(VecGetArray(xx, &x));
881: PetscCall(VecGetArrayRead(bb, &b));
883: if (flag & SOR_ZERO_INITIAL_GUESS) {
884: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
885: PetscKernel_w_gets_Ar_times_v(bs, bs, b, idiag, x); /* x[0:bs] <- D^{-1} b[0:bs] */
886: PetscCall(PetscMemcpy(t, b, bs * sizeof(PetscScalar)));
887: i2 = bs;
888: idiag += bs2;
889: for (i = 1; i < m; i++) {
890: v = aa + ai[i];
891: vi = aj + ai[i];
892: nz = diag[i] - ai[i];
894: if (T) { /* b - T (Arow * x) */
895: PetscCall(PetscMemzero(w, bs * sizeof(PetscScalar)));
896: for (j = 0; j < nz; j++) {
897: for (k = 0; k < bs; k++) w[k] -= v[j] * x[vi[j] * bs + k];
898: }
899: PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs, w, T, &t[i2]);
900: for (k = 0; k < bs; k++) t[i2 + k] += b[i2 + k];
901: } else if (kaij->isTI) {
902: PetscCall(PetscMemcpy(t + i2, b + i2, bs * sizeof(PetscScalar)));
903: for (j = 0; j < nz; j++) {
904: for (k = 0; k < bs; k++) t[i2 + k] -= v[j] * x[vi[j] * bs + k];
905: }
906: } else {
907: PetscCall(PetscMemcpy(t + i2, b + i2, bs * sizeof(PetscScalar)));
908: }
910: PetscKernel_w_gets_Ar_times_v(bs, bs, t + i2, idiag, y);
911: for (j = 0; j < bs; j++) x[i2 + j] = omega * y[j];
913: idiag += bs2;
914: i2 += bs;
915: }
916: /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
917: PetscCall(PetscLogFlops(1.0 * bs2 * a->nz));
918: xb = t;
919: } else xb = b;
920: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
921: idiag = kaij->ibdiag + bs2 * (m - 1);
922: i2 = bs * (m - 1);
923: PetscCall(PetscMemcpy(w, xb + i2, bs * sizeof(PetscScalar)));
924: PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
925: i2 -= bs;
926: idiag -= bs2;
927: for (i = m - 2; i >= 0; i--) {
928: v = aa + diag[i] + 1;
929: vi = aj + diag[i] + 1;
930: nz = ai[i + 1] - diag[i] - 1;
932: if (T) { /* FIXME: This branch untested */
933: PetscCall(PetscMemcpy(w, xb + i2, bs * sizeof(PetscScalar)));
934: /* copy all rows of x that are needed into contiguous space */
935: workt = work;
936: for (j = 0; j < nz; j++) {
937: PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
938: workt += bs;
939: }
940: arrt = arr;
941: for (j = 0; j < nz; j++) {
942: PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
943: for (k = 0; k < bs2; k++) arrt[k] *= v[j];
944: arrt += bs2;
945: }
946: PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
947: } else if (kaij->isTI) {
948: PetscCall(PetscMemcpy(w, t + i2, bs * sizeof(PetscScalar)));
949: for (j = 0; j < nz; j++) {
950: for (k = 0; k < bs; k++) w[k] -= v[j] * x[vi[j] * bs + k];
951: }
952: }
954: PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y); /* RHS incorrect for omega != 1.0 */
955: for (j = 0; j < bs; j++) x[i2 + j] = (1.0 - omega) * x[i2 + j] + omega * y[j];
957: idiag -= bs2;
958: i2 -= bs;
959: }
960: PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz)));
961: }
962: its--;
963: }
964: while (its--) { /* FIXME: This branch not updated */
965: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
966: i2 = 0;
967: idiag = kaij->ibdiag;
968: for (i = 0; i < m; i++) {
969: PetscCall(PetscMemcpy(w, b + i2, bs * sizeof(PetscScalar)));
971: v = aa + ai[i];
972: vi = aj + ai[i];
973: nz = diag[i] - ai[i];
974: workt = work;
975: for (j = 0; j < nz; j++) {
976: PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
977: workt += bs;
978: }
979: arrt = arr;
980: if (T) {
981: for (j = 0; j < nz; j++) {
982: PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
983: for (k = 0; k < bs2; k++) arrt[k] *= v[j];
984: arrt += bs2;
985: }
986: PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
987: } else if (kaij->isTI) {
988: for (j = 0; j < nz; j++) {
989: PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar)));
990: for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
991: arrt += bs2;
992: }
993: PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
994: }
995: PetscCall(PetscMemcpy(t + i2, w, bs * sizeof(PetscScalar)));
997: v = aa + diag[i] + 1;
998: vi = aj + diag[i] + 1;
999: nz = ai[i + 1] - diag[i] - 1;
1000: workt = work;
1001: for (j = 0; j < nz; j++) {
1002: PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
1003: workt += bs;
1004: }
1005: arrt = arr;
1006: if (T) {
1007: for (j = 0; j < nz; j++) {
1008: PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
1009: for (k = 0; k < bs2; k++) arrt[k] *= v[j];
1010: arrt += bs2;
1011: }
1012: PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1013: } else if (kaij->isTI) {
1014: for (j = 0; j < nz; j++) {
1015: PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar)));
1016: for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
1017: arrt += bs2;
1018: }
1019: PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1020: }
1022: PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y);
1023: for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j);
1025: idiag += bs2;
1026: i2 += bs;
1027: }
1028: xb = t;
1029: } else xb = b;
1030: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1031: idiag = kaij->ibdiag + bs2 * (m - 1);
1032: i2 = bs * (m - 1);
1033: if (xb == b) {
1034: for (i = m - 1; i >= 0; i--) {
1035: PetscCall(PetscMemcpy(w, b + i2, bs * sizeof(PetscScalar)));
1037: v = aa + ai[i];
1038: vi = aj + ai[i];
1039: nz = diag[i] - ai[i];
1040: workt = work;
1041: for (j = 0; j < nz; j++) {
1042: PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
1043: workt += bs;
1044: }
1045: arrt = arr;
1046: if (T) {
1047: for (j = 0; j < nz; j++) {
1048: PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
1049: for (k = 0; k < bs2; k++) arrt[k] *= v[j];
1050: arrt += bs2;
1051: }
1052: PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1053: } else if (kaij->isTI) {
1054: for (j = 0; j < nz; j++) {
1055: PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar)));
1056: for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
1057: arrt += bs2;
1058: }
1059: PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1060: }
1062: v = aa + diag[i] + 1;
1063: vi = aj + diag[i] + 1;
1064: nz = ai[i + 1] - diag[i] - 1;
1065: workt = work;
1066: for (j = 0; j < nz; j++) {
1067: PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
1068: workt += bs;
1069: }
1070: arrt = arr;
1071: if (T) {
1072: for (j = 0; j < nz; j++) {
1073: PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
1074: for (k = 0; k < bs2; k++) arrt[k] *= v[j];
1075: arrt += bs2;
1076: }
1077: PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1078: } else if (kaij->isTI) {
1079: for (j = 0; j < nz; j++) {
1080: PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar)));
1081: for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
1082: arrt += bs2;
1083: }
1084: PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1085: }
1087: PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y);
1088: for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j);
1089: }
1090: } else {
1091: for (i = m - 1; i >= 0; i--) {
1092: PetscCall(PetscMemcpy(w, xb + i2, bs * sizeof(PetscScalar)));
1093: v = aa + diag[i] + 1;
1094: vi = aj + diag[i] + 1;
1095: nz = ai[i + 1] - diag[i] - 1;
1096: workt = work;
1097: for (j = 0; j < nz; j++) {
1098: PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
1099: workt += bs;
1100: }
1101: arrt = arr;
1102: if (T) {
1103: for (j = 0; j < nz; j++) {
1104: PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
1105: for (k = 0; k < bs2; k++) arrt[k] *= v[j];
1106: arrt += bs2;
1107: }
1108: PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1109: } else if (kaij->isTI) {
1110: for (j = 0; j < nz; j++) {
1111: PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar)));
1112: for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
1113: arrt += bs2;
1114: }
1115: PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1116: }
1117: PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y);
1118: for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j);
1119: }
1120: }
1121: PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz)));
1122: }
1123: }
1125: PetscCall(VecRestoreArray(xx, &x));
1126: PetscCall(VecRestoreArrayRead(bb, &b));
1127: PetscFunctionReturn(PETSC_SUCCESS);
1128: }
1130: /*===================================================================================*/
1132: static PetscErrorCode MatMultAdd_MPIKAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1133: {
1134: Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
1136: PetscFunctionBegin;
1137: if (!yy) {
1138: PetscCall(VecSet(zz, 0.0));
1139: } else {
1140: PetscCall(VecCopy(yy, zz));
1141: }
1142: PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ and b->OAIJ are up to date. */
1143: /* start the scatter */
1144: PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
1145: PetscCall((*b->AIJ->ops->multadd)(b->AIJ, xx, zz, zz));
1146: PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
1147: PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, zz, zz));
1148: PetscFunctionReturn(PETSC_SUCCESS);
1149: }
1151: static PetscErrorCode MatMult_MPIKAIJ(Mat A, Vec xx, Vec yy)
1152: {
1153: PetscFunctionBegin;
1154: PetscCall(MatMultAdd_MPIKAIJ(A, xx, NULL, yy));
1155: PetscFunctionReturn(PETSC_SUCCESS);
1156: }
1158: static PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ(Mat A, const PetscScalar **values)
1159: {
1160: Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
1162: PetscFunctionBegin;
1163: PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ is up to date. */
1164: PetscCall((*b->AIJ->ops->invertblockdiagonal)(b->AIJ, values));
1165: PetscFunctionReturn(PETSC_SUCCESS);
1166: }
1168: static PetscErrorCode MatGetRow_SeqKAIJ(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **values)
1169: {
1170: Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
1171: PetscBool diag = PETSC_FALSE;
1172: PetscInt nzaij, nz, *colsaij, *idx, i, j, p = b->p, q = b->q, r = row / p, s = row % p, c;
1173: PetscScalar *vaij, *v, *S = b->S, *T = b->T;
1175: PetscFunctionBegin;
1176: PetscCheck(!b->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1177: b->getrowactive = PETSC_TRUE;
1178: PetscCheck(row >= 0 && row < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range", row);
1180: if ((!S) && (!T) && (!b->isTI)) {
1181: if (ncols) *ncols = 0;
1182: if (cols) *cols = NULL;
1183: if (values) *values = NULL;
1184: PetscFunctionReturn(PETSC_SUCCESS);
1185: }
1187: if (T || b->isTI) {
1188: PetscCall(MatGetRow_SeqAIJ(b->AIJ, r, &nzaij, &colsaij, &vaij));
1189: c = nzaij;
1190: for (i = 0; i < nzaij; i++) {
1191: /* check if this row contains a diagonal entry */
1192: if (colsaij[i] == r) {
1193: diag = PETSC_TRUE;
1194: c = i;
1195: }
1196: }
1197: } else nzaij = c = 0;
1199: /* calculate size of row */
1200: nz = 0;
1201: if (S) nz += q;
1202: if (T || b->isTI) nz += (diag && S ? (nzaij - 1) * q : nzaij * q);
1204: if (cols || values) {
1205: PetscCall(PetscMalloc2(nz, &idx, nz, &v));
1206: for (i = 0; i < q; i++) {
1207: /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1208: v[i] = 0.0;
1209: }
1210: if (b->isTI) {
1211: for (i = 0; i < nzaij; i++) {
1212: for (j = 0; j < q; j++) {
1213: idx[i * q + j] = colsaij[i] * q + j;
1214: v[i * q + j] = (j == s ? vaij[i] : 0);
1215: }
1216: }
1217: } else if (T) {
1218: for (i = 0; i < nzaij; i++) {
1219: for (j = 0; j < q; j++) {
1220: idx[i * q + j] = colsaij[i] * q + j;
1221: v[i * q + j] = vaij[i] * T[s + j * p];
1222: }
1223: }
1224: }
1225: if (S) {
1226: for (j = 0; j < q; j++) {
1227: idx[c * q + j] = r * q + j;
1228: v[c * q + j] += S[s + j * p];
1229: }
1230: }
1231: }
1233: if (ncols) *ncols = nz;
1234: if (cols) *cols = idx;
1235: if (values) *values = v;
1236: PetscFunctionReturn(PETSC_SUCCESS);
1237: }
1239: static PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1240: {
1241: PetscFunctionBegin;
1242: PetscCall(PetscFree2(*idx, *v));
1243: ((Mat_SeqKAIJ *)A->data)->getrowactive = PETSC_FALSE;
1244: PetscFunctionReturn(PETSC_SUCCESS);
1245: }
1247: static PetscErrorCode MatGetRow_MPIKAIJ(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **values)
1248: {
1249: Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
1250: Mat AIJ = b->A;
1251: PetscBool diag = PETSC_FALSE;
1252: Mat MatAIJ, MatOAIJ;
1253: const PetscInt rstart = A->rmap->rstart, rend = A->rmap->rend, p = b->p, q = b->q, *garray;
1254: PetscInt nz, *idx, ncolsaij = 0, ncolsoaij = 0, *colsaij, *colsoaij, r, s, c, i, j, lrow;
1255: PetscScalar *v, *vals, *ovals, *S = b->S, *T = b->T;
1257: PetscFunctionBegin;
1258: PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ and b->OAIJ are up to date. */
1259: MatAIJ = ((Mat_SeqKAIJ *)b->AIJ->data)->AIJ;
1260: MatOAIJ = ((Mat_SeqKAIJ *)b->OAIJ->data)->AIJ;
1261: PetscCheck(!b->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1262: b->getrowactive = PETSC_TRUE;
1263: PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows");
1264: lrow = row - rstart;
1266: if ((!S) && (!T) && (!b->isTI)) {
1267: if (ncols) *ncols = 0;
1268: if (cols) *cols = NULL;
1269: if (values) *values = NULL;
1270: PetscFunctionReturn(PETSC_SUCCESS);
1271: }
1273: r = lrow / p;
1274: s = lrow % p;
1276: if (T || b->isTI) {
1277: PetscCall(MatMPIAIJGetSeqAIJ(AIJ, NULL, NULL, &garray));
1278: PetscCall(MatGetRow_SeqAIJ(MatAIJ, lrow / p, &ncolsaij, &colsaij, &vals));
1279: PetscCall(MatGetRow_SeqAIJ(MatOAIJ, lrow / p, &ncolsoaij, &colsoaij, &ovals));
1281: c = ncolsaij + ncolsoaij;
1282: for (i = 0; i < ncolsaij; i++) {
1283: /* check if this row contains a diagonal entry */
1284: if (colsaij[i] == r) {
1285: diag = PETSC_TRUE;
1286: c = i;
1287: }
1288: }
1289: } else c = 0;
1291: /* calculate size of row */
1292: nz = 0;
1293: if (S) nz += q;
1294: if (T || b->isTI) nz += (diag && S ? (ncolsaij + ncolsoaij - 1) * q : (ncolsaij + ncolsoaij) * q);
1296: if (cols || values) {
1297: PetscCall(PetscMalloc2(nz, &idx, nz, &v));
1298: for (i = 0; i < q; i++) {
1299: /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1300: v[i] = 0.0;
1301: }
1302: if (b->isTI) {
1303: for (i = 0; i < ncolsaij; i++) {
1304: for (j = 0; j < q; j++) {
1305: idx[i * q + j] = (colsaij[i] + rstart / p) * q + j;
1306: v[i * q + j] = (j == s ? vals[i] : 0.0);
1307: }
1308: }
1309: for (i = 0; i < ncolsoaij; i++) {
1310: for (j = 0; j < q; j++) {
1311: idx[(i + ncolsaij) * q + j] = garray[colsoaij[i]] * q + j;
1312: v[(i + ncolsaij) * q + j] = (j == s ? ovals[i] : 0.0);
1313: }
1314: }
1315: } else if (T) {
1316: for (i = 0; i < ncolsaij; i++) {
1317: for (j = 0; j < q; j++) {
1318: idx[i * q + j] = (colsaij[i] + rstart / p) * q + j;
1319: v[i * q + j] = vals[i] * T[s + j * p];
1320: }
1321: }
1322: for (i = 0; i < ncolsoaij; i++) {
1323: for (j = 0; j < q; j++) {
1324: idx[(i + ncolsaij) * q + j] = garray[colsoaij[i]] * q + j;
1325: v[(i + ncolsaij) * q + j] = ovals[i] * T[s + j * p];
1326: }
1327: }
1328: }
1329: if (S) {
1330: for (j = 0; j < q; j++) {
1331: idx[c * q + j] = (r + rstart / p) * q + j;
1332: v[c * q + j] += S[s + j * p];
1333: }
1334: }
1335: }
1337: if (ncols) *ncols = nz;
1338: if (cols) *cols = idx;
1339: if (values) *values = v;
1340: PetscFunctionReturn(PETSC_SUCCESS);
1341: }
1343: static PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1344: {
1345: PetscFunctionBegin;
1346: PetscCall(PetscFree2(*idx, *v));
1347: ((Mat_SeqKAIJ *)A->data)->getrowactive = PETSC_FALSE;
1348: PetscFunctionReturn(PETSC_SUCCESS);
1349: }
1351: static PetscErrorCode MatCreateSubMatrix_KAIJ(Mat mat, IS isrow, IS iscol, MatReuse cll, Mat *newmat)
1352: {
1353: Mat A;
1355: PetscFunctionBegin;
1356: PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A));
1357: PetscCall(MatCreateSubMatrix(A, isrow, iscol, cll, newmat));
1358: PetscCall(MatDestroy(&A));
1359: PetscFunctionReturn(PETSC_SUCCESS);
1360: }
1362: /*@C
1363: MatCreateKAIJ - Creates a matrix of type `MATKAIJ`.
1365: Collective
1367: Input Parameters:
1368: + A - the `MATAIJ` matrix
1369: . p - number of rows in `S` and `T`
1370: . q - number of columns in `S` and `T`
1371: . S - the `S` matrix (can be `NULL`), stored as a `PetscScalar` array (column-major)
1372: - T - the `T` matrix (can be `NULL`), stored as a `PetscScalar` array (column-major)
1374: Output Parameter:
1375: . kaij - the new `MATKAIJ` matrix
1377: Level: advanced
1379: Notes:
1380: The created matrix is of the following form\:
1381: .vb
1382: [I \otimes S + A \otimes T]
1383: .ve
1384: where
1385: .vb
1386: S is a dense (p \times q) matrix
1387: T is a dense (p \times q) matrix
1388: A is a `MATAIJ` (n \times n) matrix
1389: I is the identity matrix
1390: .ve
1391: The resulting matrix is (np \times nq)
1393: `S` and `T` are always stored independently on all processes as `PetscScalar` arrays in
1394: column-major format.
1396: This function increases the reference count on the `MATAIJ` matrix, so the user is free to destroy the matrix if it is not needed.
1398: Changes to the entries of the `MATAIJ` matrix will immediately affect the `MATKAIJ` matrix.
1400: Developer Notes:
1401: In the `MATMPIKAIJ` case, the internal 'AIJ' and 'OAIJ' sequential KAIJ matrices are kept up to date by tracking the object state
1402: of the AIJ matrix 'A' that describes the blockwise action of the `MATMPIKAIJ` matrix and, if the object state has changed, lazily
1403: rebuilding 'AIJ' and 'OAIJ' just before executing operations with the `MATMPIKAIJ` matrix. If new types of operations are added,
1404: routines implementing those must also ensure these are rebuilt when needed (by calling the internal MatKAIJ_build_AIJ_OAIJ() routine).
1406: .seealso: [](ch_matrices), `Mat`, `MatKAIJSetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`, `MatKAIJGetAIJ()`, `MatKAIJGetS()`, `MatKAIJGetT()`, `MATKAIJ`
1407: @*/
1408: PetscErrorCode MatCreateKAIJ(Mat A, PetscInt p, PetscInt q, const PetscScalar S[], const PetscScalar T[], Mat *kaij)
1409: {
1410: PetscFunctionBegin;
1411: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), kaij));
1412: PetscCall(MatSetType(*kaij, MATKAIJ));
1413: PetscCall(MatKAIJSetAIJ(*kaij, A));
1414: PetscCall(MatKAIJSetS(*kaij, p, q, S));
1415: PetscCall(MatKAIJSetT(*kaij, p, q, T));
1416: PetscCall(MatSetUp(*kaij));
1417: PetscFunctionReturn(PETSC_SUCCESS);
1418: }
1420: /*MC
1421: MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of form
1422: [I \otimes S + A \otimes T],
1423: where
1424: .vb
1425: S is a dense (p \times q) matrix,
1426: T is a dense (p \times q) matrix,
1427: A is an AIJ (n \times n) matrix,
1428: and I is the identity matrix.
1429: .ve
1430: The resulting matrix is (np \times nq).
1432: S and T are always stored independently on all processes as `PetscScalar` arrays in column-major format.
1434: Level: advanced
1436: Note:
1437: A linear system with multiple right-hand sides, AX = B, can be expressed in the KAIJ-friendly form of (A \otimes I) x = b,
1438: where x and b are column vectors containing the row-major representations of X and B.
1440: .seealso: [](ch_matrices), `Mat`, `MatKAIJSetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`, `MatKAIJGetAIJ()`, `MatKAIJGetS()`, `MatKAIJGetT()`, `MatCreateKAIJ()`
1441: M*/
1443: PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A)
1444: {
1445: Mat_MPIKAIJ *b;
1446: PetscMPIInt size;
1448: PetscFunctionBegin;
1449: PetscCall(PetscNew(&b));
1450: A->data = (void *)b;
1452: PetscCall(PetscMemzero(A->ops, sizeof(struct _MatOps)));
1454: b->w = NULL;
1455: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
1456: if (size == 1) {
1457: PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQKAIJ));
1458: A->ops->destroy = MatDestroy_SeqKAIJ;
1459: A->ops->mult = MatMult_SeqKAIJ;
1460: A->ops->multadd = MatMultAdd_SeqKAIJ;
1461: A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ;
1462: A->ops->getrow = MatGetRow_SeqKAIJ;
1463: A->ops->restorerow = MatRestoreRow_SeqKAIJ;
1464: A->ops->sor = MatSOR_SeqKAIJ;
1465: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqkaij_seqaij_C", MatConvert_KAIJ_AIJ));
1466: } else {
1467: PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPIKAIJ));
1468: A->ops->destroy = MatDestroy_MPIKAIJ;
1469: A->ops->mult = MatMult_MPIKAIJ;
1470: A->ops->multadd = MatMultAdd_MPIKAIJ;
1471: A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ;
1472: A->ops->getrow = MatGetRow_MPIKAIJ;
1473: A->ops->restorerow = MatRestoreRow_MPIKAIJ;
1474: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetDiagonalBlock_C", MatGetDiagonalBlock_MPIKAIJ));
1475: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpikaij_mpiaij_C", MatConvert_KAIJ_AIJ));
1476: }
1477: A->ops->setup = MatSetUp_KAIJ;
1478: A->ops->view = MatView_KAIJ;
1479: A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ;
1480: PetscFunctionReturn(PETSC_SUCCESS);
1481: }