Actual source code: mpisbaij.c
2: #include <../src/mat/impls/baij/mpi/mpibaij.h>
3: #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4: #include <../src/mat/impls/sbaij/seq/sbaij.h>
5: #include <petscblaslapack.h>
7: #if defined(PETSC_HAVE_ELEMENTAL)
8: PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
9: #endif
10: #if defined(PETSC_HAVE_SCALAPACK)
11: PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
12: #endif
14: /* This could be moved to matimpl.h */
15: static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill)
16: {
17: Mat preallocator;
18: PetscInt r, rstart, rend;
19: PetscInt bs, i, m, n, M, N;
20: PetscBool cong = PETSC_TRUE;
24: for (i = 0; i < nm; i++) {
26: PetscLayoutCompare(B->rmap, X[i]->rmap, &cong);
28: }
30: MatGetBlockSize(B, &bs);
31: MatGetSize(B, &M, &N);
32: MatGetLocalSize(B, &m, &n);
33: MatCreate(PetscObjectComm((PetscObject)B), &preallocator);
34: MatSetType(preallocator, MATPREALLOCATOR);
35: MatSetBlockSize(preallocator, bs);
36: MatSetSizes(preallocator, m, n, M, N);
37: MatSetUp(preallocator);
38: MatGetOwnershipRange(preallocator, &rstart, &rend);
39: for (r = rstart; r < rend; ++r) {
40: PetscInt ncols;
41: const PetscInt *row;
42: const PetscScalar *vals;
44: for (i = 0; i < nm; i++) {
45: MatGetRow(X[i], r, &ncols, &row, &vals);
46: MatSetValues(preallocator, 1, &r, ncols, row, vals, INSERT_VALUES);
47: if (symm && symm[i]) MatSetValues(preallocator, ncols, row, 1, &r, vals, INSERT_VALUES);
48: MatRestoreRow(X[i], r, &ncols, &row, &vals);
49: }
50: }
51: MatAssemblyBegin(preallocator, MAT_FINAL_ASSEMBLY);
52: MatAssemblyEnd(preallocator, MAT_FINAL_ASSEMBLY);
53: MatPreallocatorPreallocate(preallocator, fill, B);
54: MatDestroy(&preallocator);
55: return 0;
56: }
58: PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
59: {
60: Mat B;
61: PetscInt r;
63: if (reuse != MAT_REUSE_MATRIX) {
64: PetscBool symm = PETSC_TRUE, isdense;
65: PetscInt bs;
67: MatCreate(PetscObjectComm((PetscObject)A), &B);
68: MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N);
69: MatSetType(B, newtype);
70: MatGetBlockSize(A, &bs);
71: MatSetBlockSize(B, bs);
72: PetscLayoutSetUp(B->rmap);
73: PetscLayoutSetUp(B->cmap);
74: PetscObjectTypeCompareAny((PetscObject)B, &isdense, MATSEQDENSE, MATMPIDENSE, MATSEQDENSECUDA, "");
75: if (!isdense) {
76: MatGetRowUpperTriangular(A);
77: MatPreallocateWithMats_Private(B, 1, &A, &symm, PETSC_TRUE);
78: MatRestoreRowUpperTriangular(A);
79: } else {
80: MatSetUp(B);
81: }
82: } else {
83: B = *newmat;
84: MatZeroEntries(B);
85: }
87: MatGetRowUpperTriangular(A);
88: for (r = A->rmap->rstart; r < A->rmap->rend; r++) {
89: PetscInt ncols;
90: const PetscInt *row;
91: const PetscScalar *vals;
93: MatGetRow(A, r, &ncols, &row, &vals);
94: MatSetValues(B, 1, &r, ncols, row, vals, INSERT_VALUES);
95: #if defined(PETSC_USE_COMPLEX)
96: if (A->hermitian == PETSC_BOOL3_TRUE) {
97: PetscInt i;
98: for (i = 0; i < ncols; i++) MatSetValue(B, row[i], r, PetscConj(vals[i]), INSERT_VALUES);
99: } else {
100: MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES);
101: }
102: #else
103: MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES);
104: #endif
105: MatRestoreRow(A, r, &ncols, &row, &vals);
106: }
107: MatRestoreRowUpperTriangular(A);
108: MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY);
109: MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY);
111: if (reuse == MAT_INPLACE_MATRIX) {
112: MatHeaderReplace(A, &B);
113: } else {
114: *newmat = B;
115: }
116: return 0;
117: }
119: PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat)
120: {
121: Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
123: MatStoreValues(aij->A);
124: MatStoreValues(aij->B);
125: return 0;
126: }
128: PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat)
129: {
130: Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
132: MatRetrieveValues(aij->A);
133: MatRetrieveValues(aij->B);
134: return 0;
135: }
137: #define MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, orow, ocol) \
138: { \
139: brow = row / bs; \
140: rp = aj + ai[brow]; \
141: ap = aa + bs2 * ai[brow]; \
142: rmax = aimax[brow]; \
143: nrow = ailen[brow]; \
144: bcol = col / bs; \
145: ridx = row % bs; \
146: cidx = col % bs; \
147: low = 0; \
148: high = nrow; \
149: while (high - low > 3) { \
150: t = (low + high) / 2; \
151: if (rp[t] > bcol) high = t; \
152: else low = t; \
153: } \
154: for (_i = low; _i < high; _i++) { \
155: if (rp[_i] > bcol) break; \
156: if (rp[_i] == bcol) { \
157: bap = ap + bs2 * _i + bs * cidx + ridx; \
158: if (addv == ADD_VALUES) *bap += value; \
159: else *bap = value; \
160: goto a_noinsert; \
161: } \
162: } \
163: if (a->nonew == 1) goto a_noinsert; \
165: MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \
166: N = nrow++ - 1; \
167: /* shift up all the later entries in this row */ \
168: PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1); \
169: PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1)); \
170: PetscArrayzero(ap + bs2 * _i, bs2); \
171: rp[_i] = bcol; \
172: ap[bs2 * _i + bs * cidx + ridx] = value; \
173: A->nonzerostate++; \
174: a_noinsert:; \
175: ailen[brow] = nrow; \
176: }
178: #define MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, orow, ocol) \
179: { \
180: brow = row / bs; \
181: rp = bj + bi[brow]; \
182: ap = ba + bs2 * bi[brow]; \
183: rmax = bimax[brow]; \
184: nrow = bilen[brow]; \
185: bcol = col / bs; \
186: ridx = row % bs; \
187: cidx = col % bs; \
188: low = 0; \
189: high = nrow; \
190: while (high - low > 3) { \
191: t = (low + high) / 2; \
192: if (rp[t] > bcol) high = t; \
193: else low = t; \
194: } \
195: for (_i = low; _i < high; _i++) { \
196: if (rp[_i] > bcol) break; \
197: if (rp[_i] == bcol) { \
198: bap = ap + bs2 * _i + bs * cidx + ridx; \
199: if (addv == ADD_VALUES) *bap += value; \
200: else *bap = value; \
201: goto b_noinsert; \
202: } \
203: } \
204: if (b->nonew == 1) goto b_noinsert; \
206: MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \
207: N = nrow++ - 1; \
208: /* shift up all the later entries in this row */ \
209: PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1); \
210: PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1)); \
211: PetscArrayzero(ap + bs2 * _i, bs2); \
212: rp[_i] = bcol; \
213: ap[bs2 * _i + bs * cidx + ridx] = value; \
214: B->nonzerostate++; \
215: b_noinsert:; \
216: bilen[brow] = nrow; \
217: }
219: /* Only add/insert a(i,j) with i<=j (blocks).
220: Any a(i,j) with i>j input by user is ignored or generates an error
221: */
222: PetscErrorCode MatSetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
223: {
224: Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
225: MatScalar value;
226: PetscBool roworiented = baij->roworiented;
227: PetscInt i, j, row, col;
228: PetscInt rstart_orig = mat->rmap->rstart;
229: PetscInt rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart;
230: PetscInt cend_orig = mat->cmap->rend, bs = mat->rmap->bs;
232: /* Some Variables required in the macro */
233: Mat A = baij->A;
234: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)(A)->data;
235: PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
236: MatScalar *aa = a->a;
238: Mat B = baij->B;
239: Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)(B)->data;
240: PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j;
241: MatScalar *ba = b->a;
243: PetscInt *rp, ii, nrow, _i, rmax, N, brow, bcol;
244: PetscInt low, high, t, ridx, cidx, bs2 = a->bs2;
245: MatScalar *ap, *bap;
247: /* for stash */
248: PetscInt n_loc, *in_loc = NULL;
249: MatScalar *v_loc = NULL;
251: if (!baij->donotstash) {
252: if (n > baij->n_loc) {
253: PetscFree(baij->in_loc);
254: PetscFree(baij->v_loc);
255: PetscMalloc1(n, &baij->in_loc);
256: PetscMalloc1(n, &baij->v_loc);
258: baij->n_loc = n;
259: }
260: in_loc = baij->in_loc;
261: v_loc = baij->v_loc;
262: }
264: for (i = 0; i < m; i++) {
265: if (im[i] < 0) continue;
267: if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
268: row = im[i] - rstart_orig; /* local row index */
269: for (j = 0; j < n; j++) {
270: if (im[i] / bs > in[j] / bs) {
271: if (a->ignore_ltriangular) {
272: continue; /* ignore lower triangular blocks */
273: } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
274: }
275: if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */
276: col = in[j] - cstart_orig; /* local col index */
277: brow = row / bs;
278: bcol = col / bs;
279: if (brow > bcol) continue; /* ignore lower triangular blocks of A */
280: if (roworiented) value = v[i * n + j];
281: else value = v[i + j * m];
282: MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, im[i], in[j]);
283: /* MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv); */
284: } else if (in[j] < 0) {
285: continue;
286: } else {
288: /* off-diag entry (B) */
289: if (mat->was_assembled) {
290: if (!baij->colmap) MatCreateColmap_MPIBAIJ_Private(mat);
291: #if defined(PETSC_USE_CTABLE)
292: PetscTableFind(baij->colmap, in[j] / bs + 1, &col);
293: col = col - 1;
294: #else
295: col = baij->colmap[in[j] / bs] - 1;
296: #endif
297: if (col < 0 && !((Mat_SeqSBAIJ *)(baij->A->data))->nonew) {
298: MatDisAssemble_MPISBAIJ(mat);
299: col = in[j];
300: /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
301: B = baij->B;
302: b = (Mat_SeqBAIJ *)(B)->data;
303: bimax = b->imax;
304: bi = b->i;
305: bilen = b->ilen;
306: bj = b->j;
307: ba = b->a;
308: } else col += in[j] % bs;
309: } else col = in[j];
310: if (roworiented) value = v[i * n + j];
311: else value = v[i + j * m];
312: MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, im[i], in[j]);
313: /* MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv); */
314: }
315: }
316: } else { /* off processor entry */
318: if (!baij->donotstash) {
319: mat->assembled = PETSC_FALSE;
320: n_loc = 0;
321: for (j = 0; j < n; j++) {
322: if (im[i] / bs > in[j] / bs) continue; /* ignore lower triangular blocks */
323: in_loc[n_loc] = in[j];
324: if (roworiented) {
325: v_loc[n_loc] = v[i * n + j];
326: } else {
327: v_loc[n_loc] = v[j * m + i];
328: }
329: n_loc++;
330: }
331: MatStashValuesRow_Private(&mat->stash, im[i], n_loc, in_loc, v_loc, PETSC_FALSE);
332: }
333: }
334: }
335: return 0;
336: }
338: static inline PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
339: {
340: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data;
341: PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N;
342: PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen;
343: PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
344: PetscBool roworiented = a->roworiented;
345: const PetscScalar *value = v;
346: MatScalar *ap, *aa = a->a, *bap;
348: if (col < row) {
349: if (a->ignore_ltriangular) return 0; /* ignore lower triangular block */
350: else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
351: }
352: rp = aj + ai[row];
353: ap = aa + bs2 * ai[row];
354: rmax = imax[row];
355: nrow = ailen[row];
356: value = v;
357: low = 0;
358: high = nrow;
360: while (high - low > 7) {
361: t = (low + high) / 2;
362: if (rp[t] > col) high = t;
363: else low = t;
364: }
365: for (i = low; i < high; i++) {
366: if (rp[i] > col) break;
367: if (rp[i] == col) {
368: bap = ap + bs2 * i;
369: if (roworiented) {
370: if (is == ADD_VALUES) {
371: for (ii = 0; ii < bs; ii++) {
372: for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
373: }
374: } else {
375: for (ii = 0; ii < bs; ii++) {
376: for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
377: }
378: }
379: } else {
380: if (is == ADD_VALUES) {
381: for (ii = 0; ii < bs; ii++) {
382: for (jj = 0; jj < bs; jj++) *bap++ += *value++;
383: }
384: } else {
385: for (ii = 0; ii < bs; ii++) {
386: for (jj = 0; jj < bs; jj++) *bap++ = *value++;
387: }
388: }
389: }
390: goto noinsert2;
391: }
392: }
393: if (nonew == 1) goto noinsert2;
395: MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
396: N = nrow++ - 1;
397: high++;
398: /* shift up all the later entries in this row */
399: PetscArraymove(rp + i + 1, rp + i, N - i + 1);
400: PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1));
401: rp[i] = col;
402: bap = ap + bs2 * i;
403: if (roworiented) {
404: for (ii = 0; ii < bs; ii++) {
405: for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
406: }
407: } else {
408: for (ii = 0; ii < bs; ii++) {
409: for (jj = 0; jj < bs; jj++) *bap++ = *value++;
410: }
411: }
412: noinsert2:;
413: ailen[row] = nrow;
414: return 0;
415: }
417: /*
418: This routine is exactly duplicated in mpibaij.c
419: */
420: static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
421: {
422: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
423: PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N;
424: PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen;
425: PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
426: PetscBool roworiented = a->roworiented;
427: const PetscScalar *value = v;
428: MatScalar *ap, *aa = a->a, *bap;
430: rp = aj + ai[row];
431: ap = aa + bs2 * ai[row];
432: rmax = imax[row];
433: nrow = ailen[row];
434: low = 0;
435: high = nrow;
436: value = v;
437: while (high - low > 7) {
438: t = (low + high) / 2;
439: if (rp[t] > col) high = t;
440: else low = t;
441: }
442: for (i = low; i < high; i++) {
443: if (rp[i] > col) break;
444: if (rp[i] == col) {
445: bap = ap + bs2 * i;
446: if (roworiented) {
447: if (is == ADD_VALUES) {
448: for (ii = 0; ii < bs; ii++) {
449: for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
450: }
451: } else {
452: for (ii = 0; ii < bs; ii++) {
453: for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
454: }
455: }
456: } else {
457: if (is == ADD_VALUES) {
458: for (ii = 0; ii < bs; ii++, value += bs) {
459: for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
460: bap += bs;
461: }
462: } else {
463: for (ii = 0; ii < bs; ii++, value += bs) {
464: for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
465: bap += bs;
466: }
467: }
468: }
469: goto noinsert2;
470: }
471: }
472: if (nonew == 1) goto noinsert2;
474: MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
475: N = nrow++ - 1;
476: high++;
477: /* shift up all the later entries in this row */
478: PetscArraymove(rp + i + 1, rp + i, N - i + 1);
479: PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1));
480: rp[i] = col;
481: bap = ap + bs2 * i;
482: if (roworiented) {
483: for (ii = 0; ii < bs; ii++) {
484: for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
485: }
486: } else {
487: for (ii = 0; ii < bs; ii++) {
488: for (jj = 0; jj < bs; jj++) *bap++ = *value++;
489: }
490: }
491: noinsert2:;
492: ailen[row] = nrow;
493: return 0;
494: }
496: /*
497: This routine could be optimized by removing the need for the block copy below and passing stride information
498: to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ()
499: */
500: PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const MatScalar v[], InsertMode addv)
501: {
502: Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
503: const MatScalar *value;
504: MatScalar *barray = baij->barray;
505: PetscBool roworiented = baij->roworiented, ignore_ltriangular = ((Mat_SeqSBAIJ *)baij->A->data)->ignore_ltriangular;
506: PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs;
507: PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval;
508: PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
510: if (!barray) {
511: PetscMalloc1(bs2, &barray);
512: baij->barray = barray;
513: }
515: if (roworiented) {
516: stepval = (n - 1) * bs;
517: } else {
518: stepval = (m - 1) * bs;
519: }
520: for (i = 0; i < m; i++) {
521: if (im[i] < 0) continue;
523: if (im[i] >= rstart && im[i] < rend) {
524: row = im[i] - rstart;
525: for (j = 0; j < n; j++) {
526: if (im[i] > in[j]) {
527: if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
528: else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
529: }
530: /* If NumCol = 1 then a copy is not required */
531: if ((roworiented) && (n == 1)) {
532: barray = (MatScalar *)v + i * bs2;
533: } else if ((!roworiented) && (m == 1)) {
534: barray = (MatScalar *)v + j * bs2;
535: } else { /* Here a copy is required */
536: if (roworiented) {
537: value = v + i * (stepval + bs) * bs + j * bs;
538: } else {
539: value = v + j * (stepval + bs) * bs + i * bs;
540: }
541: for (ii = 0; ii < bs; ii++, value += stepval) {
542: for (jj = 0; jj < bs; jj++) *barray++ = *value++;
543: }
544: barray -= bs2;
545: }
547: if (in[j] >= cstart && in[j] < cend) {
548: col = in[j] - cstart;
549: MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]);
550: } else if (in[j] < 0) {
551: continue;
552: } else {
554: if (mat->was_assembled) {
555: if (!baij->colmap) MatCreateColmap_MPIBAIJ_Private(mat);
557: #if defined(PETSC_USE_DEBUG)
558: #if defined(PETSC_USE_CTABLE)
559: {
560: PetscInt data;
561: PetscTableFind(baij->colmap, in[j] + 1, &data);
563: }
564: #else
566: #endif
567: #endif
568: #if defined(PETSC_USE_CTABLE)
569: PetscTableFind(baij->colmap, in[j] + 1, &col);
570: col = (col - 1) / bs;
571: #else
572: col = (baij->colmap[in[j]] - 1) / bs;
573: #endif
574: if (col < 0 && !((Mat_SeqBAIJ *)(baij->A->data))->nonew) {
575: MatDisAssemble_MPISBAIJ(mat);
576: col = in[j];
577: }
578: } else col = in[j];
579: MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]);
580: }
581: }
582: } else {
584: if (!baij->donotstash) {
585: if (roworiented) {
586: MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i);
587: } else {
588: MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i);
589: }
590: }
591: }
592: }
593: return 0;
594: }
596: PetscErrorCode MatGetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
597: {
598: Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
599: PetscInt bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend;
600: PetscInt bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data;
602: for (i = 0; i < m; i++) {
603: if (idxm[i] < 0) continue; /* negative row */
605: if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
606: row = idxm[i] - bsrstart;
607: for (j = 0; j < n; j++) {
608: if (idxn[j] < 0) continue; /* negative column */
610: if (idxn[j] >= bscstart && idxn[j] < bscend) {
611: col = idxn[j] - bscstart;
612: MatGetValues_SeqSBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j);
613: } else {
614: if (!baij->colmap) MatCreateColmap_MPIBAIJ_Private(mat);
615: #if defined(PETSC_USE_CTABLE)
616: PetscTableFind(baij->colmap, idxn[j] / bs + 1, &data);
617: data--;
618: #else
619: data = baij->colmap[idxn[j] / bs] - 1;
620: #endif
621: if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0;
622: else {
623: col = data + idxn[j] % bs;
624: MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j);
625: }
626: }
627: }
628: } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
629: }
630: return 0;
631: }
633: PetscErrorCode MatNorm_MPISBAIJ(Mat mat, NormType type, PetscReal *norm)
634: {
635: Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
636: PetscReal sum[2], *lnorm2;
638: if (baij->size == 1) {
639: MatNorm(baij->A, type, norm);
640: } else {
641: if (type == NORM_FROBENIUS) {
642: PetscMalloc1(2, &lnorm2);
643: MatNorm(baij->A, type, lnorm2);
644: *lnorm2 = (*lnorm2) * (*lnorm2);
645: lnorm2++; /* squar power of norm(A) */
646: MatNorm(baij->B, type, lnorm2);
647: *lnorm2 = (*lnorm2) * (*lnorm2);
648: lnorm2--; /* squar power of norm(B) */
649: MPIU_Allreduce(lnorm2, sum, 2, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat));
650: *norm = PetscSqrtReal(sum[0] + 2 * sum[1]);
651: PetscFree(lnorm2);
652: } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
653: Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ *)baij->A->data;
654: Mat_SeqBAIJ *bmat = (Mat_SeqBAIJ *)baij->B->data;
655: PetscReal *rsum, *rsum2, vabs;
656: PetscInt *jj, *garray = baij->garray, rstart = baij->rstartbs, nz;
657: PetscInt brow, bcol, col, bs = baij->A->rmap->bs, row, grow, gcol, mbs = amat->mbs;
658: MatScalar *v;
660: PetscMalloc2(mat->cmap->N, &rsum, mat->cmap->N, &rsum2);
661: PetscArrayzero(rsum, mat->cmap->N);
662: /* Amat */
663: v = amat->a;
664: jj = amat->j;
665: for (brow = 0; brow < mbs; brow++) {
666: grow = bs * (rstart + brow);
667: nz = amat->i[brow + 1] - amat->i[brow];
668: for (bcol = 0; bcol < nz; bcol++) {
669: gcol = bs * (rstart + *jj);
670: jj++;
671: for (col = 0; col < bs; col++) {
672: for (row = 0; row < bs; row++) {
673: vabs = PetscAbsScalar(*v);
674: v++;
675: rsum[gcol + col] += vabs;
676: /* non-diagonal block */
677: if (bcol > 0 && vabs > 0.0) rsum[grow + row] += vabs;
678: }
679: }
680: }
681: PetscLogFlops(nz * bs * bs);
682: }
683: /* Bmat */
684: v = bmat->a;
685: jj = bmat->j;
686: for (brow = 0; brow < mbs; brow++) {
687: grow = bs * (rstart + brow);
688: nz = bmat->i[brow + 1] - bmat->i[brow];
689: for (bcol = 0; bcol < nz; bcol++) {
690: gcol = bs * garray[*jj];
691: jj++;
692: for (col = 0; col < bs; col++) {
693: for (row = 0; row < bs; row++) {
694: vabs = PetscAbsScalar(*v);
695: v++;
696: rsum[gcol + col] += vabs;
697: rsum[grow + row] += vabs;
698: }
699: }
700: }
701: PetscLogFlops(nz * bs * bs);
702: }
703: MPIU_Allreduce(rsum, rsum2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat));
704: *norm = 0.0;
705: for (col = 0; col < mat->cmap->N; col++) {
706: if (rsum2[col] > *norm) *norm = rsum2[col];
707: }
708: PetscFree2(rsum, rsum2);
709: } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for this norm yet");
710: }
711: return 0;
712: }
714: PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat, MatAssemblyType mode)
715: {
716: Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
717: PetscInt nstash, reallocs;
719: if (baij->donotstash || mat->nooffprocentries) return 0;
721: MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range);
722: MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs);
723: MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs);
724: PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs);
725: MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs);
726: PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs);
727: return 0;
728: }
730: PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat, MatAssemblyType mode)
731: {
732: Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
733: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)baij->A->data;
734: PetscInt i, j, rstart, ncols, flg, bs2 = baij->bs2;
735: PetscInt *row, *col;
736: PetscBool other_disassembled;
737: PetscMPIInt n;
738: PetscBool r1, r2, r3;
739: MatScalar *val;
741: /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
742: if (!baij->donotstash && !mat->nooffprocentries) {
743: while (1) {
744: MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg);
745: if (!flg) break;
747: for (i = 0; i < n;) {
748: /* Now identify the consecutive vals belonging to the same row */
749: for (j = i, rstart = row[j]; j < n; j++) {
750: if (row[j] != rstart) break;
751: }
752: if (j < n) ncols = j - i;
753: else ncols = n - i;
754: /* Now assemble all these values with a single function call */
755: MatSetValues_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode);
756: i = j;
757: }
758: }
759: MatStashScatterEnd_Private(&mat->stash);
760: /* Now process the block-stash. Since the values are stashed column-oriented,
761: set the roworiented flag to column oriented, and after MatSetValues()
762: restore the original flags */
763: r1 = baij->roworiented;
764: r2 = a->roworiented;
765: r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented;
767: baij->roworiented = PETSC_FALSE;
768: a->roworiented = PETSC_FALSE;
770: ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
771: while (1) {
772: MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg);
773: if (!flg) break;
775: for (i = 0; i < n;) {
776: /* Now identify the consecutive vals belonging to the same row */
777: for (j = i, rstart = row[j]; j < n; j++) {
778: if (row[j] != rstart) break;
779: }
780: if (j < n) ncols = j - i;
781: else ncols = n - i;
782: MatSetValuesBlocked_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode);
783: i = j;
784: }
785: }
786: MatStashScatterEnd_Private(&mat->bstash);
788: baij->roworiented = r1;
789: a->roworiented = r2;
791: ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; /* b->roworinted */
792: }
794: MatAssemblyBegin(baij->A, mode);
795: MatAssemblyEnd(baij->A, mode);
797: /* determine if any processor has disassembled, if so we must
798: also disassemble ourselves, in order that we may reassemble. */
799: /*
800: if nonzero structure of submatrix B cannot change then we know that
801: no processor disassembled thus we can skip this stuff
802: */
803: if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) {
804: MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat));
805: if (mat->was_assembled && !other_disassembled) MatDisAssemble_MPISBAIJ(mat);
806: }
808: if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { MatSetUpMultiply_MPISBAIJ(mat); /* setup Mvctx and sMvctx */ }
809: MatAssemblyBegin(baij->B, mode);
810: MatAssemblyEnd(baij->B, mode);
812: PetscFree2(baij->rowvalues, baij->rowindices);
814: baij->rowvalues = NULL;
816: /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
817: if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)(baij->A->data))->nonew) {
818: PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
819: MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat));
820: }
821: return 0;
822: }
824: extern PetscErrorCode MatSetValues_MPIBAIJ(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], const PetscScalar[], InsertMode);
825: #include <petscdraw.h>
826: static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
827: {
828: Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
829: PetscInt bs = mat->rmap->bs;
830: PetscMPIInt rank = baij->rank;
831: PetscBool iascii, isdraw;
832: PetscViewer sviewer;
833: PetscViewerFormat format;
835: PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii);
836: PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw);
837: if (iascii) {
838: PetscViewerGetFormat(viewer, &format);
839: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
840: MatInfo info;
841: MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank);
842: MatGetInfo(mat, MAT_LOCAL, &info);
843: PetscViewerASCIIPushSynchronized(viewer);
844: PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
845: mat->rmap->bs, (double)info.memory));
846: MatGetInfo(baij->A, MAT_LOCAL, &info);
847: PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used);
848: MatGetInfo(baij->B, MAT_LOCAL, &info);
849: PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used);
850: PetscViewerFlush(viewer);
851: PetscViewerASCIIPopSynchronized(viewer);
852: PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n");
853: VecScatterView(baij->Mvctx, viewer);
854: return 0;
855: } else if (format == PETSC_VIEWER_ASCII_INFO) {
856: PetscViewerASCIIPrintf(viewer, " block size is %" PetscInt_FMT "\n", bs);
857: return 0;
858: } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
859: return 0;
860: }
861: }
863: if (isdraw) {
864: PetscDraw draw;
865: PetscBool isnull;
866: PetscViewerDrawGetDraw(viewer, 0, &draw);
867: PetscDrawIsNull(draw, &isnull);
868: if (isnull) return 0;
869: }
871: {
872: /* assemble the entire matrix onto first processor. */
873: Mat A;
874: Mat_SeqSBAIJ *Aloc;
875: Mat_SeqBAIJ *Bloc;
876: PetscInt M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs;
877: MatScalar *a;
878: const char *matname;
880: /* Should this be the same type as mat? */
881: MatCreate(PetscObjectComm((PetscObject)mat), &A);
882: if (rank == 0) {
883: MatSetSizes(A, M, N, M, N);
884: } else {
885: MatSetSizes(A, 0, 0, M, N);
886: }
887: MatSetType(A, MATMPISBAIJ);
888: MatMPISBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL);
889: MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE);
891: /* copy over the A part */
892: Aloc = (Mat_SeqSBAIJ *)baij->A->data;
893: ai = Aloc->i;
894: aj = Aloc->j;
895: a = Aloc->a;
896: PetscMalloc1(bs, &rvals);
898: for (i = 0; i < mbs; i++) {
899: rvals[0] = bs * (baij->rstartbs + i);
900: for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
901: for (j = ai[i]; j < ai[i + 1]; j++) {
902: col = (baij->cstartbs + aj[j]) * bs;
903: for (k = 0; k < bs; k++) {
904: MatSetValues_MPISBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES);
905: col++;
906: a += bs;
907: }
908: }
909: }
910: /* copy over the B part */
911: Bloc = (Mat_SeqBAIJ *)baij->B->data;
912: ai = Bloc->i;
913: aj = Bloc->j;
914: a = Bloc->a;
915: for (i = 0; i < mbs; i++) {
916: rvals[0] = bs * (baij->rstartbs + i);
917: for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
918: for (j = ai[i]; j < ai[i + 1]; j++) {
919: col = baij->garray[aj[j]] * bs;
920: for (k = 0; k < bs; k++) {
921: MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES);
922: col++;
923: a += bs;
924: }
925: }
926: }
927: PetscFree(rvals);
928: MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
929: MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
930: /*
931: Everyone has to call to draw the matrix since the graphics waits are
932: synchronized across all processors that share the PetscDraw object
933: */
934: PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer);
935: PetscObjectGetName((PetscObject)mat, &matname);
936: if (rank == 0) {
937: PetscObjectSetName((PetscObject)((Mat_MPISBAIJ *)(A->data))->A, matname);
938: MatView_SeqSBAIJ(((Mat_MPISBAIJ *)(A->data))->A, sviewer);
939: }
940: PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer);
941: PetscViewerFlush(viewer);
942: MatDestroy(&A);
943: }
944: return 0;
945: }
947: /* Used for both MPIBAIJ and MPISBAIJ matrices */
948: #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary
950: PetscErrorCode MatView_MPISBAIJ(Mat mat, PetscViewer viewer)
951: {
952: PetscBool iascii, isdraw, issocket, isbinary;
954: PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii);
955: PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw);
956: PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket);
957: PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary);
958: if (iascii || isdraw || issocket) {
959: MatView_MPISBAIJ_ASCIIorDraworSocket(mat, viewer);
960: } else if (isbinary) MatView_MPISBAIJ_Binary(mat, viewer);
961: return 0;
962: }
964: PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
965: {
966: Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
968: #if defined(PETSC_USE_LOG)
969: PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N);
970: #endif
971: MatStashDestroy_Private(&mat->stash);
972: MatStashDestroy_Private(&mat->bstash);
973: MatDestroy(&baij->A);
974: MatDestroy(&baij->B);
975: #if defined(PETSC_USE_CTABLE)
976: PetscTableDestroy(&baij->colmap);
977: #else
978: PetscFree(baij->colmap);
979: #endif
980: PetscFree(baij->garray);
981: VecDestroy(&baij->lvec);
982: VecScatterDestroy(&baij->Mvctx);
983: VecDestroy(&baij->slvec0);
984: VecDestroy(&baij->slvec0b);
985: VecDestroy(&baij->slvec1);
986: VecDestroy(&baij->slvec1a);
987: VecDestroy(&baij->slvec1b);
988: VecScatterDestroy(&baij->sMvctx);
989: PetscFree2(baij->rowvalues, baij->rowindices);
990: PetscFree(baij->barray);
991: PetscFree(baij->hd);
992: VecDestroy(&baij->diag);
993: VecDestroy(&baij->bb1);
994: VecDestroy(&baij->xx1);
995: #if defined(PETSC_USE_REAL_MAT_SINGLE)
996: PetscFree(baij->setvaluescopy);
997: #endif
998: PetscFree(baij->in_loc);
999: PetscFree(baij->v_loc);
1000: PetscFree(baij->rangebs);
1001: PetscFree(mat->data);
1003: PetscObjectChangeTypeName((PetscObject)mat, NULL);
1004: PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL);
1005: PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL);
1006: PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocation_C", NULL);
1007: PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocationCSR_C", NULL);
1008: #if defined(PETSC_HAVE_ELEMENTAL)
1009: PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_elemental_C", NULL);
1010: #endif
1011: #if defined(PETSC_HAVE_SCALAPACK)
1012: PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_scalapack_C", NULL);
1013: #endif
1014: PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpiaij_C", NULL);
1015: PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpibaij_C", NULL);
1016: return 0;
1017: }
1019: PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy)
1020: {
1021: Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1022: PetscInt mbs = a->mbs, bs = A->rmap->bs;
1023: PetscScalar *from;
1024: const PetscScalar *x;
1026: /* diagonal part */
1027: (*a->A->ops->mult)(a->A, xx, a->slvec1a);
1028: VecSet(a->slvec1b, 0.0);
1030: /* subdiagonal part */
1032: (*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b);
1034: /* copy x into the vec slvec0 */
1035: VecGetArray(a->slvec0, &from);
1036: VecGetArrayRead(xx, &x);
1038: PetscArraycpy(from, x, bs * mbs);
1039: VecRestoreArray(a->slvec0, &from);
1040: VecRestoreArrayRead(xx, &x);
1042: VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD);
1043: VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD);
1044: /* supperdiagonal part */
1045: (*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy);
1046: return 0;
1047: }
1049: PetscErrorCode MatMult_MPISBAIJ(Mat A, Vec xx, Vec yy)
1050: {
1051: Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1052: PetscInt mbs = a->mbs, bs = A->rmap->bs;
1053: PetscScalar *from;
1054: const PetscScalar *x;
1056: /* diagonal part */
1057: (*a->A->ops->mult)(a->A, xx, a->slvec1a);
1058: VecSet(a->slvec1b, 0.0);
1060: /* subdiagonal part */
1061: (*a->B->ops->multtranspose)(a->B, xx, a->slvec0b);
1063: /* copy x into the vec slvec0 */
1064: VecGetArray(a->slvec0, &from);
1065: VecGetArrayRead(xx, &x);
1067: PetscArraycpy(from, x, bs * mbs);
1068: VecRestoreArray(a->slvec0, &from);
1069: VecRestoreArrayRead(xx, &x);
1071: VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD);
1072: VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD);
1073: /* supperdiagonal part */
1074: (*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy);
1075: return 0;
1076: }
1078: PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy, Vec zz)
1079: {
1080: Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1081: PetscInt mbs = a->mbs, bs = A->rmap->bs;
1082: PetscScalar *from, zero = 0.0;
1083: const PetscScalar *x;
1085: /* diagonal part */
1086: (*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a);
1087: VecSet(a->slvec1b, zero);
1089: /* subdiagonal part */
1091: (*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b);
1093: /* copy x into the vec slvec0 */
1094: VecGetArray(a->slvec0, &from);
1095: VecGetArrayRead(xx, &x);
1096: PetscArraycpy(from, x, bs * mbs);
1097: VecRestoreArray(a->slvec0, &from);
1099: VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD);
1100: VecRestoreArrayRead(xx, &x);
1101: VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD);
1103: /* supperdiagonal part */
1104: (*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz);
1105: return 0;
1106: }
1108: PetscErrorCode MatMultAdd_MPISBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1109: {
1110: Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1111: PetscInt mbs = a->mbs, bs = A->rmap->bs;
1112: PetscScalar *from, zero = 0.0;
1113: const PetscScalar *x;
1115: /* diagonal part */
1116: (*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a);
1117: VecSet(a->slvec1b, zero);
1119: /* subdiagonal part */
1120: (*a->B->ops->multtranspose)(a->B, xx, a->slvec0b);
1122: /* copy x into the vec slvec0 */
1123: VecGetArray(a->slvec0, &from);
1124: VecGetArrayRead(xx, &x);
1125: PetscArraycpy(from, x, bs * mbs);
1126: VecRestoreArray(a->slvec0, &from);
1128: VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD);
1129: VecRestoreArrayRead(xx, &x);
1130: VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD);
1132: /* supperdiagonal part */
1133: (*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz);
1134: return 0;
1135: }
1137: /*
1138: This only works correctly for square matrices where the subblock A->A is the
1139: diagonal block
1140: */
1141: PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A, Vec v)
1142: {
1143: Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1146: MatGetDiagonal(a->A, v);
1147: return 0;
1148: }
1150: PetscErrorCode MatScale_MPISBAIJ(Mat A, PetscScalar aa)
1151: {
1152: Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1154: MatScale(a->A, aa);
1155: MatScale(a->B, aa);
1156: return 0;
1157: }
1159: PetscErrorCode MatGetRow_MPISBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1160: {
1161: Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)matin->data;
1162: PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1163: PetscInt bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB;
1164: PetscInt nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend;
1165: PetscInt *cmap, *idx_p, cstart = mat->rstartbs;
1168: mat->getrowactive = PETSC_TRUE;
1170: if (!mat->rowvalues && (idx || v)) {
1171: /*
1172: allocate enough space to hold information from the longest row.
1173: */
1174: Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ *)mat->A->data;
1175: Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ *)mat->B->data;
1176: PetscInt max = 1, mbs = mat->mbs, tmp;
1177: for (i = 0; i < mbs; i++) {
1178: tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; /* row length */
1179: if (max < tmp) max = tmp;
1180: }
1181: PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices);
1182: }
1185: lrow = row - brstart; /* local row index */
1187: pvA = &vworkA;
1188: pcA = &cworkA;
1189: pvB = &vworkB;
1190: pcB = &cworkB;
1191: if (!v) {
1192: pvA = NULL;
1193: pvB = NULL;
1194: }
1195: if (!idx) {
1196: pcA = NULL;
1197: if (!v) pcB = NULL;
1198: }
1199: (*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA);
1200: (*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB);
1201: nztot = nzA + nzB;
1203: cmap = mat->garray;
1204: if (v || idx) {
1205: if (nztot) {
1206: /* Sort by increasing column numbers, assuming A and B already sorted */
1207: PetscInt imark = -1;
1208: if (v) {
1209: *v = v_p = mat->rowvalues;
1210: for (i = 0; i < nzB; i++) {
1211: if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i];
1212: else break;
1213: }
1214: imark = i;
1215: for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1216: for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1217: }
1218: if (idx) {
1219: *idx = idx_p = mat->rowindices;
1220: if (imark > -1) {
1221: for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1222: } else {
1223: for (i = 0; i < nzB; i++) {
1224: if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1225: else break;
1226: }
1227: imark = i;
1228: }
1229: for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i];
1230: for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1231: }
1232: } else {
1233: if (idx) *idx = NULL;
1234: if (v) *v = NULL;
1235: }
1236: }
1237: *nz = nztot;
1238: (*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA);
1239: (*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB);
1240: return 0;
1241: }
1243: PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1244: {
1245: Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
1248: baij->getrowactive = PETSC_FALSE;
1249: return 0;
1250: }
1252: PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1253: {
1254: Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1255: Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1257: aA->getrow_utriangular = PETSC_TRUE;
1258: return 0;
1259: }
1260: PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1261: {
1262: Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1263: Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1265: aA->getrow_utriangular = PETSC_FALSE;
1266: return 0;
1267: }
1269: PetscErrorCode MatConjugate_MPISBAIJ(Mat mat)
1270: {
1271: if (PetscDefined(USE_COMPLEX)) {
1272: Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)mat->data;
1274: MatConjugate(a->A);
1275: MatConjugate(a->B);
1276: }
1277: return 0;
1278: }
1280: PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1281: {
1282: Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1284: MatRealPart(a->A);
1285: MatRealPart(a->B);
1286: return 0;
1287: }
1289: PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1290: {
1291: Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1293: MatImaginaryPart(a->A);
1294: MatImaginaryPart(a->B);
1295: return 0;
1296: }
1298: /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ()
1299: Input: isrow - distributed(parallel),
1300: iscol_local - locally owned (seq)
1301: */
1302: PetscErrorCode ISEqual_private(IS isrow, IS iscol_local, PetscBool *flg)
1303: {
1304: PetscInt sz1, sz2, *a1, *a2, i, j, k, nmatch;
1305: const PetscInt *ptr1, *ptr2;
1307: ISGetLocalSize(isrow, &sz1);
1308: ISGetLocalSize(iscol_local, &sz2);
1309: if (sz1 > sz2) {
1310: *flg = PETSC_FALSE;
1311: return 0;
1312: }
1314: ISGetIndices(isrow, &ptr1);
1315: ISGetIndices(iscol_local, &ptr2);
1317: PetscMalloc1(sz1, &a1);
1318: PetscMalloc1(sz2, &a2);
1319: PetscArraycpy(a1, ptr1, sz1);
1320: PetscArraycpy(a2, ptr2, sz2);
1321: PetscSortInt(sz1, a1);
1322: PetscSortInt(sz2, a2);
1324: nmatch = 0;
1325: k = 0;
1326: for (i = 0; i < sz1; i++) {
1327: for (j = k; j < sz2; j++) {
1328: if (a1[i] == a2[j]) {
1329: k = j;
1330: nmatch++;
1331: break;
1332: }
1333: }
1334: }
1335: ISRestoreIndices(isrow, &ptr1);
1336: ISRestoreIndices(iscol_local, &ptr2);
1337: PetscFree(a1);
1338: PetscFree(a2);
1339: if (nmatch < sz1) {
1340: *flg = PETSC_FALSE;
1341: } else {
1342: *flg = PETSC_TRUE;
1343: }
1344: return 0;
1345: }
1347: PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
1348: {
1349: IS iscol_local;
1350: PetscInt csize;
1351: PetscBool isequal;
1353: ISGetLocalSize(iscol, &csize);
1354: if (call == MAT_REUSE_MATRIX) {
1355: PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local);
1357: } else {
1358: PetscBool issorted;
1360: ISAllGather(iscol, &iscol_local);
1361: ISEqual_private(isrow, iscol_local, &isequal);
1362: ISSorted(iscol_local, &issorted);
1364: }
1366: /* now call MatCreateSubMatrix_MPIBAIJ() */
1367: MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat);
1368: if (call == MAT_INITIAL_MATRIX) {
1369: PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local);
1370: ISDestroy(&iscol_local);
1371: }
1372: return 0;
1373: }
1375: PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1376: {
1377: Mat_MPISBAIJ *l = (Mat_MPISBAIJ *)A->data;
1379: MatZeroEntries(l->A);
1380: MatZeroEntries(l->B);
1381: return 0;
1382: }
1384: PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1385: {
1386: Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)matin->data;
1387: Mat A = a->A, B = a->B;
1388: PetscLogDouble isend[5], irecv[5];
1390: info->block_size = (PetscReal)matin->rmap->bs;
1392: MatGetInfo(A, MAT_LOCAL, info);
1394: isend[0] = info->nz_used;
1395: isend[1] = info->nz_allocated;
1396: isend[2] = info->nz_unneeded;
1397: isend[3] = info->memory;
1398: isend[4] = info->mallocs;
1400: MatGetInfo(B, MAT_LOCAL, info);
1402: isend[0] += info->nz_used;
1403: isend[1] += info->nz_allocated;
1404: isend[2] += info->nz_unneeded;
1405: isend[3] += info->memory;
1406: isend[4] += info->mallocs;
1407: if (flag == MAT_LOCAL) {
1408: info->nz_used = isend[0];
1409: info->nz_allocated = isend[1];
1410: info->nz_unneeded = isend[2];
1411: info->memory = isend[3];
1412: info->mallocs = isend[4];
1413: } else if (flag == MAT_GLOBAL_MAX) {
1414: MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin));
1416: info->nz_used = irecv[0];
1417: info->nz_allocated = irecv[1];
1418: info->nz_unneeded = irecv[2];
1419: info->memory = irecv[3];
1420: info->mallocs = irecv[4];
1421: } else if (flag == MAT_GLOBAL_SUM) {
1422: MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin));
1424: info->nz_used = irecv[0];
1425: info->nz_allocated = irecv[1];
1426: info->nz_unneeded = irecv[2];
1427: info->memory = irecv[3];
1428: info->mallocs = irecv[4];
1429: } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag);
1430: info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */
1431: info->fill_ratio_needed = 0;
1432: info->factor_mallocs = 0;
1433: return 0;
1434: }
1436: PetscErrorCode MatSetOption_MPISBAIJ(Mat A, MatOption op, PetscBool flg)
1437: {
1438: Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1439: Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1441: switch (op) {
1442: case MAT_NEW_NONZERO_LOCATIONS:
1443: case MAT_NEW_NONZERO_ALLOCATION_ERR:
1444: case MAT_UNUSED_NONZERO_LOCATION_ERR:
1445: case MAT_KEEP_NONZERO_PATTERN:
1446: case MAT_SUBMAT_SINGLEIS:
1447: case MAT_NEW_NONZERO_LOCATION_ERR:
1448: MatCheckPreallocated(A, 1);
1449: MatSetOption(a->A, op, flg);
1450: MatSetOption(a->B, op, flg);
1451: break;
1452: case MAT_ROW_ORIENTED:
1453: MatCheckPreallocated(A, 1);
1454: a->roworiented = flg;
1456: MatSetOption(a->A, op, flg);
1457: MatSetOption(a->B, op, flg);
1458: break;
1459: case MAT_FORCE_DIAGONAL_ENTRIES:
1460: case MAT_SORTED_FULL:
1461: PetscInfo(A, "Option %s ignored\n", MatOptions[op]);
1462: break;
1463: case MAT_IGNORE_OFF_PROC_ENTRIES:
1464: a->donotstash = flg;
1465: break;
1466: case MAT_USE_HASH_TABLE:
1467: a->ht_flag = flg;
1468: break;
1469: case MAT_HERMITIAN:
1470: MatCheckPreallocated(A, 1);
1471: MatSetOption(a->A, op, flg);
1472: #if defined(PETSC_USE_COMPLEX)
1473: if (flg) { /* need different mat-vec ops */
1474: A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1475: A->ops->multadd = MatMultAdd_MPISBAIJ_Hermitian;
1476: A->ops->multtranspose = NULL;
1477: A->ops->multtransposeadd = NULL;
1478: A->symmetric = PETSC_BOOL3_FALSE;
1479: }
1480: #endif
1481: break;
1482: case MAT_SPD:
1483: case MAT_SYMMETRIC:
1484: MatCheckPreallocated(A, 1);
1485: MatSetOption(a->A, op, flg);
1486: #if defined(PETSC_USE_COMPLEX)
1487: if (flg) { /* restore to use default mat-vec ops */
1488: A->ops->mult = MatMult_MPISBAIJ;
1489: A->ops->multadd = MatMultAdd_MPISBAIJ;
1490: A->ops->multtranspose = MatMult_MPISBAIJ;
1491: A->ops->multtransposeadd = MatMultAdd_MPISBAIJ;
1492: }
1493: #endif
1494: break;
1495: case MAT_STRUCTURALLY_SYMMETRIC:
1496: MatCheckPreallocated(A, 1);
1497: MatSetOption(a->A, op, flg);
1498: break;
1499: case MAT_SYMMETRY_ETERNAL:
1500: case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1502: PetscInfo(A, "Option %s ignored\n", MatOptions[op]);
1503: break;
1504: case MAT_SPD_ETERNAL:
1505: break;
1506: case MAT_IGNORE_LOWER_TRIANGULAR:
1507: aA->ignore_ltriangular = flg;
1508: break;
1509: case MAT_ERROR_LOWER_TRIANGULAR:
1510: aA->ignore_ltriangular = flg;
1511: break;
1512: case MAT_GETROW_UPPERTRIANGULAR:
1513: aA->getrow_utriangular = flg;
1514: break;
1515: default:
1516: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
1517: }
1518: return 0;
1519: }
1521: PetscErrorCode MatTranspose_MPISBAIJ(Mat A, MatReuse reuse, Mat *B)
1522: {
1523: if (reuse == MAT_REUSE_MATRIX) MatTransposeCheckNonzeroState_Private(A, *B);
1524: if (reuse == MAT_INITIAL_MATRIX) {
1525: MatDuplicate(A, MAT_COPY_VALUES, B);
1526: } else if (reuse == MAT_REUSE_MATRIX) {
1527: MatCopy(A, *B, SAME_NONZERO_PATTERN);
1528: }
1529: return 0;
1530: }
1532: PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat, Vec ll, Vec rr)
1533: {
1534: Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
1535: Mat a = baij->A, b = baij->B;
1536: PetscInt nv, m, n;
1537: PetscBool flg;
1539: if (ll != rr) {
1540: VecEqual(ll, rr, &flg);
1542: }
1543: if (!ll) return 0;
1545: MatGetLocalSize(mat, &m, &n);
1548: VecGetLocalSize(rr, &nv);
1551: VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD);
1553: /* left diagonalscale the off-diagonal part */
1554: PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1556: /* scale the diagonal part */
1557: PetscUseTypeMethod(a, diagonalscale, ll, rr);
1559: /* right diagonalscale the off-diagonal part */
1560: VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD);
1561: PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec);
1562: return 0;
1563: }
1565: PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1566: {
1567: Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1569: MatSetUnfactored(a->A);
1570: return 0;
1571: }
1573: static PetscErrorCode MatDuplicate_MPISBAIJ(Mat, MatDuplicateOption, Mat *);
1575: PetscErrorCode MatEqual_MPISBAIJ(Mat A, Mat B, PetscBool *flag)
1576: {
1577: Mat_MPISBAIJ *matB = (Mat_MPISBAIJ *)B->data, *matA = (Mat_MPISBAIJ *)A->data;
1578: Mat a, b, c, d;
1579: PetscBool flg;
1581: a = matA->A;
1582: b = matA->B;
1583: c = matB->A;
1584: d = matB->B;
1586: MatEqual(a, c, &flg);
1587: if (flg) MatEqual(b, d, &flg);
1588: MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A));
1589: return 0;
1590: }
1592: PetscErrorCode MatCopy_MPISBAIJ(Mat A, Mat B, MatStructure str)
1593: {
1594: PetscBool isbaij;
1596: PetscObjectTypeCompareAny((PetscObject)B, &isbaij, MATSEQSBAIJ, MATMPISBAIJ, "");
1598: /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1599: if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1600: MatGetRowUpperTriangular(A);
1601: MatCopy_Basic(A, B, str);
1602: MatRestoreRowUpperTriangular(A);
1603: } else {
1604: Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1605: Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data;
1607: MatCopy(a->A, b->A, str);
1608: MatCopy(a->B, b->B, str);
1609: }
1610: PetscObjectStateIncrease((PetscObject)B);
1611: return 0;
1612: }
1614: PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1615: {
1616: MatMPISBAIJSetPreallocation(A, A->rmap->bs, PETSC_DEFAULT, NULL, PETSC_DEFAULT, NULL);
1617: return 0;
1618: }
1620: PetscErrorCode MatAXPY_MPISBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
1621: {
1622: Mat_MPISBAIJ *xx = (Mat_MPISBAIJ *)X->data, *yy = (Mat_MPISBAIJ *)Y->data;
1623: PetscBLASInt bnz, one = 1;
1624: Mat_SeqSBAIJ *xa, *ya;
1625: Mat_SeqBAIJ *xb, *yb;
1627: if (str == SAME_NONZERO_PATTERN) {
1628: PetscScalar alpha = a;
1629: xa = (Mat_SeqSBAIJ *)xx->A->data;
1630: ya = (Mat_SeqSBAIJ *)yy->A->data;
1631: PetscBLASIntCast(xa->nz, &bnz);
1632: PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xa->a, &one, ya->a, &one));
1633: xb = (Mat_SeqBAIJ *)xx->B->data;
1634: yb = (Mat_SeqBAIJ *)yy->B->data;
1635: PetscBLASIntCast(xb->nz, &bnz);
1636: PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xb->a, &one, yb->a, &one));
1637: PetscObjectStateIncrease((PetscObject)Y);
1638: } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1639: MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE);
1640: MatAXPY_Basic(Y, a, X, str);
1641: MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE);
1642: } else {
1643: Mat B;
1644: PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs;
1646: MatGetRowUpperTriangular(X);
1647: MatGetRowUpperTriangular(Y);
1648: PetscMalloc1(yy->A->rmap->N, &nnz_d);
1649: PetscMalloc1(yy->B->rmap->N, &nnz_o);
1650: MatCreate(PetscObjectComm((PetscObject)Y), &B);
1651: PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name);
1652: MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N);
1653: MatSetBlockSizesFromMats(B, Y, Y);
1654: MatSetType(B, MATMPISBAIJ);
1655: MatAXPYGetPreallocation_SeqSBAIJ(yy->A, xx->A, nnz_d);
1656: MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o);
1657: MatMPISBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o);
1658: MatAXPY_BasicWithPreallocation(B, Y, a, X, str);
1659: MatHeaderMerge(Y, &B);
1660: PetscFree(nnz_d);
1661: PetscFree(nnz_o);
1662: MatRestoreRowUpperTriangular(X);
1663: MatRestoreRowUpperTriangular(Y);
1664: }
1665: return 0;
1666: }
1668: PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *B[])
1669: {
1670: PetscInt i;
1671: PetscBool flg;
1673: MatCreateSubMatrices_MPIBAIJ(A, n, irow, icol, scall, B); /* B[] are sbaij matrices */
1674: for (i = 0; i < n; i++) {
1675: ISEqual(irow[i], icol[i], &flg);
1676: if (!flg) MatSeqSBAIJZeroOps_Private(*B[i]);
1677: }
1678: return 0;
1679: }
1681: PetscErrorCode MatShift_MPISBAIJ(Mat Y, PetscScalar a)
1682: {
1683: Mat_MPISBAIJ *maij = (Mat_MPISBAIJ *)Y->data;
1684: Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)maij->A->data;
1686: if (!Y->preallocated) {
1687: MatMPISBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL);
1688: } else if (!aij->nz) {
1689: PetscInt nonew = aij->nonew;
1690: MatSeqSBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL);
1691: aij->nonew = nonew;
1692: }
1693: MatShift_Basic(Y, a);
1694: return 0;
1695: }
1697: PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A, PetscBool *missing, PetscInt *d)
1698: {
1699: Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1702: MatMissingDiagonal(a->A, missing, d);
1703: if (d) {
1704: PetscInt rstart;
1705: MatGetOwnershipRange(A, &rstart, NULL);
1706: *d += rstart / A->rmap->bs;
1707: }
1708: return 0;
1709: }
1711: PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A, Mat *a)
1712: {
1713: *a = ((Mat_MPISBAIJ *)A->data)->A;
1714: return 0;
1715: }
1717: /* -------------------------------------------------------------------*/
1718: static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1719: MatGetRow_MPISBAIJ,
1720: MatRestoreRow_MPISBAIJ,
1721: MatMult_MPISBAIJ,
1722: /* 4*/ MatMultAdd_MPISBAIJ,
1723: MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */
1724: MatMultAdd_MPISBAIJ,
1725: NULL,
1726: NULL,
1727: NULL,
1728: /* 10*/ NULL,
1729: NULL,
1730: NULL,
1731: MatSOR_MPISBAIJ,
1732: MatTranspose_MPISBAIJ,
1733: /* 15*/ MatGetInfo_MPISBAIJ,
1734: MatEqual_MPISBAIJ,
1735: MatGetDiagonal_MPISBAIJ,
1736: MatDiagonalScale_MPISBAIJ,
1737: MatNorm_MPISBAIJ,
1738: /* 20*/ MatAssemblyBegin_MPISBAIJ,
1739: MatAssemblyEnd_MPISBAIJ,
1740: MatSetOption_MPISBAIJ,
1741: MatZeroEntries_MPISBAIJ,
1742: /* 24*/ NULL,
1743: NULL,
1744: NULL,
1745: NULL,
1746: NULL,
1747: /* 29*/ MatSetUp_MPISBAIJ,
1748: NULL,
1749: NULL,
1750: MatGetDiagonalBlock_MPISBAIJ,
1751: NULL,
1752: /* 34*/ MatDuplicate_MPISBAIJ,
1753: NULL,
1754: NULL,
1755: NULL,
1756: NULL,
1757: /* 39*/ MatAXPY_MPISBAIJ,
1758: MatCreateSubMatrices_MPISBAIJ,
1759: MatIncreaseOverlap_MPISBAIJ,
1760: MatGetValues_MPISBAIJ,
1761: MatCopy_MPISBAIJ,
1762: /* 44*/ NULL,
1763: MatScale_MPISBAIJ,
1764: MatShift_MPISBAIJ,
1765: NULL,
1766: NULL,
1767: /* 49*/ NULL,
1768: NULL,
1769: NULL,
1770: NULL,
1771: NULL,
1772: /* 54*/ NULL,
1773: NULL,
1774: MatSetUnfactored_MPISBAIJ,
1775: NULL,
1776: MatSetValuesBlocked_MPISBAIJ,
1777: /* 59*/ MatCreateSubMatrix_MPISBAIJ,
1778: NULL,
1779: NULL,
1780: NULL,
1781: NULL,
1782: /* 64*/ NULL,
1783: NULL,
1784: NULL,
1785: NULL,
1786: NULL,
1787: /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1788: NULL,
1789: MatConvert_MPISBAIJ_Basic,
1790: NULL,
1791: NULL,
1792: /* 74*/ NULL,
1793: NULL,
1794: NULL,
1795: NULL,
1796: NULL,
1797: /* 79*/ NULL,
1798: NULL,
1799: NULL,
1800: NULL,
1801: MatLoad_MPISBAIJ,
1802: /* 84*/ NULL,
1803: NULL,
1804: NULL,
1805: NULL,
1806: NULL,
1807: /* 89*/ NULL,
1808: NULL,
1809: NULL,
1810: NULL,
1811: NULL,
1812: /* 94*/ NULL,
1813: NULL,
1814: NULL,
1815: NULL,
1816: NULL,
1817: /* 99*/ NULL,
1818: NULL,
1819: NULL,
1820: MatConjugate_MPISBAIJ,
1821: NULL,
1822: /*104*/ NULL,
1823: MatRealPart_MPISBAIJ,
1824: MatImaginaryPart_MPISBAIJ,
1825: MatGetRowUpperTriangular_MPISBAIJ,
1826: MatRestoreRowUpperTriangular_MPISBAIJ,
1827: /*109*/ NULL,
1828: NULL,
1829: NULL,
1830: NULL,
1831: MatMissingDiagonal_MPISBAIJ,
1832: /*114*/ NULL,
1833: NULL,
1834: NULL,
1835: NULL,
1836: NULL,
1837: /*119*/ NULL,
1838: NULL,
1839: NULL,
1840: NULL,
1841: NULL,
1842: /*124*/ NULL,
1843: NULL,
1844: NULL,
1845: NULL,
1846: NULL,
1847: /*129*/ NULL,
1848: NULL,
1849: NULL,
1850: NULL,
1851: NULL,
1852: /*134*/ NULL,
1853: NULL,
1854: NULL,
1855: NULL,
1856: NULL,
1857: /*139*/ MatSetBlockSizes_Default,
1858: NULL,
1859: NULL,
1860: NULL,
1861: NULL,
1862: /*144*/ MatCreateMPIMatConcatenateSeqMat_MPISBAIJ,
1863: NULL,
1864: NULL,
1865: NULL,
1866: NULL,
1867: NULL,
1868: /*150*/ NULL};
1870: PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz)
1871: {
1872: Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data;
1873: PetscInt i, mbs, Mbs;
1874: PetscMPIInt size;
1876: MatSetBlockSize(B, PetscAbs(bs));
1877: PetscLayoutSetUp(B->rmap);
1878: PetscLayoutSetUp(B->cmap);
1879: PetscLayoutGetBlockSize(B->rmap, &bs);
1883: mbs = B->rmap->n / bs;
1884: Mbs = B->rmap->N / bs;
1887: B->rmap->bs = bs;
1888: b->bs2 = bs * bs;
1889: b->mbs = mbs;
1890: b->Mbs = Mbs;
1891: b->nbs = B->cmap->n / bs;
1892: b->Nbs = B->cmap->N / bs;
1894: for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs;
1895: b->rstartbs = B->rmap->rstart / bs;
1896: b->rendbs = B->rmap->rend / bs;
1898: b->cstartbs = B->cmap->rstart / bs;
1899: b->cendbs = B->cmap->rend / bs;
1901: #if defined(PETSC_USE_CTABLE)
1902: PetscTableDestroy(&b->colmap);
1903: #else
1904: PetscFree(b->colmap);
1905: #endif
1906: PetscFree(b->garray);
1907: VecDestroy(&b->lvec);
1908: VecScatterDestroy(&b->Mvctx);
1909: VecDestroy(&b->slvec0);
1910: VecDestroy(&b->slvec0b);
1911: VecDestroy(&b->slvec1);
1912: VecDestroy(&b->slvec1a);
1913: VecDestroy(&b->slvec1b);
1914: VecScatterDestroy(&b->sMvctx);
1916: /* Because the B will have been resized we simply destroy it and create a new one each time */
1917: MPI_Comm_size(PetscObjectComm((PetscObject)B), &size);
1918: MatDestroy(&b->B);
1919: MatCreate(PETSC_COMM_SELF, &b->B);
1920: MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0);
1921: MatSetType(b->B, MATSEQBAIJ);
1923: if (!B->preallocated) {
1924: MatCreate(PETSC_COMM_SELF, &b->A);
1925: MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n);
1926: MatSetType(b->A, MATSEQSBAIJ);
1927: MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash);
1928: }
1930: MatSeqSBAIJSetPreallocation(b->A, bs, d_nz, d_nnz);
1931: MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz);
1933: B->preallocated = PETSC_TRUE;
1934: B->was_assembled = PETSC_FALSE;
1935: B->assembled = PETSC_FALSE;
1936: return 0;
1937: }
1939: PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
1940: {
1941: PetscInt m, rstart, cend;
1942: PetscInt i, j, d, nz, bd, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL;
1943: const PetscInt *JJ = NULL;
1944: PetscScalar *values = NULL;
1945: PetscBool roworiented = ((Mat_MPISBAIJ *)B->data)->roworiented;
1946: PetscBool nooffprocentries;
1949: PetscLayoutSetBlockSize(B->rmap, bs);
1950: PetscLayoutSetBlockSize(B->cmap, bs);
1951: PetscLayoutSetUp(B->rmap);
1952: PetscLayoutSetUp(B->cmap);
1953: PetscLayoutGetBlockSize(B->rmap, &bs);
1954: m = B->rmap->n / bs;
1955: rstart = B->rmap->rstart / bs;
1956: cend = B->cmap->rend / bs;
1959: PetscMalloc2(m, &d_nnz, m, &o_nnz);
1960: for (i = 0; i < m; i++) {
1961: nz = ii[i + 1] - ii[i];
1963: /* count the ones on the diagonal and above, split into diagonal and off diagonal portions. */
1964: JJ = jj + ii[i];
1965: bd = 0;
1966: for (j = 0; j < nz; j++) {
1967: if (*JJ >= i + rstart) break;
1968: JJ++;
1969: bd++;
1970: }
1971: d = 0;
1972: for (; j < nz; j++) {
1973: if (*JJ++ >= cend) break;
1974: d++;
1975: }
1976: d_nnz[i] = d;
1977: o_nnz[i] = nz - d - bd;
1978: nz = nz - bd;
1979: nz_max = PetscMax(nz_max, nz);
1980: }
1981: MatMPISBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz);
1982: MatSetOption(B, MAT_IGNORE_LOWER_TRIANGULAR, PETSC_TRUE);
1983: PetscFree2(d_nnz, o_nnz);
1985: values = (PetscScalar *)V;
1986: if (!values) PetscCalloc1(bs * bs * nz_max, &values);
1987: for (i = 0; i < m; i++) {
1988: PetscInt row = i + rstart;
1989: PetscInt ncols = ii[i + 1] - ii[i];
1990: const PetscInt *icols = jj + ii[i];
1991: if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
1992: const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
1993: MatSetValuesBlocked_MPISBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES);
1994: } else { /* block ordering does not match so we can only insert one block at a time. */
1995: PetscInt j;
1996: for (j = 0; j < ncols; j++) {
1997: const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
1998: MatSetValuesBlocked_MPISBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES);
1999: }
2000: }
2001: }
2003: if (!V) PetscFree(values);
2004: nooffprocentries = B->nooffprocentries;
2005: B->nooffprocentries = PETSC_TRUE;
2006: MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY);
2007: MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY);
2008: B->nooffprocentries = nooffprocentries;
2010: MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE);
2011: return 0;
2012: }
2014: /*MC
2015: MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
2016: based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of
2017: the matrix is stored.
2019: For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
2020: can call `MatSetOption`(`Mat`, `MAT_HERMITIAN`);
2022: Options Database Keys:
2023: . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to `MatSetFromOptions()`
2025: Note:
2026: The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the
2027: diagonal portion of the matrix of each process has to less than or equal the number of columns.
2029: Level: beginner
2031: .seealso: `MATSBAIJ`, `MATBAIJ`, `MatCreateBAIJ()`, `MATSEQSBAIJ`, `MatType`
2032: M*/
2034: PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
2035: {
2036: Mat_MPISBAIJ *b;
2037: PetscBool flg = PETSC_FALSE;
2039: PetscNew(&b);
2040: B->data = (void *)b;
2041: PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps));
2043: B->ops->destroy = MatDestroy_MPISBAIJ;
2044: B->ops->view = MatView_MPISBAIJ;
2045: B->assembled = PETSC_FALSE;
2046: B->insertmode = NOT_SET_VALUES;
2048: MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank);
2049: MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size);
2051: /* build local table of row and column ownerships */
2052: PetscMalloc1(b->size + 2, &b->rangebs);
2054: /* build cache for off array entries formed */
2055: MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash);
2057: b->donotstash = PETSC_FALSE;
2058: b->colmap = NULL;
2059: b->garray = NULL;
2060: b->roworiented = PETSC_TRUE;
2062: /* stuff used in block assembly */
2063: b->barray = NULL;
2065: /* stuff used for matrix vector multiply */
2066: b->lvec = NULL;
2067: b->Mvctx = NULL;
2068: b->slvec0 = NULL;
2069: b->slvec0b = NULL;
2070: b->slvec1 = NULL;
2071: b->slvec1a = NULL;
2072: b->slvec1b = NULL;
2073: b->sMvctx = NULL;
2075: /* stuff for MatGetRow() */
2076: b->rowindices = NULL;
2077: b->rowvalues = NULL;
2078: b->getrowactive = PETSC_FALSE;
2080: /* hash table stuff */
2081: b->ht = NULL;
2082: b->hd = NULL;
2083: b->ht_size = 0;
2084: b->ht_flag = PETSC_FALSE;
2085: b->ht_fact = 0;
2086: b->ht_total_ct = 0;
2087: b->ht_insert_ct = 0;
2089: /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2090: b->ijonly = PETSC_FALSE;
2092: b->in_loc = NULL;
2093: b->v_loc = NULL;
2094: b->n_loc = 0;
2096: PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPISBAIJ);
2097: PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPISBAIJ);
2098: PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocation_C", MatMPISBAIJSetPreallocation_MPISBAIJ);
2099: PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocationCSR_C", MatMPISBAIJSetPreallocationCSR_MPISBAIJ);
2100: #if defined(PETSC_HAVE_ELEMENTAL)
2101: PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_elemental_C", MatConvert_MPISBAIJ_Elemental);
2102: #endif
2103: #if defined(PETSC_HAVE_SCALAPACK)
2104: PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_scalapack_C", MatConvert_SBAIJ_ScaLAPACK);
2105: #endif
2106: PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpiaij_C", MatConvert_MPISBAIJ_Basic);
2107: PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpibaij_C", MatConvert_MPISBAIJ_Basic);
2109: B->symmetric = PETSC_BOOL3_TRUE;
2110: B->structurally_symmetric = PETSC_BOOL3_TRUE;
2111: B->symmetry_eternal = PETSC_TRUE;
2112: B->structural_symmetry_eternal = PETSC_TRUE;
2113: #if defined(PETSC_USE_COMPLEX)
2114: B->hermitian = PETSC_BOOL3_FALSE;
2115: #else
2116: B->hermitian = PETSC_BOOL3_TRUE;
2117: #endif
2119: PetscObjectChangeTypeName((PetscObject)B, MATMPISBAIJ);
2120: PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPISBAIJ matrix 1", "Mat");
2121: PetscOptionsBool("-mat_use_hash_table", "Use hash table to save memory in constructing matrix", "MatSetOption", flg, &flg, NULL);
2122: if (flg) {
2123: PetscReal fact = 1.39;
2124: MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE);
2125: PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL);
2126: if (fact <= 1.0) fact = 1.39;
2127: MatMPIBAIJSetHashTableFactor(B, fact);
2128: PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact);
2129: }
2130: PetscOptionsEnd();
2131: return 0;
2132: }
2134: /*MC
2135: MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
2137: This matrix type is identical to `MATSEQSBAIJ` when constructed with a single process communicator,
2138: and `MATMPISBAIJ` otherwise.
2140: Options Database Key:
2141: . -mat_type sbaij - sets the matrix type to `MATSBAIJ` during a call to `MatSetFromOptions()`
2143: Level: beginner
2145: .seealso: `MATSEQSBAIJ`, `MATMPISBAIJ`, `MatCreateSBAIJ()`, `MATSEQSBAIJ`, `MATMPISBAIJ`
2146: M*/
2148: /*@C
2149: MatMPISBAIJSetPreallocation - For good matrix assembly performance
2150: the user should preallocate the matrix storage by setting the parameters
2151: d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
2152: performance can be increased by more than a factor of 50.
2154: Collective
2156: Input Parameters:
2157: + B - the matrix
2158: . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2159: blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2160: . d_nz - number of block nonzeros per block row in diagonal portion of local
2161: submatrix (same for all local rows)
2162: . d_nnz - array containing the number of block nonzeros in the various block rows
2163: in the upper triangular and diagonal part of the in diagonal portion of the local
2164: (possibly different for each block row) or NULL. If you plan to factor the matrix you must leave room
2165: for the diagonal entry and set a value even if it is zero.
2166: . o_nz - number of block nonzeros per block row in the off-diagonal portion of local
2167: submatrix (same for all local rows).
2168: - o_nnz - array containing the number of nonzeros in the various block rows of the
2169: off-diagonal portion of the local submatrix that is right of the diagonal
2170: (possibly different for each block row) or NULL.
2172: Options Database Keys:
2173: + -mat_no_unroll - uses code that does not unroll the loops in the
2174: block calculations (much slower)
2175: - -mat_block_size - size of the blocks to use
2177: Notes:
2179: If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor
2180: than it must be used on all processors that share the object for that argument.
2182: If the *_nnz parameter is given then the *_nz parameter is ignored
2184: Storage Information:
2185: For a square global matrix we define each processor's diagonal portion
2186: to be its local rows and the corresponding columns (a square submatrix);
2187: each processor's off-diagonal portion encompasses the remainder of the
2188: local matrix (a rectangular submatrix).
2190: The user can specify preallocated storage for the diagonal part of
2191: the local submatrix with either d_nz or d_nnz (not both). Set
2192: d_nz = `PETSC_DEFAULT` and d_nnz = NULL for PETSc to control dynamic
2193: memory allocation. Likewise, specify preallocated storage for the
2194: off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2196: You can call `MatGetInfo()` to get information on how effective the preallocation was;
2197: for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2198: You can also run with the option -info and look for messages with the string
2199: malloc in them to see if additional memory allocation was needed.
2201: Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2202: the figure below we depict these three local rows and all columns (0-11).
2204: .vb
2205: 0 1 2 3 4 5 6 7 8 9 10 11
2206: --------------------------
2207: row 3 |. . . d d d o o o o o o
2208: row 4 |. . . d d d o o o o o o
2209: row 5 |. . . d d d o o o o o o
2210: --------------------------
2211: .ve
2213: Thus, any entries in the d locations are stored in the d (diagonal)
2214: submatrix, and any entries in the o locations are stored in the
2215: o (off-diagonal) submatrix. Note that the d matrix is stored in
2216: `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format.
2218: Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2219: plus the diagonal part of the d matrix,
2220: and o_nz should indicate the number of block nonzeros per row in the o matrix
2222: In general, for PDE problems in which most nonzeros are near the diagonal,
2223: one expects d_nz >> o_nz. For large problems you MUST preallocate memory
2224: or you will get TERRIBLE performance; see the users' manual chapter on
2225: matrices.
2227: Level: intermediate
2229: .seealso: `MATMPISBAIJ`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `PetscSplitOwnership()`
2230: @*/
2231: PetscErrorCode MatMPISBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
2232: {
2236: PetscTryMethod(B, "MatMPISBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz));
2237: return 0;
2238: }
2240: /*@C
2241: MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format, `MATSBAIJ`,
2242: (block compressed row). For good matrix assembly performance
2243: the user should preallocate the matrix storage by setting the parameters
2244: d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
2245: performance can be increased by more than a factor of 50.
2247: Collective
2249: Input Parameters:
2250: + comm - MPI communicator
2251: . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
2252: blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2253: . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
2254: This value should be the same as the local size used in creating the
2255: y vector for the matrix-vector product y = Ax.
2256: . n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given)
2257: This value should be the same as the local size used in creating the
2258: x vector for the matrix-vector product y = Ax.
2259: . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
2260: . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
2261: . d_nz - number of block nonzeros per block row in diagonal portion of local
2262: submatrix (same for all local rows)
2263: . d_nnz - array containing the number of block nonzeros in the various block rows
2264: in the upper triangular portion of the in diagonal portion of the local
2265: (possibly different for each block block row) or NULL.
2266: If you plan to factor the matrix you must leave room for the diagonal entry and
2267: set its value even if it is zero.
2268: . o_nz - number of block nonzeros per block row in the off-diagonal portion of local
2269: submatrix (same for all local rows).
2270: - o_nnz - array containing the number of nonzeros in the various block rows of the
2271: off-diagonal portion of the local submatrix (possibly different for
2272: each block row) or NULL.
2274: Output Parameter:
2275: . A - the matrix
2277: Options Database Keys:
2278: + -mat_no_unroll - uses code that does not unroll the loops in the
2279: block calculations (much slower)
2280: . -mat_block_size - size of the blocks to use
2281: - -mat_mpi - use the parallel matrix data structures even on one processor
2282: (defaults to using SeqBAIJ format on one processor)
2284: It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
2285: MatXXXXSetPreallocation() paradigm instead of this routine directly.
2286: [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
2288: Notes:
2289: The number of rows and columns must be divisible by blocksize.
2290: This matrix type does not support complex Hermitian operation.
2292: The user MUST specify either the local or global matrix dimensions
2293: (possibly both).
2295: If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor
2296: than it must be used on all processors that share the object for that argument.
2298: If the *_nnz parameter is given then the *_nz parameter is ignored
2300: Storage Information:
2301: For a square global matrix we define each processor's diagonal portion
2302: to be its local rows and the corresponding columns (a square submatrix);
2303: each processor's off-diagonal portion encompasses the remainder of the
2304: local matrix (a rectangular submatrix).
2306: The user can specify preallocated storage for the diagonal part of
2307: the local submatrix with either d_nz or d_nnz (not both). Set
2308: d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2309: memory allocation. Likewise, specify preallocated storage for the
2310: off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2312: Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2313: the figure below we depict these three local rows and all columns (0-11).
2315: .vb
2316: 0 1 2 3 4 5 6 7 8 9 10 11
2317: --------------------------
2318: row 3 |. . . d d d o o o o o o
2319: row 4 |. . . d d d o o o o o o
2320: row 5 |. . . d d d o o o o o o
2321: --------------------------
2322: .ve
2324: Thus, any entries in the d locations are stored in the d (diagonal)
2325: submatrix, and any entries in the o locations are stored in the
2326: o (off-diagonal) submatrix. Note that the d matrix is stored in
2327: MatSeqSBAIJ format and the o submatrix in `MATSEQBAIJ` format.
2329: Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2330: plus the diagonal part of the d matrix,
2331: and o_nz should indicate the number of block nonzeros per row in the o matrix.
2332: In general, for PDE problems in which most nonzeros are near the diagonal,
2333: one expects d_nz >> o_nz. For large problems you MUST preallocate memory
2334: or you will get TERRIBLE performance; see the users' manual chapter on
2335: matrices.
2337: Level: intermediate
2339: .seealso: `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`
2340: @*/
2342: PetscErrorCode MatCreateSBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
2343: {
2344: PetscMPIInt size;
2346: MatCreate(comm, A);
2347: MatSetSizes(*A, m, n, M, N);
2348: MPI_Comm_size(comm, &size);
2349: if (size > 1) {
2350: MatSetType(*A, MATMPISBAIJ);
2351: MatMPISBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz);
2352: } else {
2353: MatSetType(*A, MATSEQSBAIJ);
2354: MatSeqSBAIJSetPreallocation(*A, bs, d_nz, d_nnz);
2355: }
2356: return 0;
2357: }
2359: static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
2360: {
2361: Mat mat;
2362: Mat_MPISBAIJ *a, *oldmat = (Mat_MPISBAIJ *)matin->data;
2363: PetscInt len = 0, nt, bs = matin->rmap->bs, mbs = oldmat->mbs;
2364: PetscScalar *array;
2366: *newmat = NULL;
2368: MatCreate(PetscObjectComm((PetscObject)matin), &mat);
2369: MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N);
2370: MatSetType(mat, ((PetscObject)matin)->type_name);
2371: PetscLayoutReference(matin->rmap, &mat->rmap);
2372: PetscLayoutReference(matin->cmap, &mat->cmap);
2374: mat->factortype = matin->factortype;
2375: mat->preallocated = PETSC_TRUE;
2376: mat->assembled = PETSC_TRUE;
2377: mat->insertmode = NOT_SET_VALUES;
2379: a = (Mat_MPISBAIJ *)mat->data;
2380: a->bs2 = oldmat->bs2;
2381: a->mbs = oldmat->mbs;
2382: a->nbs = oldmat->nbs;
2383: a->Mbs = oldmat->Mbs;
2384: a->Nbs = oldmat->Nbs;
2386: a->size = oldmat->size;
2387: a->rank = oldmat->rank;
2388: a->donotstash = oldmat->donotstash;
2389: a->roworiented = oldmat->roworiented;
2390: a->rowindices = NULL;
2391: a->rowvalues = NULL;
2392: a->getrowactive = PETSC_FALSE;
2393: a->barray = NULL;
2394: a->rstartbs = oldmat->rstartbs;
2395: a->rendbs = oldmat->rendbs;
2396: a->cstartbs = oldmat->cstartbs;
2397: a->cendbs = oldmat->cendbs;
2399: /* hash table stuff */
2400: a->ht = NULL;
2401: a->hd = NULL;
2402: a->ht_size = 0;
2403: a->ht_flag = oldmat->ht_flag;
2404: a->ht_fact = oldmat->ht_fact;
2405: a->ht_total_ct = 0;
2406: a->ht_insert_ct = 0;
2408: PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 2);
2409: if (oldmat->colmap) {
2410: #if defined(PETSC_USE_CTABLE)
2411: PetscTableCreateCopy(oldmat->colmap, &a->colmap);
2412: #else
2413: PetscMalloc1(a->Nbs, &a->colmap);
2414: PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs);
2415: #endif
2416: } else a->colmap = NULL;
2418: if (oldmat->garray && (len = ((Mat_SeqBAIJ *)(oldmat->B->data))->nbs)) {
2419: PetscMalloc1(len, &a->garray);
2420: PetscArraycpy(a->garray, oldmat->garray, len);
2421: } else a->garray = NULL;
2423: MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash);
2424: VecDuplicate(oldmat->lvec, &a->lvec);
2425: VecScatterCopy(oldmat->Mvctx, &a->Mvctx);
2427: VecDuplicate(oldmat->slvec0, &a->slvec0);
2428: VecDuplicate(oldmat->slvec1, &a->slvec1);
2430: VecGetLocalSize(a->slvec1, &nt);
2431: VecGetArray(a->slvec1, &array);
2432: VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs * mbs, array, &a->slvec1a);
2433: VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec1b);
2434: VecRestoreArray(a->slvec1, &array);
2435: VecGetArray(a->slvec0, &array);
2436: VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec0b);
2437: VecRestoreArray(a->slvec0, &array);
2439: /* VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2440: PetscObjectReference((PetscObject)oldmat->sMvctx);
2441: a->sMvctx = oldmat->sMvctx;
2443: MatDuplicate(oldmat->A, cpvalues, &a->A);
2444: MatDuplicate(oldmat->B, cpvalues, &a->B);
2445: PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist);
2446: *newmat = mat;
2447: return 0;
2448: }
2450: /* Used for both MPIBAIJ and MPISBAIJ matrices */
2451: #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary
2453: PetscErrorCode MatLoad_MPISBAIJ(Mat mat, PetscViewer viewer)
2454: {
2455: PetscBool isbinary;
2457: PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary);
2459: MatLoad_MPISBAIJ_Binary(mat, viewer);
2460: return 0;
2461: }
2463: PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A, Vec v, PetscInt idx[])
2464: {
2465: Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
2466: Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)(a->B)->data;
2467: PetscReal atmp;
2468: PetscReal *work, *svalues, *rvalues;
2469: PetscInt i, bs, mbs, *bi, *bj, brow, j, ncols, krow, kcol, col, row, Mbs, bcol;
2470: PetscMPIInt rank, size;
2471: PetscInt *rowners_bs, dest, count, source;
2472: PetscScalar *va;
2473: MatScalar *ba;
2474: MPI_Status stat;
2477: MatGetRowMaxAbs(a->A, v, NULL);
2478: VecGetArray(v, &va);
2480: MPI_Comm_size(PetscObjectComm((PetscObject)A), &size);
2481: MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank);
2483: bs = A->rmap->bs;
2484: mbs = a->mbs;
2485: Mbs = a->Mbs;
2486: ba = b->a;
2487: bi = b->i;
2488: bj = b->j;
2490: /* find ownerships */
2491: rowners_bs = A->rmap->range;
2493: /* each proc creates an array to be distributed */
2494: PetscCalloc1(bs * Mbs, &work);
2496: /* row_max for B */
2497: if (rank != size - 1) {
2498: for (i = 0; i < mbs; i++) {
2499: ncols = bi[1] - bi[0];
2500: bi++;
2501: brow = bs * i;
2502: for (j = 0; j < ncols; j++) {
2503: bcol = bs * (*bj);
2504: for (kcol = 0; kcol < bs; kcol++) {
2505: col = bcol + kcol; /* local col index */
2506: col += rowners_bs[rank + 1]; /* global col index */
2507: for (krow = 0; krow < bs; krow++) {
2508: atmp = PetscAbsScalar(*ba);
2509: ba++;
2510: row = brow + krow; /* local row index */
2511: if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2512: if (work[col] < atmp) work[col] = atmp;
2513: }
2514: }
2515: bj++;
2516: }
2517: }
2519: /* send values to its owners */
2520: for (dest = rank + 1; dest < size; dest++) {
2521: svalues = work + rowners_bs[dest];
2522: count = rowners_bs[dest + 1] - rowners_bs[dest];
2523: MPI_Send(svalues, count, MPIU_REAL, dest, rank, PetscObjectComm((PetscObject)A));
2524: }
2525: }
2527: /* receive values */
2528: if (rank) {
2529: rvalues = work;
2530: count = rowners_bs[rank + 1] - rowners_bs[rank];
2531: for (source = 0; source < rank; source++) {
2532: MPI_Recv(rvalues, count, MPIU_REAL, MPI_ANY_SOURCE, MPI_ANY_TAG, PetscObjectComm((PetscObject)A), &stat);
2533: /* process values */
2534: for (i = 0; i < count; i++) {
2535: if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2536: }
2537: }
2538: }
2540: VecRestoreArray(v, &va);
2541: PetscFree(work);
2542: return 0;
2543: }
2545: PetscErrorCode MatSOR_MPISBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
2546: {
2547: Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)matin->data;
2548: PetscInt mbs = mat->mbs, bs = matin->rmap->bs;
2549: PetscScalar *x, *ptr, *from;
2550: Vec bb1;
2551: const PetscScalar *b;
2556: if (flag == SOR_APPLY_UPPER) {
2557: (*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx);
2558: return 0;
2559: }
2561: if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2562: if (flag & SOR_ZERO_INITIAL_GUESS) {
2563: (*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, lits, xx);
2564: its--;
2565: }
2567: VecDuplicate(bb, &bb1);
2568: while (its--) {
2569: /* lower triangular part: slvec0b = - B^T*xx */
2570: (*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b);
2572: /* copy xx into slvec0a */
2573: VecGetArray(mat->slvec0, &ptr);
2574: VecGetArray(xx, &x);
2575: PetscArraycpy(ptr, x, bs * mbs);
2576: VecRestoreArray(mat->slvec0, &ptr);
2578: VecScale(mat->slvec0, -1.0);
2580: /* copy bb into slvec1a */
2581: VecGetArray(mat->slvec1, &ptr);
2582: VecGetArrayRead(bb, &b);
2583: PetscArraycpy(ptr, b, bs * mbs);
2584: VecRestoreArray(mat->slvec1, &ptr);
2586: /* set slvec1b = 0 */
2587: VecSet(mat->slvec1b, 0.0);
2589: VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD);
2590: VecRestoreArray(xx, &x);
2591: VecRestoreArrayRead(bb, &b);
2592: VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD);
2594: /* upper triangular part: bb1 = bb1 - B*x */
2595: (*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, bb1);
2597: /* local diagonal sweep */
2598: (*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, lits, xx);
2599: }
2600: VecDestroy(&bb1);
2601: } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2602: (*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx);
2603: } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2604: (*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx);
2605: } else if (flag & SOR_EISENSTAT) {
2606: Vec xx1;
2607: PetscBool hasop;
2608: const PetscScalar *diag;
2609: PetscScalar *sl, scale = (omega - 2.0) / omega;
2610: PetscInt i, n;
2612: if (!mat->xx1) {
2613: VecDuplicate(bb, &mat->xx1);
2614: VecDuplicate(bb, &mat->bb1);
2615: }
2616: xx1 = mat->xx1;
2617: bb1 = mat->bb1;
2619: (*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx);
2621: if (!mat->diag) {
2622: /* this is wrong for same matrix with new nonzero values */
2623: MatCreateVecs(matin, &mat->diag, NULL);
2624: MatGetDiagonal(matin, mat->diag);
2625: }
2626: MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop);
2628: if (hasop) {
2629: MatMultDiagonalBlock(matin, xx, bb1);
2630: VecAYPX(mat->slvec1a, scale, bb);
2631: } else {
2632: /*
2633: These two lines are replaced by code that may be a bit faster for a good compiler
2634: VecPointwiseMult(mat->slvec1a,mat->diag,xx);
2635: VecAYPX(mat->slvec1a,scale,bb);
2636: */
2637: VecGetArray(mat->slvec1a, &sl);
2638: VecGetArrayRead(mat->diag, &diag);
2639: VecGetArrayRead(bb, &b);
2640: VecGetArray(xx, &x);
2641: VecGetLocalSize(xx, &n);
2642: if (omega == 1.0) {
2643: for (i = 0; i < n; i++) sl[i] = b[i] - diag[i] * x[i];
2644: PetscLogFlops(2.0 * n);
2645: } else {
2646: for (i = 0; i < n; i++) sl[i] = b[i] + scale * diag[i] * x[i];
2647: PetscLogFlops(3.0 * n);
2648: }
2649: VecRestoreArray(mat->slvec1a, &sl);
2650: VecRestoreArrayRead(mat->diag, &diag);
2651: VecRestoreArrayRead(bb, &b);
2652: VecRestoreArray(xx, &x);
2653: }
2655: /* multiply off-diagonal portion of matrix */
2656: VecSet(mat->slvec1b, 0.0);
2657: (*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b);
2658: VecGetArray(mat->slvec0, &from);
2659: VecGetArray(xx, &x);
2660: PetscArraycpy(from, x, bs * mbs);
2661: VecRestoreArray(mat->slvec0, &from);
2662: VecRestoreArray(xx, &x);
2663: VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD);
2664: VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD);
2665: (*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, mat->slvec1a);
2667: /* local sweep */
2668: (*mat->A->ops->sor)(mat->A, mat->slvec1a, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1);
2669: VecAXPY(xx, 1.0, xx1);
2670: } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatSORType is not supported for SBAIJ matrix format");
2671: return 0;
2672: }
2674: /*@
2675: MatCreateMPISBAIJWithArrays - creates a `MATMPISBAIJ` matrix using arrays that contain in standard
2676: CSR format the local rows.
2678: Collective
2680: Input Parameters:
2681: + comm - MPI communicator
2682: . bs - the block size, only a block size of 1 is supported
2683: . m - number of local rows (Cannot be `PETSC_DECIDE`)
2684: . n - This value should be the same as the local size used in creating the
2685: x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
2686: calculated if N is given) For square matrices n is almost always m.
2687: . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
2688: . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
2689: . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix
2690: . j - column indices
2691: - a - matrix values
2693: Output Parameter:
2694: . mat - the matrix
2696: Level: intermediate
2698: Notes:
2699: The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2700: thus you CANNOT change the matrix entries by changing the values of a[] after you have
2701: called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
2703: The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
2705: .seealso: `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
2706: `MPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`
2707: @*/
2708: PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
2709: {
2712: MatCreate(comm, mat);
2713: MatSetSizes(*mat, m, n, M, N);
2714: MatSetType(*mat, MATMPISBAIJ);
2715: MatMPISBAIJSetPreallocationCSR(*mat, bs, i, j, a);
2716: return 0;
2717: }
2719: /*@C
2720: MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATMPISBAIJ` format using the given nonzero structure and (optional) numerical values
2722: Collective
2724: Input Parameters:
2725: + B - the matrix
2726: . bs - the block size
2727: . i - the indices into j for the start of each local row (starts with zero)
2728: . j - the column indices for each local row (starts with zero) these must be sorted for each row
2729: - v - optional values in the matrix
2731: Level: advanced
2733: Notes:
2734: Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries
2735: and usually the numerical values as well
2737: Any entries below the diagonal are ignored
2739: .seealso: `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MPIAIJ`
2740: @*/
2741: PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
2742: {
2743: PetscTryMethod(B, "MatMPISBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
2744: return 0;
2745: }
2747: PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
2748: {
2749: PetscInt m, N, i, rstart, nnz, Ii, bs, cbs;
2750: PetscInt *indx;
2751: PetscScalar *values;
2753: MatGetSize(inmat, &m, &N);
2754: if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
2755: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)inmat->data;
2756: PetscInt *dnz, *onz, mbs, Nbs, nbs;
2757: PetscInt *bindx, rmax = a->rmax, j;
2758: PetscMPIInt rank, size;
2760: MatGetBlockSizes(inmat, &bs, &cbs);
2761: mbs = m / bs;
2762: Nbs = N / cbs;
2763: if (n == PETSC_DECIDE) PetscSplitOwnershipBlock(comm, cbs, &n, &N);
2764: nbs = n / cbs;
2766: PetscMalloc1(rmax, &bindx);
2767: MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */
2769: MPI_Comm_rank(comm, &rank);
2770: MPI_Comm_rank(comm, &size);
2771: if (rank == size - 1) {
2772: /* Check sum(nbs) = Nbs */
2774: }
2776: rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */
2777: MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE);
2778: for (i = 0; i < mbs; i++) {
2779: MatGetRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL); /* non-blocked nnz and indx */
2780: nnz = nnz / bs;
2781: for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs;
2782: MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz);
2783: MatRestoreRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL);
2784: }
2785: MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE);
2786: PetscFree(bindx);
2788: MatCreate(comm, outmat);
2789: MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE);
2790: MatSetBlockSizes(*outmat, bs, cbs);
2791: MatSetType(*outmat, MATSBAIJ);
2792: MatSeqSBAIJSetPreallocation(*outmat, bs, 0, dnz);
2793: MatMPISBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz);
2794: MatPreallocateEnd(dnz, onz);
2795: }
2797: /* numeric phase */
2798: MatGetBlockSizes(inmat, &bs, &cbs);
2799: MatGetOwnershipRange(*outmat, &rstart, NULL);
2801: MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE);
2802: for (i = 0; i < m; i++) {
2803: MatGetRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values);
2804: Ii = i + rstart;
2805: MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES);
2806: MatRestoreRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values);
2807: }
2808: MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE);
2809: MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY);
2810: MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY);
2811: return 0;
2812: }