Actual source code: mpisell.c
1: #include <../src/mat/impls/aij/mpi/mpiaij.h>
2: #include <../src/mat/impls/sell/mpi/mpisell.h>
3: #include <petsc/private/vecimpl.h>
4: #include <petsc/private/isimpl.h>
5: #include <petscblaslapack.h>
6: #include <petscsf.h>
8: /*MC
9: MATSELL - MATSELL = "sell" - A matrix type to be used for sparse matrices.
11: This matrix type is identical to `MATSEQSELL` when constructed with a single process communicator,
12: and `MATMPISELL` otherwise. As a result, for single process communicators,
13: `MatSeqSELLSetPreallocation()` is supported, and similarly `MatMPISELLSetPreallocation()` is supported
14: for communicators controlling multiple processes. It is recommended that you call both of
15: the above preallocation routines for simplicity.
17: Options Database Keys:
18: . -mat_type sell - sets the matrix type to `MATSELL` during a call to `MatSetFromOptions()`
20: Level: beginner
22: .seealso: `Mat`, `MATAIJ`, `MATBAIJ`, `MATSBAIJ`, `MatCreateSELL()`, `MatCreateSeqSELL()`, `MATSEQSELL`, `MATMPISELL`
23: M*/
25: static PetscErrorCode MatDiagonalSet_MPISELL(Mat Y, Vec D, InsertMode is)
26: {
27: Mat_MPISELL *sell = (Mat_MPISELL *)Y->data;
29: PetscFunctionBegin;
30: if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) {
31: PetscCall(MatDiagonalSet(sell->A, D, is));
32: } else {
33: PetscCall(MatDiagonalSet_Default(Y, D, is));
34: }
35: PetscFunctionReturn(PETSC_SUCCESS);
36: }
38: /*
39: Local utility routine that creates a mapping from the global column
40: number to the local number in the off-diagonal part of the local
41: storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at
42: a slightly higher hash table cost; without it it is not scalable (each processor
43: has an order N integer array but is fast to access.
44: */
45: PetscErrorCode MatCreateColmap_MPISELL_Private(Mat mat)
46: {
47: Mat_MPISELL *sell = (Mat_MPISELL *)mat->data;
48: PetscInt n = sell->B->cmap->n, i;
50: PetscFunctionBegin;
51: PetscCheck(sell->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPISELL Matrix was assembled but is missing garray");
52: #if defined(PETSC_USE_CTABLE)
53: PetscCall(PetscHMapICreateWithSize(n, &sell->colmap));
54: for (i = 0; i < n; i++) PetscCall(PetscHMapISet(sell->colmap, sell->garray[i] + 1, i + 1));
55: #else
56: PetscCall(PetscCalloc1(mat->cmap->N + 1, &sell->colmap));
57: for (i = 0; i < n; i++) sell->colmap[sell->garray[i]] = i + 1;
58: #endif
59: PetscFunctionReturn(PETSC_SUCCESS);
60: }
62: #define MatSetValues_SeqSELL_A_Private(row, col, value, addv, orow, ocol) \
63: { \
64: if (col <= lastcol1) low1 = 0; \
65: else high1 = nrow1; \
66: lastcol1 = col; \
67: while (high1 - low1 > 5) { \
68: t = (low1 + high1) / 2; \
69: if (cp1[sliceheight * t] > col) high1 = t; \
70: else low1 = t; \
71: } \
72: for (_i = low1; _i < high1; _i++) { \
73: if (cp1[sliceheight * _i] > col) break; \
74: if (cp1[sliceheight * _i] == col) { \
75: if (addv == ADD_VALUES) vp1[sliceheight * _i] += value; \
76: else vp1[sliceheight * _i] = value; \
77: inserted = PETSC_TRUE; \
78: goto a_noinsert; \
79: } \
80: } \
81: if (value == 0.0 && ignorezeroentries) { \
82: low1 = 0; \
83: high1 = nrow1; \
84: goto a_noinsert; \
85: } \
86: if (nonew == 1) { \
87: low1 = 0; \
88: high1 = nrow1; \
89: goto a_noinsert; \
90: } \
91: PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
92: MatSeqXSELLReallocateSELL(A, am, 1, nrow1, a->sliidx, a->sliceheight, row / sliceheight, row, col, a->colidx, a->val, cp1, vp1, nonew, MatScalar); \
93: /* shift up all the later entries in this row */ \
94: for (ii = nrow1 - 1; ii >= _i; ii--) { \
95: cp1[sliceheight * (ii + 1)] = cp1[sliceheight * ii]; \
96: vp1[sliceheight * (ii + 1)] = vp1[sliceheight * ii]; \
97: } \
98: cp1[sliceheight * _i] = col; \
99: vp1[sliceheight * _i] = value; \
100: a->nz++; \
101: nrow1++; \
102: a_noinsert:; \
103: a->rlen[row] = nrow1; \
104: }
106: #define MatSetValues_SeqSELL_B_Private(row, col, value, addv, orow, ocol) \
107: { \
108: if (col <= lastcol2) low2 = 0; \
109: else high2 = nrow2; \
110: lastcol2 = col; \
111: while (high2 - low2 > 5) { \
112: t = (low2 + high2) / 2; \
113: if (cp2[sliceheight * t] > col) high2 = t; \
114: else low2 = t; \
115: } \
116: for (_i = low2; _i < high2; _i++) { \
117: if (cp2[sliceheight * _i] > col) break; \
118: if (cp2[sliceheight * _i] == col) { \
119: if (addv == ADD_VALUES) vp2[sliceheight * _i] += value; \
120: else vp2[sliceheight * _i] = value; \
121: inserted = PETSC_TRUE; \
122: goto b_noinsert; \
123: } \
124: } \
125: if (value == 0.0 && ignorezeroentries) { \
126: low2 = 0; \
127: high2 = nrow2; \
128: goto b_noinsert; \
129: } \
130: if (nonew == 1) { \
131: low2 = 0; \
132: high2 = nrow2; \
133: goto b_noinsert; \
134: } \
135: PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
136: MatSeqXSELLReallocateSELL(B, bm, 1, nrow2, b->sliidx, b->sliceheight, row / sliceheight, row, col, b->colidx, b->val, cp2, vp2, nonew, MatScalar); \
137: /* shift up all the later entries in this row */ \
138: for (ii = nrow2 - 1; ii >= _i; ii--) { \
139: cp2[sliceheight * (ii + 1)] = cp2[sliceheight * ii]; \
140: vp2[sliceheight * (ii + 1)] = vp2[sliceheight * ii]; \
141: } \
142: cp2[sliceheight * _i] = col; \
143: vp2[sliceheight * _i] = value; \
144: b->nz++; \
145: nrow2++; \
146: b_noinsert:; \
147: b->rlen[row] = nrow2; \
148: }
150: static PetscErrorCode MatSetValues_MPISELL(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
151: {
152: Mat_MPISELL *sell = (Mat_MPISELL *)mat->data;
153: PetscScalar value;
154: PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, shift1, shift2;
155: PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
156: PetscBool roworiented = sell->roworiented;
158: /* Some Variables required in the macro */
159: Mat A = sell->A;
160: Mat_SeqSELL *a = (Mat_SeqSELL *)A->data;
161: PetscBool ignorezeroentries = a->ignorezeroentries, found;
162: Mat B = sell->B;
163: Mat_SeqSELL *b = (Mat_SeqSELL *)B->data;
164: PetscInt *cp1, *cp2, ii, _i, nrow1, nrow2, low1, high1, low2, high2, t, lastcol1, lastcol2, sliceheight = a->sliceheight;
165: MatScalar *vp1, *vp2;
167: PetscFunctionBegin;
168: for (i = 0; i < m; i++) {
169: if (im[i] < 0) continue;
170: PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
171: if (im[i] >= rstart && im[i] < rend) {
172: row = im[i] - rstart;
173: lastcol1 = -1;
174: shift1 = a->sliidx[row / sliceheight] + (row % sliceheight); /* starting index of the row */
175: cp1 = PetscSafePointerPlusOffset(a->colidx, shift1);
176: vp1 = PetscSafePointerPlusOffset(a->val, shift1);
177: nrow1 = a->rlen[row];
178: low1 = 0;
179: high1 = nrow1;
180: lastcol2 = -1;
181: shift2 = b->sliidx[row / sliceheight] + (row % sliceheight); /* starting index of the row */
182: cp2 = PetscSafePointerPlusOffset(b->colidx, shift2);
183: vp2 = PetscSafePointerPlusOffset(b->val, shift2);
184: nrow2 = b->rlen[row];
185: low2 = 0;
186: high2 = nrow2;
188: for (j = 0; j < n; j++) {
189: if (roworiented) value = v[i * n + j];
190: else value = v[i + j * m];
191: if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
192: if (in[j] >= cstart && in[j] < cend) {
193: col = in[j] - cstart;
194: MatSetValue_SeqSELL_Private(A, row, col, value, addv, im[i], in[j], cp1, vp1, lastcol1, low1, high1); /* set one value */
195: #if defined(PETSC_HAVE_CUDA)
196: if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && found) A->offloadmask = PETSC_OFFLOAD_CPU;
197: #endif
198: } else if (in[j] < 0) {
199: continue;
200: } else {
201: PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
202: if (mat->was_assembled) {
203: if (!sell->colmap) PetscCall(MatCreateColmap_MPISELL_Private(mat));
204: #if defined(PETSC_USE_CTABLE)
205: PetscCall(PetscHMapIGetWithDefault(sell->colmap, in[j] + 1, 0, &col));
206: col--;
207: #else
208: col = sell->colmap[in[j]] - 1;
209: #endif
210: if (col < 0 && !((Mat_SeqSELL *)sell->B->data)->nonew) {
211: PetscCall(MatDisAssemble_MPISELL(mat));
212: col = in[j];
213: /* Reinitialize the variables required by MatSetValues_SeqSELL_B_Private() */
214: B = sell->B;
215: b = (Mat_SeqSELL *)B->data;
216: shift2 = b->sliidx[row / sliceheight] + (row % sliceheight); /* starting index of the row */
217: cp2 = b->colidx + shift2;
218: vp2 = b->val + shift2;
219: nrow2 = b->rlen[row];
220: low2 = 0;
221: high2 = nrow2;
222: found = PETSC_FALSE;
223: } else {
224: PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
225: }
226: } else col = in[j];
227: MatSetValue_SeqSELL_Private(B, row, col, value, addv, im[i], in[j], cp2, vp2, lastcol2, low2, high2); /* set one value */
228: #if defined(PETSC_HAVE_CUDA)
229: if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && found) B->offloadmask = PETSC_OFFLOAD_CPU;
230: #endif
231: }
232: }
233: } else {
234: PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
235: if (!sell->donotstash) {
236: mat->assembled = PETSC_FALSE;
237: if (roworiented) {
238: PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
239: } else {
240: PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
241: }
242: }
243: }
244: }
245: PetscFunctionReturn(PETSC_SUCCESS);
246: }
248: static PetscErrorCode MatGetValues_MPISELL(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
249: {
250: Mat_MPISELL *sell = (Mat_MPISELL *)mat->data;
251: PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
252: PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
254: PetscFunctionBegin;
255: for (i = 0; i < m; i++) {
256: if (idxm[i] < 0) continue; /* negative row */
257: PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
258: PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
259: row = idxm[i] - rstart;
260: for (j = 0; j < n; j++) {
261: if (idxn[j] < 0) continue; /* negative column */
262: PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
263: if (idxn[j] >= cstart && idxn[j] < cend) {
264: col = idxn[j] - cstart;
265: PetscCall(MatGetValues(sell->A, 1, &row, 1, &col, v + i * n + j));
266: } else {
267: if (!sell->colmap) PetscCall(MatCreateColmap_MPISELL_Private(mat));
268: #if defined(PETSC_USE_CTABLE)
269: PetscCall(PetscHMapIGetWithDefault(sell->colmap, idxn[j] + 1, 0, &col));
270: col--;
271: #else
272: col = sell->colmap[idxn[j]] - 1;
273: #endif
274: if (col < 0 || sell->garray[col] != idxn[j]) *(v + i * n + j) = 0.0;
275: else PetscCall(MatGetValues(sell->B, 1, &row, 1, &col, v + i * n + j));
276: }
277: }
278: }
279: PetscFunctionReturn(PETSC_SUCCESS);
280: }
282: static PetscErrorCode MatAssemblyBegin_MPISELL(Mat mat, MatAssemblyType mode)
283: {
284: Mat_MPISELL *sell = (Mat_MPISELL *)mat->data;
285: PetscInt nstash, reallocs;
287: PetscFunctionBegin;
288: if (sell->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
290: PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
291: PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
292: PetscCall(PetscInfo(sell->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
293: PetscFunctionReturn(PETSC_SUCCESS);
294: }
296: PetscErrorCode MatAssemblyEnd_MPISELL(Mat mat, MatAssemblyType mode)
297: {
298: Mat_MPISELL *sell = (Mat_MPISELL *)mat->data;
299: PetscMPIInt n;
300: PetscInt i, flg;
301: PetscInt *row, *col;
302: PetscScalar *val;
303: PetscBool all_assembled;
304: /* do not use 'b = (Mat_SeqSELL*)sell->B->data' as B can be reset in disassembly */
305: PetscFunctionBegin;
306: if (!sell->donotstash && !mat->nooffprocentries) {
307: while (1) {
308: PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
309: if (!flg) break;
311: for (i = 0; i < n; i++) { /* assemble one by one */
312: PetscCall(MatSetValues_MPISELL(mat, 1, row + i, 1, col + i, val + i, mat->insertmode));
313: }
314: }
315: PetscCall(MatStashScatterEnd_Private(&mat->stash));
316: }
317: #if defined(PETSC_HAVE_CUDA)
318: if (mat->offloadmask == PETSC_OFFLOAD_CPU) sell->A->offloadmask = PETSC_OFFLOAD_CPU;
319: #endif
320: PetscCall(MatAssemblyBegin(sell->A, mode));
321: PetscCall(MatAssemblyEnd(sell->A, mode));
323: /*
324: determine if any process has disassembled, if so we must
325: also disassemble ourselves, in order that we may reassemble.
326: */
327: /*
328: if nonzero structure of submatrix B cannot change then we know that
329: no process disassembled thus we can skip this stuff
330: */
331: if (!((Mat_SeqSELL *)sell->B->data)->nonew) {
332: PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &all_assembled, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
333: if (mat->was_assembled && !all_assembled) PetscCall(MatDisAssemble_MPISELL(mat));
334: }
335: if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPISELL(mat));
336: #if defined(PETSC_HAVE_CUDA)
337: if (mat->offloadmask == PETSC_OFFLOAD_CPU && sell->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) sell->B->offloadmask = PETSC_OFFLOAD_CPU;
338: #endif
339: PetscCall(MatAssemblyBegin(sell->B, mode));
340: PetscCall(MatAssemblyEnd(sell->B, mode));
341: PetscCall(PetscFree2(sell->rowvalues, sell->rowindices));
342: sell->rowvalues = NULL;
343: PetscCall(VecDestroy(&sell->diag));
345: /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
346: if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqSELL *)sell->A->data)->nonew) {
347: PetscObjectState state = sell->A->nonzerostate + sell->B->nonzerostate;
348: PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
349: }
350: #if defined(PETSC_HAVE_CUDA)
351: mat->offloadmask = PETSC_OFFLOAD_BOTH;
352: #endif
353: PetscFunctionReturn(PETSC_SUCCESS);
354: }
356: static PetscErrorCode MatZeroEntries_MPISELL(Mat A)
357: {
358: Mat_MPISELL *l = (Mat_MPISELL *)A->data;
360: PetscFunctionBegin;
361: PetscCall(MatZeroEntries(l->A));
362: PetscCall(MatZeroEntries(l->B));
363: PetscFunctionReturn(PETSC_SUCCESS);
364: }
366: static PetscErrorCode MatMult_MPISELL(Mat A, Vec xx, Vec yy)
367: {
368: Mat_MPISELL *a = (Mat_MPISELL *)A->data;
369: PetscInt nt;
371: PetscFunctionBegin;
372: PetscCall(VecGetLocalSize(xx, &nt));
373: PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt);
374: PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
375: PetscCall((*a->A->ops->mult)(a->A, xx, yy));
376: PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
377: PetscCall((*a->B->ops->multadd)(a->B, a->lvec, yy, yy));
378: PetscFunctionReturn(PETSC_SUCCESS);
379: }
381: static PetscErrorCode MatMultDiagonalBlock_MPISELL(Mat A, Vec bb, Vec xx)
382: {
383: Mat_MPISELL *a = (Mat_MPISELL *)A->data;
385: PetscFunctionBegin;
386: PetscCall(MatMultDiagonalBlock(a->A, bb, xx));
387: PetscFunctionReturn(PETSC_SUCCESS);
388: }
390: static PetscErrorCode MatMultAdd_MPISELL(Mat A, Vec xx, Vec yy, Vec zz)
391: {
392: Mat_MPISELL *a = (Mat_MPISELL *)A->data;
394: PetscFunctionBegin;
395: PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
396: PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
397: PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
398: PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
399: PetscFunctionReturn(PETSC_SUCCESS);
400: }
402: static PetscErrorCode MatMultTranspose_MPISELL(Mat A, Vec xx, Vec yy)
403: {
404: Mat_MPISELL *a = (Mat_MPISELL *)A->data;
406: PetscFunctionBegin;
407: /* do nondiagonal part */
408: PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
409: /* do local part */
410: PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
411: /* add partial results together */
412: PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
413: PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
414: PetscFunctionReturn(PETSC_SUCCESS);
415: }
417: static PetscErrorCode MatIsTranspose_MPISELL(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f)
418: {
419: MPI_Comm comm;
420: Mat_MPISELL *Asell = (Mat_MPISELL *)Amat->data, *Bsell;
421: Mat Adia = Asell->A, Bdia, Aoff, Boff, *Aoffs, *Boffs;
422: IS Me, Notme;
423: PetscInt M, N, first, last, *notme, i;
424: PetscMPIInt size;
426: PetscFunctionBegin;
427: /* Easy test: symmetric diagonal block */
428: Bsell = (Mat_MPISELL *)Bmat->data;
429: Bdia = Bsell->A;
430: PetscCall(MatIsTranspose(Adia, Bdia, tol, f));
431: if (!*f) PetscFunctionReturn(PETSC_SUCCESS);
432: PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
433: PetscCallMPI(MPI_Comm_size(comm, &size));
434: if (size == 1) PetscFunctionReturn(PETSC_SUCCESS);
436: /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
437: PetscCall(MatGetSize(Amat, &M, &N));
438: PetscCall(MatGetOwnershipRange(Amat, &first, &last));
439: PetscCall(PetscMalloc1(N - last + first, ¬me));
440: for (i = 0; i < first; i++) notme[i] = i;
441: for (i = last; i < M; i++) notme[i - last + first] = i;
442: PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme));
443: PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me));
444: PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs));
445: Aoff = Aoffs[0];
446: PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs));
447: Boff = Boffs[0];
448: PetscCall(MatIsTranspose(Aoff, Boff, tol, f));
449: PetscCall(MatDestroyMatrices(1, &Aoffs));
450: PetscCall(MatDestroyMatrices(1, &Boffs));
451: PetscCall(ISDestroy(&Me));
452: PetscCall(ISDestroy(&Notme));
453: PetscCall(PetscFree(notme));
454: PetscFunctionReturn(PETSC_SUCCESS);
455: }
457: static PetscErrorCode MatMultTransposeAdd_MPISELL(Mat A, Vec xx, Vec yy, Vec zz)
458: {
459: Mat_MPISELL *a = (Mat_MPISELL *)A->data;
461: PetscFunctionBegin;
462: /* do nondiagonal part */
463: PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
464: /* do local part */
465: PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
466: /* add partial results together */
467: PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
468: PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
469: PetscFunctionReturn(PETSC_SUCCESS);
470: }
472: /*
473: This only works correctly for square matrices where the subblock A->A is the
474: diagonal block
475: */
476: static PetscErrorCode MatGetDiagonal_MPISELL(Mat A, Vec v)
477: {
478: Mat_MPISELL *a = (Mat_MPISELL *)A->data;
480: PetscFunctionBegin;
481: PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
482: PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition");
483: PetscCall(MatGetDiagonal(a->A, v));
484: PetscFunctionReturn(PETSC_SUCCESS);
485: }
487: static PetscErrorCode MatScale_MPISELL(Mat A, PetscScalar aa)
488: {
489: Mat_MPISELL *a = (Mat_MPISELL *)A->data;
491: PetscFunctionBegin;
492: PetscCall(MatScale(a->A, aa));
493: PetscCall(MatScale(a->B, aa));
494: PetscFunctionReturn(PETSC_SUCCESS);
495: }
497: PetscErrorCode MatDestroy_MPISELL(Mat mat)
498: {
499: Mat_MPISELL *sell = (Mat_MPISELL *)mat->data;
501: PetscFunctionBegin;
502: PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
503: PetscCall(MatStashDestroy_Private(&mat->stash));
504: PetscCall(VecDestroy(&sell->diag));
505: PetscCall(MatDestroy(&sell->A));
506: PetscCall(MatDestroy(&sell->B));
507: #if defined(PETSC_USE_CTABLE)
508: PetscCall(PetscHMapIDestroy(&sell->colmap));
509: #else
510: PetscCall(PetscFree(sell->colmap));
511: #endif
512: PetscCall(PetscFree(sell->garray));
513: PetscCall(VecDestroy(&sell->lvec));
514: PetscCall(VecScatterDestroy(&sell->Mvctx));
515: PetscCall(PetscFree2(sell->rowvalues, sell->rowindices));
516: PetscCall(PetscFree(sell->ld));
517: PetscCall(PetscFree(mat->data));
519: PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
520: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
521: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
522: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL));
523: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISELLSetPreallocation_C", NULL));
524: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisell_mpiaij_C", NULL));
525: #if defined(PETSC_HAVE_CUDA)
526: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisell_mpisellcuda_C", NULL));
527: #endif
528: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
529: PetscFunctionReturn(PETSC_SUCCESS);
530: }
532: #include <petscdraw.h>
533: static PetscErrorCode MatView_MPISELL_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
534: {
535: Mat_MPISELL *sell = (Mat_MPISELL *)mat->data;
536: PetscMPIInt rank = sell->rank, size = sell->size;
537: PetscBool isdraw, isascii, isbinary;
538: PetscViewer sviewer;
539: PetscViewerFormat format;
541: PetscFunctionBegin;
542: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
543: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
544: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
545: if (isascii) {
546: PetscCall(PetscViewerGetFormat(viewer, &format));
547: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
548: MatInfo info;
549: PetscInt *inodes;
551: PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
552: PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
553: PetscCall(MatInodeGetInodeSizes(sell->A, NULL, &inodes, NULL));
554: PetscCall(PetscViewerASCIIPushSynchronized(viewer));
555: if (!inodes) {
556: PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT ", not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used,
557: (PetscInt)info.nz_allocated, (PetscInt)info.memory));
558: } else {
559: PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT ", using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used,
560: (PetscInt)info.nz_allocated, (PetscInt)info.memory));
561: }
562: PetscCall(MatGetInfo(sell->A, MAT_LOCAL, &info));
563: PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
564: PetscCall(MatGetInfo(sell->B, MAT_LOCAL, &info));
565: PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
566: PetscCall(PetscViewerFlush(viewer));
567: PetscCall(PetscViewerASCIIPopSynchronized(viewer));
568: PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
569: PetscCall(VecScatterView(sell->Mvctx, viewer));
570: PetscFunctionReturn(PETSC_SUCCESS);
571: } else if (format == PETSC_VIEWER_ASCII_INFO) {
572: PetscInt inodecount, inodelimit, *inodes;
573: PetscCall(MatInodeGetInodeSizes(sell->A, &inodecount, &inodes, &inodelimit));
574: if (inodes) {
575: PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit));
576: } else {
577: PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n"));
578: }
579: PetscFunctionReturn(PETSC_SUCCESS);
580: } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
581: PetscFunctionReturn(PETSC_SUCCESS);
582: }
583: } else if (isbinary) {
584: if (size == 1) {
585: PetscCall(PetscObjectSetName((PetscObject)sell->A, ((PetscObject)mat)->name));
586: PetscCall(MatView(sell->A, viewer));
587: } else {
588: /* PetscCall(MatView_MPISELL_Binary(mat,viewer)); */
589: }
590: PetscFunctionReturn(PETSC_SUCCESS);
591: } else if (isdraw) {
592: PetscDraw draw;
593: PetscBool isnull;
594: PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
595: PetscCall(PetscDrawIsNull(draw, &isnull));
596: if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
597: }
599: {
600: /* assemble the entire matrix onto first processor. */
601: Mat A;
602: Mat_SeqSELL *Aloc;
603: PetscInt M = mat->rmap->N, N = mat->cmap->N, *acolidx, row, col, i, j;
604: MatScalar *aval;
605: PetscBool isnonzero;
607: PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
608: if (rank == 0) {
609: PetscCall(MatSetSizes(A, M, N, M, N));
610: } else {
611: PetscCall(MatSetSizes(A, 0, 0, M, N));
612: }
613: /* This is just a temporary matrix, so explicitly using MATMPISELL is probably best */
614: PetscCall(MatSetType(A, MATMPISELL));
615: PetscCall(MatMPISELLSetPreallocation(A, 0, NULL, 0, NULL));
616: PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
618: /* copy over the A part */
619: Aloc = (Mat_SeqSELL *)sell->A->data;
620: acolidx = Aloc->colidx;
621: aval = Aloc->val;
622: for (i = 0; i < Aloc->totalslices; i++) { /* loop over slices */
623: for (j = Aloc->sliidx[i]; j < Aloc->sliidx[i + 1]; j++) {
624: isnonzero = (PetscBool)((j - Aloc->sliidx[i]) / Aloc->sliceheight < Aloc->rlen[i * Aloc->sliceheight + j % Aloc->sliceheight]);
625: if (isnonzero) { /* check the mask bit */
626: row = i * Aloc->sliceheight + j % Aloc->sliceheight + mat->rmap->rstart;
627: col = *acolidx + mat->rmap->rstart;
628: PetscCall(MatSetValues(A, 1, &row, 1, &col, aval, INSERT_VALUES));
629: }
630: aval++;
631: acolidx++;
632: }
633: }
635: /* copy over the B part */
636: Aloc = (Mat_SeqSELL *)sell->B->data;
637: acolidx = Aloc->colidx;
638: aval = Aloc->val;
639: for (i = 0; i < Aloc->totalslices; i++) {
640: for (j = Aloc->sliidx[i]; j < Aloc->sliidx[i + 1]; j++) {
641: isnonzero = (PetscBool)((j - Aloc->sliidx[i]) / Aloc->sliceheight < Aloc->rlen[i * Aloc->sliceheight + j % Aloc->sliceheight]);
642: if (isnonzero) {
643: row = i * Aloc->sliceheight + j % Aloc->sliceheight + mat->rmap->rstart;
644: col = sell->garray[*acolidx];
645: PetscCall(MatSetValues(A, 1, &row, 1, &col, aval, INSERT_VALUES));
646: }
647: aval++;
648: acolidx++;
649: }
650: }
652: PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
653: PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
654: /*
655: Everyone has to call to draw the matrix since the graphics waits are
656: synchronized across all processors that share the PetscDraw object
657: */
658: PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
659: if (rank == 0) {
660: PetscCall(PetscObjectSetName((PetscObject)((Mat_MPISELL *)A->data)->A, ((PetscObject)mat)->name));
661: PetscCall(MatView_SeqSELL(((Mat_MPISELL *)A->data)->A, sviewer));
662: }
663: PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
664: PetscCall(MatDestroy(&A));
665: }
666: PetscFunctionReturn(PETSC_SUCCESS);
667: }
669: static PetscErrorCode MatView_MPISELL(Mat mat, PetscViewer viewer)
670: {
671: PetscBool isascii, isdraw, issocket, isbinary;
673: PetscFunctionBegin;
674: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
675: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
676: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
677: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
678: if (isascii || isdraw || isbinary || issocket) PetscCall(MatView_MPISELL_ASCIIorDraworSocket(mat, viewer));
679: PetscFunctionReturn(PETSC_SUCCESS);
680: }
682: static PetscErrorCode MatGetGhosts_MPISELL(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
683: {
684: Mat_MPISELL *sell = (Mat_MPISELL *)mat->data;
686: PetscFunctionBegin;
687: PetscCall(MatGetSize(sell->B, NULL, nghosts));
688: if (ghosts) *ghosts = sell->garray;
689: PetscFunctionReturn(PETSC_SUCCESS);
690: }
692: static PetscErrorCode MatGetInfo_MPISELL(Mat matin, MatInfoType flag, MatInfo *info)
693: {
694: Mat_MPISELL *mat = (Mat_MPISELL *)matin->data;
695: Mat A = mat->A, B = mat->B;
696: PetscLogDouble isend[5], irecv[5];
698: PetscFunctionBegin;
699: info->block_size = 1.0;
700: PetscCall(MatGetInfo(A, MAT_LOCAL, info));
702: isend[0] = info->nz_used;
703: isend[1] = info->nz_allocated;
704: isend[2] = info->nz_unneeded;
705: isend[3] = info->memory;
706: isend[4] = info->mallocs;
708: PetscCall(MatGetInfo(B, MAT_LOCAL, info));
710: isend[0] += info->nz_used;
711: isend[1] += info->nz_allocated;
712: isend[2] += info->nz_unneeded;
713: isend[3] += info->memory;
714: isend[4] += info->mallocs;
715: if (flag == MAT_LOCAL) {
716: info->nz_used = isend[0];
717: info->nz_allocated = isend[1];
718: info->nz_unneeded = isend[2];
719: info->memory = isend[3];
720: info->mallocs = isend[4];
721: } else if (flag == MAT_GLOBAL_MAX) {
722: PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
724: info->nz_used = irecv[0];
725: info->nz_allocated = irecv[1];
726: info->nz_unneeded = irecv[2];
727: info->memory = irecv[3];
728: info->mallocs = irecv[4];
729: } else if (flag == MAT_GLOBAL_SUM) {
730: PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
732: info->nz_used = irecv[0];
733: info->nz_allocated = irecv[1];
734: info->nz_unneeded = irecv[2];
735: info->memory = irecv[3];
736: info->mallocs = irecv[4];
737: }
738: info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */
739: info->fill_ratio_needed = 0;
740: info->factor_mallocs = 0;
741: PetscFunctionReturn(PETSC_SUCCESS);
742: }
744: static PetscErrorCode MatSetOption_MPISELL(Mat A, MatOption op, PetscBool flg)
745: {
746: Mat_MPISELL *a = (Mat_MPISELL *)A->data;
748: PetscFunctionBegin;
749: switch (op) {
750: case MAT_NEW_NONZERO_LOCATIONS:
751: case MAT_NEW_NONZERO_ALLOCATION_ERR:
752: case MAT_UNUSED_NONZERO_LOCATION_ERR:
753: case MAT_KEEP_NONZERO_PATTERN:
754: case MAT_NEW_NONZERO_LOCATION_ERR:
755: case MAT_USE_INODES:
756: case MAT_IGNORE_ZERO_ENTRIES:
757: MatCheckPreallocated(A, 1);
758: PetscCall(MatSetOption(a->A, op, flg));
759: PetscCall(MatSetOption(a->B, op, flg));
760: break;
761: case MAT_ROW_ORIENTED:
762: MatCheckPreallocated(A, 1);
763: a->roworiented = flg;
765: PetscCall(MatSetOption(a->A, op, flg));
766: PetscCall(MatSetOption(a->B, op, flg));
767: break;
768: case MAT_IGNORE_OFF_PROC_ENTRIES:
769: a->donotstash = flg;
770: break;
771: case MAT_SYMMETRIC:
772: MatCheckPreallocated(A, 1);
773: PetscCall(MatSetOption(a->A, op, flg));
774: break;
775: case MAT_STRUCTURALLY_SYMMETRIC:
776: MatCheckPreallocated(A, 1);
777: PetscCall(MatSetOption(a->A, op, flg));
778: break;
779: case MAT_HERMITIAN:
780: MatCheckPreallocated(A, 1);
781: PetscCall(MatSetOption(a->A, op, flg));
782: break;
783: case MAT_SYMMETRY_ETERNAL:
784: MatCheckPreallocated(A, 1);
785: PetscCall(MatSetOption(a->A, op, flg));
786: break;
787: case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
788: MatCheckPreallocated(A, 1);
789: PetscCall(MatSetOption(a->A, op, flg));
790: break;
791: default:
792: break;
793: }
794: PetscFunctionReturn(PETSC_SUCCESS);
795: }
797: static PetscErrorCode MatDiagonalScale_MPISELL(Mat mat, Vec ll, Vec rr)
798: {
799: Mat_MPISELL *sell = (Mat_MPISELL *)mat->data;
800: Mat a = sell->A, b = sell->B;
801: PetscInt s1, s2, s3;
803: PetscFunctionBegin;
804: PetscCall(MatGetLocalSize(mat, &s2, &s3));
805: if (rr) {
806: PetscCall(VecGetLocalSize(rr, &s1));
807: PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
808: /* Overlap communication with computation. */
809: PetscCall(VecScatterBegin(sell->Mvctx, rr, sell->lvec, INSERT_VALUES, SCATTER_FORWARD));
810: }
811: if (ll) {
812: PetscCall(VecGetLocalSize(ll, &s1));
813: PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
814: PetscUseTypeMethod(b, diagonalscale, ll, NULL);
815: }
816: /* scale the diagonal block */
817: PetscUseTypeMethod(a, diagonalscale, ll, rr);
819: if (rr) {
820: /* Do a scatter end and then right scale the off-diagonal block */
821: PetscCall(VecScatterEnd(sell->Mvctx, rr, sell->lvec, INSERT_VALUES, SCATTER_FORWARD));
822: PetscUseTypeMethod(b, diagonalscale, NULL, sell->lvec);
823: }
824: PetscFunctionReturn(PETSC_SUCCESS);
825: }
827: static PetscErrorCode MatSetUnfactored_MPISELL(Mat A)
828: {
829: Mat_MPISELL *a = (Mat_MPISELL *)A->data;
831: PetscFunctionBegin;
832: PetscCall(MatSetUnfactored(a->A));
833: PetscFunctionReturn(PETSC_SUCCESS);
834: }
836: static PetscErrorCode MatEqual_MPISELL(Mat A, Mat B, PetscBool *flag)
837: {
838: Mat_MPISELL *matB = (Mat_MPISELL *)B->data, *matA = (Mat_MPISELL *)A->data;
839: Mat a, b, c, d;
840: PetscBool flg;
842: PetscFunctionBegin;
843: a = matA->A;
844: b = matA->B;
845: c = matB->A;
846: d = matB->B;
848: PetscCall(MatEqual(a, c, &flg));
849: if (flg) PetscCall(MatEqual(b, d, &flg));
850: PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
851: PetscFunctionReturn(PETSC_SUCCESS);
852: }
854: static PetscErrorCode MatCopy_MPISELL(Mat A, Mat B, MatStructure str)
855: {
856: Mat_MPISELL *a = (Mat_MPISELL *)A->data;
857: Mat_MPISELL *b = (Mat_MPISELL *)B->data;
859: PetscFunctionBegin;
860: /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
861: if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
862: /* because of the column compression in the off-processor part of the matrix a->B,
863: the number of columns in a->B and b->B may be different, hence we cannot call
864: the MatCopy() directly on the two parts. If need be, we can provide a more
865: efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
866: then copying the submatrices */
867: PetscCall(MatCopy_Basic(A, B, str));
868: } else {
869: PetscCall(MatCopy(a->A, b->A, str));
870: PetscCall(MatCopy(a->B, b->B, str));
871: }
872: PetscFunctionReturn(PETSC_SUCCESS);
873: }
875: static PetscErrorCode MatSetUp_MPISELL(Mat A)
876: {
877: PetscFunctionBegin;
878: PetscCall(MatMPISELLSetPreallocation(A, PETSC_DEFAULT, NULL, PETSC_DEFAULT, NULL));
879: PetscFunctionReturn(PETSC_SUCCESS);
880: }
882: static PetscErrorCode MatConjugate_MPISELL(Mat mat)
883: {
884: PetscFunctionBegin;
885: if (PetscDefined(USE_COMPLEX)) {
886: Mat_MPISELL *sell = (Mat_MPISELL *)mat->data;
888: PetscCall(MatConjugate_SeqSELL(sell->A));
889: PetscCall(MatConjugate_SeqSELL(sell->B));
890: }
891: PetscFunctionReturn(PETSC_SUCCESS);
892: }
894: static PetscErrorCode MatInvertBlockDiagonal_MPISELL(Mat A, const PetscScalar **values)
895: {
896: Mat_MPISELL *a = (Mat_MPISELL *)A->data;
898: PetscFunctionBegin;
899: PetscCall(MatInvertBlockDiagonal(a->A, values));
900: A->factorerrortype = a->A->factorerrortype;
901: PetscFunctionReturn(PETSC_SUCCESS);
902: }
904: static PetscErrorCode MatSetRandom_MPISELL(Mat x, PetscRandom rctx)
905: {
906: Mat_MPISELL *sell = (Mat_MPISELL *)x->data;
908: PetscFunctionBegin;
909: PetscCall(MatSetRandom(sell->A, rctx));
910: PetscCall(MatSetRandom(sell->B, rctx));
911: PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY));
912: PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY));
913: PetscFunctionReturn(PETSC_SUCCESS);
914: }
916: static PetscErrorCode MatSetFromOptions_MPISELL(Mat A, PetscOptionItems PetscOptionsObject)
917: {
918: PetscFunctionBegin;
919: PetscOptionsHeadBegin(PetscOptionsObject, "MPISELL options");
920: PetscOptionsHeadEnd();
921: PetscFunctionReturn(PETSC_SUCCESS);
922: }
924: static PetscErrorCode MatShift_MPISELL(Mat Y, PetscScalar a)
925: {
926: Mat_MPISELL *msell = (Mat_MPISELL *)Y->data;
927: Mat_SeqSELL *sell = (Mat_SeqSELL *)msell->A->data;
929: PetscFunctionBegin;
930: if (!Y->preallocated) {
931: PetscCall(MatMPISELLSetPreallocation(Y, 1, NULL, 0, NULL));
932: } else if (!sell->nz) {
933: PetscInt nonew = sell->nonew;
934: PetscCall(MatSeqSELLSetPreallocation(msell->A, 1, NULL));
935: sell->nonew = nonew;
936: }
937: PetscCall(MatShift_Basic(Y, a));
938: PetscFunctionReturn(PETSC_SUCCESS);
939: }
941: static PetscErrorCode MatGetDiagonalBlock_MPISELL(Mat A, Mat *a)
942: {
943: PetscFunctionBegin;
944: *a = ((Mat_MPISELL *)A->data)->A;
945: PetscFunctionReturn(PETSC_SUCCESS);
946: }
948: static PetscErrorCode MatStoreValues_MPISELL(Mat mat)
949: {
950: Mat_MPISELL *sell = (Mat_MPISELL *)mat->data;
952: PetscFunctionBegin;
953: PetscCall(MatStoreValues(sell->A));
954: PetscCall(MatStoreValues(sell->B));
955: PetscFunctionReturn(PETSC_SUCCESS);
956: }
958: static PetscErrorCode MatRetrieveValues_MPISELL(Mat mat)
959: {
960: Mat_MPISELL *sell = (Mat_MPISELL *)mat->data;
962: PetscFunctionBegin;
963: PetscCall(MatRetrieveValues(sell->A));
964: PetscCall(MatRetrieveValues(sell->B));
965: PetscFunctionReturn(PETSC_SUCCESS);
966: }
968: static PetscErrorCode MatMPISELLSetPreallocation_MPISELL(Mat B, PetscInt d_rlenmax, const PetscInt d_rlen[], PetscInt o_rlenmax, const PetscInt o_rlen[])
969: {
970: Mat_MPISELL *b;
972: PetscFunctionBegin;
973: PetscCall(PetscLayoutSetUp(B->rmap));
974: PetscCall(PetscLayoutSetUp(B->cmap));
975: b = (Mat_MPISELL *)B->data;
977: if (!B->preallocated) {
978: /* Explicitly create 2 MATSEQSELL matrices. */
979: PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
980: PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
981: PetscCall(MatSetBlockSizesFromMats(b->A, B, B));
982: PetscCall(MatSetType(b->A, MATSEQSELL));
983: PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
984: PetscCall(MatSetSizes(b->B, B->rmap->n, B->cmap->N, B->rmap->n, B->cmap->N));
985: PetscCall(MatSetBlockSizesFromMats(b->B, B, B));
986: PetscCall(MatSetType(b->B, MATSEQSELL));
987: }
989: PetscCall(MatSeqSELLSetPreallocation(b->A, d_rlenmax, d_rlen));
990: PetscCall(MatSeqSELLSetPreallocation(b->B, o_rlenmax, o_rlen));
991: B->preallocated = PETSC_TRUE;
992: B->was_assembled = PETSC_FALSE;
993: /*
994: critical for MatAssemblyEnd to work.
995: MatAssemblyBegin checks it to set up was_assembled
996: and MatAssemblyEnd checks was_assembled to determine whether to build garray
997: */
998: B->assembled = PETSC_FALSE;
999: PetscFunctionReturn(PETSC_SUCCESS);
1000: }
1002: static PetscErrorCode MatDuplicate_MPISELL(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
1003: {
1004: Mat mat;
1005: Mat_MPISELL *a, *oldmat = (Mat_MPISELL *)matin->data;
1007: PetscFunctionBegin;
1008: *newmat = NULL;
1009: PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
1010: PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
1011: PetscCall(MatSetBlockSizesFromMats(mat, matin, matin));
1012: PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
1013: a = (Mat_MPISELL *)mat->data;
1015: mat->factortype = matin->factortype;
1016: mat->assembled = PETSC_TRUE;
1017: mat->insertmode = NOT_SET_VALUES;
1018: mat->preallocated = PETSC_TRUE;
1020: a->size = oldmat->size;
1021: a->rank = oldmat->rank;
1022: a->donotstash = oldmat->donotstash;
1023: a->roworiented = oldmat->roworiented;
1024: a->rowindices = NULL;
1025: a->rowvalues = NULL;
1026: a->getrowactive = PETSC_FALSE;
1028: PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
1029: PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
1031: if (oldmat->colmap) {
1032: #if defined(PETSC_USE_CTABLE)
1033: PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
1034: #else
1035: PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap));
1036: PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N));
1037: #endif
1038: } else a->colmap = NULL;
1039: if (oldmat->garray) {
1040: PetscInt len;
1041: len = oldmat->B->cmap->n;
1042: PetscCall(PetscMalloc1(len + 1, &a->garray));
1043: if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
1044: } else a->garray = NULL;
1046: PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
1047: PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
1048: PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
1049: PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
1050: PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
1051: *newmat = mat;
1052: PetscFunctionReturn(PETSC_SUCCESS);
1053: }
1055: static const struct _MatOps MatOps_Values = {MatSetValues_MPISELL,
1056: NULL,
1057: NULL,
1058: MatMult_MPISELL,
1059: /* 4*/ MatMultAdd_MPISELL,
1060: MatMultTranspose_MPISELL,
1061: MatMultTransposeAdd_MPISELL,
1062: NULL,
1063: NULL,
1064: NULL,
1065: /*10*/ NULL,
1066: NULL,
1067: NULL,
1068: MatSOR_MPISELL,
1069: NULL,
1070: /*15*/ MatGetInfo_MPISELL,
1071: MatEqual_MPISELL,
1072: MatGetDiagonal_MPISELL,
1073: MatDiagonalScale_MPISELL,
1074: NULL,
1075: /*20*/ MatAssemblyBegin_MPISELL,
1076: MatAssemblyEnd_MPISELL,
1077: MatSetOption_MPISELL,
1078: MatZeroEntries_MPISELL,
1079: /*24*/ NULL,
1080: NULL,
1081: NULL,
1082: NULL,
1083: NULL,
1084: /*29*/ MatSetUp_MPISELL,
1085: NULL,
1086: NULL,
1087: MatGetDiagonalBlock_MPISELL,
1088: NULL,
1089: /*34*/ MatDuplicate_MPISELL,
1090: NULL,
1091: NULL,
1092: NULL,
1093: NULL,
1094: /*39*/ NULL,
1095: NULL,
1096: NULL,
1097: MatGetValues_MPISELL,
1098: MatCopy_MPISELL,
1099: /*44*/ NULL,
1100: MatScale_MPISELL,
1101: MatShift_MPISELL,
1102: MatDiagonalSet_MPISELL,
1103: NULL,
1104: /*49*/ MatSetRandom_MPISELL,
1105: NULL,
1106: NULL,
1107: NULL,
1108: NULL,
1109: /*54*/ MatFDColoringCreate_MPIXAIJ,
1110: NULL,
1111: MatSetUnfactored_MPISELL,
1112: NULL,
1113: NULL,
1114: /*59*/ NULL,
1115: MatDestroy_MPISELL,
1116: MatView_MPISELL,
1117: NULL,
1118: NULL,
1119: /*64*/ NULL,
1120: NULL,
1121: NULL,
1122: NULL,
1123: NULL,
1124: /*69*/ NULL,
1125: NULL,
1126: NULL,
1127: MatFDColoringApply_AIJ, /* reuse AIJ function */
1128: MatSetFromOptions_MPISELL,
1129: NULL,
1130: /*75*/ NULL,
1131: NULL,
1132: NULL,
1133: NULL,
1134: NULL,
1135: /*80*/ NULL,
1136: NULL,
1137: NULL,
1138: /*83*/ NULL,
1139: NULL,
1140: NULL,
1141: NULL,
1142: NULL,
1143: NULL,
1144: /*89*/ NULL,
1145: NULL,
1146: NULL,
1147: NULL,
1148: MatConjugate_MPISELL,
1149: /*94*/ NULL,
1150: NULL,
1151: NULL,
1152: NULL,
1153: NULL,
1154: /*99*/ NULL,
1155: NULL,
1156: NULL,
1157: NULL,
1158: NULL,
1159: /*104*/ NULL,
1160: NULL,
1161: MatGetGhosts_MPISELL,
1162: NULL,
1163: NULL,
1164: /*109*/ MatMultDiagonalBlock_MPISELL,
1165: NULL,
1166: NULL,
1167: NULL,
1168: NULL,
1169: /*114*/ NULL,
1170: NULL,
1171: MatInvertBlockDiagonal_MPISELL,
1172: NULL,
1173: /*119*/ NULL,
1174: NULL,
1175: NULL,
1176: NULL,
1177: NULL,
1178: /*124*/ NULL,
1179: NULL,
1180: NULL,
1181: NULL,
1182: NULL,
1183: /*129*/ MatFDColoringSetUp_MPIXAIJ,
1184: NULL,
1185: NULL,
1186: NULL,
1187: NULL,
1188: /*134*/ NULL,
1189: NULL,
1190: NULL,
1191: NULL,
1192: NULL,
1193: /*139*/ NULL,
1194: NULL,
1195: NULL,
1196: NULL,
1197: NULL,
1198: NULL};
1200: /*@C
1201: MatMPISELLSetPreallocation - Preallocates memory for a `MATMPISELL` sparse parallel matrix in sell format.
1202: For good matrix assembly performance the user should preallocate the matrix storage by
1203: setting the parameters `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
1205: Collective
1207: Input Parameters:
1208: + B - the matrix
1209: . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix
1210: (same value is used for all local rows)
1211: . d_nnz - array containing the number of nonzeros in the various rows of the
1212: DIAGONAL portion of the local submatrix (possibly different for each row)
1213: or NULL (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure.
1214: The size of this array is equal to the number of local rows, i.e 'm'.
1215: For matrices that will be factored, you must leave room for (and set)
1216: the diagonal entry even if it is zero.
1217: . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local
1218: submatrix (same value is used for all local rows).
1219: - o_nnz - array containing the number of nonzeros in the various rows of the
1220: OFF-DIAGONAL portion of the local submatrix (possibly different for
1221: each row) or NULL (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero
1222: structure. The size of this array is equal to the number
1223: of local rows, i.e 'm'.
1225: Example usage:
1226: Consider the following 8x8 matrix with 34 non-zero values, that is
1227: assembled across 3 processors. Lets assume that proc0 owns 3 rows,
1228: proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
1229: as follows
1231: .vb
1232: 1 2 0 | 0 3 0 | 0 4
1233: Proc0 0 5 6 | 7 0 0 | 8 0
1234: 9 0 10 | 11 0 0 | 12 0
1235: -------------------------------------
1236: 13 0 14 | 15 16 17 | 0 0
1237: Proc1 0 18 0 | 19 20 21 | 0 0
1238: 0 0 0 | 22 23 0 | 24 0
1239: -------------------------------------
1240: Proc2 25 26 27 | 0 0 28 | 29 0
1241: 30 0 0 | 31 32 33 | 0 34
1242: .ve
1244: This can be represented as a collection of submatrices as
1246: .vb
1247: A B C
1248: D E F
1249: G H I
1250: .ve
1252: Where the submatrices A,B,C are owned by proc0, D,E,F are
1253: owned by proc1, G,H,I are owned by proc2.
1255: The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
1256: The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
1257: The 'M','N' parameters are 8,8, and have the same values on all procs.
1259: The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
1260: submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
1261: corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
1262: Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
1263: part as `MATSEQSELL` matrices. For example, proc1 will store [E] as a `MATSEQSELL`
1264: matrix, and [DF] as another SeqSELL matrix.
1266: When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
1267: allocated for every row of the local DIAGONAL submatrix, and o_nz
1268: storage locations are allocated for every row of the OFF-DIAGONAL submatrix.
1269: One way to choose `d_nz` and `o_nz` is to use the maximum number of nonzeros over
1270: the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
1271: In this case, the values of d_nz,o_nz are
1272: .vb
1273: proc0 dnz = 2, o_nz = 2
1274: proc1 dnz = 3, o_nz = 2
1275: proc2 dnz = 1, o_nz = 4
1276: .ve
1277: We are allocating m*(d_nz+o_nz) storage locations for every proc. This
1278: translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
1279: for proc3. i.e we are using 12+15+10=37 storage locations to store
1280: 34 values.
1282: When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
1283: for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
1284: In the above case the values for d_nnz,o_nnz are
1285: .vb
1286: proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
1287: proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
1288: proc2 d_nnz = [1,1] and o_nnz = [4,4]
1289: .ve
1290: Here the space allocated is according to nz (or maximum values in the nnz
1291: if nnz is provided) for DIAGONAL and OFF-DIAGONAL submatrices, i.e (2+2+3+2)*3+(1+4)*2=37
1293: Level: intermediate
1295: Notes:
1296: If the *_nnz parameter is given then the *_nz parameter is ignored
1298: The stored row and column indices begin with zero.
1300: The parallel matrix is partitioned such that the first m0 rows belong to
1301: process 0, the next m1 rows belong to process 1, the next m2 rows belong
1302: to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
1304: The DIAGONAL portion of the local submatrix of a processor can be defined
1305: as the submatrix which is obtained by extraction the part corresponding to
1306: the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
1307: first row that belongs to the processor, r2 is the last row belonging to
1308: the this processor, and c1-c2 is range of indices of the local part of a
1309: vector suitable for applying the matrix to. This is an mxn matrix. In the
1310: common case of a square matrix, the row and column ranges are the same and
1311: the DIAGONAL part is also square. The remaining portion of the local
1312: submatrix (mxN) constitute the OFF-DIAGONAL portion.
1314: If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored.
1316: You can call `MatGetInfo()` to get information on how effective the preallocation was;
1317: for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
1318: You can also run with the option -info and look for messages with the string
1319: malloc in them to see if additional memory allocation was needed.
1321: .seealso: `Mat`, `MatCreate()`, `MatCreateSeqSELL()`, `MatSetValues()`, `MatCreateSELL()`,
1322: `MATMPISELL`, `MatGetInfo()`, `PetscSplitOwnership()`, `MATSELL`
1323: @*/
1324: PetscErrorCode MatMPISELLSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
1325: {
1326: PetscFunctionBegin;
1329: PetscTryMethod(B, "MatMPISELLSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz));
1330: PetscFunctionReturn(PETSC_SUCCESS);
1331: }
1333: /*MC
1334: MATMPISELL - MATMPISELL = "mpisell" - A matrix type to be used for MPI sparse matrices,
1335: based on the sliced Ellpack format
1337: Options Database Key:
1338: . -mat_type sell - sets the matrix type to `MATSELL` during a call to `MatSetFromOptions()`
1340: Level: beginner
1342: .seealso: `Mat`, `MatCreateSELL()`, `MATSEQSELL`, `MATSELL`, `MATSEQAIJ`, `MATAIJ`, `MATMPIAIJ`
1343: M*/
1345: /*@C
1346: MatCreateSELL - Creates a sparse parallel matrix in `MATSELL` format.
1348: Collective
1350: Input Parameters:
1351: + comm - MPI communicator
1352: . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
1353: This value should be the same as the local size used in creating the
1354: y vector for the matrix-vector product y = Ax.
1355: . n - This value should be the same as the local size used in creating the
1356: x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
1357: calculated if `N` is given) For square matrices n is almost always `m`.
1358: . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
1359: . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
1360: . d_rlenmax - max number of nonzeros per row in DIAGONAL portion of local submatrix
1361: (same value is used for all local rows)
1362: . d_rlen - array containing the number of nonzeros in the various rows of the
1363: DIAGONAL portion of the local submatrix (possibly different for each row)
1364: or `NULL`, if d_rlenmax is used to specify the nonzero structure.
1365: The size of this array is equal to the number of local rows, i.e `m`.
1366: . o_rlenmax - max number of nonzeros per row in the OFF-DIAGONAL portion of local
1367: submatrix (same value is used for all local rows).
1368: - o_rlen - array containing the number of nonzeros in the various rows of the
1369: OFF-DIAGONAL portion of the local submatrix (possibly different for
1370: each row) or `NULL`, if `o_rlenmax` is used to specify the nonzero
1371: structure. The size of this array is equal to the number
1372: of local rows, i.e `m`.
1374: Output Parameter:
1375: . A - the matrix
1377: Options Database Key:
1378: . -mat_sell_oneindex - Internally use indexing starting at 1
1379: rather than 0. When calling `MatSetValues()`,
1380: the user still MUST index entries starting at 0!
1382: Example:
1383: Consider the following 8x8 matrix with 34 non-zero values, that is
1384: assembled across 3 processors. Lets assume that proc0 owns 3 rows,
1385: proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
1386: as follows
1388: .vb
1389: 1 2 0 | 0 3 0 | 0 4
1390: Proc0 0 5 6 | 7 0 0 | 8 0
1391: 9 0 10 | 11 0 0 | 12 0
1392: -------------------------------------
1393: 13 0 14 | 15 16 17 | 0 0
1394: Proc1 0 18 0 | 19 20 21 | 0 0
1395: 0 0 0 | 22 23 0 | 24 0
1396: -------------------------------------
1397: Proc2 25 26 27 | 0 0 28 | 29 0
1398: 30 0 0 | 31 32 33 | 0 34
1399: .ve
1401: This can be represented as a collection of submatrices as
1402: .vb
1403: A B C
1404: D E F
1405: G H I
1406: .ve
1408: Where the submatrices A,B,C are owned by proc0, D,E,F are
1409: owned by proc1, G,H,I are owned by proc2.
1411: The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
1412: The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
1413: The 'M','N' parameters are 8,8, and have the same values on all procs.
1415: The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
1416: submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
1417: corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
1418: Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
1419: part as `MATSEQSELL` matrices. For example, proc1 will store [E] as a `MATSEQSELL`
1420: matrix, and [DF] as another `MATSEQSELL` matrix.
1422: When d_rlenmax, o_rlenmax parameters are specified, d_rlenmax storage elements are
1423: allocated for every row of the local DIAGONAL submatrix, and o_rlenmax
1424: storage locations are allocated for every row of the OFF-DIAGONAL submatrix.
1425: One way to choose `d_rlenmax` and `o_rlenmax` is to use the maximum number of nonzeros over
1426: the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
1427: In this case, the values of d_rlenmax,o_rlenmax are
1428: .vb
1429: proc0 - d_rlenmax = 2, o_rlenmax = 2
1430: proc1 - d_rlenmax = 3, o_rlenmax = 2
1431: proc2 - d_rlenmax = 1, o_rlenmax = 4
1432: .ve
1433: We are allocating m*(d_rlenmax+o_rlenmax) storage locations for every proc. This
1434: translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
1435: for proc3. i.e we are using 12+15+10=37 storage locations to store
1436: 34 values.
1438: When `d_rlen`, `o_rlen` parameters are specified, the storage is specified
1439: for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
1440: In the above case the values for `d_nnz`, `o_nnz` are
1441: .vb
1442: proc0 - d_nnz = [2,2,2] and o_nnz = [2,2,2]
1443: proc1 - d_nnz = [3,3,2] and o_nnz = [2,1,1]
1444: proc2 - d_nnz = [1,1] and o_nnz = [4,4]
1445: .ve
1446: Here the space allocated is still 37 though there are 34 nonzeros because
1447: the allocation is always done according to rlenmax.
1449: Level: intermediate
1451: Notes:
1452: It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
1453: MatXXXXSetPreallocation() paradigm instead of this routine directly.
1454: [MatXXXXSetPreallocation() is, for example, `MatSeqSELLSetPreallocation()`]
1456: If the *_rlen parameter is given then the *_rlenmax parameter is ignored
1458: `m`, `n`, `M`, `N` parameters specify the size of the matrix, and its partitioning across
1459: processors, while `d_rlenmax`, `d_rlen`, `o_rlenmax` , `o_rlen` parameters specify the approximate
1460: storage requirements for this matrix.
1462: If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one
1463: processor than it must be used on all processors that share the object for
1464: that argument.
1466: The user MUST specify either the local or global matrix dimensions
1467: (possibly both).
1469: The parallel matrix is partitioned across processors such that the
1470: first m0 rows belong to process 0, the next m1 rows belong to
1471: process 1, the next m2 rows belong to process 2 etc.. where
1472: m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
1473: values corresponding to [`m` x `N`] submatrix.
1475: The columns are logically partitioned with the n0 columns belonging
1476: to 0th partition, the next n1 columns belonging to the next
1477: partition etc.. where n0,n1,n2... are the input parameter `n`.
1479: The DIAGONAL portion of the local submatrix on any given processor
1480: is the submatrix corresponding to the rows and columns `m`, `n`
1481: corresponding to the given processor. i.e diagonal matrix on
1482: process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
1483: etc. The remaining portion of the local submatrix [m x (N-n)]
1484: constitute the OFF-DIAGONAL portion. The example below better
1485: illustrates this concept.
1487: For a square global matrix we define each processor's diagonal portion
1488: to be its local rows and the corresponding columns (a square submatrix);
1489: each processor's off-diagonal portion encompasses the remainder of the
1490: local matrix (a rectangular submatrix).
1492: If `o_rlen`, `d_rlen` are specified, then `o_rlenmax`, and `d_rlenmax` are ignored.
1494: When calling this routine with a single process communicator, a matrix of
1495: type `MATSEQSELL` is returned. If a matrix of type `MATMPISELL` is desired for this
1496: type of communicator, use the construction mechanism
1497: .vb
1498: MatCreate(...,&A);
1499: MatSetType(A,MATMPISELL);
1500: MatSetSizes(A, m,n,M,N);
1501: MatMPISELLSetPreallocation(A,...);
1502: .ve
1504: .seealso: `Mat`, `MATSELL`, `MatCreate()`, `MatCreateSeqSELL()`, `MatSetValues()`, `MatMPISELLSetPreallocation()`, `MATMPISELL`
1505: @*/
1506: PetscErrorCode MatCreateSELL(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_rlenmax, const PetscInt d_rlen[], PetscInt o_rlenmax, const PetscInt o_rlen[], Mat *A)
1507: {
1508: PetscMPIInt size;
1510: PetscFunctionBegin;
1511: PetscCall(MatCreate(comm, A));
1512: PetscCall(MatSetSizes(*A, m, n, M, N));
1513: PetscCallMPI(MPI_Comm_size(comm, &size));
1514: if (size > 1) {
1515: PetscCall(MatSetType(*A, MATMPISELL));
1516: PetscCall(MatMPISELLSetPreallocation(*A, d_rlenmax, d_rlen, o_rlenmax, o_rlen));
1517: } else {
1518: PetscCall(MatSetType(*A, MATSEQSELL));
1519: PetscCall(MatSeqSELLSetPreallocation(*A, d_rlenmax, d_rlen));
1520: }
1521: PetscFunctionReturn(PETSC_SUCCESS);
1522: }
1524: /*@C
1525: MatMPISELLGetSeqSELL - Returns the local pieces of this distributed matrix
1527: Not Collective
1529: Input Parameter:
1530: . A - the `MATMPISELL` matrix
1532: Output Parameters:
1533: + Ad - The diagonal portion of `A`
1534: . Ao - The off-diagonal portion of `A`
1535: - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
1537: Level: advanced
1539: .seealso: `Mat`, `MATSEQSELL`, `MATMPISELL`
1540: @*/
1541: PetscErrorCode MatMPISELLGetSeqSELL(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
1542: {
1543: Mat_MPISELL *a = (Mat_MPISELL *)A->data;
1544: PetscBool flg;
1546: PetscFunctionBegin;
1547: PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPISELL, &flg));
1548: PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPISELL matrix as input");
1549: if (Ad) *Ad = a->A;
1550: if (Ao) *Ao = a->B;
1551: if (colmap) *colmap = a->garray;
1552: PetscFunctionReturn(PETSC_SUCCESS);
1553: }
1555: /*@C
1556: MatMPISELLGetLocalMatCondensed - Creates a `MATSEQSELL` matrix from an `MATMPISELL` matrix by
1557: taking all its local rows and NON-ZERO columns
1559: Not Collective
1561: Input Parameters:
1562: + A - the matrix
1563: . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
1564: . row - index sets of rows to extract (or `NULL`)
1565: - col - index sets of columns to extract (or `NULL`)
1567: Output Parameter:
1568: . A_loc - the local sequential matrix generated
1570: Level: advanced
1572: .seealso: `Mat`, `MATSEQSELL`, `MATMPISELL`, `MatGetOwnershipRange()`, `MatMPISELLGetLocalMat()`
1573: @*/
1574: PetscErrorCode MatMPISELLGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc)
1575: {
1576: Mat_MPISELL *a = (Mat_MPISELL *)A->data;
1577: PetscInt i, start, end, ncols, nzA, nzB, *cmap, imark, *idx;
1578: IS isrowa, iscola;
1579: Mat *aloc;
1580: PetscBool match;
1582: PetscFunctionBegin;
1583: PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPISELL, &match));
1584: PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPISELL matrix as input");
1585: PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0));
1586: if (!row) {
1587: start = A->rmap->rstart;
1588: end = A->rmap->rend;
1589: PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa));
1590: } else {
1591: isrowa = *row;
1592: }
1593: if (!col) {
1594: start = A->cmap->rstart;
1595: cmap = a->garray;
1596: nzA = a->A->cmap->n;
1597: nzB = a->B->cmap->n;
1598: PetscCall(PetscMalloc1(nzA + nzB, &idx));
1599: ncols = 0;
1600: for (i = 0; i < nzB; i++) {
1601: if (cmap[i] < start) idx[ncols++] = cmap[i];
1602: else break;
1603: }
1604: imark = i;
1605: for (i = 0; i < nzA; i++) idx[ncols++] = start + i;
1606: for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i];
1607: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola));
1608: } else {
1609: iscola = *col;
1610: }
1611: if (scall != MAT_INITIAL_MATRIX) {
1612: PetscCall(PetscMalloc1(1, &aloc));
1613: aloc[0] = *A_loc;
1614: }
1615: PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc));
1616: *A_loc = aloc[0];
1617: PetscCall(PetscFree(aloc));
1618: if (!row) PetscCall(ISDestroy(&isrowa));
1619: if (!col) PetscCall(ISDestroy(&iscola));
1620: PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0));
1621: PetscFunctionReturn(PETSC_SUCCESS);
1622: }
1624: #include <../src/mat/impls/aij/mpi/mpiaij.h>
1626: PetscErrorCode MatConvert_MPISELL_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1627: {
1628: Mat_MPISELL *a = (Mat_MPISELL *)A->data;
1629: Mat B;
1630: Mat_MPIAIJ *b;
1632: PetscFunctionBegin;
1633: PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Matrix must be assembled");
1635: if (reuse == MAT_REUSE_MATRIX) {
1636: B = *newmat;
1637: } else {
1638: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1639: PetscCall(MatSetType(B, MATMPIAIJ));
1640: PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
1641: PetscCall(MatSetBlockSizes(B, A->rmap->bs, A->cmap->bs));
1642: PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL));
1643: PetscCall(MatMPIAIJSetPreallocation(B, 0, NULL, 0, NULL));
1644: }
1645: b = (Mat_MPIAIJ *)B->data;
1647: if (reuse == MAT_REUSE_MATRIX) {
1648: PetscCall(MatConvert_SeqSELL_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A));
1649: PetscCall(MatConvert_SeqSELL_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B));
1650: } else {
1651: PetscCall(MatDestroy(&b->A));
1652: PetscCall(MatDestroy(&b->B));
1653: PetscCall(MatDisAssemble_MPISELL(A));
1654: PetscCall(MatConvert_SeqSELL_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A));
1655: PetscCall(MatConvert_SeqSELL_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B));
1656: PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1657: PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1658: PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1659: PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1660: }
1662: if (reuse == MAT_INPLACE_MATRIX) {
1663: PetscCall(MatHeaderReplace(A, &B));
1664: } else {
1665: *newmat = B;
1666: }
1667: PetscFunctionReturn(PETSC_SUCCESS);
1668: }
1670: PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1671: {
1672: Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1673: Mat B;
1674: Mat_MPISELL *b;
1676: PetscFunctionBegin;
1677: PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Matrix must be assembled");
1679: if (reuse == MAT_REUSE_MATRIX) {
1680: B = *newmat;
1681: } else {
1682: Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)a->A->data, *Ba = (Mat_SeqAIJ *)a->B->data;
1683: PetscInt i, d_nz = 0, o_nz = 0, m = A->rmap->N, n = A->cmap->N, lm = A->rmap->n, ln = A->cmap->n;
1684: PetscInt *d_nnz, *o_nnz;
1685: PetscCall(PetscMalloc2(lm, &d_nnz, lm, &o_nnz));
1686: for (i = 0; i < lm; i++) {
1687: d_nnz[i] = Aa->i[i + 1] - Aa->i[i];
1688: o_nnz[i] = Ba->i[i + 1] - Ba->i[i];
1689: if (d_nnz[i] > d_nz) d_nz = d_nnz[i];
1690: if (o_nnz[i] > o_nz) o_nz = o_nnz[i];
1691: }
1692: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1693: PetscCall(MatSetType(B, MATMPISELL));
1694: PetscCall(MatSetSizes(B, lm, ln, m, n));
1695: PetscCall(MatSetBlockSizes(B, A->rmap->bs, A->cmap->bs));
1696: PetscCall(MatSeqSELLSetPreallocation(B, d_nz, d_nnz));
1697: PetscCall(MatMPISELLSetPreallocation(B, d_nz, d_nnz, o_nz, o_nnz));
1698: PetscCall(PetscFree2(d_nnz, o_nnz));
1699: }
1700: b = (Mat_MPISELL *)B->data;
1702: if (reuse == MAT_REUSE_MATRIX) {
1703: PetscCall(MatConvert_SeqAIJ_SeqSELL(a->A, MATSEQSELL, MAT_REUSE_MATRIX, &b->A));
1704: PetscCall(MatConvert_SeqAIJ_SeqSELL(a->B, MATSEQSELL, MAT_REUSE_MATRIX, &b->B));
1705: } else {
1706: PetscCall(MatDestroy(&b->A));
1707: PetscCall(MatDestroy(&b->B));
1708: PetscCall(MatConvert_SeqAIJ_SeqSELL(a->A, MATSEQSELL, MAT_INITIAL_MATRIX, &b->A));
1709: PetscCall(MatConvert_SeqAIJ_SeqSELL(a->B, MATSEQSELL, MAT_INITIAL_MATRIX, &b->B));
1710: PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1711: PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1712: PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1713: PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1714: }
1716: if (reuse == MAT_INPLACE_MATRIX) {
1717: PetscCall(MatHeaderReplace(A, &B));
1718: } else {
1719: *newmat = B;
1720: }
1721: PetscFunctionReturn(PETSC_SUCCESS);
1722: }
1724: PetscErrorCode MatSOR_MPISELL(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
1725: {
1726: Mat_MPISELL *mat = (Mat_MPISELL *)matin->data;
1727: Vec bb1 = NULL;
1729: PetscFunctionBegin;
1730: if (flag == SOR_APPLY_UPPER) {
1731: PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1732: PetscFunctionReturn(PETSC_SUCCESS);
1733: }
1735: if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1));
1737: if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1738: if (flag & SOR_ZERO_INITIAL_GUESS) {
1739: PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1740: its--;
1741: }
1743: while (its--) {
1744: PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1745: PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1747: /* update rhs: bb1 = bb - B*x */
1748: PetscCall(VecScale(mat->lvec, -1.0));
1749: PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1751: /* local sweep */
1752: PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
1753: }
1754: } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1755: if (flag & SOR_ZERO_INITIAL_GUESS) {
1756: PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1757: its--;
1758: }
1759: while (its--) {
1760: PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1761: PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1763: /* update rhs: bb1 = bb - B*x */
1764: PetscCall(VecScale(mat->lvec, -1.0));
1765: PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1767: /* local sweep */
1768: PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
1769: }
1770: } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1771: if (flag & SOR_ZERO_INITIAL_GUESS) {
1772: PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1773: its--;
1774: }
1775: while (its--) {
1776: PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1777: PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1779: /* update rhs: bb1 = bb - B*x */
1780: PetscCall(VecScale(mat->lvec, -1.0));
1781: PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1783: /* local sweep */
1784: PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
1785: }
1786: } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported");
1788: PetscCall(VecDestroy(&bb1));
1790: matin->factorerrortype = mat->A->factorerrortype;
1791: PetscFunctionReturn(PETSC_SUCCESS);
1792: }
1794: #if defined(PETSC_HAVE_CUDA)
1795: PETSC_INTERN PetscErrorCode MatConvert_MPISELL_MPISELLCUDA(Mat, MatType, MatReuse, Mat *);
1796: #endif
1798: /*MC
1799: MATMPISELL - MATMPISELL = "MPISELL" - A matrix type to be used for parallel sparse matrices.
1801: Options Database Keys:
1802: . -mat_type mpisell - sets the matrix type to `MATMPISELL` during a call to `MatSetFromOptions()`
1804: Level: beginner
1806: .seealso: `Mat`, `MATSELL`, `MATSEQSELL` `MatCreateSELL()`
1807: M*/
1808: PETSC_EXTERN PetscErrorCode MatCreate_MPISELL(Mat B)
1809: {
1810: Mat_MPISELL *b;
1811: PetscMPIInt size;
1813: PetscFunctionBegin;
1814: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
1815: PetscCall(PetscNew(&b));
1816: B->data = (void *)b;
1817: B->ops[0] = MatOps_Values;
1818: B->assembled = PETSC_FALSE;
1819: B->insertmode = NOT_SET_VALUES;
1820: b->size = size;
1821: PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
1822: /* build cache for off array entries formed */
1823: PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
1825: b->donotstash = PETSC_FALSE;
1826: b->colmap = NULL;
1827: b->garray = NULL;
1828: b->roworiented = PETSC_TRUE;
1830: /* stuff used for matrix vector multiply */
1831: b->lvec = NULL;
1832: b->Mvctx = NULL;
1834: /* stuff for MatGetRow() */
1835: b->rowindices = NULL;
1836: b->rowvalues = NULL;
1837: b->getrowactive = PETSC_FALSE;
1839: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPISELL));
1840: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPISELL));
1841: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPISELL));
1842: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISELLSetPreallocation_C", MatMPISELLSetPreallocation_MPISELL));
1843: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisell_mpiaij_C", MatConvert_MPISELL_MPIAIJ));
1844: #if defined(PETSC_HAVE_CUDA)
1845: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisell_mpisellcuda_C", MatConvert_MPISELL_MPISELLCUDA));
1846: #endif
1847: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPISELL));
1848: PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPISELL));
1849: PetscFunctionReturn(PETSC_SUCCESS);
1850: }