Actual source code: mpisell.c

  1: #include <../src/mat/impls/aij/mpi/mpiaij.h>
  2: #include <../src/mat/impls/sell/mpi/mpisell.h>
  3: #include <petsc/private/vecimpl.h>
  4: #include <petsc/private/isimpl.h>
  5: #include <petscblaslapack.h>
  6: #include <petscsf.h>

  8: /*MC
  9:    MATSELL - MATSELL = "sell" - A matrix type to be used for sparse matrices.

 11:    This matrix type is identical to `MATSEQSELL` when constructed with a single process communicator,
 12:    and `MATMPISELL` otherwise.  As a result, for single process communicators,
 13:   `MatSeqSELLSetPreallocation()` is supported, and similarly `MatMPISELLSetPreallocation()` is supported
 14:   for communicators controlling multiple processes.  It is recommended that you call both of
 15:   the above preallocation routines for simplicity.

 17:    Options Database Keys:
 18: . -mat_type sell - sets the matrix type to `MATSELL` during a call to `MatSetFromOptions()`

 20:   Level: beginner

 22: .seealso: `Mat`, `MATAIJ`, `MATBAIJ`, `MATSBAIJ`, `MatCreateSELL()`, `MatCreateSeqSELL()`, `MATSEQSELL`, `MATMPISELL`
 23: M*/

 25: static PetscErrorCode MatDiagonalSet_MPISELL(Mat Y, Vec D, InsertMode is)
 26: {
 27:   Mat_MPISELL *sell = (Mat_MPISELL *)Y->data;

 29:   PetscFunctionBegin;
 30:   if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) {
 31:     PetscCall(MatDiagonalSet(sell->A, D, is));
 32:   } else {
 33:     PetscCall(MatDiagonalSet_Default(Y, D, is));
 34:   }
 35:   PetscFunctionReturn(PETSC_SUCCESS);
 36: }

 38: /*
 39:   Local utility routine that creates a mapping from the global column
 40: number to the local number in the off-diagonal part of the local
 41: storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
 42: a slightly higher hash table cost; without it it is not scalable (each processor
 43: has an order N integer array but is fast to access.
 44: */
 45: PetscErrorCode MatCreateColmap_MPISELL_Private(Mat mat)
 46: {
 47:   Mat_MPISELL *sell = (Mat_MPISELL *)mat->data;
 48:   PetscInt     n    = sell->B->cmap->n, i;

 50:   PetscFunctionBegin;
 51:   PetscCheck(sell->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPISELL Matrix was assembled but is missing garray");
 52: #if defined(PETSC_USE_CTABLE)
 53:   PetscCall(PetscHMapICreateWithSize(n, &sell->colmap));
 54:   for (i = 0; i < n; i++) PetscCall(PetscHMapISet(sell->colmap, sell->garray[i] + 1, i + 1));
 55: #else
 56:   PetscCall(PetscCalloc1(mat->cmap->N + 1, &sell->colmap));
 57:   for (i = 0; i < n; i++) sell->colmap[sell->garray[i]] = i + 1;
 58: #endif
 59:   PetscFunctionReturn(PETSC_SUCCESS);
 60: }

 62: #define MatSetValues_SeqSELL_A_Private(row, col, value, addv, orow, ocol) \
 63:   { \
 64:     if (col <= lastcol1) low1 = 0; \
 65:     else high1 = nrow1; \
 66:     lastcol1 = col; \
 67:     while (high1 - low1 > 5) { \
 68:       t = (low1 + high1) / 2; \
 69:       if (cp1[sliceheight * t] > col) high1 = t; \
 70:       else low1 = t; \
 71:     } \
 72:     for (_i = low1; _i < high1; _i++) { \
 73:       if (cp1[sliceheight * _i] > col) break; \
 74:       if (cp1[sliceheight * _i] == col) { \
 75:         if (addv == ADD_VALUES) vp1[sliceheight * _i] += value; \
 76:         else vp1[sliceheight * _i] = value; \
 77:         inserted = PETSC_TRUE; \
 78:         goto a_noinsert; \
 79:       } \
 80:     } \
 81:     if (value == 0.0 && ignorezeroentries) { \
 82:       low1  = 0; \
 83:       high1 = nrow1; \
 84:       goto a_noinsert; \
 85:     } \
 86:     if (nonew == 1) { \
 87:       low1  = 0; \
 88:       high1 = nrow1; \
 89:       goto a_noinsert; \
 90:     } \
 91:     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
 92:     MatSeqXSELLReallocateSELL(A, am, 1, nrow1, a->sliidx, a->sliceheight, row / sliceheight, row, col, a->colidx, a->val, cp1, vp1, nonew, MatScalar); \
 93:     /* shift up all the later entries in this row */ \
 94:     for (ii = nrow1 - 1; ii >= _i; ii--) { \
 95:       cp1[sliceheight * (ii + 1)] = cp1[sliceheight * ii]; \
 96:       vp1[sliceheight * (ii + 1)] = vp1[sliceheight * ii]; \
 97:     } \
 98:     cp1[sliceheight * _i] = col; \
 99:     vp1[sliceheight * _i] = value; \
100:     a->nz++; \
101:     nrow1++; \
102:     A->nonzerostate++; \
103:   a_noinsert:; \
104:     a->rlen[row] = nrow1; \
105:   }

107: #define MatSetValues_SeqSELL_B_Private(row, col, value, addv, orow, ocol) \
108:   { \
109:     if (col <= lastcol2) low2 = 0; \
110:     else high2 = nrow2; \
111:     lastcol2 = col; \
112:     while (high2 - low2 > 5) { \
113:       t = (low2 + high2) / 2; \
114:       if (cp2[sliceheight * t] > col) high2 = t; \
115:       else low2 = t; \
116:     } \
117:     for (_i = low2; _i < high2; _i++) { \
118:       if (cp2[sliceheight * _i] > col) break; \
119:       if (cp2[sliceheight * _i] == col) { \
120:         if (addv == ADD_VALUES) vp2[sliceheight * _i] += value; \
121:         else vp2[sliceheight * _i] = value; \
122:         inserted = PETSC_TRUE; \
123:         goto b_noinsert; \
124:       } \
125:     } \
126:     if (value == 0.0 && ignorezeroentries) { \
127:       low2  = 0; \
128:       high2 = nrow2; \
129:       goto b_noinsert; \
130:     } \
131:     if (nonew == 1) { \
132:       low2  = 0; \
133:       high2 = nrow2; \
134:       goto b_noinsert; \
135:     } \
136:     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
137:     MatSeqXSELLReallocateSELL(B, bm, 1, nrow2, b->sliidx, b->sliceheight, row / sliceheight, row, col, b->colidx, b->val, cp2, vp2, nonew, MatScalar); \
138:     /* shift up all the later entries in this row */ \
139:     for (ii = nrow2 - 1; ii >= _i; ii--) { \
140:       cp2[sliceheight * (ii + 1)] = cp2[sliceheight * ii]; \
141:       vp2[sliceheight * (ii + 1)] = vp2[sliceheight * ii]; \
142:     } \
143:     cp2[sliceheight * _i] = col; \
144:     vp2[sliceheight * _i] = value; \
145:     b->nz++; \
146:     nrow2++; \
147:     B->nonzerostate++; \
148:   b_noinsert:; \
149:     b->rlen[row] = nrow2; \
150:   }

152: static PetscErrorCode MatSetValues_MPISELL(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
153: {
154:   Mat_MPISELL *sell = (Mat_MPISELL *)mat->data;
155:   PetscScalar  value;
156:   PetscInt     i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, shift1, shift2;
157:   PetscInt     cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
158:   PetscBool    roworiented = sell->roworiented;

160:   /* Some Variables required in the macro */
161:   Mat          A                 = sell->A;
162:   Mat_SeqSELL *a                 = (Mat_SeqSELL *)A->data;
163:   PetscBool    ignorezeroentries = a->ignorezeroentries, found;
164:   Mat          B                 = sell->B;
165:   Mat_SeqSELL *b                 = (Mat_SeqSELL *)B->data;
166:   PetscInt    *cp1, *cp2, ii, _i, nrow1, nrow2, low1, high1, low2, high2, t, lastcol1, lastcol2, sliceheight = a->sliceheight;
167:   MatScalar   *vp1, *vp2;

169:   PetscFunctionBegin;
170:   for (i = 0; i < m; i++) {
171:     if (im[i] < 0) continue;
172:     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
173:     if (im[i] >= rstart && im[i] < rend) {
174:       row      = im[i] - rstart;
175:       lastcol1 = -1;
176:       shift1   = a->sliidx[row / sliceheight] + (row % sliceheight); /* starting index of the row */
177:       cp1      = PetscSafePointerPlusOffset(a->colidx, shift1);
178:       vp1      = PetscSafePointerPlusOffset(a->val, shift1);
179:       nrow1    = a->rlen[row];
180:       low1     = 0;
181:       high1    = nrow1;
182:       lastcol2 = -1;
183:       shift2   = b->sliidx[row / sliceheight] + (row % sliceheight); /* starting index of the row */
184:       cp2      = PetscSafePointerPlusOffset(b->colidx, shift2);
185:       vp2      = PetscSafePointerPlusOffset(b->val, shift2);
186:       nrow2    = b->rlen[row];
187:       low2     = 0;
188:       high2    = nrow2;

190:       for (j = 0; j < n; j++) {
191:         if (roworiented) value = v[i * n + j];
192:         else value = v[i + j * m];
193:         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
194:         if (in[j] >= cstart && in[j] < cend) {
195:           col = in[j] - cstart;
196:           MatSetValue_SeqSELL_Private(A, row, col, value, addv, im[i], in[j], cp1, vp1, lastcol1, low1, high1); /* set one value */
197: #if defined(PETSC_HAVE_CUDA)
198:           if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && found) A->offloadmask = PETSC_OFFLOAD_CPU;
199: #endif
200:         } else if (in[j] < 0) {
201:           continue;
202:         } else {
203:           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
204:           if (mat->was_assembled) {
205:             if (!sell->colmap) PetscCall(MatCreateColmap_MPISELL_Private(mat));
206: #if defined(PETSC_USE_CTABLE)
207:             PetscCall(PetscHMapIGetWithDefault(sell->colmap, in[j] + 1, 0, &col));
208:             col--;
209: #else
210:             col = sell->colmap[in[j]] - 1;
211: #endif
212:             if (col < 0 && !((Mat_SeqSELL *)sell->B->data)->nonew) {
213:               PetscCall(MatDisAssemble_MPISELL(mat));
214:               col = in[j];
215:               /* Reinitialize the variables required by MatSetValues_SeqSELL_B_Private() */
216:               B      = sell->B;
217:               b      = (Mat_SeqSELL *)B->data;
218:               shift2 = b->sliidx[row / sliceheight] + (row % sliceheight); /* starting index of the row */
219:               cp2    = b->colidx + shift2;
220:               vp2    = b->val + shift2;
221:               nrow2  = b->rlen[row];
222:               low2   = 0;
223:               high2  = nrow2;
224:               found  = PETSC_FALSE;
225:             } else {
226:               PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
227:             }
228:           } else col = in[j];
229:           MatSetValue_SeqSELL_Private(B, row, col, value, addv, im[i], in[j], cp2, vp2, lastcol2, low2, high2); /* set one value */
230: #if defined(PETSC_HAVE_CUDA)
231:           if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && found) B->offloadmask = PETSC_OFFLOAD_CPU;
232: #endif
233:         }
234:       }
235:     } else {
236:       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
237:       if (!sell->donotstash) {
238:         mat->assembled = PETSC_FALSE;
239:         if (roworiented) {
240:           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
241:         } else {
242:           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
243:         }
244:       }
245:     }
246:   }
247:   PetscFunctionReturn(PETSC_SUCCESS);
248: }

250: static PetscErrorCode MatGetValues_MPISELL(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
251: {
252:   Mat_MPISELL *sell = (Mat_MPISELL *)mat->data;
253:   PetscInt     i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
254:   PetscInt     cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;

256:   PetscFunctionBegin;
257:   for (i = 0; i < m; i++) {
258:     if (idxm[i] < 0) continue; /* negative row */
259:     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
260:     if (idxm[i] >= rstart && idxm[i] < rend) {
261:       row = idxm[i] - rstart;
262:       for (j = 0; j < n; j++) {
263:         if (idxn[j] < 0) continue; /* negative column */
264:         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
265:         if (idxn[j] >= cstart && idxn[j] < cend) {
266:           col = idxn[j] - cstart;
267:           PetscCall(MatGetValues(sell->A, 1, &row, 1, &col, v + i * n + j));
268:         } else {
269:           if (!sell->colmap) PetscCall(MatCreateColmap_MPISELL_Private(mat));
270: #if defined(PETSC_USE_CTABLE)
271:           PetscCall(PetscHMapIGetWithDefault(sell->colmap, idxn[j] + 1, 0, &col));
272:           col--;
273: #else
274:           col = sell->colmap[idxn[j]] - 1;
275: #endif
276:           if ((col < 0) || (sell->garray[col] != idxn[j])) *(v + i * n + j) = 0.0;
277:           else PetscCall(MatGetValues(sell->B, 1, &row, 1, &col, v + i * n + j));
278:         }
279:       }
280:     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
281:   }
282:   PetscFunctionReturn(PETSC_SUCCESS);
283: }

285: static PetscErrorCode MatAssemblyBegin_MPISELL(Mat mat, MatAssemblyType mode)
286: {
287:   Mat_MPISELL *sell = (Mat_MPISELL *)mat->data;
288:   PetscInt     nstash, reallocs;

290:   PetscFunctionBegin;
291:   if (sell->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);

293:   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
294:   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
295:   PetscCall(PetscInfo(sell->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
296:   PetscFunctionReturn(PETSC_SUCCESS);
297: }

299: PetscErrorCode MatAssemblyEnd_MPISELL(Mat mat, MatAssemblyType mode)
300: {
301:   Mat_MPISELL *sell = (Mat_MPISELL *)mat->data;
302:   PetscMPIInt  n;
303:   PetscInt     i, flg;
304:   PetscInt    *row, *col;
305:   PetscScalar *val;
306:   PetscBool    other_disassembled;
307:   /* do not use 'b = (Mat_SeqSELL*)sell->B->data' as B can be reset in disassembly */
308:   PetscFunctionBegin;
309:   if (!sell->donotstash && !mat->nooffprocentries) {
310:     while (1) {
311:       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
312:       if (!flg) break;

314:       for (i = 0; i < n; i++) { /* assemble one by one */
315:         PetscCall(MatSetValues_MPISELL(mat, 1, row + i, 1, col + i, val + i, mat->insertmode));
316:       }
317:     }
318:     PetscCall(MatStashScatterEnd_Private(&mat->stash));
319:   }
320: #if defined(PETSC_HAVE_CUDA)
321:   if (mat->offloadmask == PETSC_OFFLOAD_CPU) sell->A->offloadmask = PETSC_OFFLOAD_CPU;
322: #endif
323:   PetscCall(MatAssemblyBegin(sell->A, mode));
324:   PetscCall(MatAssemblyEnd(sell->A, mode));

326:   /*
327:      determine if any processor has disassembled, if so we must
328:      also disassemble ourselves, in order that we may reassemble.
329:   */
330:   /*
331:      if nonzero structure of submatrix B cannot change then we know that
332:      no processor disassembled thus we can skip this stuff
333:   */
334:   if (!((Mat_SeqSELL *)sell->B->data)->nonew) {
335:     PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
336:     if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPISELL(mat));
337:   }
338:   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPISELL(mat));
339: #if defined(PETSC_HAVE_CUDA)
340:   if (mat->offloadmask == PETSC_OFFLOAD_CPU && sell->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) sell->B->offloadmask = PETSC_OFFLOAD_CPU;
341: #endif
342:   PetscCall(MatAssemblyBegin(sell->B, mode));
343:   PetscCall(MatAssemblyEnd(sell->B, mode));
344:   PetscCall(PetscFree2(sell->rowvalues, sell->rowindices));
345:   sell->rowvalues = NULL;
346:   PetscCall(VecDestroy(&sell->diag));

348:   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
349:   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqSELL *)sell->A->data)->nonew) {
350:     PetscObjectState state = sell->A->nonzerostate + sell->B->nonzerostate;
351:     PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
352:   }
353: #if defined(PETSC_HAVE_CUDA)
354:   mat->offloadmask = PETSC_OFFLOAD_BOTH;
355: #endif
356:   PetscFunctionReturn(PETSC_SUCCESS);
357: }

359: static PetscErrorCode MatZeroEntries_MPISELL(Mat A)
360: {
361:   Mat_MPISELL *l = (Mat_MPISELL *)A->data;

363:   PetscFunctionBegin;
364:   PetscCall(MatZeroEntries(l->A));
365:   PetscCall(MatZeroEntries(l->B));
366:   PetscFunctionReturn(PETSC_SUCCESS);
367: }

369: static PetscErrorCode MatMult_MPISELL(Mat A, Vec xx, Vec yy)
370: {
371:   Mat_MPISELL *a = (Mat_MPISELL *)A->data;
372:   PetscInt     nt;

374:   PetscFunctionBegin;
375:   PetscCall(VecGetLocalSize(xx, &nt));
376:   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt);
377:   PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
378:   PetscCall((*a->A->ops->mult)(a->A, xx, yy));
379:   PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
380:   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, yy, yy));
381:   PetscFunctionReturn(PETSC_SUCCESS);
382: }

384: static PetscErrorCode MatMultDiagonalBlock_MPISELL(Mat A, Vec bb, Vec xx)
385: {
386:   Mat_MPISELL *a = (Mat_MPISELL *)A->data;

388:   PetscFunctionBegin;
389:   PetscCall(MatMultDiagonalBlock(a->A, bb, xx));
390:   PetscFunctionReturn(PETSC_SUCCESS);
391: }

393: static PetscErrorCode MatMultAdd_MPISELL(Mat A, Vec xx, Vec yy, Vec zz)
394: {
395:   Mat_MPISELL *a = (Mat_MPISELL *)A->data;

397:   PetscFunctionBegin;
398:   PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
399:   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
400:   PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
401:   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
402:   PetscFunctionReturn(PETSC_SUCCESS);
403: }

405: static PetscErrorCode MatMultTranspose_MPISELL(Mat A, Vec xx, Vec yy)
406: {
407:   Mat_MPISELL *a = (Mat_MPISELL *)A->data;

409:   PetscFunctionBegin;
410:   /* do nondiagonal part */
411:   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
412:   /* do local part */
413:   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
414:   /* add partial results together */
415:   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
416:   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
417:   PetscFunctionReturn(PETSC_SUCCESS);
418: }

420: static PetscErrorCode MatIsTranspose_MPISELL(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f)
421: {
422:   MPI_Comm     comm;
423:   Mat_MPISELL *Asell = (Mat_MPISELL *)Amat->data, *Bsell;
424:   Mat          Adia  = Asell->A, Bdia, Aoff, Boff, *Aoffs, *Boffs;
425:   IS           Me, Notme;
426:   PetscInt     M, N, first, last, *notme, i;
427:   PetscMPIInt  size;

429:   PetscFunctionBegin;
430:   /* Easy test: symmetric diagonal block */
431:   Bsell = (Mat_MPISELL *)Bmat->data;
432:   Bdia  = Bsell->A;
433:   PetscCall(MatIsTranspose(Adia, Bdia, tol, f));
434:   if (!*f) PetscFunctionReturn(PETSC_SUCCESS);
435:   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
436:   PetscCallMPI(MPI_Comm_size(comm, &size));
437:   if (size == 1) PetscFunctionReturn(PETSC_SUCCESS);

439:   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
440:   PetscCall(MatGetSize(Amat, &M, &N));
441:   PetscCall(MatGetOwnershipRange(Amat, &first, &last));
442:   PetscCall(PetscMalloc1(N - last + first, &notme));
443:   for (i = 0; i < first; i++) notme[i] = i;
444:   for (i = last; i < M; i++) notme[i - last + first] = i;
445:   PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme));
446:   PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me));
447:   PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs));
448:   Aoff = Aoffs[0];
449:   PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs));
450:   Boff = Boffs[0];
451:   PetscCall(MatIsTranspose(Aoff, Boff, tol, f));
452:   PetscCall(MatDestroyMatrices(1, &Aoffs));
453:   PetscCall(MatDestroyMatrices(1, &Boffs));
454:   PetscCall(ISDestroy(&Me));
455:   PetscCall(ISDestroy(&Notme));
456:   PetscCall(PetscFree(notme));
457:   PetscFunctionReturn(PETSC_SUCCESS);
458: }

460: static PetscErrorCode MatMultTransposeAdd_MPISELL(Mat A, Vec xx, Vec yy, Vec zz)
461: {
462:   Mat_MPISELL *a = (Mat_MPISELL *)A->data;

464:   PetscFunctionBegin;
465:   /* do nondiagonal part */
466:   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
467:   /* do local part */
468:   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
469:   /* add partial results together */
470:   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
471:   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
472:   PetscFunctionReturn(PETSC_SUCCESS);
473: }

475: /*
476:   This only works correctly for square matrices where the subblock A->A is the
477:    diagonal block
478: */
479: static PetscErrorCode MatGetDiagonal_MPISELL(Mat A, Vec v)
480: {
481:   Mat_MPISELL *a = (Mat_MPISELL *)A->data;

483:   PetscFunctionBegin;
484:   PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
485:   PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition");
486:   PetscCall(MatGetDiagonal(a->A, v));
487:   PetscFunctionReturn(PETSC_SUCCESS);
488: }

490: static PetscErrorCode MatScale_MPISELL(Mat A, PetscScalar aa)
491: {
492:   Mat_MPISELL *a = (Mat_MPISELL *)A->data;

494:   PetscFunctionBegin;
495:   PetscCall(MatScale(a->A, aa));
496:   PetscCall(MatScale(a->B, aa));
497:   PetscFunctionReturn(PETSC_SUCCESS);
498: }

500: PetscErrorCode MatDestroy_MPISELL(Mat mat)
501: {
502:   Mat_MPISELL *sell = (Mat_MPISELL *)mat->data;

504:   PetscFunctionBegin;
505:   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
506:   PetscCall(MatStashDestroy_Private(&mat->stash));
507:   PetscCall(VecDestroy(&sell->diag));
508:   PetscCall(MatDestroy(&sell->A));
509:   PetscCall(MatDestroy(&sell->B));
510: #if defined(PETSC_USE_CTABLE)
511:   PetscCall(PetscHMapIDestroy(&sell->colmap));
512: #else
513:   PetscCall(PetscFree(sell->colmap));
514: #endif
515:   PetscCall(PetscFree(sell->garray));
516:   PetscCall(VecDestroy(&sell->lvec));
517:   PetscCall(VecScatterDestroy(&sell->Mvctx));
518:   PetscCall(PetscFree2(sell->rowvalues, sell->rowindices));
519:   PetscCall(PetscFree(sell->ld));
520:   PetscCall(PetscFree(mat->data));

522:   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
523:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
524:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
525:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL));
526:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISELLSetPreallocation_C", NULL));
527:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisell_mpiaij_C", NULL));
528: #if defined(PETSC_HAVE_CUDA)
529:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisell_mpisellcuda_C", NULL));
530: #endif
531:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
532:   PetscFunctionReturn(PETSC_SUCCESS);
533: }

535: #include <petscdraw.h>
536: static PetscErrorCode MatView_MPISELL_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
537: {
538:   Mat_MPISELL      *sell = (Mat_MPISELL *)mat->data;
539:   PetscMPIInt       rank = sell->rank, size = sell->size;
540:   PetscBool         isdraw, iascii, isbinary;
541:   PetscViewer       sviewer;
542:   PetscViewerFormat format;

544:   PetscFunctionBegin;
545:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
546:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
547:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
548:   if (iascii) {
549:     PetscCall(PetscViewerGetFormat(viewer, &format));
550:     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
551:       MatInfo   info;
552:       PetscInt *inodes;

554:       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
555:       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
556:       PetscCall(MatInodeGetInodeSizes(sell->A, NULL, &inodes, NULL));
557:       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
558:       if (!inodes) {
559:         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT ", not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used,
560:                                                      (PetscInt)info.nz_allocated, (PetscInt)info.memory));
561:       } else {
562:         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT ", using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used,
563:                                                      (PetscInt)info.nz_allocated, (PetscInt)info.memory));
564:       }
565:       PetscCall(MatGetInfo(sell->A, MAT_LOCAL, &info));
566:       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
567:       PetscCall(MatGetInfo(sell->B, MAT_LOCAL, &info));
568:       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
569:       PetscCall(PetscViewerFlush(viewer));
570:       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
571:       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
572:       PetscCall(VecScatterView(sell->Mvctx, viewer));
573:       PetscFunctionReturn(PETSC_SUCCESS);
574:     } else if (format == PETSC_VIEWER_ASCII_INFO) {
575:       PetscInt inodecount, inodelimit, *inodes;
576:       PetscCall(MatInodeGetInodeSizes(sell->A, &inodecount, &inodes, &inodelimit));
577:       if (inodes) {
578:         PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit));
579:       } else {
580:         PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n"));
581:       }
582:       PetscFunctionReturn(PETSC_SUCCESS);
583:     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
584:       PetscFunctionReturn(PETSC_SUCCESS);
585:     }
586:   } else if (isbinary) {
587:     if (size == 1) {
588:       PetscCall(PetscObjectSetName((PetscObject)sell->A, ((PetscObject)mat)->name));
589:       PetscCall(MatView(sell->A, viewer));
590:     } else {
591:       /* PetscCall(MatView_MPISELL_Binary(mat,viewer)); */
592:     }
593:     PetscFunctionReturn(PETSC_SUCCESS);
594:   } else if (isdraw) {
595:     PetscDraw draw;
596:     PetscBool isnull;
597:     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
598:     PetscCall(PetscDrawIsNull(draw, &isnull));
599:     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
600:   }

602:   {
603:     /* assemble the entire matrix onto first processor. */
604:     Mat          A;
605:     Mat_SeqSELL *Aloc;
606:     PetscInt     M = mat->rmap->N, N = mat->cmap->N, *acolidx, row, col, i, j;
607:     MatScalar   *aval;
608:     PetscBool    isnonzero;

610:     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
611:     if (rank == 0) {
612:       PetscCall(MatSetSizes(A, M, N, M, N));
613:     } else {
614:       PetscCall(MatSetSizes(A, 0, 0, M, N));
615:     }
616:     /* This is just a temporary matrix, so explicitly using MATMPISELL is probably best */
617:     PetscCall(MatSetType(A, MATMPISELL));
618:     PetscCall(MatMPISELLSetPreallocation(A, 0, NULL, 0, NULL));
619:     PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));

621:     /* copy over the A part */
622:     Aloc    = (Mat_SeqSELL *)sell->A->data;
623:     acolidx = Aloc->colidx;
624:     aval    = Aloc->val;
625:     for (i = 0; i < Aloc->totalslices; i++) { /* loop over slices */
626:       for (j = Aloc->sliidx[i]; j < Aloc->sliidx[i + 1]; j++) {
627:         isnonzero = (PetscBool)((j - Aloc->sliidx[i]) / Aloc->sliceheight < Aloc->rlen[i * Aloc->sliceheight + j % Aloc->sliceheight]);
628:         if (isnonzero) { /* check the mask bit */
629:           row = i * Aloc->sliceheight + j % Aloc->sliceheight + mat->rmap->rstart;
630:           col = *acolidx + mat->rmap->rstart;
631:           PetscCall(MatSetValues(A, 1, &row, 1, &col, aval, INSERT_VALUES));
632:         }
633:         aval++;
634:         acolidx++;
635:       }
636:     }

638:     /* copy over the B part */
639:     Aloc    = (Mat_SeqSELL *)sell->B->data;
640:     acolidx = Aloc->colidx;
641:     aval    = Aloc->val;
642:     for (i = 0; i < Aloc->totalslices; i++) {
643:       for (j = Aloc->sliidx[i]; j < Aloc->sliidx[i + 1]; j++) {
644:         isnonzero = (PetscBool)((j - Aloc->sliidx[i]) / Aloc->sliceheight < Aloc->rlen[i * Aloc->sliceheight + j % Aloc->sliceheight]);
645:         if (isnonzero) {
646:           row = i * Aloc->sliceheight + j % Aloc->sliceheight + mat->rmap->rstart;
647:           col = sell->garray[*acolidx];
648:           PetscCall(MatSetValues(A, 1, &row, 1, &col, aval, INSERT_VALUES));
649:         }
650:         aval++;
651:         acolidx++;
652:       }
653:     }

655:     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
656:     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
657:     /*
658:        Everyone has to call to draw the matrix since the graphics waits are
659:        synchronized across all processors that share the PetscDraw object
660:     */
661:     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
662:     if (rank == 0) {
663:       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPISELL *)A->data)->A, ((PetscObject)mat)->name));
664:       PetscCall(MatView_SeqSELL(((Mat_MPISELL *)A->data)->A, sviewer));
665:     }
666:     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
667:     PetscCall(MatDestroy(&A));
668:   }
669:   PetscFunctionReturn(PETSC_SUCCESS);
670: }

672: static PetscErrorCode MatView_MPISELL(Mat mat, PetscViewer viewer)
673: {
674:   PetscBool iascii, isdraw, issocket, isbinary;

676:   PetscFunctionBegin;
677:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
678:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
679:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
680:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
681:   if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPISELL_ASCIIorDraworSocket(mat, viewer));
682:   PetscFunctionReturn(PETSC_SUCCESS);
683: }

685: static PetscErrorCode MatGetGhosts_MPISELL(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
686: {
687:   Mat_MPISELL *sell = (Mat_MPISELL *)mat->data;

689:   PetscFunctionBegin;
690:   PetscCall(MatGetSize(sell->B, NULL, nghosts));
691:   if (ghosts) *ghosts = sell->garray;
692:   PetscFunctionReturn(PETSC_SUCCESS);
693: }

695: static PetscErrorCode MatGetInfo_MPISELL(Mat matin, MatInfoType flag, MatInfo *info)
696: {
697:   Mat_MPISELL   *mat = (Mat_MPISELL *)matin->data;
698:   Mat            A = mat->A, B = mat->B;
699:   PetscLogDouble isend[5], irecv[5];

701:   PetscFunctionBegin;
702:   info->block_size = 1.0;
703:   PetscCall(MatGetInfo(A, MAT_LOCAL, info));

705:   isend[0] = info->nz_used;
706:   isend[1] = info->nz_allocated;
707:   isend[2] = info->nz_unneeded;
708:   isend[3] = info->memory;
709:   isend[4] = info->mallocs;

711:   PetscCall(MatGetInfo(B, MAT_LOCAL, info));

713:   isend[0] += info->nz_used;
714:   isend[1] += info->nz_allocated;
715:   isend[2] += info->nz_unneeded;
716:   isend[3] += info->memory;
717:   isend[4] += info->mallocs;
718:   if (flag == MAT_LOCAL) {
719:     info->nz_used      = isend[0];
720:     info->nz_allocated = isend[1];
721:     info->nz_unneeded  = isend[2];
722:     info->memory       = isend[3];
723:     info->mallocs      = isend[4];
724:   } else if (flag == MAT_GLOBAL_MAX) {
725:     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));

727:     info->nz_used      = irecv[0];
728:     info->nz_allocated = irecv[1];
729:     info->nz_unneeded  = irecv[2];
730:     info->memory       = irecv[3];
731:     info->mallocs      = irecv[4];
732:   } else if (flag == MAT_GLOBAL_SUM) {
733:     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));

735:     info->nz_used      = irecv[0];
736:     info->nz_allocated = irecv[1];
737:     info->nz_unneeded  = irecv[2];
738:     info->memory       = irecv[3];
739:     info->mallocs      = irecv[4];
740:   }
741:   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
742:   info->fill_ratio_needed = 0;
743:   info->factor_mallocs    = 0;
744:   PetscFunctionReturn(PETSC_SUCCESS);
745: }

747: static PetscErrorCode MatSetOption_MPISELL(Mat A, MatOption op, PetscBool flg)
748: {
749:   Mat_MPISELL *a = (Mat_MPISELL *)A->data;

751:   PetscFunctionBegin;
752:   switch (op) {
753:   case MAT_NEW_NONZERO_LOCATIONS:
754:   case MAT_NEW_NONZERO_ALLOCATION_ERR:
755:   case MAT_UNUSED_NONZERO_LOCATION_ERR:
756:   case MAT_KEEP_NONZERO_PATTERN:
757:   case MAT_NEW_NONZERO_LOCATION_ERR:
758:   case MAT_USE_INODES:
759:   case MAT_IGNORE_ZERO_ENTRIES:
760:     MatCheckPreallocated(A, 1);
761:     PetscCall(MatSetOption(a->A, op, flg));
762:     PetscCall(MatSetOption(a->B, op, flg));
763:     break;
764:   case MAT_ROW_ORIENTED:
765:     MatCheckPreallocated(A, 1);
766:     a->roworiented = flg;

768:     PetscCall(MatSetOption(a->A, op, flg));
769:     PetscCall(MatSetOption(a->B, op, flg));
770:     break;
771:   case MAT_FORCE_DIAGONAL_ENTRIES:
772:   case MAT_SORTED_FULL:
773:     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
774:     break;
775:   case MAT_IGNORE_OFF_PROC_ENTRIES:
776:     a->donotstash = flg;
777:     break;
778:   case MAT_SPD:
779:   case MAT_SPD_ETERNAL:
780:     break;
781:   case MAT_SYMMETRIC:
782:     MatCheckPreallocated(A, 1);
783:     PetscCall(MatSetOption(a->A, op, flg));
784:     break;
785:   case MAT_STRUCTURALLY_SYMMETRIC:
786:     MatCheckPreallocated(A, 1);
787:     PetscCall(MatSetOption(a->A, op, flg));
788:     break;
789:   case MAT_HERMITIAN:
790:     MatCheckPreallocated(A, 1);
791:     PetscCall(MatSetOption(a->A, op, flg));
792:     break;
793:   case MAT_SYMMETRY_ETERNAL:
794:     MatCheckPreallocated(A, 1);
795:     PetscCall(MatSetOption(a->A, op, flg));
796:     break;
797:   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
798:     MatCheckPreallocated(A, 1);
799:     PetscCall(MatSetOption(a->A, op, flg));
800:     break;
801:   default:
802:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
803:   }
804:   PetscFunctionReturn(PETSC_SUCCESS);
805: }

807: static PetscErrorCode MatDiagonalScale_MPISELL(Mat mat, Vec ll, Vec rr)
808: {
809:   Mat_MPISELL *sell = (Mat_MPISELL *)mat->data;
810:   Mat          a = sell->A, b = sell->B;
811:   PetscInt     s1, s2, s3;

813:   PetscFunctionBegin;
814:   PetscCall(MatGetLocalSize(mat, &s2, &s3));
815:   if (rr) {
816:     PetscCall(VecGetLocalSize(rr, &s1));
817:     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
818:     /* Overlap communication with computation. */
819:     PetscCall(VecScatterBegin(sell->Mvctx, rr, sell->lvec, INSERT_VALUES, SCATTER_FORWARD));
820:   }
821:   if (ll) {
822:     PetscCall(VecGetLocalSize(ll, &s1));
823:     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
824:     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
825:   }
826:   /* scale  the diagonal block */
827:   PetscUseTypeMethod(a, diagonalscale, ll, rr);

829:   if (rr) {
830:     /* Do a scatter end and then right scale the off-diagonal block */
831:     PetscCall(VecScatterEnd(sell->Mvctx, rr, sell->lvec, INSERT_VALUES, SCATTER_FORWARD));
832:     PetscUseTypeMethod(b, diagonalscale, NULL, sell->lvec);
833:   }
834:   PetscFunctionReturn(PETSC_SUCCESS);
835: }

837: static PetscErrorCode MatSetUnfactored_MPISELL(Mat A)
838: {
839:   Mat_MPISELL *a = (Mat_MPISELL *)A->data;

841:   PetscFunctionBegin;
842:   PetscCall(MatSetUnfactored(a->A));
843:   PetscFunctionReturn(PETSC_SUCCESS);
844: }

846: static PetscErrorCode MatEqual_MPISELL(Mat A, Mat B, PetscBool *flag)
847: {
848:   Mat_MPISELL *matB = (Mat_MPISELL *)B->data, *matA = (Mat_MPISELL *)A->data;
849:   Mat          a, b, c, d;
850:   PetscBool    flg;

852:   PetscFunctionBegin;
853:   a = matA->A;
854:   b = matA->B;
855:   c = matB->A;
856:   d = matB->B;

858:   PetscCall(MatEqual(a, c, &flg));
859:   if (flg) PetscCall(MatEqual(b, d, &flg));
860:   PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
861:   PetscFunctionReturn(PETSC_SUCCESS);
862: }

864: static PetscErrorCode MatCopy_MPISELL(Mat A, Mat B, MatStructure str)
865: {
866:   Mat_MPISELL *a = (Mat_MPISELL *)A->data;
867:   Mat_MPISELL *b = (Mat_MPISELL *)B->data;

869:   PetscFunctionBegin;
870:   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
871:   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
872:     /* because of the column compression in the off-processor part of the matrix a->B,
873:        the number of columns in a->B and b->B may be different, hence we cannot call
874:        the MatCopy() directly on the two parts. If need be, we can provide a more
875:        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
876:        then copying the submatrices */
877:     PetscCall(MatCopy_Basic(A, B, str));
878:   } else {
879:     PetscCall(MatCopy(a->A, b->A, str));
880:     PetscCall(MatCopy(a->B, b->B, str));
881:   }
882:   PetscFunctionReturn(PETSC_SUCCESS);
883: }

885: static PetscErrorCode MatSetUp_MPISELL(Mat A)
886: {
887:   PetscFunctionBegin;
888:   PetscCall(MatMPISELLSetPreallocation(A, PETSC_DEFAULT, NULL, PETSC_DEFAULT, NULL));
889:   PetscFunctionReturn(PETSC_SUCCESS);
890: }

892: static PetscErrorCode MatConjugate_MPISELL(Mat mat)
893: {
894:   PetscFunctionBegin;
895:   if (PetscDefined(USE_COMPLEX)) {
896:     Mat_MPISELL *sell = (Mat_MPISELL *)mat->data;

898:     PetscCall(MatConjugate_SeqSELL(sell->A));
899:     PetscCall(MatConjugate_SeqSELL(sell->B));
900:   }
901:   PetscFunctionReturn(PETSC_SUCCESS);
902: }

904: static PetscErrorCode MatRealPart_MPISELL(Mat A)
905: {
906:   Mat_MPISELL *a = (Mat_MPISELL *)A->data;

908:   PetscFunctionBegin;
909:   PetscCall(MatRealPart(a->A));
910:   PetscCall(MatRealPart(a->B));
911:   PetscFunctionReturn(PETSC_SUCCESS);
912: }

914: static PetscErrorCode MatImaginaryPart_MPISELL(Mat A)
915: {
916:   Mat_MPISELL *a = (Mat_MPISELL *)A->data;

918:   PetscFunctionBegin;
919:   PetscCall(MatImaginaryPart(a->A));
920:   PetscCall(MatImaginaryPart(a->B));
921:   PetscFunctionReturn(PETSC_SUCCESS);
922: }

924: static PetscErrorCode MatInvertBlockDiagonal_MPISELL(Mat A, const PetscScalar **values)
925: {
926:   Mat_MPISELL *a = (Mat_MPISELL *)A->data;

928:   PetscFunctionBegin;
929:   PetscCall(MatInvertBlockDiagonal(a->A, values));
930:   A->factorerrortype = a->A->factorerrortype;
931:   PetscFunctionReturn(PETSC_SUCCESS);
932: }

934: static PetscErrorCode MatSetRandom_MPISELL(Mat x, PetscRandom rctx)
935: {
936:   Mat_MPISELL *sell = (Mat_MPISELL *)x->data;

938:   PetscFunctionBegin;
939:   PetscCall(MatSetRandom(sell->A, rctx));
940:   PetscCall(MatSetRandom(sell->B, rctx));
941:   PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY));
942:   PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY));
943:   PetscFunctionReturn(PETSC_SUCCESS);
944: }

946: static PetscErrorCode MatSetFromOptions_MPISELL(Mat A, PetscOptionItems *PetscOptionsObject)
947: {
948:   PetscFunctionBegin;
949:   PetscOptionsHeadBegin(PetscOptionsObject, "MPISELL options");
950:   PetscOptionsHeadEnd();
951:   PetscFunctionReturn(PETSC_SUCCESS);
952: }

954: static PetscErrorCode MatShift_MPISELL(Mat Y, PetscScalar a)
955: {
956:   Mat_MPISELL *msell = (Mat_MPISELL *)Y->data;
957:   Mat_SeqSELL *sell  = (Mat_SeqSELL *)msell->A->data;

959:   PetscFunctionBegin;
960:   if (!Y->preallocated) {
961:     PetscCall(MatMPISELLSetPreallocation(Y, 1, NULL, 0, NULL));
962:   } else if (!sell->nz) {
963:     PetscInt nonew = sell->nonew;
964:     PetscCall(MatSeqSELLSetPreallocation(msell->A, 1, NULL));
965:     sell->nonew = nonew;
966:   }
967:   PetscCall(MatShift_Basic(Y, a));
968:   PetscFunctionReturn(PETSC_SUCCESS);
969: }

971: static PetscErrorCode MatMissingDiagonal_MPISELL(Mat A, PetscBool *missing, PetscInt *d)
972: {
973:   Mat_MPISELL *a = (Mat_MPISELL *)A->data;

975:   PetscFunctionBegin;
976:   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
977:   PetscCall(MatMissingDiagonal(a->A, missing, d));
978:   if (d) {
979:     PetscInt rstart;
980:     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
981:     *d += rstart;
982:   }
983:   PetscFunctionReturn(PETSC_SUCCESS);
984: }

986: static PetscErrorCode MatGetDiagonalBlock_MPISELL(Mat A, Mat *a)
987: {
988:   PetscFunctionBegin;
989:   *a = ((Mat_MPISELL *)A->data)->A;
990:   PetscFunctionReturn(PETSC_SUCCESS);
991: }

993: static PetscErrorCode MatStoreValues_MPISELL(Mat mat)
994: {
995:   Mat_MPISELL *sell = (Mat_MPISELL *)mat->data;

997:   PetscFunctionBegin;
998:   PetscCall(MatStoreValues(sell->A));
999:   PetscCall(MatStoreValues(sell->B));
1000:   PetscFunctionReturn(PETSC_SUCCESS);
1001: }

1003: static PetscErrorCode MatRetrieveValues_MPISELL(Mat mat)
1004: {
1005:   Mat_MPISELL *sell = (Mat_MPISELL *)mat->data;

1007:   PetscFunctionBegin;
1008:   PetscCall(MatRetrieveValues(sell->A));
1009:   PetscCall(MatRetrieveValues(sell->B));
1010:   PetscFunctionReturn(PETSC_SUCCESS);
1011: }

1013: static PetscErrorCode MatMPISELLSetPreallocation_MPISELL(Mat B, PetscInt d_rlenmax, const PetscInt d_rlen[], PetscInt o_rlenmax, const PetscInt o_rlen[])
1014: {
1015:   Mat_MPISELL *b;

1017:   PetscFunctionBegin;
1018:   PetscCall(PetscLayoutSetUp(B->rmap));
1019:   PetscCall(PetscLayoutSetUp(B->cmap));
1020:   b = (Mat_MPISELL *)B->data;

1022:   if (!B->preallocated) {
1023:     /* Explicitly create 2 MATSEQSELL matrices. */
1024:     PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
1025:     PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
1026:     PetscCall(MatSetBlockSizesFromMats(b->A, B, B));
1027:     PetscCall(MatSetType(b->A, MATSEQSELL));
1028:     PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
1029:     PetscCall(MatSetSizes(b->B, B->rmap->n, B->cmap->N, B->rmap->n, B->cmap->N));
1030:     PetscCall(MatSetBlockSizesFromMats(b->B, B, B));
1031:     PetscCall(MatSetType(b->B, MATSEQSELL));
1032:   }

1034:   PetscCall(MatSeqSELLSetPreallocation(b->A, d_rlenmax, d_rlen));
1035:   PetscCall(MatSeqSELLSetPreallocation(b->B, o_rlenmax, o_rlen));
1036:   B->preallocated  = PETSC_TRUE;
1037:   B->was_assembled = PETSC_FALSE;
1038:   /*
1039:     critical for MatAssemblyEnd to work.
1040:     MatAssemblyBegin checks it to set up was_assembled
1041:     and MatAssemblyEnd checks was_assembled to determine whether to build garray
1042:   */
1043:   B->assembled = PETSC_FALSE;
1044:   PetscFunctionReturn(PETSC_SUCCESS);
1045: }

1047: static PetscErrorCode MatDuplicate_MPISELL(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
1048: {
1049:   Mat          mat;
1050:   Mat_MPISELL *a, *oldmat = (Mat_MPISELL *)matin->data;

1052:   PetscFunctionBegin;
1053:   *newmat = NULL;
1054:   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
1055:   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
1056:   PetscCall(MatSetBlockSizesFromMats(mat, matin, matin));
1057:   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
1058:   a = (Mat_MPISELL *)mat->data;

1060:   mat->factortype   = matin->factortype;
1061:   mat->assembled    = PETSC_TRUE;
1062:   mat->insertmode   = NOT_SET_VALUES;
1063:   mat->preallocated = PETSC_TRUE;

1065:   a->size         = oldmat->size;
1066:   a->rank         = oldmat->rank;
1067:   a->donotstash   = oldmat->donotstash;
1068:   a->roworiented  = oldmat->roworiented;
1069:   a->rowindices   = NULL;
1070:   a->rowvalues    = NULL;
1071:   a->getrowactive = PETSC_FALSE;

1073:   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
1074:   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));

1076:   if (oldmat->colmap) {
1077: #if defined(PETSC_USE_CTABLE)
1078:     PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
1079: #else
1080:     PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap));
1081:     PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N));
1082: #endif
1083:   } else a->colmap = NULL;
1084:   if (oldmat->garray) {
1085:     PetscInt len;
1086:     len = oldmat->B->cmap->n;
1087:     PetscCall(PetscMalloc1(len + 1, &a->garray));
1088:     if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
1089:   } else a->garray = NULL;

1091:   PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
1092:   PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
1093:   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
1094:   PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
1095:   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
1096:   *newmat = mat;
1097:   PetscFunctionReturn(PETSC_SUCCESS);
1098: }

1100: static const struct _MatOps MatOps_Values = {MatSetValues_MPISELL,
1101:                                              NULL,
1102:                                              NULL,
1103:                                              MatMult_MPISELL,
1104:                                              /* 4*/ MatMultAdd_MPISELL,
1105:                                              MatMultTranspose_MPISELL,
1106:                                              MatMultTransposeAdd_MPISELL,
1107:                                              NULL,
1108:                                              NULL,
1109:                                              NULL,
1110:                                              /*10*/ NULL,
1111:                                              NULL,
1112:                                              NULL,
1113:                                              MatSOR_MPISELL,
1114:                                              NULL,
1115:                                              /*15*/ MatGetInfo_MPISELL,
1116:                                              MatEqual_MPISELL,
1117:                                              MatGetDiagonal_MPISELL,
1118:                                              MatDiagonalScale_MPISELL,
1119:                                              NULL,
1120:                                              /*20*/ MatAssemblyBegin_MPISELL,
1121:                                              MatAssemblyEnd_MPISELL,
1122:                                              MatSetOption_MPISELL,
1123:                                              MatZeroEntries_MPISELL,
1124:                                              /*24*/ NULL,
1125:                                              NULL,
1126:                                              NULL,
1127:                                              NULL,
1128:                                              NULL,
1129:                                              /*29*/ MatSetUp_MPISELL,
1130:                                              NULL,
1131:                                              NULL,
1132:                                              MatGetDiagonalBlock_MPISELL,
1133:                                              NULL,
1134:                                              /*34*/ MatDuplicate_MPISELL,
1135:                                              NULL,
1136:                                              NULL,
1137:                                              NULL,
1138:                                              NULL,
1139:                                              /*39*/ NULL,
1140:                                              NULL,
1141:                                              NULL,
1142:                                              MatGetValues_MPISELL,
1143:                                              MatCopy_MPISELL,
1144:                                              /*44*/ NULL,
1145:                                              MatScale_MPISELL,
1146:                                              MatShift_MPISELL,
1147:                                              MatDiagonalSet_MPISELL,
1148:                                              NULL,
1149:                                              /*49*/ MatSetRandom_MPISELL,
1150:                                              NULL,
1151:                                              NULL,
1152:                                              NULL,
1153:                                              NULL,
1154:                                              /*54*/ MatFDColoringCreate_MPIXAIJ,
1155:                                              NULL,
1156:                                              MatSetUnfactored_MPISELL,
1157:                                              NULL,
1158:                                              NULL,
1159:                                              /*59*/ NULL,
1160:                                              MatDestroy_MPISELL,
1161:                                              MatView_MPISELL,
1162:                                              NULL,
1163:                                              NULL,
1164:                                              /*64*/ NULL,
1165:                                              NULL,
1166:                                              NULL,
1167:                                              NULL,
1168:                                              NULL,
1169:                                              /*69*/ NULL,
1170:                                              NULL,
1171:                                              NULL,
1172:                                              NULL,
1173:                                              NULL,
1174:                                              NULL,
1175:                                              /*75*/ MatFDColoringApply_AIJ, /* reuse AIJ function */
1176:                                              MatSetFromOptions_MPISELL,
1177:                                              NULL,
1178:                                              NULL,
1179:                                              NULL,
1180:                                              /*80*/ NULL,
1181:                                              NULL,
1182:                                              NULL,
1183:                                              /*83*/ NULL,
1184:                                              NULL,
1185:                                              NULL,
1186:                                              NULL,
1187:                                              NULL,
1188:                                              NULL,
1189:                                              /*89*/ NULL,
1190:                                              NULL,
1191:                                              NULL,
1192:                                              NULL,
1193:                                              NULL,
1194:                                              /*94*/ NULL,
1195:                                              NULL,
1196:                                              NULL,
1197:                                              NULL,
1198:                                              NULL,
1199:                                              /*99*/ NULL,
1200:                                              NULL,
1201:                                              NULL,
1202:                                              MatConjugate_MPISELL,
1203:                                              NULL,
1204:                                              /*104*/ NULL,
1205:                                              MatRealPart_MPISELL,
1206:                                              MatImaginaryPart_MPISELL,
1207:                                              NULL,
1208:                                              NULL,
1209:                                              /*109*/ NULL,
1210:                                              NULL,
1211:                                              NULL,
1212:                                              NULL,
1213:                                              MatMissingDiagonal_MPISELL,
1214:                                              /*114*/ NULL,
1215:                                              NULL,
1216:                                              MatGetGhosts_MPISELL,
1217:                                              NULL,
1218:                                              NULL,
1219:                                              /*119*/ MatMultDiagonalBlock_MPISELL,
1220:                                              NULL,
1221:                                              NULL,
1222:                                              NULL,
1223:                                              NULL,
1224:                                              /*124*/ NULL,
1225:                                              NULL,
1226:                                              MatInvertBlockDiagonal_MPISELL,
1227:                                              NULL,
1228:                                              NULL,
1229:                                              /*129*/ NULL,
1230:                                              NULL,
1231:                                              NULL,
1232:                                              NULL,
1233:                                              NULL,
1234:                                              /*134*/ NULL,
1235:                                              NULL,
1236:                                              NULL,
1237:                                              NULL,
1238:                                              NULL,
1239:                                              /*139*/ NULL,
1240:                                              NULL,
1241:                                              NULL,
1242:                                              MatFDColoringSetUp_MPIXAIJ,
1243:                                              NULL,
1244:                                              /*144*/ NULL,
1245:                                              NULL,
1246:                                              NULL,
1247:                                              NULL,
1248:                                              NULL,
1249:                                              NULL,
1250:                                              /*150*/ NULL,
1251:                                              NULL,
1252:                                              NULL};

1254: /*@C
1255:   MatMPISELLSetPreallocation - Preallocates memory for a `MATMPISELL` sparse parallel matrix in sell format.
1256:   For good matrix assembly performance the user should preallocate the matrix storage by
1257:   setting the parameters `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).

1259:   Collective

1261:   Input Parameters:
1262: + B     - the matrix
1263: . d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
1264:            (same value is used for all local rows)
1265: . d_nnz - array containing the number of nonzeros in the various rows of the
1266:            DIAGONAL portion of the local submatrix (possibly different for each row)
1267:            or NULL (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure.
1268:            The size of this array is equal to the number of local rows, i.e 'm'.
1269:            For matrices that will be factored, you must leave room for (and set)
1270:            the diagonal entry even if it is zero.
1271: . o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
1272:            submatrix (same value is used for all local rows).
1273: - o_nnz - array containing the number of nonzeros in the various rows of the
1274:            OFF-DIAGONAL portion of the local submatrix (possibly different for
1275:            each row) or NULL (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero
1276:            structure. The size of this array is equal to the number
1277:            of local rows, i.e 'm'.

1279:   Example usage:
1280:   Consider the following 8x8 matrix with 34 non-zero values, that is
1281:   assembled across 3 processors. Lets assume that proc0 owns 3 rows,
1282:   proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
1283:   as follows

1285: .vb
1286:             1  2  0  |  0  3  0  |  0  4
1287:     Proc0   0  5  6  |  7  0  0  |  8  0
1288:             9  0 10  | 11  0  0  | 12  0
1289:     -------------------------------------
1290:            13  0 14  | 15 16 17  |  0  0
1291:     Proc1   0 18  0  | 19 20 21  |  0  0
1292:             0  0  0  | 22 23  0  | 24  0
1293:     -------------------------------------
1294:     Proc2  25 26 27  |  0  0 28  | 29  0
1295:            30  0  0  | 31 32 33  |  0 34
1296: .ve

1298:   This can be represented as a collection of submatrices as

1300: .vb
1301:       A B C
1302:       D E F
1303:       G H I
1304: .ve

1306:   Where the submatrices A,B,C are owned by proc0, D,E,F are
1307:   owned by proc1, G,H,I are owned by proc2.

1309:   The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
1310:   The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
1311:   The 'M','N' parameters are 8,8, and have the same values on all procs.

1313:   The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
1314:   submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
1315:   corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
1316:   Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
1317:   part as `MATSEQSELL` matrices. For example, proc1 will store [E] as a `MATSEQSELL`
1318:   matrix, ans [DF] as another SeqSELL matrix.

1320:   When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
1321:   allocated for every row of the local diagonal submatrix, and o_nz
1322:   storage locations are allocated for every row of the OFF-DIAGONAL submat.
1323:   One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local
1324:   rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
1325:   In this case, the values of d_nz,o_nz are
1326: .vb
1327:      proc0  dnz = 2, o_nz = 2
1328:      proc1  dnz = 3, o_nz = 2
1329:      proc2  dnz = 1, o_nz = 4
1330: .ve
1331:   We are allocating m*(d_nz+o_nz) storage locations for every proc. This
1332:   translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
1333:   for proc3. i.e we are using 12+15+10=37 storage locations to store
1334:   34 values.

1336:   When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
1337:   for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
1338:   In the above case the values for d_nnz,o_nnz are
1339: .vb
1340:      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
1341:      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
1342:      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
1343: .ve
1344:   Here the space allocated is according to nz (or maximum values in the nnz
1345:   if nnz is provided) for DIAGONAL and OFF-DIAGONAL submatrices, i.e (2+2+3+2)*3+(1+4)*2=37

1347:   Level: intermediate

1349:   Notes:
1350:   If the *_nnz parameter is given then the *_nz parameter is ignored

1352:   The stored row and column indices begin with zero.

1354:   The parallel matrix is partitioned such that the first m0 rows belong to
1355:   process 0, the next m1 rows belong to process 1, the next m2 rows belong
1356:   to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.

1358:   The DIAGONAL portion of the local submatrix of a processor can be defined
1359:   as the submatrix which is obtained by extraction the part corresponding to
1360:   the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
1361:   first row that belongs to the processor, r2 is the last row belonging to
1362:   the this processor, and c1-c2 is range of indices of the local part of a
1363:   vector suitable for applying the matrix to.  This is an mxn matrix.  In the
1364:   common case of a square matrix, the row and column ranges are the same and
1365:   the DIAGONAL part is also square. The remaining portion of the local
1366:   submatrix (mxN) constitute the OFF-DIAGONAL portion.

1368:   If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored.

1370:   You can call `MatGetInfo()` to get information on how effective the preallocation was;
1371:   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
1372:   You can also run with the option -info and look for messages with the string
1373:   malloc in them to see if additional memory allocation was needed.

1375: .seealso: `Mat`, `MatCreate()`, `MatCreateSeqSELL()`, `MatSetValues()`, `MatCreatesell()`,
1376:           `MATMPISELL`, `MatGetInfo()`, `PetscSplitOwnership()`, `MATSELL`
1377: @*/
1378: PetscErrorCode MatMPISELLSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
1379: {
1380:   PetscFunctionBegin;
1383:   PetscTryMethod(B, "MatMPISELLSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz));
1384:   PetscFunctionReturn(PETSC_SUCCESS);
1385: }

1387: /*MC
1388:    MATMPISELL - MATMPISELL = "mpisell" - A matrix type to be used for MPI sparse matrices,
1389:    based on the sliced Ellpack format

1391:    Options Database Key:
1392: . -mat_type sell - sets the matrix type to `MATSELL` during a call to `MatSetFromOptions()`

1394:    Level: beginner

1396: .seealso: `Mat`, `MatCreateSell()`, `MATSEQSELL`, `MATSELL`, `MATSEQAIJ`, `MATAIJ`, `MATMPIAIJ`
1397: M*/

1399: /*@C
1400:   MatCreateSELL - Creates a sparse parallel matrix in `MATSELL` format.

1402:   Collective

1404:   Input Parameters:
1405: + comm      - MPI communicator
1406: . m         - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
1407:            This value should be the same as the local size used in creating the
1408:            y vector for the matrix-vector product y = Ax.
1409: . n         - This value should be the same as the local size used in creating the
1410:        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
1411:        calculated if `N` is given) For square matrices n is almost always `m`.
1412: . M         - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
1413: . N         - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
1414: . d_rlenmax - max number of nonzeros per row in DIAGONAL portion of local submatrix
1415:                (same value is used for all local rows)
1416: . d_rlen    - array containing the number of nonzeros in the various rows of the
1417:             DIAGONAL portion of the local submatrix (possibly different for each row)
1418:             or `NULL`, if d_rlenmax is used to specify the nonzero structure.
1419:             The size of this array is equal to the number of local rows, i.e `m`.
1420: . o_rlenmax - max number of nonzeros per row in the OFF-DIAGONAL portion of local
1421:                submatrix (same value is used for all local rows).
1422: - o_rlen    - array containing the number of nonzeros in the various rows of the
1423:             OFF-DIAGONAL portion of the local submatrix (possibly different for
1424:             each row) or `NULL`, if `o_rlenmax` is used to specify the nonzero
1425:             structure. The size of this array is equal to the number
1426:             of local rows, i.e `m`.

1428:   Output Parameter:
1429: . A - the matrix

1431:   Options Database Key:
1432: . -mat_sell_oneindex - Internally use indexing starting at 1
1433:         rather than 0.  When calling `MatSetValues()`,
1434:         the user still MUST index entries starting at 0!

1436:   Example:
1437:   Consider the following 8x8 matrix with 34 non-zero values, that is
1438:   assembled across 3 processors. Lets assume that proc0 owns 3 rows,
1439:   proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
1440:   as follows

1442: .vb
1443:             1  2  0  |  0  3  0  |  0  4
1444:     Proc0   0  5  6  |  7  0  0  |  8  0
1445:             9  0 10  | 11  0  0  | 12  0
1446:     -------------------------------------
1447:            13  0 14  | 15 16 17  |  0  0
1448:     Proc1   0 18  0  | 19 20 21  |  0  0
1449:             0  0  0  | 22 23  0  | 24  0
1450:     -------------------------------------
1451:     Proc2  25 26 27  |  0  0 28  | 29  0
1452:            30  0  0  | 31 32 33  |  0 34
1453: .ve

1455:   This can be represented as a collection of submatrices as
1456: .vb
1457:       A B C
1458:       D E F
1459:       G H I
1460: .ve

1462:   Where the submatrices A,B,C are owned by proc0, D,E,F are
1463:   owned by proc1, G,H,I are owned by proc2.

1465:   The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
1466:   The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
1467:   The 'M','N' parameters are 8,8, and have the same values on all procs.

1469:   The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
1470:   submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
1471:   corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
1472:   Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
1473:   part as `MATSEQSELL` matrices. For example, proc1 will store [E] as a `MATSEQSELL`
1474:   matrix, ans [DF] as another `MATSEQSELL` matrix.

1476:   When d_rlenmax, o_rlenmax parameters are specified, d_rlenmax storage elements are
1477:   allocated for every row of the local diagonal submatrix, and o_rlenmax
1478:   storage locations are allocated for every row of the OFF-DIAGONAL submat.
1479:   One way to choose d_rlenmax and o_rlenmax is to use the max nonzerors per local
1480:   rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
1481:   In this case, the values of d_rlenmax,o_rlenmax are
1482: .vb
1483:      proc0 - d_rlenmax = 2, o_rlenmax = 2
1484:      proc1 - d_rlenmax = 3, o_rlenmax = 2
1485:      proc2 - d_rlenmax = 1, o_rlenmax = 4
1486: .ve
1487:   We are allocating m*(d_rlenmax+o_rlenmax) storage locations for every proc. This
1488:   translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
1489:   for proc3. i.e we are using 12+15+10=37 storage locations to store
1490:   34 values.

1492:   When `d_rlen`, `o_rlen` parameters are specified, the storage is specified
1493:   for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
1494:   In the above case the values for `d_nnz`, `o_nnz` are
1495: .vb
1496:      proc0 - d_nnz = [2,2,2] and o_nnz = [2,2,2]
1497:      proc1 - d_nnz = [3,3,2] and o_nnz = [2,1,1]
1498:      proc2 - d_nnz = [1,1]   and o_nnz = [4,4]
1499: .ve
1500:   Here the space allocated is still 37 though there are 34 nonzeros because
1501:   the allocation is always done according to rlenmax.

1503:   Level: intermediate

1505:   Notes:
1506:   It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
1507:   MatXXXXSetPreallocation() paradigm instead of this routine directly.
1508:   [MatXXXXSetPreallocation() is, for example, `MatSeqSELLSetPreallocation()`]

1510:   If the *_rlen parameter is given then the *_rlenmax parameter is ignored

1512:   `m`, `n`, `M`, `N` parameters specify the size of the matrix, and its partitioning across
1513:   processors, while `d_rlenmax`, `d_rlen`, `o_rlenmax` , `o_rlen` parameters specify the approximate
1514:   storage requirements for this matrix.

1516:   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one
1517:   processor than it must be used on all processors that share the object for
1518:   that argument.

1520:   The user MUST specify either the local or global matrix dimensions
1521:   (possibly both).

1523:   The parallel matrix is partitioned across processors such that the
1524:   first m0 rows belong to process 0, the next m1 rows belong to
1525:   process 1, the next m2 rows belong to process 2 etc.. where
1526:   m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
1527:   values corresponding to [`m` x `N`] submatrix.

1529:   The columns are logically partitioned with the n0 columns belonging
1530:   to 0th partition, the next n1 columns belonging to the next
1531:   partition etc.. where n0,n1,n2... are the input parameter `n`.

1533:   The DIAGONAL portion of the local submatrix on any given processor
1534:   is the submatrix corresponding to the rows and columns `m`, `n`
1535:   corresponding to the given processor. i.e diagonal matrix on
1536:   process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
1537:   etc. The remaining portion of the local submatrix [m x (N-n)]
1538:   constitute the OFF-DIAGONAL portion. The example below better
1539:   illustrates this concept.

1541:   For a square global matrix we define each processor's diagonal portion
1542:   to be its local rows and the corresponding columns (a square submatrix);
1543:   each processor's off-diagonal portion encompasses the remainder of the
1544:   local matrix (a rectangular submatrix).

1546:   If `o_rlen`, `d_rlen` are specified, then `o_rlenmax`, and `d_rlenmax` are ignored.

1548:   When calling this routine with a single process communicator, a matrix of
1549:   type `MATSEQSELL` is returned.  If a matrix of type `MATMPISELL` is desired for this
1550:   type of communicator, use the construction mechanism
1551: .vb
1552:    MatCreate(...,&A);
1553:    MatSetType(A,MATMPISELL);
1554:    MatSetSizes(A, m,n,M,N);
1555:    MatMPISELLSetPreallocation(A,...);
1556: .ve

1558: .seealso: `Mat`, `MATSELL`, `MatCreate()`, `MatCreateSeqSELL()`, `MatSetValues()`, `MatMPISELLSetPreallocation()`, `MatMPISELLSetPreallocationSELL()`,
1559:           `MATMPISELL`, `MatCreateMPISELLWithArrays()`
1560: @*/
1561: PetscErrorCode MatCreateSELL(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_rlenmax, const PetscInt d_rlen[], PetscInt o_rlenmax, const PetscInt o_rlen[], Mat *A)
1562: {
1563:   PetscMPIInt size;

1565:   PetscFunctionBegin;
1566:   PetscCall(MatCreate(comm, A));
1567:   PetscCall(MatSetSizes(*A, m, n, M, N));
1568:   PetscCallMPI(MPI_Comm_size(comm, &size));
1569:   if (size > 1) {
1570:     PetscCall(MatSetType(*A, MATMPISELL));
1571:     PetscCall(MatMPISELLSetPreallocation(*A, d_rlenmax, d_rlen, o_rlenmax, o_rlen));
1572:   } else {
1573:     PetscCall(MatSetType(*A, MATSEQSELL));
1574:     PetscCall(MatSeqSELLSetPreallocation(*A, d_rlenmax, d_rlen));
1575:   }
1576:   PetscFunctionReturn(PETSC_SUCCESS);
1577: }

1579: /*@C
1580:   MatMPISELLGetSeqSELL - Returns the local pieces of this distributed matrix

1582:   Not Collective

1584:   Input Parameter:
1585: . A - the `MATMPISELL` matrix

1587:   Output Parameters:
1588: + Ad     - The diagonal portion of `A`
1589: . Ao     - The off-diagonal portion of `A`
1590: - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix

1592:   Level: advanced

1594: .seealso: `Mat`, `MATSEQSELL`, `MATMPISELL`
1595: @*/
1596: PetscErrorCode MatMPISELLGetSeqSELL(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
1597: {
1598:   Mat_MPISELL *a = (Mat_MPISELL *)A->data;
1599:   PetscBool    flg;

1601:   PetscFunctionBegin;
1602:   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPISELL, &flg));
1603:   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPISELL matrix as input");
1604:   if (Ad) *Ad = a->A;
1605:   if (Ao) *Ao = a->B;
1606:   if (colmap) *colmap = a->garray;
1607:   PetscFunctionReturn(PETSC_SUCCESS);
1608: }

1610: /*@C
1611:   MatMPISELLGetLocalMatCondensed - Creates a `MATSEQSELL` matrix from an `MATMPISELL` matrix by
1612:   taking all its local rows and NON-ZERO columns

1614:   Not Collective

1616:   Input Parameters:
1617: + A     - the matrix
1618: . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
1619: . row   - index sets of rows to extract (or `NULL`)
1620: - col   - index sets of columns to extract (or `NULL`)

1622:   Output Parameter:
1623: . A_loc - the local sequential matrix generated

1625:   Level: advanced

1627: .seealso: `Mat`, `MATSEQSELL`, `MATMPISELL`, `MatGetOwnershipRange()`, `MatMPISELLGetLocalMat()`
1628: @*/
1629: PetscErrorCode MatMPISELLGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc)
1630: {
1631:   Mat_MPISELL *a = (Mat_MPISELL *)A->data;
1632:   PetscInt     i, start, end, ncols, nzA, nzB, *cmap, imark, *idx;
1633:   IS           isrowa, iscola;
1634:   Mat         *aloc;
1635:   PetscBool    match;

1637:   PetscFunctionBegin;
1638:   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPISELL, &match));
1639:   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPISELL matrix as input");
1640:   PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0));
1641:   if (!row) {
1642:     start = A->rmap->rstart;
1643:     end   = A->rmap->rend;
1644:     PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa));
1645:   } else {
1646:     isrowa = *row;
1647:   }
1648:   if (!col) {
1649:     start = A->cmap->rstart;
1650:     cmap  = a->garray;
1651:     nzA   = a->A->cmap->n;
1652:     nzB   = a->B->cmap->n;
1653:     PetscCall(PetscMalloc1(nzA + nzB, &idx));
1654:     ncols = 0;
1655:     for (i = 0; i < nzB; i++) {
1656:       if (cmap[i] < start) idx[ncols++] = cmap[i];
1657:       else break;
1658:     }
1659:     imark = i;
1660:     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;
1661:     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i];
1662:     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola));
1663:   } else {
1664:     iscola = *col;
1665:   }
1666:   if (scall != MAT_INITIAL_MATRIX) {
1667:     PetscCall(PetscMalloc1(1, &aloc));
1668:     aloc[0] = *A_loc;
1669:   }
1670:   PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc));
1671:   *A_loc = aloc[0];
1672:   PetscCall(PetscFree(aloc));
1673:   if (!row) PetscCall(ISDestroy(&isrowa));
1674:   if (!col) PetscCall(ISDestroy(&iscola));
1675:   PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0));
1676:   PetscFunctionReturn(PETSC_SUCCESS);
1677: }

1679: #include <../src/mat/impls/aij/mpi/mpiaij.h>

1681: PetscErrorCode MatConvert_MPISELL_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1682: {
1683:   Mat_MPISELL *a = (Mat_MPISELL *)A->data;
1684:   Mat          B;
1685:   Mat_MPIAIJ  *b;

1687:   PetscFunctionBegin;
1688:   PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Matrix must be assembled");

1690:   if (reuse == MAT_REUSE_MATRIX) {
1691:     B = *newmat;
1692:   } else {
1693:     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1694:     PetscCall(MatSetType(B, MATMPIAIJ));
1695:     PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
1696:     PetscCall(MatSetBlockSizes(B, A->rmap->bs, A->cmap->bs));
1697:     PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL));
1698:     PetscCall(MatMPIAIJSetPreallocation(B, 0, NULL, 0, NULL));
1699:   }
1700:   b = (Mat_MPIAIJ *)B->data;

1702:   if (reuse == MAT_REUSE_MATRIX) {
1703:     PetscCall(MatConvert_SeqSELL_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A));
1704:     PetscCall(MatConvert_SeqSELL_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B));
1705:   } else {
1706:     PetscCall(MatDestroy(&b->A));
1707:     PetscCall(MatDestroy(&b->B));
1708:     PetscCall(MatDisAssemble_MPISELL(A));
1709:     PetscCall(MatConvert_SeqSELL_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A));
1710:     PetscCall(MatConvert_SeqSELL_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B));
1711:     PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1712:     PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1713:     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1714:     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1715:   }

1717:   if (reuse == MAT_INPLACE_MATRIX) {
1718:     PetscCall(MatHeaderReplace(A, &B));
1719:   } else {
1720:     *newmat = B;
1721:   }
1722:   PetscFunctionReturn(PETSC_SUCCESS);
1723: }

1725: PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1726: {
1727:   Mat_MPIAIJ  *a = (Mat_MPIAIJ *)A->data;
1728:   Mat          B;
1729:   Mat_MPISELL *b;

1731:   PetscFunctionBegin;
1732:   PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Matrix must be assembled");

1734:   if (reuse == MAT_REUSE_MATRIX) {
1735:     B = *newmat;
1736:   } else {
1737:     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)a->A->data, *Ba = (Mat_SeqAIJ *)a->B->data;
1738:     PetscInt    i, d_nz = 0, o_nz = 0, m = A->rmap->N, n = A->cmap->N, lm = A->rmap->n, ln = A->cmap->n;
1739:     PetscInt   *d_nnz, *o_nnz;
1740:     PetscCall(PetscMalloc2(lm, &d_nnz, lm, &o_nnz));
1741:     for (i = 0; i < lm; i++) {
1742:       d_nnz[i] = Aa->i[i + 1] - Aa->i[i];
1743:       o_nnz[i] = Ba->i[i + 1] - Ba->i[i];
1744:       if (d_nnz[i] > d_nz) d_nz = d_nnz[i];
1745:       if (o_nnz[i] > o_nz) o_nz = o_nnz[i];
1746:     }
1747:     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1748:     PetscCall(MatSetType(B, MATMPISELL));
1749:     PetscCall(MatSetSizes(B, lm, ln, m, n));
1750:     PetscCall(MatSetBlockSizes(B, A->rmap->bs, A->cmap->bs));
1751:     PetscCall(MatSeqSELLSetPreallocation(B, d_nz, d_nnz));
1752:     PetscCall(MatMPISELLSetPreallocation(B, d_nz, d_nnz, o_nz, o_nnz));
1753:     PetscCall(PetscFree2(d_nnz, o_nnz));
1754:   }
1755:   b = (Mat_MPISELL *)B->data;

1757:   if (reuse == MAT_REUSE_MATRIX) {
1758:     PetscCall(MatConvert_SeqAIJ_SeqSELL(a->A, MATSEQSELL, MAT_REUSE_MATRIX, &b->A));
1759:     PetscCall(MatConvert_SeqAIJ_SeqSELL(a->B, MATSEQSELL, MAT_REUSE_MATRIX, &b->B));
1760:   } else {
1761:     PetscCall(MatDestroy(&b->A));
1762:     PetscCall(MatDestroy(&b->B));
1763:     PetscCall(MatConvert_SeqAIJ_SeqSELL(a->A, MATSEQSELL, MAT_INITIAL_MATRIX, &b->A));
1764:     PetscCall(MatConvert_SeqAIJ_SeqSELL(a->B, MATSEQSELL, MAT_INITIAL_MATRIX, &b->B));
1765:     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1766:     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1767:     PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1768:     PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1769:   }

1771:   if (reuse == MAT_INPLACE_MATRIX) {
1772:     PetscCall(MatHeaderReplace(A, &B));
1773:   } else {
1774:     *newmat = B;
1775:   }
1776:   PetscFunctionReturn(PETSC_SUCCESS);
1777: }

1779: PetscErrorCode MatSOR_MPISELL(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
1780: {
1781:   Mat_MPISELL *mat = (Mat_MPISELL *)matin->data;
1782:   Vec          bb1 = NULL;

1784:   PetscFunctionBegin;
1785:   if (flag == SOR_APPLY_UPPER) {
1786:     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1787:     PetscFunctionReturn(PETSC_SUCCESS);
1788:   }

1790:   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1));

1792:   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1793:     if (flag & SOR_ZERO_INITIAL_GUESS) {
1794:       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1795:       its--;
1796:     }

1798:     while (its--) {
1799:       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1800:       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));

1802:       /* update rhs: bb1 = bb - B*x */
1803:       PetscCall(VecScale(mat->lvec, -1.0));
1804:       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));

1806:       /* local sweep */
1807:       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
1808:     }
1809:   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1810:     if (flag & SOR_ZERO_INITIAL_GUESS) {
1811:       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1812:       its--;
1813:     }
1814:     while (its--) {
1815:       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1816:       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));

1818:       /* update rhs: bb1 = bb - B*x */
1819:       PetscCall(VecScale(mat->lvec, -1.0));
1820:       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));

1822:       /* local sweep */
1823:       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
1824:     }
1825:   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1826:     if (flag & SOR_ZERO_INITIAL_GUESS) {
1827:       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1828:       its--;
1829:     }
1830:     while (its--) {
1831:       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1832:       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));

1834:       /* update rhs: bb1 = bb - B*x */
1835:       PetscCall(VecScale(mat->lvec, -1.0));
1836:       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));

1838:       /* local sweep */
1839:       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
1840:     }
1841:   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported");

1843:   PetscCall(VecDestroy(&bb1));

1845:   matin->factorerrortype = mat->A->factorerrortype;
1846:   PetscFunctionReturn(PETSC_SUCCESS);
1847: }

1849: #if defined(PETSC_HAVE_CUDA)
1850: PETSC_INTERN PetscErrorCode MatConvert_MPISELL_MPISELLCUDA(Mat, MatType, MatReuse, Mat *);
1851: #endif

1853: /*MC
1854:    MATMPISELL - MATMPISELL = "MPISELL" - A matrix type to be used for parallel sparse matrices.

1856:    Options Database Keys:
1857: . -mat_type mpisell - sets the matrix type to `MATMPISELL` during a call to `MatSetFromOptions()`

1859:   Level: beginner

1861: .seealso: `Mat`, `MATSELL`, `MATSEQSELL` `MatCreateSELL()`
1862: M*/
1863: PETSC_EXTERN PetscErrorCode MatCreate_MPISELL(Mat B)
1864: {
1865:   Mat_MPISELL *b;
1866:   PetscMPIInt  size;

1868:   PetscFunctionBegin;
1869:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
1870:   PetscCall(PetscNew(&b));
1871:   B->data       = (void *)b;
1872:   B->ops[0]     = MatOps_Values;
1873:   B->assembled  = PETSC_FALSE;
1874:   B->insertmode = NOT_SET_VALUES;
1875:   b->size       = size;
1876:   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
1877:   /* build cache for off array entries formed */
1878:   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));

1880:   b->donotstash  = PETSC_FALSE;
1881:   b->colmap      = NULL;
1882:   b->garray      = NULL;
1883:   b->roworiented = PETSC_TRUE;

1885:   /* stuff used for matrix vector multiply */
1886:   b->lvec  = NULL;
1887:   b->Mvctx = NULL;

1889:   /* stuff for MatGetRow() */
1890:   b->rowindices   = NULL;
1891:   b->rowvalues    = NULL;
1892:   b->getrowactive = PETSC_FALSE;

1894:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPISELL));
1895:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPISELL));
1896:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPISELL));
1897:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISELLSetPreallocation_C", MatMPISELLSetPreallocation_MPISELL));
1898:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisell_mpiaij_C", MatConvert_MPISELL_MPIAIJ));
1899: #if defined(PETSC_HAVE_CUDA)
1900:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisell_mpisellcuda_C", MatConvert_MPISELL_MPISELLCUDA));
1901: #endif
1902:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPISELL));
1903:   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPISELL));
1904:   PetscFunctionReturn(PETSC_SUCCESS);
1905: }