Actual source code: mpiaij.c

  1: #include <../src/mat/impls/aij/mpi/mpiaij.h>
  2: #include <petsc/private/vecimpl.h>
  3: #include <petsc/private/sfimpl.h>
  4: #include <petsc/private/isimpl.h>
  5: #include <petscblaslapack.h>
  6: #include <petscsf.h>
  7: #include <petsc/private/hashmapi.h>

  9: PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
 10: {
 11:   Mat B;

 13:   MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B);
 14:   PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B);
 15:   MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done);
 16:   MatDestroy(&B);
 17:   return 0;
 18: }

 20: PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
 21: {
 22:   Mat B;

 24:   PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B);
 25:   MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done);
 26:   PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL);
 27:   return 0;
 28: }

 30: /*MC
 31:    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.

 33:    This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator,
 34:    and `MATMPIAIJ` otherwise.  As a result, for single process communicators,
 35:   `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
 36:   for communicators controlling multiple processes.  It is recommended that you call both of
 37:   the above preallocation routines for simplicity.

 39:    Options Database Keys:
 40: . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()`

 42:   Developer Note:
 43:     Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when
 44:    enough exist.

 46:   Level: beginner

 48: .seealso: `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ`
 49: M*/

 51: /*MC
 52:    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.

 54:    This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator,
 55:    and `MATMPIAIJCRL` otherwise.  As a result, for single process communicators,
 56:    `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
 57:   for communicators controlling multiple processes.  It is recommended that you call both of
 58:   the above preallocation routines for simplicity.

 60:    Options Database Keys:
 61: . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()`

 63:   Level: beginner

 65: .seealso: `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`
 66: M*/

 68: static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg)
 69: {
 70:   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;

 72: #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL)
 73:   A->boundtocpu = flg;
 74: #endif
 75:   if (a->A) MatBindToCPU(a->A, flg);
 76:   if (a->B) MatBindToCPU(a->B, flg);

 78:   /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products.
 79:    * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors
 80:    * to differ from the parent matrix. */
 81:   if (a->lvec) VecBindToCPU(a->lvec, flg);
 82:   if (a->diag) VecBindToCPU(a->diag, flg);

 84:   return 0;
 85: }

 87: PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
 88: {
 89:   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data;

 91:   if (mat->A) {
 92:     MatSetBlockSizes(mat->A, rbs, cbs);
 93:     MatSetBlockSizes(mat->B, rbs, 1);
 94:   }
 95:   return 0;
 96: }

 98: PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows)
 99: {
100:   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *)M->data;
101:   Mat_SeqAIJ      *a   = (Mat_SeqAIJ *)mat->A->data;
102:   Mat_SeqAIJ      *b   = (Mat_SeqAIJ *)mat->B->data;
103:   const PetscInt  *ia, *ib;
104:   const MatScalar *aa, *bb, *aav, *bav;
105:   PetscInt         na, nb, i, j, *rows, cnt = 0, n0rows;
106:   PetscInt         m = M->rmap->n, rstart = M->rmap->rstart;

108:   *keptrows = NULL;

110:   ia = a->i;
111:   ib = b->i;
112:   MatSeqAIJGetArrayRead(mat->A, &aav);
113:   MatSeqAIJGetArrayRead(mat->B, &bav);
114:   for (i = 0; i < m; i++) {
115:     na = ia[i + 1] - ia[i];
116:     nb = ib[i + 1] - ib[i];
117:     if (!na && !nb) {
118:       cnt++;
119:       goto ok1;
120:     }
121:     aa = aav + ia[i];
122:     for (j = 0; j < na; j++) {
123:       if (aa[j] != 0.0) goto ok1;
124:     }
125:     bb = bav + ib[i];
126:     for (j = 0; j < nb; j++) {
127:       if (bb[j] != 0.0) goto ok1;
128:     }
129:     cnt++;
130:   ok1:;
131:   }
132:   MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M));
133:   if (!n0rows) {
134:     MatSeqAIJRestoreArrayRead(mat->A, &aav);
135:     MatSeqAIJRestoreArrayRead(mat->B, &bav);
136:     return 0;
137:   }
138:   PetscMalloc1(M->rmap->n - cnt, &rows);
139:   cnt = 0;
140:   for (i = 0; i < m; i++) {
141:     na = ia[i + 1] - ia[i];
142:     nb = ib[i + 1] - ib[i];
143:     if (!na && !nb) continue;
144:     aa = aav + ia[i];
145:     for (j = 0; j < na; j++) {
146:       if (aa[j] != 0.0) {
147:         rows[cnt++] = rstart + i;
148:         goto ok2;
149:       }
150:     }
151:     bb = bav + ib[i];
152:     for (j = 0; j < nb; j++) {
153:       if (bb[j] != 0.0) {
154:         rows[cnt++] = rstart + i;
155:         goto ok2;
156:       }
157:     }
158:   ok2:;
159:   }
160:   ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows);
161:   MatSeqAIJRestoreArrayRead(mat->A, &aav);
162:   MatSeqAIJRestoreArrayRead(mat->B, &bav);
163:   return 0;
164: }

166: PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is)
167: {
168:   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data;
169:   PetscBool   cong;

171:   MatHasCongruentLayouts(Y, &cong);
172:   if (Y->assembled && cong) {
173:     MatDiagonalSet(aij->A, D, is);
174:   } else {
175:     MatDiagonalSet_Default(Y, D, is);
176:   }
177:   return 0;
178: }

180: PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows)
181: {
182:   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data;
183:   PetscInt    i, rstart, nrows, *rows;

185:   *zrows = NULL;
186:   MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows);
187:   MatGetOwnershipRange(M, &rstart, NULL);
188:   for (i = 0; i < nrows; i++) rows[i] += rstart;
189:   ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows);
190:   return 0;
191: }

193: PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions)
194: {
195:   Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)A->data;
196:   PetscInt           i, m, n, *garray = aij->garray;
197:   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ *)aij->A->data;
198:   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ *)aij->B->data;
199:   PetscReal         *work;
200:   const PetscScalar *dummy;

202:   MatGetSize(A, &m, &n);
203:   PetscCalloc1(n, &work);
204:   MatSeqAIJGetArrayRead(aij->A, &dummy);
205:   MatSeqAIJRestoreArrayRead(aij->A, &dummy);
206:   MatSeqAIJGetArrayRead(aij->B, &dummy);
207:   MatSeqAIJRestoreArrayRead(aij->B, &dummy);
208:   if (type == NORM_2) {
209:     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]);
210:     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]);
211:   } else if (type == NORM_1) {
212:     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
213:     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
214:   } else if (type == NORM_INFINITY) {
215:     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
216:     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]);
217:   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
218:     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]);
219:     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]);
220:   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
221:     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]);
222:     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]);
223:   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
224:   if (type == NORM_INFINITY) {
225:     MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A));
226:   } else {
227:     MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A));
228:   }
229:   PetscFree(work);
230:   if (type == NORM_2) {
231:     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
232:   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
233:     for (i = 0; i < n; i++) reductions[i] /= m;
234:   }
235:   return 0;
236: }

238: PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is)
239: {
240:   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
241:   IS              sis, gis;
242:   const PetscInt *isis, *igis;
243:   PetscInt        n, *iis, nsis, ngis, rstart, i;

245:   MatFindOffBlockDiagonalEntries(a->A, &sis);
246:   MatFindNonzeroRows(a->B, &gis);
247:   ISGetSize(gis, &ngis);
248:   ISGetSize(sis, &nsis);
249:   ISGetIndices(sis, &isis);
250:   ISGetIndices(gis, &igis);

252:   PetscMalloc1(ngis + nsis, &iis);
253:   PetscArraycpy(iis, igis, ngis);
254:   PetscArraycpy(iis + ngis, isis, nsis);
255:   n = ngis + nsis;
256:   PetscSortRemoveDupsInt(&n, iis);
257:   MatGetOwnershipRange(A, &rstart, NULL);
258:   for (i = 0; i < n; i++) iis[i] += rstart;
259:   ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is);

261:   ISRestoreIndices(sis, &isis);
262:   ISRestoreIndices(gis, &igis);
263:   ISDestroy(&sis);
264:   ISDestroy(&gis);
265:   return 0;
266: }

268: /*
269:   Local utility routine that creates a mapping from the global column
270: number to the local number in the off-diagonal part of the local
271: storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
272: a slightly higher hash table cost; without it it is not scalable (each processor
273: has an order N integer array but is fast to access.
274: */
275: PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
276: {
277:   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
278:   PetscInt    n   = aij->B->cmap->n, i;

281: #if defined(PETSC_USE_CTABLE)
282:   PetscTableCreate(n, mat->cmap->N + 1, &aij->colmap);
283:   for (i = 0; i < n; i++) PetscTableAdd(aij->colmap, aij->garray[i] + 1, i + 1, INSERT_VALUES);
284: #else
285:   PetscCalloc1(mat->cmap->N + 1, &aij->colmap);
286:   for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1;
287: #endif
288:   return 0;
289: }

291: #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \
292:   { \
293:     if (col <= lastcol1) low1 = 0; \
294:     else high1 = nrow1; \
295:     lastcol1 = col; \
296:     while (high1 - low1 > 5) { \
297:       t = (low1 + high1) / 2; \
298:       if (rp1[t] > col) high1 = t; \
299:       else low1 = t; \
300:     } \
301:     for (_i = low1; _i < high1; _i++) { \
302:       if (rp1[_i] > col) break; \
303:       if (rp1[_i] == col) { \
304:         if (addv == ADD_VALUES) { \
305:           ap1[_i] += value; \
306:           /* Not sure LogFlops will slow dow the code or not */ \
307:           (void)PetscLogFlops(1.0); \
308:         } else ap1[_i] = value; \
309:         goto a_noinsert; \
310:       } \
311:     } \
312:     if (value == 0.0 && ignorezeroentries && row != col) { \
313:       low1  = 0; \
314:       high1 = nrow1; \
315:       goto a_noinsert; \
316:     } \
317:     if (nonew == 1) { \
318:       low1  = 0; \
319:       high1 = nrow1; \
320:       goto a_noinsert; \
321:     } \
323:     MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \
324:     N = nrow1++ - 1; \
325:     a->nz++; \
326:     high1++; \
327:     /* shift up all the later entries in this row */ \
328:     PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1); \
329:     PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1); \
330:     rp1[_i] = col; \
331:     ap1[_i] = value; \
332:     A->nonzerostate++; \
333:   a_noinsert:; \
334:     ailen[row] = nrow1; \
335:   }

337: #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \
338:   { \
339:     if (col <= lastcol2) low2 = 0; \
340:     else high2 = nrow2; \
341:     lastcol2 = col; \
342:     while (high2 - low2 > 5) { \
343:       t = (low2 + high2) / 2; \
344:       if (rp2[t] > col) high2 = t; \
345:       else low2 = t; \
346:     } \
347:     for (_i = low2; _i < high2; _i++) { \
348:       if (rp2[_i] > col) break; \
349:       if (rp2[_i] == col) { \
350:         if (addv == ADD_VALUES) { \
351:           ap2[_i] += value; \
352:           (void)PetscLogFlops(1.0); \
353:         } else ap2[_i] = value; \
354:         goto b_noinsert; \
355:       } \
356:     } \
357:     if (value == 0.0 && ignorezeroentries) { \
358:       low2  = 0; \
359:       high2 = nrow2; \
360:       goto b_noinsert; \
361:     } \
362:     if (nonew == 1) { \
363:       low2  = 0; \
364:       high2 = nrow2; \
365:       goto b_noinsert; \
366:     } \
368:     MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \
369:     N = nrow2++ - 1; \
370:     b->nz++; \
371:     high2++; \
372:     /* shift up all the later entries in this row */ \
373:     PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1); \
374:     PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1); \
375:     rp2[_i] = col; \
376:     ap2[_i] = value; \
377:     B->nonzerostate++; \
378:   b_noinsert:; \
379:     bilen[row] = nrow2; \
380:   }

382: PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[])
383: {
384:   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)A->data;
385:   Mat_SeqAIJ  *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data;
386:   PetscInt     l, *garray                         = mat->garray, diag;
387:   PetscScalar *aa, *ba;

389:   /* code only works for square matrices A */

391:   /* find size of row to the left of the diagonal part */
392:   MatGetOwnershipRange(A, &diag, NULL);
393:   row = row - diag;
394:   for (l = 0; l < b->i[row + 1] - b->i[row]; l++) {
395:     if (garray[b->j[b->i[row] + l]] > diag) break;
396:   }
397:   if (l) {
398:     MatSeqAIJGetArray(mat->B, &ba);
399:     PetscArraycpy(ba + b->i[row], v, l);
400:     MatSeqAIJRestoreArray(mat->B, &ba);
401:   }

403:   /* diagonal part */
404:   if (a->i[row + 1] - a->i[row]) {
405:     MatSeqAIJGetArray(mat->A, &aa);
406:     PetscArraycpy(aa + a->i[row], v + l, (a->i[row + 1] - a->i[row]));
407:     MatSeqAIJRestoreArray(mat->A, &aa);
408:   }

410:   /* right of diagonal part */
411:   if (b->i[row + 1] - b->i[row] - l) {
412:     MatSeqAIJGetArray(mat->B, &ba);
413:     PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l);
414:     MatSeqAIJRestoreArray(mat->B, &ba);
415:   }
416:   return 0;
417: }

419: PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
420: {
421:   Mat_MPIAIJ *aij   = (Mat_MPIAIJ *)mat->data;
422:   PetscScalar value = 0.0;
423:   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
424:   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
425:   PetscBool   roworiented = aij->roworiented;

427:   /* Some Variables required in the macro */
428:   Mat         A     = aij->A;
429:   Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
430:   PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
431:   PetscBool   ignorezeroentries = a->ignorezeroentries;
432:   Mat         B                 = aij->B;
433:   Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
434:   PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
435:   MatScalar  *aa, *ba;
436:   PetscInt   *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
437:   PetscInt    nonew;
438:   MatScalar  *ap1, *ap2;

440:   MatSeqAIJGetArray(A, &aa);
441:   MatSeqAIJGetArray(B, &ba);
442:   for (i = 0; i < m; i++) {
443:     if (im[i] < 0) continue;
445:     if (im[i] >= rstart && im[i] < rend) {
446:       row      = im[i] - rstart;
447:       lastcol1 = -1;
448:       rp1      = aj + ai[row];
449:       ap1      = aa + ai[row];
450:       rmax1    = aimax[row];
451:       nrow1    = ailen[row];
452:       low1     = 0;
453:       high1    = nrow1;
454:       lastcol2 = -1;
455:       rp2      = bj + bi[row];
456:       ap2      = ba + bi[row];
457:       rmax2    = bimax[row];
458:       nrow2    = bilen[row];
459:       low2     = 0;
460:       high2    = nrow2;

462:       for (j = 0; j < n; j++) {
463:         if (v) value = roworiented ? v[i * n + j] : v[i + j * m];
464:         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
465:         if (in[j] >= cstart && in[j] < cend) {
466:           col   = in[j] - cstart;
467:           nonew = a->nonew;
468:           MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
469:         } else if (in[j] < 0) {
470:           continue;
471:         } else {
473:           if (mat->was_assembled) {
474:             if (!aij->colmap) MatCreateColmap_MPIAIJ_Private(mat);
475: #if defined(PETSC_USE_CTABLE)
476:             PetscTableFind(aij->colmap, in[j] + 1, &col); /* map global col ids to local ones */
477:             col--;
478: #else
479:             col = aij->colmap[in[j]] - 1;
480: #endif
481:             if (col < 0 && !((Mat_SeqAIJ *)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */
482:               MatDisAssemble_MPIAIJ(mat);                 /* Change aij->B from reduced/local format to expanded/global format */
483:               col = in[j];
484:               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
485:               B     = aij->B;
486:               b     = (Mat_SeqAIJ *)B->data;
487:               bimax = b->imax;
488:               bi    = b->i;
489:               bilen = b->ilen;
490:               bj    = b->j;
491:               ba    = b->a;
492:               rp2   = bj + bi[row];
493:               ap2   = ba + bi[row];
494:               rmax2 = bimax[row];
495:               nrow2 = bilen[row];
496:               low2  = 0;
497:               high2 = nrow2;
498:               bm    = aij->B->rmap->n;
499:               ba    = b->a;
500:             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
501:               if (1 == ((Mat_SeqAIJ *)(aij->B->data))->nonew) {
502:                 PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j]);
503:               } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
504:             }
505:           } else col = in[j];
506:           nonew = b->nonew;
507:           MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
508:         }
509:       }
510:     } else {
512:       if (!aij->donotstash) {
513:         mat->assembled = PETSC_FALSE;
514:         if (roworiented) {
515:           MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));
516:         } else {
517:           MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));
518:         }
519:       }
520:     }
521:   }
522:   MatSeqAIJRestoreArray(A, &aa); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */
523:   MatSeqAIJRestoreArray(B, &ba);
524:   return 0;
525: }

527: /*
528:     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
529:     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
530:     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
531: */
532: PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[])
533: {
534:   Mat_MPIAIJ *aij    = (Mat_MPIAIJ *)mat->data;
535:   Mat         A      = aij->A; /* diagonal part of the matrix */
536:   Mat         B      = aij->B; /* offdiagonal part of the matrix */
537:   Mat_SeqAIJ *a      = (Mat_SeqAIJ *)A->data;
538:   Mat_SeqAIJ *b      = (Mat_SeqAIJ *)B->data;
539:   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, col;
540:   PetscInt   *ailen = a->ilen, *aj = a->j;
541:   PetscInt   *bilen = b->ilen, *bj = b->j;
542:   PetscInt    am          = aij->A->rmap->n, j;
543:   PetscInt    diag_so_far = 0, dnz;
544:   PetscInt    offd_so_far = 0, onz;

546:   /* Iterate over all rows of the matrix */
547:   for (j = 0; j < am; j++) {
548:     dnz = onz = 0;
549:     /*  Iterate over all non-zero columns of the current row */
550:     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
551:       /* If column is in the diagonal */
552:       if (mat_j[col] >= cstart && mat_j[col] < cend) {
553:         aj[diag_so_far++] = mat_j[col] - cstart;
554:         dnz++;
555:       } else { /* off-diagonal entries */
556:         bj[offd_so_far++] = mat_j[col];
557:         onz++;
558:       }
559:     }
560:     ailen[j] = dnz;
561:     bilen[j] = onz;
562:   }
563:   return 0;
564: }

566: /*
567:     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
568:     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
569:     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
570:     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
571:     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
572: */
573: PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[])
574: {
575:   Mat_MPIAIJ  *aij  = (Mat_MPIAIJ *)mat->data;
576:   Mat          A    = aij->A; /* diagonal part of the matrix */
577:   Mat          B    = aij->B; /* offdiagonal part of the matrix */
578:   Mat_SeqAIJ  *aijd = (Mat_SeqAIJ *)(aij->A)->data, *aijo = (Mat_SeqAIJ *)(aij->B)->data;
579:   Mat_SeqAIJ  *a      = (Mat_SeqAIJ *)A->data;
580:   Mat_SeqAIJ  *b      = (Mat_SeqAIJ *)B->data;
581:   PetscInt     cstart = mat->cmap->rstart, cend = mat->cmap->rend;
582:   PetscInt    *ailen = a->ilen, *aj = a->j;
583:   PetscInt    *bilen = b->ilen, *bj = b->j;
584:   PetscInt     am          = aij->A->rmap->n, j;
585:   PetscInt    *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
586:   PetscInt     col, dnz_row, onz_row, rowstart_diag, rowstart_offd;
587:   PetscScalar *aa = a->a, *ba = b->a;

589:   /* Iterate over all rows of the matrix */
590:   for (j = 0; j < am; j++) {
591:     dnz_row = onz_row = 0;
592:     rowstart_offd     = full_offd_i[j];
593:     rowstart_diag     = full_diag_i[j];
594:     /*  Iterate over all non-zero columns of the current row */
595:     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
596:       /* If column is in the diagonal */
597:       if (mat_j[col] >= cstart && mat_j[col] < cend) {
598:         aj[rowstart_diag + dnz_row] = mat_j[col] - cstart;
599:         aa[rowstart_diag + dnz_row] = mat_a[col];
600:         dnz_row++;
601:       } else { /* off-diagonal entries */
602:         bj[rowstart_offd + onz_row] = mat_j[col];
603:         ba[rowstart_offd + onz_row] = mat_a[col];
604:         onz_row++;
605:       }
606:     }
607:     ailen[j] = dnz_row;
608:     bilen[j] = onz_row;
609:   }
610:   return 0;
611: }

613: PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
614: {
615:   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
616:   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
617:   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;

619:   for (i = 0; i < m; i++) {
620:     if (idxm[i] < 0) continue; /* negative row */
622:     if (idxm[i] >= rstart && idxm[i] < rend) {
623:       row = idxm[i] - rstart;
624:       for (j = 0; j < n; j++) {
625:         if (idxn[j] < 0) continue; /* negative column */
627:         if (idxn[j] >= cstart && idxn[j] < cend) {
628:           col = idxn[j] - cstart;
629:           MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j);
630:         } else {
631:           if (!aij->colmap) MatCreateColmap_MPIAIJ_Private(mat);
632: #if defined(PETSC_USE_CTABLE)
633:           PetscTableFind(aij->colmap, idxn[j] + 1, &col);
634:           col--;
635: #else
636:           col = aij->colmap[idxn[j]] - 1;
637: #endif
638:           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0;
639:           else MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j);
640:         }
641:       }
642:     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
643:   }
644:   return 0;
645: }

647: PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode)
648: {
649:   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
650:   PetscInt    nstash, reallocs;

652:   if (aij->donotstash || mat->nooffprocentries) return 0;

654:   MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range);
655:   MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs);
656:   PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs);
657:   return 0;
658: }

660: PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode)
661: {
662:   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *)mat->data;
663:   PetscMPIInt  n;
664:   PetscInt     i, j, rstart, ncols, flg;
665:   PetscInt    *row, *col;
666:   PetscBool    other_disassembled;
667:   PetscScalar *val;

669:   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */

671:   if (!aij->donotstash && !mat->nooffprocentries) {
672:     while (1) {
673:       MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg);
674:       if (!flg) break;

676:       for (i = 0; i < n;) {
677:         /* Now identify the consecutive vals belonging to the same row */
678:         for (j = i, rstart = row[j]; j < n; j++) {
679:           if (row[j] != rstart) break;
680:         }
681:         if (j < n) ncols = j - i;
682:         else ncols = n - i;
683:         /* Now assemble all these values with a single function call */
684:         MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode);
685:         i = j;
686:       }
687:     }
688:     MatStashScatterEnd_Private(&mat->stash);
689:   }
690: #if defined(PETSC_HAVE_DEVICE)
691:   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
692:   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
693:   if (mat->boundtocpu) {
694:     MatBindToCPU(aij->A, PETSC_TRUE);
695:     MatBindToCPU(aij->B, PETSC_TRUE);
696:   }
697: #endif
698:   MatAssemblyBegin(aij->A, mode);
699:   MatAssemblyEnd(aij->A, mode);

701:   /* determine if any processor has disassembled, if so we must
702:      also disassemble ourself, in order that we may reassemble. */
703:   /*
704:      if nonzero structure of submatrix B cannot change then we know that
705:      no processor disassembled thus we can skip this stuff
706:   */
707:   if (!((Mat_SeqAIJ *)aij->B->data)->nonew) {
708:     MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat));
709:     if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */
710:       MatDisAssemble_MPIAIJ(mat);
711:     }
712:   }
713:   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) MatSetUpMultiply_MPIAIJ(mat);
714:   MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE);
715: #if defined(PETSC_HAVE_DEVICE)
716:   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
717: #endif
718:   MatAssemblyBegin(aij->B, mode);
719:   MatAssemblyEnd(aij->B, mode);

721:   PetscFree2(aij->rowvalues, aij->rowindices);

723:   aij->rowvalues = NULL;

725:   VecDestroy(&aij->diag);

727:   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
728:   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)(aij->A->data))->nonew) {
729:     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
730:     MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat));
731:   }
732: #if defined(PETSC_HAVE_DEVICE)
733:   mat->offloadmask = PETSC_OFFLOAD_BOTH;
734: #endif
735:   return 0;
736: }

738: PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
739: {
740:   Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data;

742:   MatZeroEntries(l->A);
743:   MatZeroEntries(l->B);
744:   return 0;
745: }

747: PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
748: {
749:   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *)A->data;
750:   PetscObjectState sA, sB;
751:   PetscInt        *lrows;
752:   PetscInt         r, len;
753:   PetscBool        cong, lch, gch;

755:   /* get locally owned rows */
756:   MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows);
757:   MatHasCongruentLayouts(A, &cong);
758:   /* fix right hand side if needed */
759:   if (x && b) {
760:     const PetscScalar *xx;
761:     PetscScalar       *bb;

764:     VecGetArrayRead(x, &xx);
765:     VecGetArray(b, &bb);
766:     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
767:     VecRestoreArrayRead(x, &xx);
768:     VecRestoreArray(b, &bb);
769:   }

771:   sA = mat->A->nonzerostate;
772:   sB = mat->B->nonzerostate;

774:   if (diag != 0.0 && cong) {
775:     MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);
776:     MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);
777:   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
778:     Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data;
779:     Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data;
780:     PetscInt    nnwA, nnwB;
781:     PetscBool   nnzA, nnzB;

783:     nnwA = aijA->nonew;
784:     nnwB = aijB->nonew;
785:     nnzA = aijA->keepnonzeropattern;
786:     nnzB = aijB->keepnonzeropattern;
787:     if (!nnzA) {
788:       PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");
789:       aijA->nonew = 0;
790:     }
791:     if (!nnzB) {
792:       PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");
793:       aijB->nonew = 0;
794:     }
795:     /* Must zero here before the next loop */
796:     MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);
797:     MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);
798:     for (r = 0; r < len; ++r) {
799:       const PetscInt row = lrows[r] + A->rmap->rstart;
800:       if (row >= A->cmap->N) continue;
801:       MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);
802:     }
803:     aijA->nonew = nnwA;
804:     aijB->nonew = nnwB;
805:   } else {
806:     MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);
807:     MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);
808:   }
809:   PetscFree(lrows);
810:   MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
811:   MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);

813:   /* reduce nonzerostate */
814:   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
815:   MPIU_Allreduce(&lch, &gch, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)A));
816:   if (gch) A->nonzerostate++;
817:   return 0;
818: }

820: PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
821: {
822:   Mat_MPIAIJ        *l = (Mat_MPIAIJ *)A->data;
823:   PetscMPIInt        n = A->rmap->n;
824:   PetscInt           i, j, r, m, len = 0;
825:   PetscInt          *lrows, *owners = A->rmap->range;
826:   PetscMPIInt        p = 0;
827:   PetscSFNode       *rrows;
828:   PetscSF            sf;
829:   const PetscScalar *xx;
830:   PetscScalar       *bb, *mask, *aij_a;
831:   Vec                xmask, lmask;
832:   Mat_SeqAIJ        *aij = (Mat_SeqAIJ *)l->B->data;
833:   const PetscInt    *aj, *ii, *ridx;
834:   PetscScalar       *aa;

836:   /* Create SF where leaves are input rows and roots are owned rows */
837:   PetscMalloc1(n, &lrows);
838:   for (r = 0; r < n; ++r) lrows[r] = -1;
839:   PetscMalloc1(N, &rrows);
840:   for (r = 0; r < N; ++r) {
841:     const PetscInt idx = rows[r];
843:     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
844:       PetscLayoutFindOwner(A->rmap, idx, &p);
845:     }
846:     rrows[r].rank  = p;
847:     rrows[r].index = rows[r] - owners[p];
848:   }
849:   PetscSFCreate(PetscObjectComm((PetscObject)A), &sf);
850:   PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);
851:   /* Collect flags for rows to be zeroed */
852:   PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR);
853:   PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR);
854:   PetscSFDestroy(&sf);
855:   /* Compress and put in row numbers */
856:   for (r = 0; r < n; ++r)
857:     if (lrows[r] >= 0) lrows[len++] = r;
858:   /* zero diagonal part of matrix */
859:   MatZeroRowsColumns(l->A, len, lrows, diag, x, b);
860:   /* handle off diagonal part of matrix */
861:   MatCreateVecs(A, &xmask, NULL);
862:   VecDuplicate(l->lvec, &lmask);
863:   VecGetArray(xmask, &bb);
864:   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
865:   VecRestoreArray(xmask, &bb);
866:   VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD);
867:   VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD);
868:   VecDestroy(&xmask);
869:   if (x && b) { /* this code is buggy when the row and column layout don't match */
870:     PetscBool cong;

872:     MatHasCongruentLayouts(A, &cong);
874:     VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD);
875:     VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD);
876:     VecGetArrayRead(l->lvec, &xx);
877:     VecGetArray(b, &bb);
878:   }
879:   VecGetArray(lmask, &mask);
880:   /* remove zeroed rows of off diagonal matrix */
881:   MatSeqAIJGetArray(l->B, &aij_a);
882:   ii = aij->i;
883:   for (i = 0; i < len; i++) PetscArrayzero(aij_a + ii[lrows[i]], ii[lrows[i] + 1] - ii[lrows[i]]);
884:   /* loop over all elements of off process part of matrix zeroing removed columns*/
885:   if (aij->compressedrow.use) {
886:     m    = aij->compressedrow.nrows;
887:     ii   = aij->compressedrow.i;
888:     ridx = aij->compressedrow.rindex;
889:     for (i = 0; i < m; i++) {
890:       n  = ii[i + 1] - ii[i];
891:       aj = aij->j + ii[i];
892:       aa = aij_a + ii[i];

894:       for (j = 0; j < n; j++) {
895:         if (PetscAbsScalar(mask[*aj])) {
896:           if (b) bb[*ridx] -= *aa * xx[*aj];
897:           *aa = 0.0;
898:         }
899:         aa++;
900:         aj++;
901:       }
902:       ridx++;
903:     }
904:   } else { /* do not use compressed row format */
905:     m = l->B->rmap->n;
906:     for (i = 0; i < m; i++) {
907:       n  = ii[i + 1] - ii[i];
908:       aj = aij->j + ii[i];
909:       aa = aij_a + ii[i];
910:       for (j = 0; j < n; j++) {
911:         if (PetscAbsScalar(mask[*aj])) {
912:           if (b) bb[i] -= *aa * xx[*aj];
913:           *aa = 0.0;
914:         }
915:         aa++;
916:         aj++;
917:       }
918:     }
919:   }
920:   if (x && b) {
921:     VecRestoreArray(b, &bb);
922:     VecRestoreArrayRead(l->lvec, &xx);
923:   }
924:   MatSeqAIJRestoreArray(l->B, &aij_a);
925:   VecRestoreArray(lmask, &mask);
926:   VecDestroy(&lmask);
927:   PetscFree(lrows);

929:   /* only change matrix nonzero state if pattern was allowed to be changed */
930:   if (!((Mat_SeqAIJ *)(l->A->data))->keepnonzeropattern) {
931:     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
932:     MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A));
933:   }
934:   return 0;
935: }

937: PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy)
938: {
939:   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
940:   PetscInt    nt;
941:   VecScatter  Mvctx = a->Mvctx;

943:   VecGetLocalSize(xx, &nt);
945:   VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD);
946:   PetscUseTypeMethod(a->A, mult, xx, yy);
947:   VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD);
948:   PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy);
949:   return 0;
950: }

952: PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx)
953: {
954:   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;

956:   MatMultDiagonalBlock(a->A, bb, xx);
957:   return 0;
958: }

960: PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
961: {
962:   Mat_MPIAIJ *a     = (Mat_MPIAIJ *)A->data;
963:   VecScatter  Mvctx = a->Mvctx;

965:   VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD);
966:   (*a->A->ops->multadd)(a->A, xx, yy, zz);
967:   VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD);
968:   (*a->B->ops->multadd)(a->B, a->lvec, zz, zz);
969:   return 0;
970: }

972: PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy)
973: {
974:   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;

976:   /* do nondiagonal part */
977:   (*a->B->ops->multtranspose)(a->B, xx, a->lvec);
978:   /* do local part */
979:   (*a->A->ops->multtranspose)(a->A, xx, yy);
980:   /* add partial results together */
981:   VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE);
982:   VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE);
983:   return 0;
984: }

986: PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f)
987: {
988:   MPI_Comm    comm;
989:   Mat_MPIAIJ *Aij  = (Mat_MPIAIJ *)Amat->data, *Bij;
990:   Mat         Adia = Aij->A, Bdia, Aoff, Boff, *Aoffs, *Boffs;
991:   IS          Me, Notme;
992:   PetscInt    M, N, first, last, *notme, i;
993:   PetscBool   lf;
994:   PetscMPIInt size;

996:   /* Easy test: symmetric diagonal block */
997:   Bij  = (Mat_MPIAIJ *)Bmat->data;
998:   Bdia = Bij->A;
999:   MatIsTranspose(Adia, Bdia, tol, &lf);
1000:   MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat));
1001:   if (!*f) return 0;
1002:   PetscObjectGetComm((PetscObject)Amat, &comm);
1003:   MPI_Comm_size(comm, &size);
1004:   if (size == 1) return 0;

1006:   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1007:   MatGetSize(Amat, &M, &N);
1008:   MatGetOwnershipRange(Amat, &first, &last);
1009:   PetscMalloc1(N - last + first, &notme);
1010:   for (i = 0; i < first; i++) notme[i] = i;
1011:   for (i = last; i < M; i++) notme[i - last + first] = i;
1012:   ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme);
1013:   ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me);
1014:   MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs);
1015:   Aoff = Aoffs[0];
1016:   MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs);
1017:   Boff = Boffs[0];
1018:   MatIsTranspose(Aoff, Boff, tol, f);
1019:   MatDestroyMatrices(1, &Aoffs);
1020:   MatDestroyMatrices(1, &Boffs);
1021:   ISDestroy(&Me);
1022:   ISDestroy(&Notme);
1023:   PetscFree(notme);
1024:   return 0;
1025: }

1027: PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A, PetscReal tol, PetscBool *f)
1028: {
1029:   MatIsTranspose_MPIAIJ(A, A, tol, f);
1030:   return 0;
1031: }

1033: PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1034: {
1035:   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;

1037:   /* do nondiagonal part */
1038:   (*a->B->ops->multtranspose)(a->B, xx, a->lvec);
1039:   /* do local part */
1040:   (*a->A->ops->multtransposeadd)(a->A, xx, yy, zz);
1041:   /* add partial results together */
1042:   VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE);
1043:   VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE);
1044:   return 0;
1045: }

1047: /*
1048:   This only works correctly for square matrices where the subblock A->A is the
1049:    diagonal block
1050: */
1051: PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v)
1052: {
1053:   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;

1057:   MatGetDiagonal(a->A, v);
1058:   return 0;
1059: }

1061: PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa)
1062: {
1063:   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;

1065:   MatScale(a->A, aa);
1066:   MatScale(a->B, aa);
1067:   return 0;
1068: }

1070: /* Free COO stuff; must match allocation methods in MatSetPreallocationCOO_MPIAIJ() */
1071: PETSC_INTERN PetscErrorCode MatResetPreallocationCOO_MPIAIJ(Mat mat)
1072: {
1073:   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;

1075:   PetscSFDestroy(&aij->coo_sf);
1076:   PetscFree(aij->Aperm1);
1077:   PetscFree(aij->Bperm1);
1078:   PetscFree(aij->Ajmap1);
1079:   PetscFree(aij->Bjmap1);

1081:   PetscFree(aij->Aimap2);
1082:   PetscFree(aij->Bimap2);
1083:   PetscFree(aij->Aperm2);
1084:   PetscFree(aij->Bperm2);
1085:   PetscFree(aij->Ajmap2);
1086:   PetscFree(aij->Bjmap2);

1088:   PetscFree2(aij->sendbuf, aij->recvbuf);
1089:   PetscFree(aij->Cperm1);
1090:   return 0;
1091: }

1093: PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1094: {
1095:   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;

1097: #if defined(PETSC_USE_LOG)
1098:   PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N);
1099: #endif
1100:   MatStashDestroy_Private(&mat->stash);
1101:   VecDestroy(&aij->diag);
1102:   MatDestroy(&aij->A);
1103:   MatDestroy(&aij->B);
1104: #if defined(PETSC_USE_CTABLE)
1105:   PetscTableDestroy(&aij->colmap);
1106: #else
1107:   PetscFree(aij->colmap);
1108: #endif
1109:   PetscFree(aij->garray);
1110:   VecDestroy(&aij->lvec);
1111:   VecScatterDestroy(&aij->Mvctx);
1112:   PetscFree2(aij->rowvalues, aij->rowindices);
1113:   PetscFree(aij->ld);

1115:   /* Free COO */
1116:   MatResetPreallocationCOO_MPIAIJ(mat);

1118:   PetscFree(mat->data);

1120:   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
1121:   PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL);

1123:   PetscObjectChangeTypeName((PetscObject)mat, NULL);
1124:   PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL);
1125:   PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL);
1126:   PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL);
1127:   PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL);
1128:   PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL);
1129:   PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL);
1130:   PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL);
1131:   PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL);
1132:   PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL);
1133: #if defined(PETSC_HAVE_CUDA)
1134:   PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL);
1135: #endif
1136: #if defined(PETSC_HAVE_KOKKOS_KERNELS)
1137:   PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL);
1138: #endif
1139:   PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL);
1140: #if defined(PETSC_HAVE_ELEMENTAL)
1141:   PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL);
1142: #endif
1143: #if defined(PETSC_HAVE_SCALAPACK)
1144:   PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL);
1145: #endif
1146: #if defined(PETSC_HAVE_HYPRE)
1147:   PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL);
1148:   PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL);
1149: #endif
1150:   PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL);
1151:   PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL);
1152:   PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL);
1153:   PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL);
1154:   PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL);
1155:   PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL);
1156: #if defined(PETSC_HAVE_MKL_SPARSE)
1157:   PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL);
1158: #endif
1159:   PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL);
1160:   PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL);
1161:   PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL);
1162:   PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL);
1163:   PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL);
1164:   return 0;
1165: }

1167: PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
1168: {
1169:   Mat_MPIAIJ        *aij    = (Mat_MPIAIJ *)mat->data;
1170:   Mat_SeqAIJ        *A      = (Mat_SeqAIJ *)aij->A->data;
1171:   Mat_SeqAIJ        *B      = (Mat_SeqAIJ *)aij->B->data;
1172:   const PetscInt    *garray = aij->garray;
1173:   const PetscScalar *aa, *ba;
1174:   PetscInt           header[4], M, N, m, rs, cs, nz, cnt, i, ja, jb;
1175:   PetscInt          *rowlens;
1176:   PetscInt          *colidxs;
1177:   PetscScalar       *matvals;

1179:   PetscViewerSetUp(viewer);

1181:   M  = mat->rmap->N;
1182:   N  = mat->cmap->N;
1183:   m  = mat->rmap->n;
1184:   rs = mat->rmap->rstart;
1185:   cs = mat->cmap->rstart;
1186:   nz = A->nz + B->nz;

1188:   /* write matrix header */
1189:   header[0] = MAT_FILE_CLASSID;
1190:   header[1] = M;
1191:   header[2] = N;
1192:   header[3] = nz;
1193:   MPI_Reduce(&nz, &header[3], 1, MPIU_INT, MPI_SUM, 0, PetscObjectComm((PetscObject)mat));
1194:   PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT);

1196:   /* fill in and store row lengths  */
1197:   PetscMalloc1(m, &rowlens);
1198:   for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i];
1199:   PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT);
1200:   PetscFree(rowlens);

1202:   /* fill in and store column indices */
1203:   PetscMalloc1(nz, &colidxs);
1204:   for (cnt = 0, i = 0; i < m; i++) {
1205:     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1206:       if (garray[B->j[jb]] > cs) break;
1207:       colidxs[cnt++] = garray[B->j[jb]];
1208:     }
1209:     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs;
1210:     for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]];
1211:   }
1213:   PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT);
1214:   PetscFree(colidxs);

1216:   /* fill in and store nonzero values */
1217:   MatSeqAIJGetArrayRead(aij->A, &aa);
1218:   MatSeqAIJGetArrayRead(aij->B, &ba);
1219:   PetscMalloc1(nz, &matvals);
1220:   for (cnt = 0, i = 0; i < m; i++) {
1221:     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1222:       if (garray[B->j[jb]] > cs) break;
1223:       matvals[cnt++] = ba[jb];
1224:     }
1225:     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja];
1226:     for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb];
1227:   }
1228:   MatSeqAIJRestoreArrayRead(aij->A, &aa);
1229:   MatSeqAIJRestoreArrayRead(aij->B, &ba);
1231:   PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR);
1232:   PetscFree(matvals);

1234:   /* write block size option to the viewer's .info file */
1235:   MatView_Binary_BlockSizes(mat, viewer);
1236:   return 0;
1237: }

1239: #include <petscdraw.h>
1240: PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
1241: {
1242:   Mat_MPIAIJ       *aij  = (Mat_MPIAIJ *)mat->data;
1243:   PetscMPIInt       rank = aij->rank, size = aij->size;
1244:   PetscBool         isdraw, iascii, isbinary;
1245:   PetscViewer       sviewer;
1246:   PetscViewerFormat format;

1248:   PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw);
1249:   PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii);
1250:   PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary);
1251:   if (iascii) {
1252:     PetscViewerGetFormat(viewer, &format);
1253:     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1254:       PetscInt i, nmax = 0, nmin = PETSC_MAX_INT, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)(aij->A->data))->nz + ((Mat_SeqAIJ *)(aij->B->data))->nz;
1255:       PetscMalloc1(size, &nz);
1256:       MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat));
1257:       for (i = 0; i < (PetscInt)size; i++) {
1258:         nmax = PetscMax(nmax, nz[i]);
1259:         nmin = PetscMin(nmin, nz[i]);
1260:         navg += nz[i];
1261:       }
1262:       PetscFree(nz);
1263:       navg = navg / size;
1264:       PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT "  avg %" PetscInt_FMT "  max %" PetscInt_FMT "\n", nmin, navg, nmax);
1265:       return 0;
1266:     }
1267:     PetscViewerGetFormat(viewer, &format);
1268:     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1269:       MatInfo   info;
1270:       PetscInt *inodes = NULL;

1272:       MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank);
1273:       MatGetInfo(mat, MAT_LOCAL, &info);
1274:       MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL);
1275:       PetscViewerASCIIPushSynchronized(viewer);
1276:       if (!inodes) {
1277:         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1278:                                                      (double)info.memory));
1279:       } else {
1280:         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1281:                                                      (double)info.memory));
1282:       }
1283:       MatGetInfo(aij->A, MAT_LOCAL, &info);
1284:       PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used);
1285:       MatGetInfo(aij->B, MAT_LOCAL, &info);
1286:       PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used);
1287:       PetscViewerFlush(viewer);
1288:       PetscViewerASCIIPopSynchronized(viewer);
1289:       PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n");
1290:       VecScatterView(aij->Mvctx, viewer);
1291:       return 0;
1292:     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1293:       PetscInt inodecount, inodelimit, *inodes;
1294:       MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit);
1295:       if (inodes) {
1296:         PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit);
1297:       } else {
1298:         PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n");
1299:       }
1300:       return 0;
1301:     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1302:       return 0;
1303:     }
1304:   } else if (isbinary) {
1305:     if (size == 1) {
1306:       PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name);
1307:       MatView(aij->A, viewer);
1308:     } else {
1309:       MatView_MPIAIJ_Binary(mat, viewer);
1310:     }
1311:     return 0;
1312:   } else if (iascii && size == 1) {
1313:     PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name);
1314:     MatView(aij->A, viewer);
1315:     return 0;
1316:   } else if (isdraw) {
1317:     PetscDraw draw;
1318:     PetscBool isnull;
1319:     PetscViewerDrawGetDraw(viewer, 0, &draw);
1320:     PetscDrawIsNull(draw, &isnull);
1321:     if (isnull) return 0;
1322:   }

1324:   { /* assemble the entire matrix onto first processor */
1325:     Mat A = NULL, Av;
1326:     IS  isrow, iscol;

1328:     ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow);
1329:     ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol);
1330:     MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A);
1331:     MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL);
1332:     /*  The commented code uses MatCreateSubMatrices instead */
1333:     /*
1334:     Mat *AA, A = NULL, Av;
1335:     IS  isrow,iscol;

1337:     ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow);
1338:     ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol);
1339:     MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);
1340:     if (rank == 0) {
1341:        PetscObjectReference((PetscObject)AA[0]);
1342:        A    = AA[0];
1343:        Av   = AA[0];
1344:     }
1345:     MatDestroySubMatrices(1,&AA);
1346: */
1347:     ISDestroy(&iscol);
1348:     ISDestroy(&isrow);
1349:     /*
1350:        Everyone has to call to draw the matrix since the graphics waits are
1351:        synchronized across all processors that share the PetscDraw object
1352:     */
1353:     PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer);
1354:     if (rank == 0) {
1355:       if (((PetscObject)mat)->name) PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name);
1356:       MatView_SeqAIJ(Av, sviewer);
1357:     }
1358:     PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer);
1359:     PetscViewerFlush(viewer);
1360:     MatDestroy(&A);
1361:   }
1362:   return 0;
1363: }

1365: PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer)
1366: {
1367:   PetscBool iascii, isdraw, issocket, isbinary;

1369:   PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii);
1370:   PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw);
1371:   PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary);
1372:   PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket);
1373:   if (iascii || isdraw || isbinary || issocket) MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer);
1374:   return 0;
1375: }

1377: PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
1378: {
1379:   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data;
1380:   Vec         bb1 = NULL;
1381:   PetscBool   hasop;

1383:   if (flag == SOR_APPLY_UPPER) {
1384:     (*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx);
1385:     return 0;
1386:   }

1388:   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) VecDuplicate(bb, &bb1);

1390:   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1391:     if (flag & SOR_ZERO_INITIAL_GUESS) {
1392:       (*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx);
1393:       its--;
1394:     }

1396:     while (its--) {
1397:       VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD);
1398:       VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD);

1400:       /* update rhs: bb1 = bb - B*x */
1401:       VecScale(mat->lvec, -1.0);
1402:       (*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1);

1404:       /* local sweep */
1405:       (*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx);
1406:     }
1407:   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1408:     if (flag & SOR_ZERO_INITIAL_GUESS) {
1409:       (*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx);
1410:       its--;
1411:     }
1412:     while (its--) {
1413:       VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD);
1414:       VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD);

1416:       /* update rhs: bb1 = bb - B*x */
1417:       VecScale(mat->lvec, -1.0);
1418:       (*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1);

1420:       /* local sweep */
1421:       (*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx);
1422:     }
1423:   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1424:     if (flag & SOR_ZERO_INITIAL_GUESS) {
1425:       (*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx);
1426:       its--;
1427:     }
1428:     while (its--) {
1429:       VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD);
1430:       VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD);

1432:       /* update rhs: bb1 = bb - B*x */
1433:       VecScale(mat->lvec, -1.0);
1434:       (*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1);

1436:       /* local sweep */
1437:       (*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx);
1438:     }
1439:   } else if (flag & SOR_EISENSTAT) {
1440:     Vec xx1;

1442:     VecDuplicate(bb, &xx1);
1443:     (*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx);

1445:     VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD);
1446:     VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD);
1447:     if (!mat->diag) {
1448:       MatCreateVecs(matin, &mat->diag, NULL);
1449:       MatGetDiagonal(matin, mat->diag);
1450:     }
1451:     MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop);
1452:     if (hasop) {
1453:       MatMultDiagonalBlock(matin, xx, bb1);
1454:     } else {
1455:       VecPointwiseMult(bb1, mat->diag, xx);
1456:     }
1457:     VecAYPX(bb1, (omega - 2.0) / omega, bb);

1459:     MatMultAdd(mat->B, mat->lvec, bb1, bb1);

1461:     /* local sweep */
1462:     (*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1);
1463:     VecAXPY(xx, 1.0, xx1);
1464:     VecDestroy(&xx1);
1465:   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported");

1467:   VecDestroy(&bb1);

1469:   matin->factorerrortype = mat->A->factorerrortype;
1470:   return 0;
1471: }

1473: PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B)
1474: {
1475:   Mat             aA, aB, Aperm;
1476:   const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj;
1477:   PetscScalar    *aa, *ba;
1478:   PetscInt        i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest;
1479:   PetscSF         rowsf, sf;
1480:   IS              parcolp = NULL;
1481:   PetscBool       done;

1483:   MatGetLocalSize(A, &m, &n);
1484:   ISGetIndices(rowp, &rwant);
1485:   ISGetIndices(colp, &cwant);
1486:   PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest);

1488:   /* Invert row permutation to find out where my rows should go */
1489:   PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf);
1490:   PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant);
1491:   PetscSFSetFromOptions(rowsf);
1492:   for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i;
1493:   PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE);
1494:   PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE);

1496:   /* Invert column permutation to find out where my columns should go */
1497:   PetscSFCreate(PetscObjectComm((PetscObject)A), &sf);
1498:   PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant);
1499:   PetscSFSetFromOptions(sf);
1500:   for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i;
1501:   PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE);
1502:   PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE);
1503:   PetscSFDestroy(&sf);

1505:   ISRestoreIndices(rowp, &rwant);
1506:   ISRestoreIndices(colp, &cwant);
1507:   MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols);

1509:   /* Find out where my gcols should go */
1510:   MatGetSize(aB, NULL, &ng);
1511:   PetscMalloc1(ng, &gcdest);
1512:   PetscSFCreate(PetscObjectComm((PetscObject)A), &sf);
1513:   PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols);
1514:   PetscSFSetFromOptions(sf);
1515:   PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE);
1516:   PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE);
1517:   PetscSFDestroy(&sf);

1519:   PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz);
1520:   MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done);
1521:   MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done);
1522:   for (i = 0; i < m; i++) {
1523:     PetscInt    row = rdest[i];
1524:     PetscMPIInt rowner;
1525:     PetscLayoutFindOwner(A->rmap, row, &rowner);
1526:     for (j = ai[i]; j < ai[i + 1]; j++) {
1527:       PetscInt    col = cdest[aj[j]];
1528:       PetscMPIInt cowner;
1529:       PetscLayoutFindOwner(A->cmap, col, &cowner); /* Could build an index for the columns to eliminate this search */
1530:       if (rowner == cowner) dnnz[i]++;
1531:       else onnz[i]++;
1532:     }
1533:     for (j = bi[i]; j < bi[i + 1]; j++) {
1534:       PetscInt    col = gcdest[bj[j]];
1535:       PetscMPIInt cowner;
1536:       PetscLayoutFindOwner(A->cmap, col, &cowner);
1537:       if (rowner == cowner) dnnz[i]++;
1538:       else onnz[i]++;
1539:     }
1540:   }
1541:   PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE);
1542:   PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE);
1543:   PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE);
1544:   PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE);
1545:   PetscSFDestroy(&rowsf);

1547:   MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm);
1548:   MatSeqAIJGetArray(aA, &aa);
1549:   MatSeqAIJGetArray(aB, &ba);
1550:   for (i = 0; i < m; i++) {
1551:     PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */
1552:     PetscInt  j0, rowlen;
1553:     rowlen = ai[i + 1] - ai[i];
1554:     for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */
1555:       for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]];
1556:       MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES);
1557:     }
1558:     rowlen = bi[i + 1] - bi[i];
1559:     for (j0 = j = 0; j < rowlen; j0 = j) {
1560:       for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]];
1561:       MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES);
1562:     }
1563:   }
1564:   MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY);
1565:   MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY);
1566:   MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done);
1567:   MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done);
1568:   MatSeqAIJRestoreArray(aA, &aa);
1569:   MatSeqAIJRestoreArray(aB, &ba);
1570:   PetscFree4(dnnz, onnz, tdnnz, tonnz);
1571:   PetscFree3(work, rdest, cdest);
1572:   PetscFree(gcdest);
1573:   if (parcolp) ISDestroy(&colp);
1574:   *B = Aperm;
1575:   return 0;
1576: }

1578: PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
1579: {
1580:   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;

1582:   MatGetSize(aij->B, NULL, nghosts);
1583:   if (ghosts) *ghosts = aij->garray;
1584:   return 0;
1585: }

1587: PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1588: {
1589:   Mat_MPIAIJ    *mat = (Mat_MPIAIJ *)matin->data;
1590:   Mat            A = mat->A, B = mat->B;
1591:   PetscLogDouble isend[5], irecv[5];

1593:   info->block_size = 1.0;
1594:   MatGetInfo(A, MAT_LOCAL, info);

1596:   isend[0] = info->nz_used;
1597:   isend[1] = info->nz_allocated;
1598:   isend[2] = info->nz_unneeded;
1599:   isend[3] = info->memory;
1600:   isend[4] = info->mallocs;

1602:   MatGetInfo(B, MAT_LOCAL, info);

1604:   isend[0] += info->nz_used;
1605:   isend[1] += info->nz_allocated;
1606:   isend[2] += info->nz_unneeded;
1607:   isend[3] += info->memory;
1608:   isend[4] += info->mallocs;
1609:   if (flag == MAT_LOCAL) {
1610:     info->nz_used      = isend[0];
1611:     info->nz_allocated = isend[1];
1612:     info->nz_unneeded  = isend[2];
1613:     info->memory       = isend[3];
1614:     info->mallocs      = isend[4];
1615:   } else if (flag == MAT_GLOBAL_MAX) {
1616:     MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin));

1618:     info->nz_used      = irecv[0];
1619:     info->nz_allocated = irecv[1];
1620:     info->nz_unneeded  = irecv[2];
1621:     info->memory       = irecv[3];
1622:     info->mallocs      = irecv[4];
1623:   } else if (flag == MAT_GLOBAL_SUM) {
1624:     MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin));

1626:     info->nz_used      = irecv[0];
1627:     info->nz_allocated = irecv[1];
1628:     info->nz_unneeded  = irecv[2];
1629:     info->memory       = irecv[3];
1630:     info->mallocs      = irecv[4];
1631:   }
1632:   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1633:   info->fill_ratio_needed = 0;
1634:   info->factor_mallocs    = 0;
1635:   return 0;
1636: }

1638: PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg)
1639: {
1640:   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;

1642:   switch (op) {
1643:   case MAT_NEW_NONZERO_LOCATIONS:
1644:   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1645:   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1646:   case MAT_KEEP_NONZERO_PATTERN:
1647:   case MAT_NEW_NONZERO_LOCATION_ERR:
1648:   case MAT_USE_INODES:
1649:   case MAT_IGNORE_ZERO_ENTRIES:
1650:   case MAT_FORM_EXPLICIT_TRANSPOSE:
1651:     MatCheckPreallocated(A, 1);
1652:     MatSetOption(a->A, op, flg);
1653:     MatSetOption(a->B, op, flg);
1654:     break;
1655:   case MAT_ROW_ORIENTED:
1656:     MatCheckPreallocated(A, 1);
1657:     a->roworiented = flg;

1659:     MatSetOption(a->A, op, flg);
1660:     MatSetOption(a->B, op, flg);
1661:     break;
1662:   case MAT_FORCE_DIAGONAL_ENTRIES:
1663:   case MAT_SORTED_FULL:
1664:     PetscInfo(A, "Option %s ignored\n", MatOptions[op]);
1665:     break;
1666:   case MAT_IGNORE_OFF_PROC_ENTRIES:
1667:     a->donotstash = flg;
1668:     break;
1669:   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1670:   case MAT_SPD:
1671:   case MAT_SYMMETRIC:
1672:   case MAT_STRUCTURALLY_SYMMETRIC:
1673:   case MAT_HERMITIAN:
1674:   case MAT_SYMMETRY_ETERNAL:
1675:   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1676:   case MAT_SPD_ETERNAL:
1677:     /* if the diagonal matrix is square it inherits some of the properties above */
1678:     break;
1679:   case MAT_SUBMAT_SINGLEIS:
1680:     A->submat_singleis = flg;
1681:     break;
1682:   case MAT_STRUCTURE_ONLY:
1683:     /* The option is handled directly by MatSetOption() */
1684:     break;
1685:   default:
1686:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
1687:   }
1688:   return 0;
1689: }

1691: PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1692: {
1693:   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)matin->data;
1694:   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1695:   PetscInt     i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart;
1696:   PetscInt     nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend;
1697:   PetscInt    *cmap, *idx_p;

1700:   mat->getrowactive = PETSC_TRUE;

1702:   if (!mat->rowvalues && (idx || v)) {
1703:     /*
1704:         allocate enough space to hold information from the longest row.
1705:     */
1706:     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data;
1707:     PetscInt    max = 1, tmp;
1708:     for (i = 0; i < matin->rmap->n; i++) {
1709:       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1710:       if (max < tmp) max = tmp;
1711:     }
1712:     PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices);
1713:   }

1716:   lrow = row - rstart;

1718:   pvA = &vworkA;
1719:   pcA = &cworkA;
1720:   pvB = &vworkB;
1721:   pcB = &cworkB;
1722:   if (!v) {
1723:     pvA = NULL;
1724:     pvB = NULL;
1725:   }
1726:   if (!idx) {
1727:     pcA = NULL;
1728:     if (!v) pcB = NULL;
1729:   }
1730:   (*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA);
1731:   (*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB);
1732:   nztot = nzA + nzB;

1734:   cmap = mat->garray;
1735:   if (v || idx) {
1736:     if (nztot) {
1737:       /* Sort by increasing column numbers, assuming A and B already sorted */
1738:       PetscInt imark = -1;
1739:       if (v) {
1740:         *v = v_p = mat->rowvalues;
1741:         for (i = 0; i < nzB; i++) {
1742:           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1743:           else break;
1744:         }
1745:         imark = i;
1746:         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1747:         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1748:       }
1749:       if (idx) {
1750:         *idx = idx_p = mat->rowindices;
1751:         if (imark > -1) {
1752:           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]];
1753:         } else {
1754:           for (i = 0; i < nzB; i++) {
1755:             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1756:             else break;
1757:           }
1758:           imark = i;
1759:         }
1760:         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i];
1761:         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]];
1762:       }
1763:     } else {
1764:       if (idx) *idx = NULL;
1765:       if (v) *v = NULL;
1766:     }
1767:   }
1768:   *nz = nztot;
1769:   (*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA);
1770:   (*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB);
1771:   return 0;
1772: }

1774: PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1775: {
1776:   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;

1779:   aij->getrowactive = PETSC_FALSE;
1780:   return 0;
1781: }

1783: PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm)
1784: {
1785:   Mat_MPIAIJ      *aij  = (Mat_MPIAIJ *)mat->data;
1786:   Mat_SeqAIJ      *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data;
1787:   PetscInt         i, j, cstart = mat->cmap->rstart;
1788:   PetscReal        sum = 0.0;
1789:   const MatScalar *v, *amata, *bmata;

1791:   if (aij->size == 1) {
1792:     MatNorm(aij->A, type, norm);
1793:   } else {
1794:     MatSeqAIJGetArrayRead(aij->A, &amata);
1795:     MatSeqAIJGetArrayRead(aij->B, &bmata);
1796:     if (type == NORM_FROBENIUS) {
1797:       v = amata;
1798:       for (i = 0; i < amat->nz; i++) {
1799:         sum += PetscRealPart(PetscConj(*v) * (*v));
1800:         v++;
1801:       }
1802:       v = bmata;
1803:       for (i = 0; i < bmat->nz; i++) {
1804:         sum += PetscRealPart(PetscConj(*v) * (*v));
1805:         v++;
1806:       }
1807:       MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat));
1808:       *norm = PetscSqrtReal(*norm);
1809:       PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz);
1810:     } else if (type == NORM_1) { /* max column norm */
1811:       PetscReal *tmp, *tmp2;
1812:       PetscInt  *jj, *garray = aij->garray;
1813:       PetscCalloc1(mat->cmap->N + 1, &tmp);
1814:       PetscMalloc1(mat->cmap->N + 1, &tmp2);
1815:       *norm = 0.0;
1816:       v     = amata;
1817:       jj    = amat->j;
1818:       for (j = 0; j < amat->nz; j++) {
1819:         tmp[cstart + *jj++] += PetscAbsScalar(*v);
1820:         v++;
1821:       }
1822:       v  = bmata;
1823:       jj = bmat->j;
1824:       for (j = 0; j < bmat->nz; j++) {
1825:         tmp[garray[*jj++]] += PetscAbsScalar(*v);
1826:         v++;
1827:       }
1828:       MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat));
1829:       for (j = 0; j < mat->cmap->N; j++) {
1830:         if (tmp2[j] > *norm) *norm = tmp2[j];
1831:       }
1832:       PetscFree(tmp);
1833:       PetscFree(tmp2);
1834:       PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0));
1835:     } else if (type == NORM_INFINITY) { /* max row norm */
1836:       PetscReal ntemp = 0.0;
1837:       for (j = 0; j < aij->A->rmap->n; j++) {
1838:         v   = amata + amat->i[j];
1839:         sum = 0.0;
1840:         for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) {
1841:           sum += PetscAbsScalar(*v);
1842:           v++;
1843:         }
1844:         v = bmata + bmat->i[j];
1845:         for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) {
1846:           sum += PetscAbsScalar(*v);
1847:           v++;
1848:         }
1849:         if (sum > ntemp) ntemp = sum;
1850:       }
1851:       MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat));
1852:       PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0));
1853:     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm");
1854:     MatSeqAIJRestoreArrayRead(aij->A, &amata);
1855:     MatSeqAIJRestoreArrayRead(aij->B, &bmata);
1856:   }
1857:   return 0;
1858: }

1860: PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout)
1861: {
1862:   Mat_MPIAIJ      *a    = (Mat_MPIAIJ *)A->data, *b;
1863:   Mat_SeqAIJ      *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag;
1864:   PetscInt         M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol;
1865:   const PetscInt  *ai, *aj, *bi, *bj, *B_diag_i;
1866:   Mat              B, A_diag, *B_diag;
1867:   const MatScalar *pbv, *bv;

1869:   if (reuse == MAT_REUSE_MATRIX) MatTransposeCheckNonzeroState_Private(A, *matout);
1870:   ma = A->rmap->n;
1871:   na = A->cmap->n;
1872:   mb = a->B->rmap->n;
1873:   nb = a->B->cmap->n;
1874:   ai = Aloc->i;
1875:   aj = Aloc->j;
1876:   bi = Bloc->i;
1877:   bj = Bloc->j;
1878:   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1879:     PetscInt            *d_nnz, *g_nnz, *o_nnz;
1880:     PetscSFNode         *oloc;
1881:     PETSC_UNUSED PetscSF sf;

1883:     PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc);
1884:     /* compute d_nnz for preallocation */
1885:     PetscArrayzero(d_nnz, na);
1886:     for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++;
1887:     /* compute local off-diagonal contributions */
1888:     PetscArrayzero(g_nnz, nb);
1889:     for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++;
1890:     /* map those to global */
1891:     PetscSFCreate(PetscObjectComm((PetscObject)A), &sf);
1892:     PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray);
1893:     PetscSFSetFromOptions(sf);
1894:     PetscArrayzero(o_nnz, na);
1895:     PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM);
1896:     PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM);
1897:     PetscSFDestroy(&sf);

1899:     MatCreate(PetscObjectComm((PetscObject)A), &B);
1900:     MatSetSizes(B, A->cmap->n, A->rmap->n, N, M);
1901:     MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs));
1902:     MatSetType(B, ((PetscObject)A)->type_name);
1903:     MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz);
1904:     PetscFree4(d_nnz, o_nnz, g_nnz, oloc);
1905:   } else {
1906:     B = *matout;
1907:     MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE);
1908:   }

1910:   b           = (Mat_MPIAIJ *)B->data;
1911:   A_diag      = a->A;
1912:   B_diag      = &b->A;
1913:   sub_B_diag  = (Mat_SeqAIJ *)(*B_diag)->data;
1914:   A_diag_ncol = A_diag->cmap->N;
1915:   B_diag_ilen = sub_B_diag->ilen;
1916:   B_diag_i    = sub_B_diag->i;

1918:   /* Set ilen for diagonal of B */
1919:   for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i];

1921:   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
1922:   very quickly (=without using MatSetValues), because all writes are local. */
1923:   MatTransposeSetPrecursor(A_diag, *B_diag);
1924:   MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag);

1926:   /* copy over the B part */
1927:   PetscMalloc1(bi[mb], &cols);
1928:   MatSeqAIJGetArrayRead(a->B, &bv);
1929:   pbv = bv;
1930:   row = A->rmap->rstart;
1931:   for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]];
1932:   cols_tmp = cols;
1933:   for (i = 0; i < mb; i++) {
1934:     ncol = bi[i + 1] - bi[i];
1935:     MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES);
1936:     row++;
1937:     pbv += ncol;
1938:     cols_tmp += ncol;
1939:   }
1940:   PetscFree(cols);
1941:   MatSeqAIJRestoreArrayRead(a->B, &bv);

1943:   MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY);
1944:   MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY);
1945:   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1946:     *matout = B;
1947:   } else {
1948:     MatHeaderMerge(A, &B);
1949:   }
1950:   return 0;
1951: }

1953: PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr)
1954: {
1955:   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1956:   Mat         a = aij->A, b = aij->B;
1957:   PetscInt    s1, s2, s3;

1959:   MatGetLocalSize(mat, &s2, &s3);
1960:   if (rr) {
1961:     VecGetLocalSize(rr, &s1);
1963:     /* Overlap communication with computation. */
1964:     VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD);
1965:   }
1966:   if (ll) {
1967:     VecGetLocalSize(ll, &s1);
1969:     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1970:   }
1971:   /* scale  the diagonal block */
1972:   PetscUseTypeMethod(a, diagonalscale, ll, rr);

1974:   if (rr) {
1975:     /* Do a scatter end and then right scale the off-diagonal block */
1976:     VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD);
1977:     PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec);
1978:   }
1979:   return 0;
1980: }

1982: PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1983: {
1984:   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;

1986:   MatSetUnfactored(a->A);
1987:   return 0;
1988: }

1990: PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag)
1991: {
1992:   Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data;
1993:   Mat         a, b, c, d;
1994:   PetscBool   flg;

1996:   a = matA->A;
1997:   b = matA->B;
1998:   c = matB->A;
1999:   d = matB->B;

2001:   MatEqual(a, c, &flg);
2002:   if (flg) MatEqual(b, d, &flg);
2003:   MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A));
2004:   return 0;
2005: }

2007: PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str)
2008: {
2009:   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2010:   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;

2012:   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2013:   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2014:     /* because of the column compression in the off-processor part of the matrix a->B,
2015:        the number of columns in a->B and b->B may be different, hence we cannot call
2016:        the MatCopy() directly on the two parts. If need be, we can provide a more
2017:        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2018:        then copying the submatrices */
2019:     MatCopy_Basic(A, B, str);
2020:   } else {
2021:     MatCopy(a->A, b->A, str);
2022:     MatCopy(a->B, b->B, str);
2023:   }
2024:   PetscObjectStateIncrease((PetscObject)B);
2025:   return 0;
2026: }

2028: PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2029: {
2030:   MatMPIAIJSetPreallocation(A, PETSC_DEFAULT, NULL, PETSC_DEFAULT, NULL);
2031:   return 0;
2032: }

2034: /*
2035:    Computes the number of nonzeros per row needed for preallocation when X and Y
2036:    have different nonzero structure.
2037: */
2038: PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz)
2039: {
2040:   PetscInt i, j, k, nzx, nzy;

2042:   /* Set the number of nonzeros in the new matrix */
2043:   for (i = 0; i < m; i++) {
2044:     const PetscInt *xjj = xj + xi[i], *yjj = yj + yi[i];
2045:     nzx    = xi[i + 1] - xi[i];
2046:     nzy    = yi[i + 1] - yi[i];
2047:     nnz[i] = 0;
2048:     for (j = 0, k = 0; j < nzx; j++) {                                /* Point in X */
2049:       for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2050:       if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++;             /* Skip duplicate */
2051:       nnz[i]++;
2052:     }
2053:     for (; k < nzy; k++) nnz[i]++;
2054:   }
2055:   return 0;
2056: }

2058: /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2059: static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz)
2060: {
2061:   PetscInt    m = Y->rmap->N;
2062:   Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data;
2063:   Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data;

2065:   MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz);
2066:   return 0;
2067: }

2069: PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
2070: {
2071:   Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data;

2073:   if (str == SAME_NONZERO_PATTERN) {
2074:     MatAXPY(yy->A, a, xx->A, str);
2075:     MatAXPY(yy->B, a, xx->B, str);
2076:   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2077:     MatAXPY_Basic(Y, a, X, str);
2078:   } else {
2079:     Mat       B;
2080:     PetscInt *nnz_d, *nnz_o;

2082:     PetscMalloc1(yy->A->rmap->N, &nnz_d);
2083:     PetscMalloc1(yy->B->rmap->N, &nnz_o);
2084:     MatCreate(PetscObjectComm((PetscObject)Y), &B);
2085:     PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name);
2086:     MatSetLayouts(B, Y->rmap, Y->cmap);
2087:     MatSetType(B, ((PetscObject)Y)->type_name);
2088:     MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d);
2089:     MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o);
2090:     MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o);
2091:     MatAXPY_BasicWithPreallocation(B, Y, a, X, str);
2092:     MatHeaderMerge(Y, &B);
2093:     PetscFree(nnz_d);
2094:     PetscFree(nnz_o);
2095:   }
2096:   return 0;
2097: }

2099: PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);

2101: PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2102: {
2103:   if (PetscDefined(USE_COMPLEX)) {
2104:     Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;

2106:     MatConjugate_SeqAIJ(aij->A);
2107:     MatConjugate_SeqAIJ(aij->B);
2108:   }
2109:   return 0;
2110: }

2112: PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2113: {
2114:   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;

2116:   MatRealPart(a->A);
2117:   MatRealPart(a->B);
2118:   return 0;
2119: }

2121: PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2122: {
2123:   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;

2125:   MatImaginaryPart(a->A);
2126:   MatImaginaryPart(a->B);
2127:   return 0;
2128: }

2130: PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2131: {
2132:   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
2133:   PetscInt           i, *idxb = NULL, m = A->rmap->n;
2134:   PetscScalar       *va, *vv;
2135:   Vec                vB, vA;
2136:   const PetscScalar *vb;

2138:   VecCreateSeq(PETSC_COMM_SELF, m, &vA);
2139:   MatGetRowMaxAbs(a->A, vA, idx);

2141:   VecGetArrayWrite(vA, &va);
2142:   if (idx) {
2143:     for (i = 0; i < m; i++) {
2144:       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2145:     }
2146:   }

2148:   VecCreateSeq(PETSC_COMM_SELF, m, &vB);
2149:   PetscMalloc1(m, &idxb);
2150:   MatGetRowMaxAbs(a->B, vB, idxb);

2152:   VecGetArrayWrite(v, &vv);
2153:   VecGetArrayRead(vB, &vb);
2154:   for (i = 0; i < m; i++) {
2155:     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2156:       vv[i] = vb[i];
2157:       if (idx) idx[i] = a->garray[idxb[i]];
2158:     } else {
2159:       vv[i] = va[i];
2160:       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]];
2161:     }
2162:   }
2163:   VecRestoreArrayWrite(vA, &vv);
2164:   VecRestoreArrayWrite(vA, &va);
2165:   VecRestoreArrayRead(vB, &vb);
2166:   PetscFree(idxb);
2167:   VecDestroy(&vA);
2168:   VecDestroy(&vB);
2169:   return 0;
2170: }

2172: PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2173: {
2174:   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2175:   PetscInt           m = A->rmap->n, n = A->cmap->n;
2176:   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2177:   PetscInt          *cmap = mat->garray;
2178:   PetscInt          *diagIdx, *offdiagIdx;
2179:   Vec                diagV, offdiagV;
2180:   PetscScalar       *a, *diagA, *offdiagA;
2181:   const PetscScalar *ba, *bav;
2182:   PetscInt           r, j, col, ncols, *bi, *bj;
2183:   Mat                B = mat->B;
2184:   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;

2186:   /* When a process holds entire A and other processes have no entry */
2187:   if (A->cmap->N == n) {
2188:     VecGetArrayWrite(v, &diagA);
2189:     VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV);
2190:     MatGetRowMinAbs(mat->A, diagV, idx);
2191:     VecDestroy(&diagV);
2192:     VecRestoreArrayWrite(v, &diagA);
2193:     return 0;
2194:   } else if (n == 0) {
2195:     if (m) {
2196:       VecGetArrayWrite(v, &a);
2197:       for (r = 0; r < m; r++) {
2198:         a[r] = 0.0;
2199:         if (idx) idx[r] = -1;
2200:       }
2201:       VecRestoreArrayWrite(v, &a);
2202:     }
2203:     return 0;
2204:   }

2206:   PetscMalloc2(m, &diagIdx, m, &offdiagIdx);
2207:   VecCreateSeq(PETSC_COMM_SELF, m, &diagV);
2208:   VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);
2209:   MatGetRowMinAbs(mat->A, diagV, diagIdx);

2211:   /* Get offdiagIdx[] for implicit 0.0 */
2212:   MatSeqAIJGetArrayRead(B, &bav);
2213:   ba = bav;
2214:   bi = b->i;
2215:   bj = b->j;
2216:   VecGetArrayWrite(offdiagV, &offdiagA);
2217:   for (r = 0; r < m; r++) {
2218:     ncols = bi[r + 1] - bi[r];
2219:     if (ncols == A->cmap->N - n) { /* Brow is dense */
2220:       offdiagA[r]   = *ba;
2221:       offdiagIdx[r] = cmap[0];
2222:     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2223:       offdiagA[r] = 0.0;

2225:       /* Find first hole in the cmap */
2226:       for (j = 0; j < ncols; j++) {
2227:         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2228:         if (col > j && j < cstart) {
2229:           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2230:           break;
2231:         } else if (col > j + n && j >= cstart) {
2232:           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2233:           break;
2234:         }
2235:       }
2236:       if (j == ncols && ncols < A->cmap->N - n) {
2237:         /* a hole is outside compressed Bcols */
2238:         if (ncols == 0) {
2239:           if (cstart) {
2240:             offdiagIdx[r] = 0;
2241:           } else offdiagIdx[r] = cend;
2242:         } else { /* ncols > 0 */
2243:           offdiagIdx[r] = cmap[ncols - 1] + 1;
2244:           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2245:         }
2246:       }
2247:     }

2249:     for (j = 0; j < ncols; j++) {
2250:       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {
2251:         offdiagA[r]   = *ba;
2252:         offdiagIdx[r] = cmap[*bj];
2253:       }
2254:       ba++;
2255:       bj++;
2256:     }
2257:   }

2259:   VecGetArrayWrite(v, &a);
2260:   VecGetArrayRead(diagV, (const PetscScalar **)&diagA);
2261:   for (r = 0; r < m; ++r) {
2262:     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2263:       a[r] = diagA[r];
2264:       if (idx) idx[r] = cstart + diagIdx[r];
2265:     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2266:       a[r] = diagA[r];
2267:       if (idx) {
2268:         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2269:           idx[r] = cstart + diagIdx[r];
2270:         } else idx[r] = offdiagIdx[r];
2271:       }
2272:     } else {
2273:       a[r] = offdiagA[r];
2274:       if (idx) idx[r] = offdiagIdx[r];
2275:     }
2276:   }
2277:   MatSeqAIJRestoreArrayRead(B, &bav);
2278:   VecRestoreArrayWrite(v, &a);
2279:   VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA);
2280:   VecRestoreArrayWrite(offdiagV, &offdiagA);
2281:   VecDestroy(&diagV);
2282:   VecDestroy(&offdiagV);
2283:   PetscFree2(diagIdx, offdiagIdx);
2284:   return 0;
2285: }

2287: PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2288: {
2289:   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2290:   PetscInt           m = A->rmap->n, n = A->cmap->n;
2291:   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2292:   PetscInt          *cmap = mat->garray;
2293:   PetscInt          *diagIdx, *offdiagIdx;
2294:   Vec                diagV, offdiagV;
2295:   PetscScalar       *a, *diagA, *offdiagA;
2296:   const PetscScalar *ba, *bav;
2297:   PetscInt           r, j, col, ncols, *bi, *bj;
2298:   Mat                B = mat->B;
2299:   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;

2301:   /* When a process holds entire A and other processes have no entry */
2302:   if (A->cmap->N == n) {
2303:     VecGetArrayWrite(v, &diagA);
2304:     VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV);
2305:     MatGetRowMin(mat->A, diagV, idx);
2306:     VecDestroy(&diagV);
2307:     VecRestoreArrayWrite(v, &diagA);
2308:     return 0;
2309:   } else if (n == 0) {
2310:     if (m) {
2311:       VecGetArrayWrite(v, &a);
2312:       for (r = 0; r < m; r++) {
2313:         a[r] = PETSC_MAX_REAL;
2314:         if (idx) idx[r] = -1;
2315:       }
2316:       VecRestoreArrayWrite(v, &a);
2317:     }
2318:     return 0;
2319:   }

2321:   PetscCalloc2(m, &diagIdx, m, &offdiagIdx);
2322:   VecCreateSeq(PETSC_COMM_SELF, m, &diagV);
2323:   VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);
2324:   MatGetRowMin(mat->A, diagV, diagIdx);

2326:   /* Get offdiagIdx[] for implicit 0.0 */
2327:   MatSeqAIJGetArrayRead(B, &bav);
2328:   ba = bav;
2329:   bi = b->i;
2330:   bj = b->j;
2331:   VecGetArrayWrite(offdiagV, &offdiagA);
2332:   for (r = 0; r < m; r++) {
2333:     ncols = bi[r + 1] - bi[r];
2334:     if (ncols == A->cmap->N - n) { /* Brow is dense */
2335:       offdiagA[r]   = *ba;
2336:       offdiagIdx[r] = cmap[0];
2337:     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2338:       offdiagA[r] = 0.0;

2340:       /* Find first hole in the cmap */
2341:       for (j = 0; j < ncols; j++) {
2342:         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2343:         if (col > j && j < cstart) {
2344:           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2345:           break;
2346:         } else if (col > j + n && j >= cstart) {
2347:           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2348:           break;
2349:         }
2350:       }
2351:       if (j == ncols && ncols < A->cmap->N - n) {
2352:         /* a hole is outside compressed Bcols */
2353:         if (ncols == 0) {
2354:           if (cstart) {
2355:             offdiagIdx[r] = 0;
2356:           } else offdiagIdx[r] = cend;
2357:         } else { /* ncols > 0 */
2358:           offdiagIdx[r] = cmap[ncols - 1] + 1;
2359:           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2360:         }
2361:       }
2362:     }

2364:     for (j = 0; j < ncols; j++) {
2365:       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {
2366:         offdiagA[r]   = *ba;
2367:         offdiagIdx[r] = cmap[*bj];
2368:       }
2369:       ba++;
2370:       bj++;
2371:     }
2372:   }

2374:   VecGetArrayWrite(v, &a);
2375:   VecGetArrayRead(diagV, (const PetscScalar **)&diagA);
2376:   for (r = 0; r < m; ++r) {
2377:     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2378:       a[r] = diagA[r];
2379:       if (idx) idx[r] = cstart + diagIdx[r];
2380:     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2381:       a[r] = diagA[r];
2382:       if (idx) {
2383:         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2384:           idx[r] = cstart + diagIdx[r];
2385:         } else idx[r] = offdiagIdx[r];
2386:       }
2387:     } else {
2388:       a[r] = offdiagA[r];
2389:       if (idx) idx[r] = offdiagIdx[r];
2390:     }
2391:   }
2392:   MatSeqAIJRestoreArrayRead(B, &bav);
2393:   VecRestoreArrayWrite(v, &a);
2394:   VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA);
2395:   VecRestoreArrayWrite(offdiagV, &offdiagA);
2396:   VecDestroy(&diagV);
2397:   VecDestroy(&offdiagV);
2398:   PetscFree2(diagIdx, offdiagIdx);
2399:   return 0;
2400: }

2402: PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2403: {
2404:   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2405:   PetscInt           m = A->rmap->n, n = A->cmap->n;
2406:   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2407:   PetscInt          *cmap = mat->garray;
2408:   PetscInt          *diagIdx, *offdiagIdx;
2409:   Vec                diagV, offdiagV;
2410:   PetscScalar       *a, *diagA, *offdiagA;
2411:   const PetscScalar *ba, *bav;
2412:   PetscInt           r, j, col, ncols, *bi, *bj;
2413:   Mat                B = mat->B;
2414:   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;

2416:   /* When a process holds entire A and other processes have no entry */
2417:   if (A->cmap->N == n) {
2418:     VecGetArrayWrite(v, &diagA);
2419:     VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV);
2420:     MatGetRowMax(mat->A, diagV, idx);
2421:     VecDestroy(&diagV);
2422:     VecRestoreArrayWrite(v, &diagA);
2423:     return 0;
2424:   } else if (n == 0) {
2425:     if (m) {
2426:       VecGetArrayWrite(v, &a);
2427:       for (r = 0; r < m; r++) {
2428:         a[r] = PETSC_MIN_REAL;
2429:         if (idx) idx[r] = -1;
2430:       }
2431:       VecRestoreArrayWrite(v, &a);
2432:     }
2433:     return 0;
2434:   }

2436:   PetscMalloc2(m, &diagIdx, m, &offdiagIdx);
2437:   VecCreateSeq(PETSC_COMM_SELF, m, &diagV);
2438:   VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);
2439:   MatGetRowMax(mat->A, diagV, diagIdx);

2441:   /* Get offdiagIdx[] for implicit 0.0 */
2442:   MatSeqAIJGetArrayRead(B, &bav);
2443:   ba = bav;
2444:   bi = b->i;
2445:   bj = b->j;
2446:   VecGetArrayWrite(offdiagV, &offdiagA);
2447:   for (r = 0; r < m; r++) {
2448:     ncols = bi[r + 1] - bi[r];
2449:     if (ncols == A->cmap->N - n) { /* Brow is dense */
2450:       offdiagA[r]   = *ba;
2451:       offdiagIdx[r] = cmap[0];
2452:     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2453:       offdiagA[r] = 0.0;

2455:       /* Find first hole in the cmap */
2456:       for (j = 0; j < ncols; j++) {
2457:         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2458:         if (col > j && j < cstart) {
2459:           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2460:           break;
2461:         } else if (col > j + n && j >= cstart) {
2462:           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2463:           break;
2464:         }
2465:       }
2466:       if (j == ncols && ncols < A->cmap->N - n) {
2467:         /* a hole is outside compressed Bcols */
2468:         if (ncols == 0) {
2469:           if (cstart) {
2470:             offdiagIdx[r] = 0;
2471:           } else offdiagIdx[r] = cend;
2472:         } else { /* ncols > 0 */
2473:           offdiagIdx[r] = cmap[ncols - 1] + 1;
2474:           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2475:         }
2476:       }
2477:     }

2479:     for (j = 0; j < ncols; j++) {
2480:       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {
2481:         offdiagA[r]   = *ba;
2482:         offdiagIdx[r] = cmap[*bj];
2483:       }
2484:       ba++;
2485:       bj++;
2486:     }
2487:   }

2489:   VecGetArrayWrite(v, &a);
2490:   VecGetArrayRead(diagV, (const PetscScalar **)&diagA);
2491:   for (r = 0; r < m; ++r) {
2492:     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2493:       a[r] = diagA[r];
2494:       if (idx) idx[r] = cstart + diagIdx[r];
2495:     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2496:       a[r] = diagA[r];
2497:       if (idx) {
2498:         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2499:           idx[r] = cstart + diagIdx[r];
2500:         } else idx[r] = offdiagIdx[r];
2501:       }
2502:     } else {
2503:       a[r] = offdiagA[r];
2504:       if (idx) idx[r] = offdiagIdx[r];
2505:     }
2506:   }
2507:   MatSeqAIJRestoreArrayRead(B, &bav);
2508:   VecRestoreArrayWrite(v, &a);
2509:   VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA);
2510:   VecRestoreArrayWrite(offdiagV, &offdiagA);
2511:   VecDestroy(&diagV);
2512:   VecDestroy(&offdiagV);
2513:   PetscFree2(diagIdx, offdiagIdx);
2514:   return 0;
2515: }

2517: PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat)
2518: {
2519:   Mat *dummy;

2521:   MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy);
2522:   *newmat = *dummy;
2523:   PetscFree(dummy);
2524:   return 0;
2525: }

2527: PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values)
2528: {
2529:   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;

2531:   MatInvertBlockDiagonal(a->A, values);
2532:   A->factorerrortype = a->A->factorerrortype;
2533:   return 0;
2534: }

2536: static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx)
2537: {
2538:   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data;

2541:   MatSetRandom(aij->A, rctx);
2542:   if (x->assembled) {
2543:     MatSetRandom(aij->B, rctx);
2544:   } else {
2545:     MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx);
2546:   }
2547:   MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY);
2548:   MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY);
2549:   return 0;
2550: }

2552: PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc)
2553: {
2554:   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2555:   else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ;
2556:   return 0;
2557: }

2559: /*@
2560:    MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank

2562:    Not collective

2564:    Input Parameter:
2565: .    A - the matrix

2567:    Output Parameter:
2568: .    nz - the number of nonzeros

2570:  Level: advanced

2572: .seealso: `MATMPIAIJ`, `Mat`
2573: @*/
2574: PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz)
2575: {
2576:   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data;
2577:   Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data;

2579:   *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n];
2580:   return 0;
2581: }

2583: /*@
2584:    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap

2586:    Collective

2588:    Input Parameters:
2589: +    A - the matrix
2590: -    sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm)

2592:  Level: advanced

2594: @*/
2595: PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc)
2596: {
2597:   PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc));
2598:   return 0;
2599: }

2601: PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject)
2602: {
2603:   PetscBool sc = PETSC_FALSE, flg;

2605:   PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options");
2606:   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2607:   PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg);
2608:   if (flg) MatMPIAIJSetUseScalableIncreaseOverlap(A, sc);
2609:   PetscOptionsHeadEnd();
2610:   return 0;
2611: }

2613: PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a)
2614: {
2615:   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data;
2616:   Mat_SeqAIJ *aij  = (Mat_SeqAIJ *)maij->A->data;

2618:   if (!Y->preallocated) {
2619:     MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL);
2620:   } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */
2621:     PetscInt nonew = aij->nonew;
2622:     MatSeqAIJSetPreallocation(maij->A, 1, NULL);
2623:     aij->nonew = nonew;
2624:   }
2625:   MatShift_Basic(Y, a);
2626:   return 0;
2627: }

2629: PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d)
2630: {
2631:   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;

2634:   MatMissingDiagonal(a->A, missing, d);
2635:   if (d) {
2636:     PetscInt rstart;
2637:     MatGetOwnershipRange(A, &rstart, NULL);
2638:     *d += rstart;
2639:   }
2640:   return 0;
2641: }

2643: PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag)
2644: {
2645:   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;

2647:   MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag);
2648:   return 0;
2649: }

2651: /* -------------------------------------------------------------------*/
2652: static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2653:                                        MatGetRow_MPIAIJ,
2654:                                        MatRestoreRow_MPIAIJ,
2655:                                        MatMult_MPIAIJ,
2656:                                        /* 4*/ MatMultAdd_MPIAIJ,
2657:                                        MatMultTranspose_MPIAIJ,
2658:                                        MatMultTransposeAdd_MPIAIJ,
2659:                                        NULL,
2660:                                        NULL,
2661:                                        NULL,
2662:                                        /*10*/ NULL,
2663:                                        NULL,
2664:                                        NULL,
2665:                                        MatSOR_MPIAIJ,
2666:                                        MatTranspose_MPIAIJ,
2667:                                        /*15*/ MatGetInfo_MPIAIJ,
2668:                                        MatEqual_MPIAIJ,
2669:                                        MatGetDiagonal_MPIAIJ,
2670:                                        MatDiagonalScale_MPIAIJ,
2671:                                        MatNorm_MPIAIJ,
2672:                                        /*20*/ MatAssemblyBegin_MPIAIJ,
2673:                                        MatAssemblyEnd_MPIAIJ,
2674:                                        MatSetOption_MPIAIJ,
2675:                                        MatZeroEntries_MPIAIJ,
2676:                                        /*24*/ MatZeroRows_MPIAIJ,
2677:                                        NULL,
2678:                                        NULL,
2679:                                        NULL,
2680:                                        NULL,
2681:                                        /*29*/ MatSetUp_MPIAIJ,
2682:                                        NULL,
2683:                                        NULL,
2684:                                        MatGetDiagonalBlock_MPIAIJ,
2685:                                        NULL,
2686:                                        /*34*/ MatDuplicate_MPIAIJ,
2687:                                        NULL,
2688:                                        NULL,
2689:                                        NULL,
2690:                                        NULL,
2691:                                        /*39*/ MatAXPY_MPIAIJ,
2692:                                        MatCreateSubMatrices_MPIAIJ,
2693:                                        MatIncreaseOverlap_MPIAIJ,
2694:                                        MatGetValues_MPIAIJ,
2695:                                        MatCopy_MPIAIJ,
2696:                                        /*44*/ MatGetRowMax_MPIAIJ,
2697:                                        MatScale_MPIAIJ,
2698:                                        MatShift_MPIAIJ,
2699:                                        MatDiagonalSet_MPIAIJ,
2700:                                        MatZeroRowsColumns_MPIAIJ,
2701:                                        /*49*/ MatSetRandom_MPIAIJ,
2702:                                        MatGetRowIJ_MPIAIJ,
2703:                                        MatRestoreRowIJ_MPIAIJ,
2704:                                        NULL,
2705:                                        NULL,
2706:                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2707:                                        NULL,
2708:                                        MatSetUnfactored_MPIAIJ,
2709:                                        MatPermute_MPIAIJ,
2710:                                        NULL,
2711:                                        /*59*/ MatCreateSubMatrix_MPIAIJ,
2712:                                        MatDestroy_MPIAIJ,
2713:                                        MatView_MPIAIJ,
2714:                                        NULL,
2715:                                        NULL,
2716:                                        /*64*/ NULL,
2717:                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2718:                                        NULL,
2719:                                        NULL,
2720:                                        NULL,
2721:                                        /*69*/ MatGetRowMaxAbs_MPIAIJ,
2722:                                        MatGetRowMinAbs_MPIAIJ,
2723:                                        NULL,
2724:                                        NULL,
2725:                                        NULL,
2726:                                        NULL,
2727:                                        /*75*/ MatFDColoringApply_AIJ,
2728:                                        MatSetFromOptions_MPIAIJ,
2729:                                        NULL,
2730:                                        NULL,
2731:                                        MatFindZeroDiagonals_MPIAIJ,
2732:                                        /*80*/ NULL,
2733:                                        NULL,
2734:                                        NULL,
2735:                                        /*83*/ MatLoad_MPIAIJ,
2736:                                        MatIsSymmetric_MPIAIJ,
2737:                                        NULL,
2738:                                        NULL,
2739:                                        NULL,
2740:                                        NULL,
2741:                                        /*89*/ NULL,
2742:                                        NULL,
2743:                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2744:                                        NULL,
2745:                                        NULL,
2746:                                        /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2747:                                        NULL,
2748:                                        NULL,
2749:                                        NULL,
2750:                                        MatBindToCPU_MPIAIJ,
2751:                                        /*99*/ MatProductSetFromOptions_MPIAIJ,
2752:                                        NULL,
2753:                                        NULL,
2754:                                        MatConjugate_MPIAIJ,
2755:                                        NULL,
2756:                                        /*104*/ MatSetValuesRow_MPIAIJ,
2757:                                        MatRealPart_MPIAIJ,
2758:                                        MatImaginaryPart_MPIAIJ,
2759:                                        NULL,
2760:                                        NULL,
2761:                                        /*109*/ NULL,
2762:                                        NULL,
2763:                                        MatGetRowMin_MPIAIJ,
2764:                                        NULL,
2765:                                        MatMissingDiagonal_MPIAIJ,
2766:                                        /*114*/ MatGetSeqNonzeroStructure_MPIAIJ,
2767:                                        NULL,
2768:                                        MatGetGhosts_MPIAIJ,
2769:                                        NULL,
2770:                                        NULL,
2771:                                        /*119*/ MatMultDiagonalBlock_MPIAIJ,
2772:                                        NULL,
2773:                                        NULL,
2774:                                        NULL,
2775:                                        MatGetMultiProcBlock_MPIAIJ,
2776:                                        /*124*/ MatFindNonzeroRows_MPIAIJ,
2777:                                        MatGetColumnReductions_MPIAIJ,
2778:                                        MatInvertBlockDiagonal_MPIAIJ,
2779:                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2780:                                        MatCreateSubMatricesMPI_MPIAIJ,
2781:                                        /*129*/ NULL,
2782:                                        NULL,
2783:                                        NULL,
2784:                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2785:                                        NULL,
2786:                                        /*134*/ NULL,
2787:                                        NULL,
2788:                                        NULL,
2789:                                        NULL,
2790:                                        NULL,
2791:                                        /*139*/ MatSetBlockSizes_MPIAIJ,
2792:                                        NULL,
2793:                                        NULL,
2794:                                        MatFDColoringSetUp_MPIXAIJ,
2795:                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2796:                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2797:                                        /*145*/ NULL,
2798:                                        NULL,
2799:                                        NULL,
2800:                                        MatCreateGraph_Simple_AIJ,
2801:                                        NULL,
2802:                                        /*150*/ NULL};

2804: /* ----------------------------------------------------------------------------------------*/

2806: PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
2807: {
2808:   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;

2810:   MatStoreValues(aij->A);
2811:   MatStoreValues(aij->B);
2812:   return 0;
2813: }

2815: PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
2816: {
2817:   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;

2819:   MatRetrieveValues(aij->A);
2820:   MatRetrieveValues(aij->B);
2821:   return 0;
2822: }

2824: PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
2825: {
2826:   Mat_MPIAIJ *b;
2827:   PetscMPIInt size;

2829:   PetscLayoutSetUp(B->rmap);
2830:   PetscLayoutSetUp(B->cmap);
2831:   b = (Mat_MPIAIJ *)B->data;

2833: #if defined(PETSC_USE_CTABLE)
2834:   PetscTableDestroy(&b->colmap);
2835: #else
2836:   PetscFree(b->colmap);
2837: #endif
2838:   PetscFree(b->garray);
2839:   VecDestroy(&b->lvec);
2840:   VecScatterDestroy(&b->Mvctx);

2842:   /* Because the B will have been resized we simply destroy it and create a new one each time */
2843:   MPI_Comm_size(PetscObjectComm((PetscObject)B), &size);
2844:   MatDestroy(&b->B);
2845:   MatCreate(PETSC_COMM_SELF, &b->B);
2846:   MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0);
2847:   MatSetBlockSizesFromMats(b->B, B, B);
2848:   MatSetType(b->B, MATSEQAIJ);

2850:   if (!B->preallocated) {
2851:     MatCreate(PETSC_COMM_SELF, &b->A);
2852:     MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n);
2853:     MatSetBlockSizesFromMats(b->A, B, B);
2854:     MatSetType(b->A, MATSEQAIJ);
2855:   }

2857:   MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz);
2858:   MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz);
2859:   B->preallocated  = PETSC_TRUE;
2860:   B->was_assembled = PETSC_FALSE;
2861:   B->assembled     = PETSC_FALSE;
2862:   return 0;
2863: }

2865: PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2866: {
2867:   Mat_MPIAIJ *b;

2870:   PetscLayoutSetUp(B->rmap);
2871:   PetscLayoutSetUp(B->cmap);
2872:   b = (Mat_MPIAIJ *)B->data;

2874: #if defined(PETSC_USE_CTABLE)
2875:   PetscTableDestroy(&b->colmap);
2876: #else
2877:   PetscFree(b->colmap);
2878: #endif
2879:   PetscFree(b->garray);
2880:   VecDestroy(&b->lvec);
2881:   VecScatterDestroy(&b->Mvctx);

2883:   MatResetPreallocation(b->A);
2884:   MatResetPreallocation(b->B);
2885:   B->preallocated  = PETSC_TRUE;
2886:   B->was_assembled = PETSC_FALSE;
2887:   B->assembled     = PETSC_FALSE;
2888:   return 0;
2889: }

2891: PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
2892: {
2893:   Mat         mat;
2894:   Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data;

2896:   *newmat = NULL;
2897:   MatCreate(PetscObjectComm((PetscObject)matin), &mat);
2898:   MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N);
2899:   MatSetBlockSizesFromMats(mat, matin, matin);
2900:   MatSetType(mat, ((PetscObject)matin)->type_name);
2901:   a = (Mat_MPIAIJ *)mat->data;

2903:   mat->factortype   = matin->factortype;
2904:   mat->assembled    = matin->assembled;
2905:   mat->insertmode   = NOT_SET_VALUES;
2906:   mat->preallocated = matin->preallocated;

2908:   a->size         = oldmat->size;
2909:   a->rank         = oldmat->rank;
2910:   a->donotstash   = oldmat->donotstash;
2911:   a->roworiented  = oldmat->roworiented;
2912:   a->rowindices   = NULL;
2913:   a->rowvalues    = NULL;
2914:   a->getrowactive = PETSC_FALSE;

2916:   PetscLayoutReference(matin->rmap, &mat->rmap);
2917:   PetscLayoutReference(matin->cmap, &mat->cmap);

2919:   if (oldmat->colmap) {
2920: #if defined(PETSC_USE_CTABLE)
2921:     PetscTableCreateCopy(oldmat->colmap, &a->colmap);
2922: #else
2923:     PetscMalloc1(mat->cmap->N, &a->colmap);
2924:     PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N);
2925: #endif
2926:   } else a->colmap = NULL;
2927:   if (oldmat->garray) {
2928:     PetscInt len;
2929:     len = oldmat->B->cmap->n;
2930:     PetscMalloc1(len + 1, &a->garray);
2931:     if (len) PetscArraycpy(a->garray, oldmat->garray, len);
2932:   } else a->garray = NULL;

2934:   /* It may happen MatDuplicate is called with a non-assembled matrix
2935:      In fact, MatDuplicate only requires the matrix to be preallocated
2936:      This may happen inside a DMCreateMatrix_Shell */
2937:   if (oldmat->lvec) { VecDuplicate(oldmat->lvec, &a->lvec); }
2938:   if (oldmat->Mvctx) { VecScatterCopy(oldmat->Mvctx, &a->Mvctx); }
2939:   MatDuplicate(oldmat->A, cpvalues, &a->A);
2940:   MatDuplicate(oldmat->B, cpvalues, &a->B);
2941:   PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist);
2942:   *newmat = mat;
2943:   return 0;
2944: }

2946: PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2947: {
2948:   PetscBool isbinary, ishdf5;

2952:   /* force binary viewer to load .info file if it has not yet done so */
2953:   PetscViewerSetUp(viewer);
2954:   PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary);
2955:   PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5);
2956:   if (isbinary) {
2957:     MatLoad_MPIAIJ_Binary(newMat, viewer);
2958:   } else if (ishdf5) {
2959: #if defined(PETSC_HAVE_HDF5)
2960:     MatLoad_AIJ_HDF5(newMat, viewer);
2961: #else
2962:     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
2963: #endif
2964:   } else {
2965:     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
2966:   }
2967:   return 0;
2968: }

2970: PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
2971: {
2972:   PetscInt     header[4], M, N, m, nz, rows, cols, sum, i;
2973:   PetscInt    *rowidxs, *colidxs;
2974:   PetscScalar *matvals;

2976:   PetscViewerSetUp(viewer);

2978:   /* read in matrix header */
2979:   PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT);
2981:   M  = header[1];
2982:   N  = header[2];
2983:   nz = header[3];

2988:   /* set block sizes from the viewer's .info file */
2989:   MatLoad_Binary_BlockSizes(mat, viewer);
2990:   /* set global sizes if not set already */
2991:   if (mat->rmap->N < 0) mat->rmap->N = M;
2992:   if (mat->cmap->N < 0) mat->cmap->N = N;
2993:   PetscLayoutSetUp(mat->rmap);
2994:   PetscLayoutSetUp(mat->cmap);

2996:   /* check if the matrix sizes are correct */
2997:   MatGetSize(mat, &rows, &cols);

3000:   /* read in row lengths and build row indices */
3001:   MatGetLocalSize(mat, &m, NULL);
3002:   PetscMalloc1(m + 1, &rowidxs);
3003:   PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT);
3004:   rowidxs[0] = 0;
3005:   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3006:   MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer));
3008:   /* read in column indices and matrix values */
3009:   PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals);
3010:   PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT);
3011:   PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR);
3012:   /* store matrix indices and values */
3013:   MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals);
3014:   PetscFree(rowidxs);
3015:   PetscFree2(colidxs, matvals);
3016:   return 0;
3017: }

3019: /* Not scalable because of ISAllGather() unless getting all columns. */
3020: PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq)
3021: {
3022:   IS          iscol_local;
3023:   PetscBool   isstride;
3024:   PetscMPIInt lisstride = 0, gisstride;

3026:   /* check if we are grabbing all columns*/
3027:   PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride);

3029:   if (isstride) {
3030:     PetscInt start, len, mstart, mlen;
3031:     ISStrideGetInfo(iscol, &start, NULL);
3032:     ISGetLocalSize(iscol, &len);
3033:     MatGetOwnershipRangeColumn(mat, &mstart, &mlen);
3034:     if (mstart == start && mlen - mstart == len) lisstride = 1;
3035:   }

3037:   MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat));
3038:   if (gisstride) {
3039:     PetscInt N;
3040:     MatGetSize(mat, NULL, &N);
3041:     ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local);
3042:     ISSetIdentity(iscol_local);
3043:     PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");
3044:   } else {
3045:     PetscInt cbs;
3046:     ISGetBlockSize(iscol, &cbs);
3047:     ISAllGather(iscol, &iscol_local);
3048:     ISSetBlockSize(iscol_local, cbs);
3049:   }

3051:   *isseq = iscol_local;
3052:   return 0;
3053: }

3055: /*
3056:  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3057:  (see MatCreateSubMatrix_MPIAIJ_nonscalable)

3059:  Input Parameters:
3060:    mat - matrix
3061:    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3062:            i.e., mat->rstart <= isrow[i] < mat->rend
3063:    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3064:            i.e., mat->cstart <= iscol[i] < mat->cend
3065:  Output Parameter:
3066:    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3067:    iscol_o - sequential column index set for retrieving mat->B
3068:    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3069:  */
3070: PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, const PetscInt *garray[])
3071: {
3072:   Vec             x, cmap;
3073:   const PetscInt *is_idx;
3074:   PetscScalar    *xarray, *cmaparray;
3075:   PetscInt        ncols, isstart, *idx, m, rstart, *cmap1, count;
3076:   Mat_MPIAIJ     *a    = (Mat_MPIAIJ *)mat->data;
3077:   Mat             B    = a->B;
3078:   Vec             lvec = a->lvec, lcmap;
3079:   PetscInt        i, cstart, cend, Bn = B->cmap->N;
3080:   MPI_Comm        comm;
3081:   VecScatter      Mvctx = a->Mvctx;

3083:   PetscObjectGetComm((PetscObject)mat, &comm);
3084:   ISGetLocalSize(iscol, &ncols);

3086:   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3087:   MatCreateVecs(mat, &x, NULL);
3088:   VecSet(x, -1.0);
3089:   VecDuplicate(x, &cmap);
3090:   VecSet(cmap, -1.0);

3092:   /* Get start indices */
3093:   MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm);
3094:   isstart -= ncols;
3095:   MatGetOwnershipRangeColumn(mat, &cstart, &cend);

3097:   ISGetIndices(iscol, &is_idx);
3098:   VecGetArray(x, &xarray);
3099:   VecGetArray(cmap, &cmaparray);
3100:   PetscMalloc1(ncols, &idx);
3101:   for (i = 0; i < ncols; i++) {
3102:     xarray[is_idx[i] - cstart]    = (PetscScalar)is_idx[i];
3103:     cmaparray[is_idx[i] - cstart] = i + isstart;        /* global index of iscol[i] */
3104:     idx[i]                        = is_idx[i] - cstart; /* local index of iscol[i]  */
3105:   }
3106:   VecRestoreArray(x, &xarray);
3107:   VecRestoreArray(cmap, &cmaparray);
3108:   ISRestoreIndices(iscol, &is_idx);

3110:   /* Get iscol_d */
3111:   ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d);
3112:   ISGetBlockSize(iscol, &i);
3113:   ISSetBlockSize(*iscol_d, i);

3115:   /* Get isrow_d */
3116:   ISGetLocalSize(isrow, &m);
3117:   rstart = mat->rmap->rstart;
3118:   PetscMalloc1(m, &idx);
3119:   ISGetIndices(isrow, &is_idx);
3120:   for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart;
3121:   ISRestoreIndices(isrow, &is_idx);

3123:   ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d);
3124:   ISGetBlockSize(isrow, &i);
3125:   ISSetBlockSize(*isrow_d, i);

3127:   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3128:   VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD);
3129:   VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD);

3131:   VecDuplicate(lvec, &lcmap);

3133:   VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD);
3134:   VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD);

3136:   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3137:   /* off-process column indices */
3138:   count = 0;
3139:   PetscMalloc1(Bn, &idx);
3140:   PetscMalloc1(Bn, &cmap1);

3142:   VecGetArray(lvec, &xarray);
3143:   VecGetArray(lcmap, &cmaparray);
3144:   for (i = 0; i < Bn; i++) {
3145:     if (PetscRealPart(xarray[i]) > -1.0) {
3146:       idx[count]   = i;                                     /* local column index in off-diagonal part B */
3147:       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */
3148:       count++;
3149:     }
3150:   }
3151:   VecRestoreArray(lvec, &xarray);
3152:   VecRestoreArray(lcmap, &cmaparray);

3154:   ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o);
3155:   /* cannot ensure iscol_o has same blocksize as iscol! */

3157:   PetscFree(idx);
3158:   *garray = cmap1;

3160:   VecDestroy(&x);
3161:   VecDestroy(&cmap);
3162:   VecDestroy(&lcmap);
3163:   return 0;
3164: }

3166: /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3167: PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat)
3168: {
3169:   Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub;
3170:   Mat         M = NULL;
3171:   MPI_Comm    comm;
3172:   IS          iscol_d, isrow_d, iscol_o;
3173:   Mat         Asub = NULL, Bsub = NULL;
3174:   PetscInt    n;

3176:   PetscObjectGetComm((PetscObject)mat, &comm);

3178:   if (call == MAT_REUSE_MATRIX) {
3179:     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3180:     PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d);

3183:     PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d);

3186:     PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o);

3189:     /* Update diagonal and off-diagonal portions of submat */
3190:     asub = (Mat_MPIAIJ *)(*submat)->data;
3191:     MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A);
3192:     ISGetLocalSize(iscol_o, &n);
3193:     if (n) MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B);
3194:     MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY);
3195:     MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY);

3197:   } else { /* call == MAT_INITIAL_MATRIX) */
3198:     const PetscInt *garray;
3199:     PetscInt        BsubN;

3201:     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3202:     ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray);

3204:     /* Create local submatrices Asub and Bsub */
3205:     MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub);
3206:     MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub);

3208:     /* Create submatrix M */
3209:     MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M);

3211:     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3212:     asub = (Mat_MPIAIJ *)M->data;

3214:     ISGetLocalSize(iscol_o, &BsubN);
3215:     n = asub->B->cmap->N;
3216:     if (BsubN > n) {
3217:       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3218:       const PetscInt *idx;
3219:       PetscInt        i, j, *idx_new, *subgarray = asub->garray;
3220:       PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN);

3222:       PetscMalloc1(n, &idx_new);
3223:       j = 0;
3224:       ISGetIndices(iscol_o, &idx);
3225:       for (i = 0; i < n; i++) {
3226:         if (j >= BsubN) break;
3227:         while (subgarray[i] > garray[j]) j++;

3229:         if (subgarray[i] == garray[j]) {
3230:           idx_new[i] = idx[j++];
3231:         } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]);
3232:       }
3233:       ISRestoreIndices(iscol_o, &idx);

3235:       ISDestroy(&iscol_o);
3236:       ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o);

3238:     } else if (BsubN < n) {
3239:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N);
3240:     }

3242:     PetscFree(garray);
3243:     *submat = M;

3245:     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3246:     PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d);
3247:     ISDestroy(&isrow_d);

3249:     PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d);
3250:     ISDestroy(&iscol_d);

3252:     PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o);
3253:     ISDestroy(&iscol_o);
3254:   }
3255:   return 0;
3256: }

3258: PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
3259: {
3260:   IS        iscol_local = NULL, isrow_d;
3261:   PetscInt  csize;
3262:   PetscInt  n, i, j, start, end;
3263:   PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2];
3264:   MPI_Comm  comm;

3266:   /* If isrow has same processor distribution as mat,
3267:      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3268:   if (call == MAT_REUSE_MATRIX) {
3269:     PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d);
3270:     if (isrow_d) {
3271:       sameRowDist  = PETSC_TRUE;
3272:       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3273:     } else {
3274:       PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local);
3275:       if (iscol_local) {
3276:         sameRowDist  = PETSC_TRUE;
3277:         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3278:       }
3279:     }
3280:   } else {
3281:     /* Check if isrow has same processor distribution as mat */
3282:     sameDist[0] = PETSC_FALSE;
3283:     ISGetLocalSize(isrow, &n);
3284:     if (!n) {
3285:       sameDist[0] = PETSC_TRUE;
3286:     } else {
3287:       ISGetMinMax(isrow, &i, &j);
3288:       MatGetOwnershipRange(mat, &start, &end);
3289:       if (i >= start && j < end) sameDist[0] = PETSC_TRUE;
3290:     }

3292:     /* Check if iscol has same processor distribution as mat */
3293:     sameDist[1] = PETSC_FALSE;
3294:     ISGetLocalSize(iscol, &n);
3295:     if (!n) {
3296:       sameDist[1] = PETSC_TRUE;
3297:     } else {
3298:       ISGetMinMax(iscol, &i, &j);
3299:       MatGetOwnershipRangeColumn(mat, &start, &end);
3300:       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3301:     }

3303:     PetscObjectGetComm((PetscObject)mat, &comm);
3304:     MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm);
3305:     sameRowDist = tsameDist[0];
3306:   }

3308:   if (sameRowDist) {
3309:     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3310:       /* isrow and iscol have same processor distribution as mat */
3311:       MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat);
3312:       return 0;
3313:     } else { /* sameRowDist */
3314:       /* isrow has same processor distribution as mat */
3315:       if (call == MAT_INITIAL_MATRIX) {
3316:         PetscBool sorted;
3317:         ISGetSeqIS_Private(mat, iscol, &iscol_local);
3318:         ISGetLocalSize(iscol_local, &n); /* local size of iscol_local = global columns of newmat */
3319:         ISGetSize(iscol, &i);

3322:         ISSorted(iscol_local, &sorted);
3323:         if (sorted) {
3324:           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3325:           MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat);
3326:           return 0;
3327:         }
3328:       } else { /* call == MAT_REUSE_MATRIX */
3329:         IS iscol_sub;
3330:         PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub);
3331:         if (iscol_sub) {
3332:           MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat);
3333:           return 0;
3334:         }
3335:       }
3336:     }
3337:   }

3339:   /* General case: iscol -> iscol_local which has global size of iscol */
3340:   if (call == MAT_REUSE_MATRIX) {
3341:     PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local);
3343:   } else {
3344:     if (!iscol_local) ISGetSeqIS_Private(mat, iscol, &iscol_local);
3345:   }

3347:   ISGetLocalSize(iscol, &csize);
3348:   MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat);

3350:   if (call == MAT_INITIAL_MATRIX) {
3351:     PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local);
3352:     ISDestroy(&iscol_local);
3353:   }
3354:   return 0;
3355: }

3357: /*@C
3358:      MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal"
3359:          and "off-diagonal" part of the matrix in CSR format.

3361:    Collective

3363:    Input Parameters:
3364: +  comm - MPI communicator
3365: .  A - "diagonal" portion of matrix
3366: .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3367: -  garray - global index of B columns

3369:    Output Parameter:
3370: .   mat - the matrix, with input A as its local diagonal matrix
3371:    Level: advanced

3373:    Notes:
3374:    See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix.

3376:    A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.

3378: .seealso: `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()`
3379: @*/
3380: PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat)
3381: {
3382:   Mat_MPIAIJ        *maij;
3383:   Mat_SeqAIJ        *b  = (Mat_SeqAIJ *)B->data, *bnew;
3384:   PetscInt          *oi = b->i, *oj = b->j, i, nz, col;
3385:   const PetscScalar *oa;
3386:   Mat                Bnew;
3387:   PetscInt           m, n, N;
3388:   MatType            mpi_mat_type;

3390:   MatCreate(comm, mat);
3391:   MatGetSize(A, &m, &n);
3394:   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */

3397:   /* Get global columns of mat */
3398:   MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm);

3400:   MatSetSizes(*mat, m, n, PETSC_DECIDE, N);
3401:   /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */
3402:   MatGetMPIMatType_Private(A, &mpi_mat_type);
3403:   MatSetType(*mat, mpi_mat_type);

3405:   MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs);
3406:   maij = (Mat_MPIAIJ *)(*mat)->data;

3408:   (*mat)->preallocated = PETSC_TRUE;

3410:   PetscLayoutSetUp((*mat)->rmap);
3411:   PetscLayoutSetUp((*mat)->cmap);

3413:   /* Set A as diagonal portion of *mat */
3414:   maij->A = A;

3416:   nz = oi[m];
3417:   for (i = 0; i < nz; i++) {
3418:     col   = oj[i];
3419:     oj[i] = garray[col];
3420:   }

3422:   /* Set Bnew as off-diagonal portion of *mat */
3423:   MatSeqAIJGetArrayRead(B, &oa);
3424:   MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew);
3425:   MatSeqAIJRestoreArrayRead(B, &oa);
3426:   bnew        = (Mat_SeqAIJ *)Bnew->data;
3427:   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3428:   maij->B     = Bnew;


3432:   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3433:   b->free_a       = PETSC_FALSE;
3434:   b->free_ij      = PETSC_FALSE;
3435:   MatDestroy(&B);

3437:   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3438:   bnew->free_a       = PETSC_TRUE;
3439:   bnew->free_ij      = PETSC_TRUE;

3441:   /* condense columns of maij->B */
3442:   MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE);
3443:   MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY);
3444:   MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY);
3445:   MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE);
3446:   MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE);
3447:   return 0;
3448: }

3450: extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *);

3452: PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat)
3453: {
3454:   PetscInt        i, m, n, rstart, row, rend, nz, j, bs, cbs;
3455:   PetscInt       *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3456:   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)mat->data;
3457:   Mat             M, Msub, B = a->B;
3458:   MatScalar      *aa;
3459:   Mat_SeqAIJ     *aij;
3460:   PetscInt       *garray = a->garray, *colsub, Ncols;
3461:   PetscInt        count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend;
3462:   IS              iscol_sub, iscmap;
3463:   const PetscInt *is_idx, *cmap;
3464:   PetscBool       allcolumns = PETSC_FALSE;
3465:   MPI_Comm        comm;

3467:   PetscObjectGetComm((PetscObject)mat, &comm);
3468:   if (call == MAT_REUSE_MATRIX) {
3469:     PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub);
3471:     ISGetLocalSize(iscol_sub, &count);

3473:     PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap);

3476:     PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub);

3479:     MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub);

3481:   } else { /* call == MAT_INITIAL_MATRIX) */
3482:     PetscBool flg;

3484:     ISGetLocalSize(iscol, &n);
3485:     ISGetSize(iscol, &Ncols);

3487:     /* (1) iscol -> nonscalable iscol_local */
3488:     /* Check for special case: each processor gets entire matrix columns */
3489:     ISIdentity(iscol_local, &flg);
3490:     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3491:     MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat));
3492:     if (allcolumns) {
3493:       iscol_sub = iscol_local;
3494:       PetscObjectReference((PetscObject)iscol_local);
3495:       ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap);

3497:     } else {
3498:       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3499:       PetscInt *idx, *cmap1, k;
3500:       PetscMalloc1(Ncols, &idx);
3501:       PetscMalloc1(Ncols, &cmap1);
3502:       ISGetIndices(iscol_local, &is_idx);
3503:       count = 0;
3504:       k     = 0;
3505:       for (i = 0; i < Ncols; i++) {
3506:         j = is_idx[i];
3507:         if (j >= cstart && j < cend) {
3508:           /* diagonal part of mat */
3509:           idx[count]     = j;
3510:           cmap1[count++] = i; /* column index in submat */
3511:         } else if (Bn) {
3512:           /* off-diagonal part of mat */
3513:           if (j == garray[k]) {
3514:             idx[count]     = j;
3515:             cmap1[count++] = i; /* column index in submat */
3516:           } else if (j > garray[k]) {
3517:             while (j > garray[k] && k < Bn - 1) k++;
3518:             if (j == garray[k]) {
3519:               idx[count]     = j;
3520:               cmap1[count++] = i; /* column index in submat */
3521:             }
3522:           }
3523:         }
3524:       }
3525:       ISRestoreIndices(iscol_local, &is_idx);

3527:       ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub);
3528:       ISGetBlockSize(iscol, &cbs);
3529:       ISSetBlockSize(iscol_sub, cbs);

3531:       ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap);
3532:     }

3534:     /* (3) Create sequential Msub */
3535:     MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub);
3536:   }

3538:   ISGetLocalSize(iscol_sub, &count);
3539:   aij = (Mat_SeqAIJ *)(Msub)->data;
3540:   ii  = aij->i;
3541:   ISGetIndices(iscmap, &cmap);

3543:   /*
3544:       m - number of local rows
3545:       Ncols - number of columns (same on all processors)
3546:       rstart - first row in new global matrix generated
3547:   */
3548:   MatGetSize(Msub, &m, NULL);

3550:   if (call == MAT_INITIAL_MATRIX) {
3551:     /* (4) Create parallel newmat */
3552:     PetscMPIInt rank, size;
3553:     PetscInt    csize;

3555:     MPI_Comm_size(comm, &size);
3556:     MPI_Comm_rank(comm, &rank);

3558:     /*
3559:         Determine the number of non-zeros in the diagonal and off-diagonal
3560:         portions of the matrix in order to do correct preallocation
3561:     */

3563:     /* first get start and end of "diagonal" columns */
3564:     ISGetLocalSize(iscol, &csize);
3565:     if (csize == PETSC_DECIDE) {
3566:       ISGetSize(isrow, &mglobal);
3567:       if (mglobal == Ncols) { /* square matrix */
3568:         nlocal = m;
3569:       } else {
3570:         nlocal = Ncols / size + ((Ncols % size) > rank);
3571:       }
3572:     } else {
3573:       nlocal = csize;
3574:     }
3575:     MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm);
3576:     rstart = rend - nlocal;

3579:     /* next, compute all the lengths */
3580:     jj = aij->j;
3581:     PetscMalloc1(2 * m + 1, &dlens);
3582:     olens = dlens + m;
3583:     for (i = 0; i < m; i++) {
3584:       jend = ii[i + 1] - ii[i];
3585:       olen = 0;
3586:       dlen = 0;
3587:       for (j = 0; j < jend; j++) {
3588:         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3589:         else dlen++;
3590:         jj++;
3591:       }
3592:       olens[i] = olen;
3593:       dlens[i] = dlen;
3594:     }

3596:     ISGetBlockSize(isrow, &bs);
3597:     ISGetBlockSize(iscol, &cbs);

3599:     MatCreate(comm, &M);
3600:     MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols);
3601:     MatSetBlockSizes(M, bs, cbs);
3602:     MatSetType(M, ((PetscObject)mat)->type_name);
3603:     MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens);
3604:     PetscFree(dlens);

3606:   } else { /* call == MAT_REUSE_MATRIX */
3607:     M = *newmat;
3608:     MatGetLocalSize(M, &i, NULL);
3610:     MatZeroEntries(M);
3611:     /*
3612:          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3613:        rather than the slower MatSetValues().
3614:     */
3615:     M->was_assembled = PETSC_TRUE;
3616:     M->assembled     = PETSC_FALSE;
3617:   }

3619:   /* (5) Set values of Msub to *newmat */
3620:   PetscMalloc1(count, &colsub);
3621:   MatGetOwnershipRange(M, &rstart, NULL);

3623:   jj = aij->j;
3624:   MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa);
3625:   for (i = 0; i < m; i++) {
3626:     row = rstart + i;
3627:     nz  = ii[i + 1] - ii[i];
3628:     for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]];
3629:     MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES);
3630:     jj += nz;
3631:     aa += nz;
3632:   }
3633:   MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa);
3634:   ISRestoreIndices(iscmap, &cmap);

3636:   MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY);
3637:   MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY);

3639:   PetscFree(colsub);

3641:   /* save Msub, iscol_sub and iscmap used in processor for next request */
3642:   if (call == MAT_INITIAL_MATRIX) {
3643:     *newmat = M;
3644:     PetscObjectCompose((PetscObject)(*newmat), "SubMatrix", (PetscObject)Msub);
3645:     MatDestroy(&Msub);

3647:     PetscObjectCompose((PetscObject)(*newmat), "SubIScol", (PetscObject)iscol_sub);
3648:     ISDestroy(&iscol_sub);

3650:     PetscObjectCompose((PetscObject)(*newmat), "Subcmap", (PetscObject)iscmap);
3651:     ISDestroy(&iscmap);

3653:     if (iscol_local) {
3654:       PetscObjectCompose((PetscObject)(*newmat), "ISAllGather", (PetscObject)iscol_local);
3655:       ISDestroy(&iscol_local);
3656:     }
3657:   }
3658:   return 0;
3659: }

3661: /*
3662:     Not great since it makes two copies of the submatrix, first an SeqAIJ
3663:   in local and then by concatenating the local matrices the end result.
3664:   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()

3666:   This requires a sequential iscol with all indices.
3667: */
3668: PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat)
3669: {
3670:   PetscMPIInt rank, size;
3671:   PetscInt    i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs;
3672:   PetscInt   *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3673:   Mat         M, Mreuse;
3674:   MatScalar  *aa, *vwork;
3675:   MPI_Comm    comm;
3676:   Mat_SeqAIJ *aij;
3677:   PetscBool   colflag, allcolumns = PETSC_FALSE;

3679:   PetscObjectGetComm((PetscObject)mat, &comm);
3680:   MPI_Comm_rank(comm, &rank);
3681:   MPI_Comm_size(comm, &size);

3683:   /* Check for special case: each processor gets entire matrix columns */
3684:   ISIdentity(iscol, &colflag);
3685:   ISGetLocalSize(iscol, &n);
3686:   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3687:   MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat));

3689:   if (call == MAT_REUSE_MATRIX) {
3690:     PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse);
3692:     MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse);
3693:   } else {
3694:     MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse);
3695:   }

3697:   /*
3698:       m - number of local rows
3699:       n - number of columns (same on all processors)
3700:       rstart - first row in new global matrix generated
3701:   */
3702:   MatGetSize(Mreuse, &m, &n);
3703:   MatGetBlockSizes(Mreuse, &bs, &cbs);
3704:   if (call == MAT_INITIAL_MATRIX) {
3705:     aij = (Mat_SeqAIJ *)(Mreuse)->data;
3706:     ii  = aij->i;
3707:     jj  = aij->j;

3709:     /*
3710:         Determine the number of non-zeros in the diagonal and off-diagonal
3711:         portions of the matrix in order to do correct preallocation
3712:     */

3714:     /* first get start and end of "diagonal" columns */
3715:     if (csize == PETSC_DECIDE) {
3716:       ISGetSize(isrow, &mglobal);
3717:       if (mglobal == n) { /* square matrix */
3718:         nlocal = m;
3719:       } else {
3720:         nlocal = n / size + ((n % size) > rank);
3721:       }
3722:     } else {
3723:       nlocal = csize;
3724:     }
3725:     MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm);
3726:     rstart = rend - nlocal;

3729:     /* next, compute all the lengths */
3730:     PetscMalloc1(2 * m + 1, &dlens);
3731:     olens = dlens + m;
3732:     for (i = 0; i < m; i++) {
3733:       jend = ii[i + 1] - ii[i];
3734:       olen = 0;
3735:       dlen = 0;
3736:       for (j = 0; j < jend; j++) {
3737:         if (*jj < rstart || *jj >= rend) olen++;
3738:         else dlen++;
3739:         jj++;
3740:       }
3741:       olens[i] = olen;
3742:       dlens[i] = dlen;
3743:     }
3744:     MatCreate(comm, &M);
3745:     MatSetSizes(M, m, nlocal, PETSC_DECIDE, n);
3746:     MatSetBlockSizes(M, bs, cbs);
3747:     MatSetType(M, ((PetscObject)mat)->type_name);
3748:     MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens);
3749:     PetscFree(dlens);
3750:   } else {
3751:     PetscInt ml, nl;

3753:     M = *newmat;
3754:     MatGetLocalSize(M, &ml, &nl);
3756:     MatZeroEntries(M);
3757:     /*
3758:          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3759:        rather than the slower MatSetValues().
3760:     */
3761:     M->was_assembled = PETSC_TRUE;
3762:     M->assembled     = PETSC_FALSE;
3763:   }
3764:   MatGetOwnershipRange(M, &rstart, &rend);
3765:   aij = (Mat_SeqAIJ *)(Mreuse)->data;
3766:   ii  = aij->i;
3767:   jj  = aij->j;

3769:   /* trigger copy to CPU if needed */
3770:   MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa);
3771:   for (i = 0; i < m; i++) {
3772:     row   = rstart + i;
3773:     nz    = ii[i + 1] - ii[i];
3774:     cwork = jj;
3775:     jj += nz;
3776:     vwork = aa;
3777:     aa += nz;
3778:     MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES);
3779:   }
3780:   MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa);

3782:   MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY);
3783:   MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY);
3784:   *newmat = M;

3786:   /* save submatrix used in processor for next request */
3787:   if (call == MAT_INITIAL_MATRIX) {
3788:     PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse);
3789:     MatDestroy(&Mreuse);
3790:   }
3791:   return 0;
3792: }

3794: PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
3795: {
3796:   PetscInt        m, cstart, cend, j, nnz, i, d, *ld;
3797:   PetscInt       *d_nnz, *o_nnz, nnz_max = 0, rstart, ii;
3798:   const PetscInt *JJ;
3799:   PetscBool       nooffprocentries;
3800:   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)B->data;


3804:   PetscLayoutSetUp(B->rmap);
3805:   PetscLayoutSetUp(B->cmap);
3806:   m      = B->rmap->n;
3807:   cstart = B->cmap->rstart;
3808:   cend   = B->cmap->rend;
3809:   rstart = B->rmap->rstart;

3811:   PetscCalloc2(m, &d_nnz, m, &o_nnz);

3813:   if (PetscDefined(USE_DEBUG)) {
3814:     for (i = 0; i < m; i++) {
3815:       nnz = Ii[i + 1] - Ii[i];
3816:       JJ  = J + Ii[i];
3820:     }
3821:   }

3823:   for (i = 0; i < m; i++) {
3824:     nnz     = Ii[i + 1] - Ii[i];
3825:     JJ      = J + Ii[i];
3826:     nnz_max = PetscMax(nnz_max, nnz);
3827:     d       = 0;
3828:     for (j = 0; j < nnz; j++) {
3829:       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3830:     }
3831:     d_nnz[i] = d;
3832:     o_nnz[i] = nnz - d;
3833:   }
3834:   MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz);
3835:   PetscFree2(d_nnz, o_nnz);

3837:   for (i = 0; i < m; i++) {
3838:     ii = i + rstart;
3839:     MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], J + Ii[i], v ? v + Ii[i] : NULL, INSERT_VALUES);
3840:   }
3841:   nooffprocentries    = B->nooffprocentries;
3842:   B->nooffprocentries = PETSC_TRUE;
3843:   MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY);
3844:   MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY);
3845:   B->nooffprocentries = nooffprocentries;

3847:   /* count number of entries below block diagonal */
3848:   PetscFree(Aij->ld);
3849:   PetscCalloc1(m, &ld);
3850:   Aij->ld = ld;
3851:   for (i = 0; i < m; i++) {
3852:     nnz = Ii[i + 1] - Ii[i];
3853:     j   = 0;
3854:     while (j < nnz && J[j] < cstart) j++;
3855:     ld[i] = j;
3856:     J += nnz;
3857:   }

3859:   MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE);
3860:   return 0;
3861: }

3863: /*@
3864:    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format
3865:    (the default parallel PETSc format).

3867:    Collective

3869:    Input Parameters:
3870: +  B - the matrix
3871: .  i - the indices into j for the start of each local row (starts with zero)
3872: .  j - the column indices for each local row (starts with zero)
3873: -  v - optional values in the matrix

3875:    Level: developer

3877:    Notes:
3878:        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3879:      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3880:      called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.

3882:        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.

3884:        The format which is used for the sparse matrix input, is equivalent to a
3885:     row-major ordering.. i.e for the following matrix, the input data expected is
3886:     as shown

3888: $        1 0 0
3889: $        2 0 3     P0
3890: $       -------
3891: $        4 5 6     P1
3892: $
3893: $     Process0 [P0]: rows_owned=[0,1]
3894: $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3895: $        j =  {0,0,2}  [size = 3]
3896: $        v =  {1,2,3}  [size = 3]
3897: $
3898: $     Process1 [P1]: rows_owned=[2]
3899: $        i =  {0,3}    [size = nrow+1  = 1+1]
3900: $        j =  {0,1,2}  [size = 3]
3901: $        v =  {4,5,6}  [size = 3]

3903: .seealso: `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`,
3904:           `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`
3905: @*/
3906: PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
3907: {
3908:   PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v));
3909:   return 0;
3910: }

3912: /*@C
3913:    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format
3914:    (the default parallel PETSc format).  For good matrix assembly performance
3915:    the user should preallocate the matrix storage by setting the parameters
3916:    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3917:    performance can be increased by more than a factor of 50.

3919:    Collective

3921:    Input Parameters:
3922: +  B - the matrix
3923: .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3924:            (same value is used for all local rows)
3925: .  d_nnz - array containing the number of nonzeros in the various rows of the
3926:            DIAGONAL portion of the local submatrix (possibly different for each row)
3927:            or NULL (`PETSC_NULL_INTEGER` in Fortran), if d_nz is used to specify the nonzero structure.
3928:            The size of this array is equal to the number of local rows, i.e 'm'.
3929:            For matrices that will be factored, you must leave room for (and set)
3930:            the diagonal entry even if it is zero.
3931: .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3932:            submatrix (same value is used for all local rows).
3933: -  o_nnz - array containing the number of nonzeros in the various rows of the
3934:            OFF-DIAGONAL portion of the local submatrix (possibly different for
3935:            each row) or NULL (`PETSC_NULL_INTEGER` in Fortran), if o_nz is used to specify the nonzero
3936:            structure. The size of this array is equal to the number
3937:            of local rows, i.e 'm'.

3939:    If the *_nnz parameter is given then the *_nz parameter is ignored

3941:    The `MATAIJ` format, also called compressed row storage (CSR)), is fully compatible with standard Fortran 77
3942:    storage.  The stored row and column indices begin with zero.
3943:    See [Sparse Matrices](sec_matsparse) for details.

3945:    The parallel matrix is partitioned such that the first m0 rows belong to
3946:    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3947:    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.

3949:    The DIAGONAL portion of the local submatrix of a processor can be defined
3950:    as the submatrix which is obtained by extraction the part corresponding to
3951:    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3952:    first row that belongs to the processor, r2 is the last row belonging to
3953:    the this processor, and c1-c2 is range of indices of the local part of a
3954:    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3955:    common case of a square matrix, the row and column ranges are the same and
3956:    the DIAGONAL part is also square. The remaining portion of the local
3957:    submatrix (mxN) constitute the OFF-DIAGONAL portion.

3959:    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.

3961:    You can call MatGetInfo() to get information on how effective the preallocation was;
3962:    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3963:    You can also run with the option -info and look for messages with the string
3964:    malloc in them to see if additional memory allocation was needed.

3966:    Example usage:

3968:    Consider the following 8x8 matrix with 34 non-zero values, that is
3969:    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3970:    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3971:    as follows:

3973: .vb
3974:             1  2  0  |  0  3  0  |  0  4
3975:     Proc0   0  5  6  |  7  0  0  |  8  0
3976:             9  0 10  | 11  0  0  | 12  0
3977:     -------------------------------------
3978:            13  0 14  | 15 16 17  |  0  0
3979:     Proc1   0 18  0  | 19 20 21  |  0  0
3980:             0  0  0  | 22 23  0  | 24  0
3981:     -------------------------------------
3982:     Proc2  25 26 27  |  0  0 28  | 29  0
3983:            30  0  0  | 31 32 33  |  0 34
3984: .ve

3986:    This can be represented as a collection of submatrices as:

3988: .vb
3989:       A B C
3990:       D E F
3991:       G H I
3992: .ve

3994:    Where the submatrices A,B,C are owned by proc0, D,E,F are
3995:    owned by proc1, G,H,I are owned by proc2.

3997:    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3998:    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3999:    The 'M','N' parameters are 8,8, and have the same values on all procs.

4001:    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4002:    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4003:    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4004:    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4005:    part as `MATSEQAIJ` matrices. for eg: proc1 will store [E] as a SeqAIJ
4006:    matrix, ans [DF] as another `MATSEQAIJ` matrix.

4008:    When d_nz, o_nz parameters are specified, d_nz storage elements are
4009:    allocated for every row of the local diagonal submatrix, and o_nz
4010:    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4011:    One way to choose d_nz and o_nz is to use the max nonzerors per local
4012:    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4013:    In this case, the values of d_nz,o_nz are:
4014: .vb
4015:      proc0 : dnz = 2, o_nz = 2
4016:      proc1 : dnz = 3, o_nz = 2
4017:      proc2 : dnz = 1, o_nz = 4
4018: .ve
4019:    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4020:    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4021:    for proc3. i.e we are using 12+15+10=37 storage locations to store
4022:    34 values.

4024:    When d_nnz, o_nnz parameters are specified, the storage is specified
4025:    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4026:    In the above case the values for d_nnz,o_nnz are:
4027: .vb
4028:      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4029:      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4030:      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4031: .ve
4032:    Here the space allocated is sum of all the above values i.e 34, and
4033:    hence pre-allocation is perfect.

4035:    Level: intermediate

4037: .seealso: [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`,
4038:           `MATMPIAIJ`, `MatGetInfo()`, `PetscSplitOwnership()`
4039: @*/
4040: PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
4041: {
4044:   PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz));
4045:   return 0;
4046: }

4048: /*@
4049:      MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard
4050:          CSR format for the local rows.

4052:    Collective

4054:    Input Parameters:
4055: +  comm - MPI communicator
4056: .  m - number of local rows (Cannot be `PETSC_DECIDE`)
4057: .  n - This value should be the same as the local size used in creating the
4058:        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4059:        calculated if N is given) For square matrices n is almost always m.
4060: .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4061: .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4062: .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4063: .   j - column indices
4064: -   a - optional matrix values

4066:    Output Parameter:
4067: .   mat - the matrix

4069:    Level: intermediate

4071:    Notes:
4072:        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4073:      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4074:      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.

4076:        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.

4078:        The format which is used for the sparse matrix input, is equivalent to a
4079:     row-major ordering.. i.e for the following matrix, the input data expected is
4080:     as shown

4082:        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays

4084: $        1 0 0
4085: $        2 0 3     P0
4086: $       -------
4087: $        4 5 6     P1
4088: $
4089: $     Process0 [P0]: rows_owned=[0,1]
4090: $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4091: $        j =  {0,0,2}  [size = 3]
4092: $        v =  {1,2,3}  [size = 3]
4093: $
4094: $     Process1 [P1]: rows_owned=[2]
4095: $        i =  {0,3}    [size = nrow+1  = 1+1]
4096: $        j =  {0,1,2}  [size = 3]
4097: $        v =  {4,5,6}  [size = 3]

4099: .seealso: `MATMPIAIK`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4100:           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`
4101: @*/
4102: PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
4103: {
4106:   MatCreate(comm, mat);
4107:   MatSetSizes(*mat, m, n, M, N);
4108:   /* MatSetBlockSizes(M,bs,cbs); */
4109:   MatSetType(*mat, MATMPIAIJ);
4110:   MatMPIAIJSetPreallocationCSR(*mat, i, j, a);
4111:   return 0;
4112: }

4114: /*@
4115:      MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard
4116:          CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed from `MatCreateMPIAIJWithArrays()`

4118:      Deprecated: Use `MatUpdateMPIAIJWithArray()`

4120:    Collective

4122:    Input Parameters:
4123: +  mat - the matrix
4124: .  m - number of local rows (Cannot be `PETSC_DECIDE`)
4125: .  n - This value should be the same as the local size used in creating the
4126:        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4127:        calculated if N is given) For square matrices n is almost always m.
4128: .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4129: .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4130: .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4131: .  J - column indices
4132: -  v - matrix values

4134:    Level: intermediate

4136: .seealso: `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4137:           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArray()`
4138: @*/
4139: PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
4140: {
4141:   PetscInt        nnz, i;
4142:   PetscBool       nooffprocentries;
4143:   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4144:   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4145:   PetscScalar    *ad, *ao;
4146:   PetscInt        ldi, Iii, md;
4147:   const PetscInt *Adi = Ad->i;
4148:   PetscInt       *ld  = Aij->ld;


4155:   MatSeqAIJGetArrayWrite(Aij->A, &ad);
4156:   MatSeqAIJGetArrayWrite(Aij->B, &ao);

4158:   for (i = 0; i < m; i++) {
4159:     nnz = Ii[i + 1] - Ii[i];
4160:     Iii = Ii[i];
4161:     ldi = ld[i];
4162:     md  = Adi[i + 1] - Adi[i];
4163:     PetscArraycpy(ao, v + Iii, ldi);
4164:     PetscArraycpy(ad, v + Iii + ldi, md);
4165:     PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md);
4166:     ad += md;
4167:     ao += nnz - md;
4168:   }
4169:   nooffprocentries      = mat->nooffprocentries;
4170:   mat->nooffprocentries = PETSC_TRUE;
4171:   MatSeqAIJRestoreArrayWrite(Aij->A, &ad);
4172:   MatSeqAIJRestoreArrayWrite(Aij->B, &ao);
4173:   PetscObjectStateIncrease((PetscObject)Aij->A);
4174:   PetscObjectStateIncrease((PetscObject)Aij->B);
4175:   PetscObjectStateIncrease((PetscObject)mat);
4176:   MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY);
4177:   MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY);
4178:   mat->nooffprocentries = nooffprocentries;
4179:   return 0;
4180: }

4182: /*@
4183:      MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values

4185:    Collective

4187:    Input Parameters:
4188: +  mat - the matrix
4189: -  v - matrix values, stored by row

4191:    Level: intermediate

4193:    Note:
4194:    The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()`

4196: .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4197:           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArrays()`
4198: @*/
4199: PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[])
4200: {
4201:   PetscInt        nnz, i, m;
4202:   PetscBool       nooffprocentries;
4203:   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4204:   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4205:   Mat_SeqAIJ     *Ao  = (Mat_SeqAIJ *)Aij->B->data;
4206:   PetscScalar    *ad, *ao;
4207:   const PetscInt *Adi = Ad->i, *Adj = Ao->i;
4208:   PetscInt        ldi, Iii, md;
4209:   PetscInt       *ld = Aij->ld;

4211:   m = mat->rmap->n;

4213:   MatSeqAIJGetArrayWrite(Aij->A, &ad);
4214:   MatSeqAIJGetArrayWrite(Aij->B, &ao);
4215:   Iii = 0;
4216:   for (i = 0; i < m; i++) {
4217:     nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i];
4218:     ldi = ld[i];
4219:     md  = Adi[i + 1] - Adi[i];
4220:     PetscArraycpy(ao, v + Iii, ldi);
4221:     PetscArraycpy(ad, v + Iii + ldi, md);
4222:     PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md);
4223:     ad += md;
4224:     ao += nnz - md;
4225:     Iii += nnz;
4226:   }
4227:   nooffprocentries      = mat->nooffprocentries;
4228:   mat->nooffprocentries = PETSC_TRUE;
4229:   MatSeqAIJRestoreArrayWrite(Aij->A, &ad);
4230:   MatSeqAIJRestoreArrayWrite(Aij->B, &ao);
4231:   PetscObjectStateIncrease((PetscObject)Aij->A);
4232:   PetscObjectStateIncrease((PetscObject)Aij->B);
4233:   PetscObjectStateIncrease((PetscObject)mat);
4234:   MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY);
4235:   MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY);
4236:   mat->nooffprocentries = nooffprocentries;
4237:   return 0;
4238: }

4240: /*@C
4241:    MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format
4242:    (the default parallel PETSc format).  For good matrix assembly performance
4243:    the user should preallocate the matrix storage by setting the parameters
4244:    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4245:    performance can be increased by more than a factor of 50.

4247:    Collective

4249:    Input Parameters:
4250: +  comm - MPI communicator
4251: .  m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
4252:            This value should be the same as the local size used in creating the
4253:            y vector for the matrix-vector product y = Ax.
4254: .  n - This value should be the same as the local size used in creating the
4255:        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4256:        calculated if N is given) For square matrices n is almost always m.
4257: .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4258: .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4259: .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4260:            (same value is used for all local rows)
4261: .  d_nnz - array containing the number of nonzeros in the various rows of the
4262:            DIAGONAL portion of the local submatrix (possibly different for each row)
4263:            or NULL, if d_nz is used to specify the nonzero structure.
4264:            The size of this array is equal to the number of local rows, i.e 'm'.
4265: .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4266:            submatrix (same value is used for all local rows).
4267: -  o_nnz - array containing the number of nonzeros in the various rows of the
4268:            OFF-DIAGONAL portion of the local submatrix (possibly different for
4269:            each row) or NULL, if o_nz is used to specify the nonzero
4270:            structure. The size of this array is equal to the number
4271:            of local rows, i.e 'm'.

4273:    Output Parameter:
4274: .  A - the matrix

4276:    It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
4277:    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4278:    [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]

4280:    Notes:
4281:    If the *_nnz parameter is given then the *_nz parameter is ignored

4283:    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4284:    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4285:    storage requirements for this matrix.

4287:    If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one
4288:    processor than it must be used on all processors that share the object for
4289:    that argument.

4291:    The user MUST specify either the local or global matrix dimensions
4292:    (possibly both).

4294:    The parallel matrix is partitioned across processors such that the
4295:    first m0 rows belong to process 0, the next m1 rows belong to
4296:    process 1, the next m2 rows belong to process 2 etc.. where
4297:    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4298:    values corresponding to [m x N] submatrix.

4300:    The columns are logically partitioned with the n0 columns belonging
4301:    to 0th partition, the next n1 columns belonging to the next
4302:    partition etc.. where n0,n1,n2... are the input parameter 'n'.

4304:    The DIAGONAL portion of the local submatrix on any given processor
4305:    is the submatrix corresponding to the rows and columns m,n
4306:    corresponding to the given processor. i.e diagonal matrix on
4307:    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4308:    etc. The remaining portion of the local submatrix [m x (N-n)]
4309:    constitute the OFF-DIAGONAL portion. The example below better
4310:    illustrates this concept.

4312:    For a square global matrix we define each processor's diagonal portion
4313:    to be its local rows and the corresponding columns (a square submatrix);
4314:    each processor's off-diagonal portion encompasses the remainder of the
4315:    local matrix (a rectangular submatrix).

4317:    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.

4319:    When calling this routine with a single process communicator, a matrix of
4320:    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4321:    type of communicator, use the construction mechanism
4322: .vb
4323:      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4324: .ve

4326: $     MatCreate(...,&A);
4327: $     MatSetType(A,MATMPIAIJ);
4328: $     MatSetSizes(A, m,n,M,N);
4329: $     MatMPIAIJSetPreallocation(A,...);

4331:    By default, this format uses inodes (identical nodes) when possible.
4332:    We search for consecutive rows with the same nonzero structure, thereby
4333:    reusing matrix information to achieve increased efficiency.

4335:    Options Database Keys:
4336: +  -mat_no_inode  - Do not use inodes
4337: .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4338: -  -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices.
4339:         See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix.
4340:         Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call.

4342:    Example usage:

4344:    Consider the following 8x8 matrix with 34 non-zero values, that is
4345:    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4346:    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4347:    as follows

4349: .vb
4350:             1  2  0  |  0  3  0  |  0  4
4351:     Proc0   0  5  6  |  7  0  0  |  8  0
4352:             9  0 10  | 11  0  0  | 12  0
4353:     -------------------------------------
4354:            13  0 14  | 15 16 17  |  0  0
4355:     Proc1   0 18  0  | 19 20 21  |  0  0
4356:             0  0  0  | 22 23  0  | 24  0
4357:     -------------------------------------
4358:     Proc2  25 26 27  |  0  0 28  | 29  0
4359:            30  0  0  | 31 32 33  |  0 34
4360: .ve

4362:    This can be represented as a collection of submatrices as

4364: .vb
4365:       A B C
4366:       D E F
4367:       G H I
4368: .ve

4370:    Where the submatrices A,B,C are owned by proc0, D,E,F are
4371:    owned by proc1, G,H,I are owned by proc2.

4373:    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4374:    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4375:    The 'M','N' parameters are 8,8, and have the same values on all procs.

4377:    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4378:    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4379:    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4380:    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4381:    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4382:    matrix, ans [DF] as another SeqAIJ matrix.

4384:    When d_nz, o_nz parameters are specified, d_nz storage elements are
4385:    allocated for every row of the local diagonal submatrix, and o_nz
4386:    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4387:    One way to choose d_nz and o_nz is to use the max nonzerors per local
4388:    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4389:    In this case, the values of d_nz,o_nz are
4390: .vb
4391:      proc0 : dnz = 2, o_nz = 2
4392:      proc1 : dnz = 3, o_nz = 2
4393:      proc2 : dnz = 1, o_nz = 4
4394: .ve
4395:    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4396:    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4397:    for proc3. i.e we are using 12+15+10=37 storage locations to store
4398:    34 values.

4400:    When d_nnz, o_nnz parameters are specified, the storage is specified
4401:    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4402:    In the above case the values for d_nnz,o_nnz are
4403: .vb
4404:      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4405:      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4406:      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4407: .ve
4408:    Here the space allocated is sum of all the above values i.e 34, and
4409:    hence pre-allocation is perfect.

4411:    Level: intermediate

4413: .seealso: [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4414:           `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`
4415: @*/
4416: PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
4417: {
4418:   PetscMPIInt size;

4420:   MatCreate(comm, A);
4421:   MatSetSizes(*A, m, n, M, N);
4422:   MPI_Comm_size(comm, &size);
4423:   if (size > 1) {
4424:     MatSetType(*A, MATMPIAIJ);
4425:     MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz);
4426:   } else {
4427:     MatSetType(*A, MATSEQAIJ);
4428:     MatSeqAIJSetPreallocation(*A, d_nz, d_nnz);
4429:   }
4430:   return 0;
4431: }

4433: /*@C
4434:   MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix

4436:   Not collective

4438:   Input Parameter:
4439: . A - The `MATMPIAIJ` matrix

4441:   Output Parameters:
4442: + Ad - The local diagonal block as a `MATSEQAIJ` matrix
4443: . Ao - The local off-diagonal block as a `MATSEQAIJ` matrix
4444: - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix

4446:   Note:
4447:   The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns
4448:   in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is
4449:   the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these
4450:   local column numbers to global column numbers in the original matrix.

4452:   Level: intermediate

4454: .seealso: `MATMPIAIJ`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MATSEQAIJ`
4455: @*/
4456: PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
4457: {
4458:   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4459:   PetscBool   flg;

4461:   PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg);
4463:   if (Ad) *Ad = a->A;
4464:   if (Ao) *Ao = a->B;
4465:   if (colmap) *colmap = a->garray;
4466:   return 0;
4467: }

4469: PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
4470: {
4471:   PetscInt     m, N, i, rstart, nnz, Ii;
4472:   PetscInt    *indx;
4473:   PetscScalar *values;
4474:   MatType      rootType;

4476:   MatGetSize(inmat, &m, &N);
4477:   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4478:     PetscInt *dnz, *onz, sum, bs, cbs;

4480:     if (n == PETSC_DECIDE) PetscSplitOwnership(comm, &n, &N);
4481:     /* Check sum(n) = N */
4482:     MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm);

4485:     MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm);
4486:     rstart -= m;

4488:     MatPreallocateBegin(comm, m, n, dnz, onz);
4489:     for (i = 0; i < m; i++) {
4490:       MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL);
4491:       MatPreallocateSet(i + rstart, nnz, indx, dnz, onz);
4492:       MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL);
4493:     }

4495:     MatCreate(comm, outmat);
4496:     MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE);
4497:     MatGetBlockSizes(inmat, &bs, &cbs);
4498:     MatSetBlockSizes(*outmat, bs, cbs);
4499:     MatGetRootType_Private(inmat, &rootType);
4500:     MatSetType(*outmat, rootType);
4501:     MatSeqAIJSetPreallocation(*outmat, 0, dnz);
4502:     MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz);
4503:     MatPreallocateEnd(dnz, onz);
4504:     MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE);
4505:   }

4507:   /* numeric phase */
4508:   MatGetOwnershipRange(*outmat, &rstart, NULL);
4509:   for (i = 0; i < m; i++) {
4510:     MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values);
4511:     Ii = i + rstart;
4512:     MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES);
4513:     MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values);
4514:   }
4515:   MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY);
4516:   MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY);
4517:   return 0;
4518: }

4520: PetscErrorCode MatFileSplit(Mat A, char *outfile)
4521: {
4522:   PetscMPIInt        rank;
4523:   PetscInt           m, N, i, rstart, nnz;
4524:   size_t             len;
4525:   const PetscInt    *indx;
4526:   PetscViewer        out;
4527:   char              *name;
4528:   Mat                B;
4529:   const PetscScalar *values;

4531:   MatGetLocalSize(A, &m, NULL);
4532:   MatGetSize(A, NULL, &N);
4533:   /* Should this be the type of the diagonal block of A? */
4534:   MatCreate(PETSC_COMM_SELF, &B);
4535:   MatSetSizes(B, m, N, m, N);
4536:   MatSetBlockSizesFromMats(B, A, A);
4537:   MatSetType(B, MATSEQAIJ);
4538:   MatSeqAIJSetPreallocation(B, 0, NULL);
4539:   MatGetOwnershipRange(A, &rstart, NULL);
4540:   for (i = 0; i < m; i++) {
4541:     MatGetRow(A, i + rstart, &nnz, &indx, &values);
4542:     MatSetValues(B, 1, &i, nnz, indx, values, INSERT_VALUES);
4543:     MatRestoreRow(A, i + rstart, &nnz, &indx, &values);
4544:   }
4545:   MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY);
4546:   MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY);

4548:   MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank);
4549:   PetscStrlen(outfile, &len);
4550:   PetscMalloc1(len + 6, &name);
4551:   PetscSNPrintf(name, len + 6, "%s.%d", outfile, rank);
4552:   PetscViewerBinaryOpen(PETSC_COMM_SELF, name, FILE_MODE_APPEND, &out);
4553:   PetscFree(name);
4554:   MatView(B, out);
4555:   PetscViewerDestroy(&out);
4556:   MatDestroy(&B);
4557:   return 0;
4558: }

4560: static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4561: {
4562:   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;

4564:   if (!merge) return 0;
4565:   PetscFree(merge->id_r);
4566:   PetscFree(merge->len_s);
4567:   PetscFree(merge->len_r);
4568:   PetscFree(merge->bi);
4569:   PetscFree(merge->bj);
4570:   PetscFree(merge->buf_ri[0]);
4571:   PetscFree(merge->buf_ri);
4572:   PetscFree(merge->buf_rj[0]);
4573:   PetscFree(merge->buf_rj);
4574:   PetscFree(merge->coi);
4575:   PetscFree(merge->coj);
4576:   PetscFree(merge->owners_co);
4577:   PetscLayoutDestroy(&merge->rowmap);
4578:   PetscFree(merge);
4579:   return 0;
4580: }

4582: #include <../src/mat/utils/freespace.h>
4583: #include <petscbt.h>

4585: PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat)
4586: {
4587:   MPI_Comm             comm;
4588:   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4589:   PetscMPIInt          size, rank, taga, *len_s;
4590:   PetscInt             N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj;
4591:   PetscInt             proc, m;
4592:   PetscInt           **buf_ri, **buf_rj;
4593:   PetscInt             k, anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj;
4594:   PetscInt             nrows, **buf_ri_k, **nextrow, **nextai;
4595:   MPI_Request         *s_waits, *r_waits;
4596:   MPI_Status          *status;
4597:   const MatScalar     *aa, *a_a;
4598:   MatScalar          **abuf_r, *ba_i;
4599:   Mat_Merge_SeqsToMPI *merge;
4600:   PetscContainer       container;

4602:   PetscObjectGetComm((PetscObject)mpimat, &comm);
4603:   PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0);

4605:   MPI_Comm_size(comm, &size);
4606:   MPI_Comm_rank(comm, &rank);

4608:   PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container);
4610:   PetscContainerGetPointer(container, (void **)&merge);
4611:   MatSeqAIJGetArrayRead(seqmat, &a_a);
4612:   aa = a_a;

4614:   bi     = merge->bi;
4615:   bj     = merge->bj;
4616:   buf_ri = merge->buf_ri;
4617:   buf_rj = merge->buf_rj;

4619:   PetscMalloc1(size, &status);
4620:   owners = merge->rowmap->range;
4621:   len_s  = merge->len_s;

4623:   /* send and recv matrix values */
4624:   /*-----------------------------*/
4625:   PetscObjectGetNewTag((PetscObject)mpimat, &taga);
4626:   PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits);

4628:   PetscMalloc1(merge->nsend + 1, &s_waits);
4629:   for (proc = 0, k = 0; proc < size; proc++) {
4630:     if (!len_s[proc]) continue;
4631:     i = owners[proc];
4632:     MPI_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k);
4633:     k++;
4634:   }

4636:   if (merge->nrecv) MPI_Waitall(merge->nrecv, r_waits, status);
4637:   if (merge->nsend) MPI_Waitall(merge->nsend, s_waits, status);
4638:   PetscFree(status);

4640:   PetscFree(s_waits);
4641:   PetscFree(r_waits);

4643:   /* insert mat values of mpimat */
4644:   /*----------------------------*/
4645:   PetscMalloc1(N, &ba_i);
4646:   PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai);

4648:   for (k = 0; k < merge->nrecv; k++) {
4649:     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4650:     nrows       = *(buf_ri_k[k]);
4651:     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4652:     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4653:   }

4655:   /* set values of ba */
4656:   m = merge->rowmap->n;
4657:   for (i = 0; i < m; i++) {
4658:     arow = owners[rank] + i;
4659:     bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */
4660:     bnzi = bi[i + 1] - bi[i];
4661:     PetscArrayzero(ba_i, bnzi);

4663:     /* add local non-zero vals of this proc's seqmat into ba */
4664:     anzi   = ai[arow + 1] - ai[arow];
4665:     aj     = a->j + ai[arow];
4666:     aa     = a_a + ai[arow];
4667:     nextaj = 0;
4668:     for (j = 0; nextaj < anzi; j++) {
4669:       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4670:         ba_i[j] += aa[nextaj++];
4671:       }
4672:     }

4674:     /* add received vals into ba */
4675:     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
4676:       /* i-th row */
4677:       if (i == *nextrow[k]) {
4678:         anzi   = *(nextai[k] + 1) - *nextai[k];
4679:         aj     = buf_rj[k] + *(nextai[k]);
4680:         aa     = abuf_r[k] + *(nextai[k]);
4681:         nextaj = 0;
4682:         for (j = 0; nextaj < anzi; j++) {
4683:           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4684:             ba_i[j] += aa[nextaj++];
4685:           }
4686:         }
4687:         nextrow[k]++;
4688:         nextai[k]++;
4689:       }
4690:     }
4691:     MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES);
4692:   }
4693:   MatSeqAIJRestoreArrayRead(seqmat, &a_a);
4694:   MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY);
4695:   MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY);

4697:   PetscFree(abuf_r[0]);
4698:   PetscFree(abuf_r);
4699:   PetscFree(ba_i);
4700:   PetscFree3(buf_ri_k, nextrow, nextai);
4701:   PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0);
4702:   return 0;
4703: }

4705: PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat)
4706: {
4707:   Mat                  B_mpi;
4708:   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4709:   PetscMPIInt          size, rank, tagi, tagj, *len_s, *len_si, *len_ri;
4710:   PetscInt           **buf_rj, **buf_ri, **buf_ri_k;
4711:   PetscInt             M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j;
4712:   PetscInt             len, proc, *dnz, *onz, bs, cbs;
4713:   PetscInt             k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi;
4714:   PetscInt             nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai;
4715:   MPI_Request         *si_waits, *sj_waits, *ri_waits, *rj_waits;
4716:   MPI_Status          *status;
4717:   PetscFreeSpaceList   free_space = NULL, current_space = NULL;
4718:   PetscBT              lnkbt;
4719:   Mat_Merge_SeqsToMPI *merge;
4720:   PetscContainer       container;

4722:   PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0);

4724:   /* make sure it is a PETSc comm */
4725:   PetscCommDuplicate(comm, &comm, NULL);
4726:   MPI_Comm_size(comm, &size);
4727:   MPI_Comm_rank(comm, &rank);

4729:   PetscNew(&merge);
4730:   PetscMalloc1(size, &status);

4732:   /* determine row ownership */
4733:   /*---------------------------------------------------------*/
4734:   PetscLayoutCreate(comm, &merge->rowmap);
4735:   PetscLayoutSetLocalSize(merge->rowmap, m);
4736:   PetscLayoutSetSize(merge->rowmap, M);
4737:   PetscLayoutSetBlockSize(merge->rowmap, 1);
4738:   PetscLayoutSetUp(merge->rowmap);
4739:   PetscMalloc1(size, &len_si);
4740:   PetscMalloc1(size, &merge->len_s);

4742:   m      = merge->rowmap->n;
4743:   owners = merge->rowmap->range;

4745:   /* determine the number of messages to send, their lengths */
4746:   /*---------------------------------------------------------*/
4747:   len_s = merge->len_s;

4749:   len          = 0; /* length of buf_si[] */
4750:   merge->nsend = 0;
4751:   for (proc = 0; proc < size; proc++) {
4752:     len_si[proc] = 0;
4753:     if (proc == rank) {
4754:       len_s[proc] = 0;
4755:     } else {
4756:       len_si[proc] = owners[proc + 1] - owners[proc] + 1;
4757:       len_s[proc]  = ai[owners[proc + 1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4758:     }
4759:     if (len_s[proc]) {
4760:       merge->nsend++;
4761:       nrows = 0;
4762:       for (i = owners[proc]; i < owners[proc + 1]; i++) {
4763:         if (ai[i + 1] > ai[i]) nrows++;
4764:       }
4765:       len_si[proc] = 2 * (nrows + 1);
4766:       len += len_si[proc];
4767:     }
4768:   }

4770:   /* determine the number and length of messages to receive for ij-structure */
4771:   /*-------------------------------------------------------------------------*/
4772:   PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv);
4773:   PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri);

4775:   /* post the Irecv of j-structure */
4776:   /*-------------------------------*/
4777:   PetscCommGetNewTag(comm, &tagj);
4778:   PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits);

4780:   /* post the Isend of j-structure */
4781:   /*--------------------------------*/
4782:   PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits);

4784:   for (proc = 0, k = 0; proc < size; proc++) {
4785:     if (!len_s[proc]) continue;
4786:     i = owners[proc];
4787:     MPI_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k);
4788:     k++;
4789:   }

4791:   /* receives and sends of j-structure are complete */
4792:   /*------------------------------------------------*/
4793:   if (merge->nrecv) MPI_Waitall(merge->nrecv, rj_waits, status);
4794:   if (merge->nsend) MPI_Waitall(merge->nsend, sj_waits, status);

4796:   /* send and recv i-structure */
4797:   /*---------------------------*/
4798:   PetscCommGetNewTag(comm, &tagi);
4799:   PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits);

4801:   PetscMalloc1(len + 1, &buf_s);
4802:   buf_si = buf_s; /* points to the beginning of k-th msg to be sent */
4803:   for (proc = 0, k = 0; proc < size; proc++) {
4804:     if (!len_s[proc]) continue;
4805:     /* form outgoing message for i-structure:
4806:          buf_si[0]:                 nrows to be sent
4807:                [1:nrows]:           row index (global)
4808:                [nrows+1:2*nrows+1]: i-structure index
4809:     */
4810:     /*-------------------------------------------*/
4811:     nrows       = len_si[proc] / 2 - 1;
4812:     buf_si_i    = buf_si + nrows + 1;
4813:     buf_si[0]   = nrows;
4814:     buf_si_i[0] = 0;
4815:     nrows       = 0;
4816:     for (i = owners[proc]; i < owners[proc + 1]; i++) {
4817:       anzi = ai[i + 1] - ai[i];
4818:       if (anzi) {
4819:         buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */
4820:         buf_si[nrows + 1]   = i - owners[proc];       /* local row index */
4821:         nrows++;
4822:       }
4823:     }
4824:     MPI_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k);
4825:     k++;
4826:     buf_si += len_si[proc];
4827:   }

4829:   if (merge->nrecv) MPI_Waitall(merge->nrecv, ri_waits, status);
4830:   if (merge->nsend) MPI_Waitall(merge->nsend, si_waits, status);

4832:   PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv);
4833:   for (i = 0; i < merge->nrecv; i++) PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i]);

4835:   PetscFree(len_si);
4836:   PetscFree(len_ri);
4837:   PetscFree(rj_waits);
4838:   PetscFree2(si_waits, sj_waits);
4839:   PetscFree(ri_waits);
4840:   PetscFree(buf_s);
4841:   PetscFree(status);

4843:   /* compute a local seq matrix in each processor */
4844:   /*----------------------------------------------*/
4845:   /* allocate bi array and free space for accumulating nonzero column info */
4846:   PetscMalloc1(m + 1, &bi);
4847:   bi[0] = 0;

4849:   /* create and initialize a linked list */
4850:   nlnk = N + 1;
4851:   PetscLLCreate(N, N, nlnk, lnk, lnkbt);

4853:   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4854:   len = ai[owners[rank + 1]] - ai[owners[rank]];
4855:   PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space);

4857:   current_space = free_space;

4859:   /* determine symbolic info for each local row */
4860:   PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai);

4862:   for (k = 0; k < merge->nrecv; k++) {
4863:     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4864:     nrows       = *buf_ri_k[k];
4865:     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4866:     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4867:   }

4869:   MatPreallocateBegin(comm, m, n, dnz, onz);
4870:   len = 0;
4871:   for (i = 0; i < m; i++) {
4872:     bnzi = 0;
4873:     /* add local non-zero cols of this proc's seqmat into lnk */
4874:     arow = owners[rank] + i;
4875:     anzi = ai[arow + 1] - ai[arow];
4876:     aj   = a->j + ai[arow];
4877:     PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt);
4878:     bnzi += nlnk;
4879:     /* add received col data into lnk */
4880:     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
4881:       if (i == *nextrow[k]) {            /* i-th row */
4882:         anzi = *(nextai[k] + 1) - *nextai[k];
4883:         aj   = buf_rj[k] + *nextai[k];
4884:         PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt);
4885:         bnzi += nlnk;
4886:         nextrow[k]++;
4887:         nextai[k]++;
4888:       }
4889:     }
4890:     if (len < bnzi) len = bnzi; /* =max(bnzi) */

4892:     /* if free space is not available, make more free space */
4893:     if (current_space->local_remaining < bnzi) PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), &current_space);
4894:     /* copy data into free space, then initialize lnk */
4895:     PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt);
4896:     MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz);

4898:     current_space->array += bnzi;
4899:     current_space->local_used += bnzi;
4900:     current_space->local_remaining -= bnzi;

4902:     bi[i + 1] = bi[i] + bnzi;
4903:   }

4905:   PetscFree3(buf_ri_k, nextrow, nextai);

4907:   PetscMalloc1(bi[m] + 1, &bj);
4908:   PetscFreeSpaceContiguous(&free_space, bj);
4909:   PetscLLDestroy(lnk, lnkbt);

4911:   /* create symbolic parallel matrix B_mpi */
4912:   /*---------------------------------------*/
4913:   MatGetBlockSizes(seqmat, &bs, &cbs);
4914:   MatCreate(comm, &B_mpi);
4915:   if (n == PETSC_DECIDE) {
4916:     MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N);
4917:   } else {
4918:     MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE);
4919:   }
4920:   MatSetBlockSizes(B_mpi, bs, cbs);
4921:   MatSetType(B_mpi, MATMPIAIJ);
4922:   MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz);
4923:   MatPreallocateEnd(dnz, onz);
4924:   MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE);

4926:   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4927:   B_mpi->assembled = PETSC_FALSE;
4928:   merge->bi        = bi;
4929:   merge->bj        = bj;
4930:   merge->buf_ri    = buf_ri;
4931:   merge->buf_rj    = buf_rj;
4932:   merge->coi       = NULL;
4933:   merge->coj       = NULL;
4934:   merge->owners_co = NULL;

4936:   PetscCommDestroy(&comm);

4938:   /* attach the supporting struct to B_mpi for reuse */
4939:   PetscContainerCreate(PETSC_COMM_SELF, &container);
4940:   PetscContainerSetPointer(container, merge);
4941:   PetscContainerSetUserDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI);
4942:   PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container);
4943:   PetscContainerDestroy(&container);
4944:   *mpimat = B_mpi;

4946:   PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0);
4947:   return 0;
4948: }

4950: /*@C
4951:       MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential
4952:                  matrices from each processor

4954:     Collective

4956:    Input Parameters:
4957: +    comm - the communicators the parallel matrix will live on
4958: .    seqmat - the input sequential matrices
4959: .    m - number of local rows (or `PETSC_DECIDE`)
4960: .    n - number of local columns (or `PETSC_DECIDE`)
4961: -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`

4963:    Output Parameter:
4964: .    mpimat - the parallel matrix generated

4966:     Level: advanced

4968:    Note:
4969:      The dimensions of the sequential matrix in each processor MUST be the same.
4970:      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4971:      destroyed when mpimat is destroyed. Call `PetscObjectQuery()` to access seqmat.
4972: @*/
4973: PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat)
4974: {
4975:   PetscMPIInt size;

4977:   MPI_Comm_size(comm, &size);
4978:   if (size == 1) {
4979:     PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0);
4980:     if (scall == MAT_INITIAL_MATRIX) {
4981:       MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat);
4982:     } else {
4983:       MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN);
4984:     }
4985:     PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0);
4986:     return 0;
4987:   }
4988:   PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0);
4989:   if (scall == MAT_INITIAL_MATRIX) MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat);
4990:   MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat);
4991:   PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0);
4992:   return 0;
4993: }

4995: /*@
4996:      MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix by taking all its local rows and putting them into a sequential matrix with
4997:           mlocal rows and n columns. Where mlocal is the row count obtained with `MatGetLocalSize()` and n is the global column count obtained
4998:           with `MatGetSize()`

5000:     Not Collective

5002:    Input Parameters:
5003: +    A - the matrix
5004: -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`

5006:    Output Parameter:
5007: .    A_loc - the local sequential matrix generated

5009:     Level: developer

5011:    Notes:
5012:      In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.

5014:      Destroy the matrix with `MatDestroy()`

5016: .seealso: `MatMPIAIJGetLocalMat()`
5017: @*/
5018: PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc)
5019: {
5020:   PetscBool mpi;

5022:   PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi);
5023:   if (mpi) {
5024:     MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc);
5025:   } else {
5026:     *A_loc = A;
5027:     PetscObjectReference((PetscObject)*A_loc);
5028:   }
5029:   return 0;
5030: }

5032: /*@
5033:      MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with
5034:           mlocal rows and n columns. Where mlocal is the row count obtained with `MatGetLocalSize()` and n is the global column count obtained
5035:           with `MatGetSize()`

5037:     Not Collective

5039:    Input Parameters:
5040: +    A - the matrix
5041: -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`

5043:    Output Parameter:
5044: .    A_loc - the local sequential matrix generated

5046:     Level: developer

5048:    Notes:
5049:      In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.

5051:      When the communicator associated with A has size 1 and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of A.
5052:      If `MAT_REUSE_MATRIX` is requested with comm size 1, `MatCopy`(Adiag,*A_loc,`SAME_NONZERO_PATTERN`) is called.
5053:      This means that one can preallocate the proper sequential matrix first and then call this routine with `MAT_REUSE_MATRIX` to safely
5054:      modify the values of the returned A_loc.

5056: .seealso: `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()`
5057: @*/
5058: PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc)
5059: {
5060:   Mat_MPIAIJ        *mpimat = (Mat_MPIAIJ *)A->data;
5061:   Mat_SeqAIJ        *mat, *a, *b;
5062:   PetscInt          *ai, *aj, *bi, *bj, *cmap = mpimat->garray;
5063:   const PetscScalar *aa, *ba, *aav, *bav;
5064:   PetscScalar       *ca, *cam;
5065:   PetscMPIInt        size;
5066:   PetscInt           am = A->rmap->n, i, j, k, cstart = A->cmap->rstart;
5067:   PetscInt          *ci, *cj, col, ncols_d, ncols_o, jo;
5068:   PetscBool          match;

5070:   PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match);
5072:   MPI_Comm_size(PetscObjectComm((PetscObject)A), &size);
5073:   if (size == 1) {
5074:     if (scall == MAT_INITIAL_MATRIX) {
5075:       PetscObjectReference((PetscObject)mpimat->A);
5076:       *A_loc = mpimat->A;
5077:     } else if (scall == MAT_REUSE_MATRIX) {
5078:       MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN);
5079:     }
5080:     return 0;
5081:   }

5083:   PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0);
5084:   a  = (Mat_SeqAIJ *)(mpimat->A)->data;
5085:   b  = (Mat_SeqAIJ *)(mpimat->B)->data;
5086:   ai = a->i;
5087:   aj = a->j;
5088:   bi = b->i;
5089:   bj = b->j;
5090:   MatSeqAIJGetArrayRead(mpimat->A, &aav);
5091:   MatSeqAIJGetArrayRead(mpimat->B, &bav);
5092:   aa = aav;
5093:   ba = bav;
5094:   if (scall == MAT_INITIAL_MATRIX) {
5095:     PetscMalloc1(1 + am, &ci);
5096:     ci[0] = 0;
5097:     for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]);
5098:     PetscMalloc1(1 + ci[am], &cj);
5099:     PetscMalloc1(1 + ci[am], &ca);
5100:     k = 0;
5101:     for (i = 0; i < am; i++) {
5102:       ncols_o = bi[i + 1] - bi[i];
5103:       ncols_d = ai[i + 1] - ai[i];
5104:       /* off-diagonal portion of A */
5105:       for (jo = 0; jo < ncols_o; jo++) {
5106:         col = cmap[*bj];
5107:         if (col >= cstart) break;
5108:         cj[k] = col;
5109:         bj++;
5110:         ca[k++] = *ba++;
5111:       }
5112:       /* diagonal portion of A */
5113:       for (j = 0; j < ncols_d; j++) {
5114:         cj[k]   = cstart + *aj++;
5115:         ca[k++] = *aa++;
5116:       }
5117:       /* off-diagonal portion of A */
5118:       for (j = jo; j < ncols_o; j++) {
5119:         cj[k]   = cmap[*bj++];
5120:         ca[k++] = *ba++;
5121:       }
5122:     }
5123:     /* put together the new matrix */
5124:     MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc);
5125:     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5126:     /* Since these are PETSc arrays, change flags to free them as necessary. */
5127:     mat          = (Mat_SeqAIJ *)(*A_loc)->data;
5128:     mat->free_a  = PETSC_TRUE;
5129:     mat->free_ij = PETSC_TRUE;
5130:     mat->nonew   = 0;
5131:   } else if (scall == MAT_REUSE_MATRIX) {
5132:     mat = (Mat_SeqAIJ *)(*A_loc)->data;
5133:     ci  = mat->i;
5134:     cj  = mat->j;
5135:     MatSeqAIJGetArrayWrite(*A_loc, &cam);
5136:     for (i = 0; i < am; i++) {
5137:       /* off-diagonal portion of A */
5138:       ncols_o = bi[i + 1] - bi[i];
5139:       for (jo = 0; jo < ncols_o; jo++) {
5140:         col = cmap[*bj];
5141:         if (col >= cstart) break;
5142:         *cam++ = *ba++;
5143:         bj++;
5144:       }
5145:       /* diagonal portion of A */
5146:       ncols_d = ai[i + 1] - ai[i];
5147:       for (j = 0; j < ncols_d; j++) *cam++ = *aa++;
5148:       /* off-diagonal portion of A */
5149:       for (j = jo; j < ncols_o; j++) {
5150:         *cam++ = *ba++;
5151:         bj++;
5152:       }
5153:     }
5154:     MatSeqAIJRestoreArrayWrite(*A_loc, &cam);
5155:   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5156:   MatSeqAIJRestoreArrayRead(mpimat->A, &aav);
5157:   MatSeqAIJRestoreArrayRead(mpimat->B, &bav);
5158:   PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0);
5159:   return 0;
5160: }

5162: /*@
5163:      MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with
5164:           mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part

5166:     Not Collective

5168:    Input Parameters:
5169: +    A - the matrix
5170: -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`

5172:    Output Parameters:
5173: +    glob - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be NULL)
5174: -    A_loc - the local sequential matrix generated

5176:     Level: developer

5178:    Note:
5179:      This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the off diagonal part (in its local ordering)

5181: .seealso: `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`
5182: @*/
5183: PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc)
5184: {
5185:   Mat             Ao, Ad;
5186:   const PetscInt *cmap;
5187:   PetscMPIInt     size;
5188:   PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *);

5190:   MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap);
5191:   MPI_Comm_size(PetscObjectComm((PetscObject)A), &size);
5192:   if (size == 1) {
5193:     if (scall == MAT_INITIAL_MATRIX) {
5194:       PetscObjectReference((PetscObject)Ad);
5195:       *A_loc = Ad;
5196:     } else if (scall == MAT_REUSE_MATRIX) {
5197:       MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN);
5198:     }
5199:     if (glob) ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob);
5200:     return 0;
5201:   }
5202:   PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f);
5203:   PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0);
5204:   if (f) {
5205:     (*f)(A, scall, glob, A_loc);
5206:   } else {
5207:     Mat_SeqAIJ        *a = (Mat_SeqAIJ *)Ad->data;
5208:     Mat_SeqAIJ        *b = (Mat_SeqAIJ *)Ao->data;
5209:     Mat_SeqAIJ        *c;
5210:     PetscInt          *ai = a->i, *aj = a->j;
5211:     PetscInt          *bi = b->i, *bj = b->j;
5212:     PetscInt          *ci, *cj;
5213:     const PetscScalar *aa, *ba;
5214:     PetscScalar       *ca;
5215:     PetscInt           i, j, am, dn, on;

5217:     MatGetLocalSize(Ad, &am, &dn);
5218:     MatGetLocalSize(Ao, NULL, &on);
5219:     MatSeqAIJGetArrayRead(Ad, &aa);
5220:     MatSeqAIJGetArrayRead(Ao, &ba);
5221:     if (scall == MAT_INITIAL_MATRIX) {
5222:       PetscInt k;
5223:       PetscMalloc1(1 + am, &ci);
5224:       PetscMalloc1(ai[am] + bi[am], &cj);
5225:       PetscMalloc1(ai[am] + bi[am], &ca);
5226:       ci[0] = 0;
5227:       for (i = 0, k = 0; i < am; i++) {
5228:         const PetscInt ncols_o = bi[i + 1] - bi[i];
5229:         const PetscInt ncols_d = ai[i + 1] - ai[i];
5230:         ci[i + 1]              = ci[i] + ncols_o + ncols_d;
5231:         /* diagonal portion of A */
5232:         for (j = 0; j < ncols_d; j++, k++) {
5233:           cj[k] = *aj++;
5234:           ca[k] = *aa++;
5235:         }
5236:         /* off-diagonal portion of A */
5237:         for (j = 0; j < ncols_o; j++, k++) {
5238:           cj[k] = dn + *bj++;
5239:           ca[k] = *ba++;
5240:         }
5241:       }
5242:       /* put together the new matrix */
5243:       MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc);
5244:       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5245:       /* Since these are PETSc arrays, change flags to free them as necessary. */
5246:       c          = (Mat_SeqAIJ *)(*A_loc)->data;
5247:       c->free_a  = PETSC_TRUE;
5248:       c->free_ij = PETSC_TRUE;
5249:       c->nonew   = 0;
5250:       MatSetType(*A_loc, ((PetscObject)Ad)->type_name);
5251:     } else if (scall == MAT_REUSE_MATRIX) {
5252:       MatSeqAIJGetArrayWrite(*A_loc, &ca);
5253:       for (i = 0; i < am; i++) {
5254:         const PetscInt ncols_d = ai[i + 1] - ai[i];
5255:         const PetscInt ncols_o = bi[i + 1] - bi[i];
5256:         /* diagonal portion of A */
5257:         for (j = 0; j < ncols_d; j++) *ca++ = *aa++;
5258:         /* off-diagonal portion of A */
5259:         for (j = 0; j < ncols_o; j++) *ca++ = *ba++;
5260:       }
5261:       MatSeqAIJRestoreArrayWrite(*A_loc, &ca);
5262:     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5263:     MatSeqAIJRestoreArrayRead(Ad, &aa);
5264:     MatSeqAIJRestoreArrayRead(Ao, &aa);
5265:     if (glob) {
5266:       PetscInt cst, *gidx;

5268:       MatGetOwnershipRangeColumn(A, &cst, NULL);
5269:       PetscMalloc1(dn + on, &gidx);
5270:       for (i = 0; i < dn; i++) gidx[i] = cst + i;
5271:       for (i = 0; i < on; i++) gidx[i + dn] = cmap[i];
5272:       ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob);
5273:     }
5274:   }
5275:   PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0);
5276:   return 0;
5277: }

5279: /*@C
5280:      MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns

5282:     Not Collective

5284:    Input Parameters:
5285: +    A - the matrix
5286: .    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5287: -    row, col - index sets of rows and columns to extract (or NULL)

5289:    Output Parameter:
5290: .    A_loc - the local sequential matrix generated

5292:     Level: developer

5294: .seealso: `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`
5295: @*/
5296: PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc)
5297: {
5298:   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5299:   PetscInt    i, start, end, ncols, nzA, nzB, *cmap, imark, *idx;
5300:   IS          isrowa, iscola;
5301:   Mat        *aloc;
5302:   PetscBool   match;

5304:   PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match);
5306:   PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0);
5307:   if (!row) {
5308:     start = A->rmap->rstart;
5309:     end   = A->rmap->rend;
5310:     ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa);
5311:   } else {
5312:     isrowa = *row;
5313:   }
5314:   if (!col) {
5315:     start = A->cmap->rstart;
5316:     cmap  = a->garray;
5317:     nzA   = a->A->cmap->n;
5318:     nzB   = a->B->cmap->n;
5319:     PetscMalloc1(nzA + nzB, &idx);
5320:     ncols = 0;
5321:     for (i = 0; i < nzB; i++) {
5322:       if (cmap[i] < start) idx[ncols++] = cmap[i];
5323:       else break;
5324:     }
5325:     imark = i;
5326:     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;
5327:     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i];
5328:     ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola);
5329:   } else {
5330:     iscola = *col;
5331:   }
5332:   if (scall != MAT_INITIAL_MATRIX) {
5333:     PetscMalloc1(1, &aloc);
5334:     aloc[0] = *A_loc;
5335:   }
5336:   MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc);
5337:   if (!col) { /* attach global id of condensed columns */
5338:     PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola);
5339:   }
5340:   *A_loc = aloc[0];
5341:   PetscFree(aloc);
5342:   if (!row) ISDestroy(&isrowa);
5343:   if (!col) ISDestroy(&iscola);
5344:   PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0);
5345:   return 0;
5346: }

5348: /*
5349:  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5350:  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5351:  * on a global size.
5352:  * */
5353: PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth)
5354: {
5355:   Mat_MPIAIJ            *p  = (Mat_MPIAIJ *)P->data;
5356:   Mat_SeqAIJ            *pd = (Mat_SeqAIJ *)(p->A)->data, *po = (Mat_SeqAIJ *)(p->B)->data, *p_oth;
5357:   PetscInt               plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol;
5358:   PetscMPIInt            owner;
5359:   PetscSFNode           *iremote, *oiremote;
5360:   const PetscInt        *lrowindices;
5361:   PetscSF                sf, osf;
5362:   PetscInt               pcstart, *roffsets, *loffsets, *pnnz, j;
5363:   PetscInt               ontotalcols, dntotalcols, ntotalcols, nout;
5364:   MPI_Comm               comm;
5365:   ISLocalToGlobalMapping mapping;
5366:   const PetscScalar     *pd_a, *po_a;

5368:   PetscObjectGetComm((PetscObject)P, &comm);
5369:   /* plocalsize is the number of roots
5370:    * nrows is the number of leaves
5371:    * */
5372:   MatGetLocalSize(P, &plocalsize, NULL);
5373:   ISGetLocalSize(rows, &nrows);
5374:   PetscCalloc1(nrows, &iremote);
5375:   ISGetIndices(rows, &lrowindices);
5376:   for (i = 0; i < nrows; i++) {
5377:     /* Find a remote index and an owner for a row
5378:      * The row could be local or remote
5379:      * */
5380:     owner = 0;
5381:     lidx  = 0;
5382:     PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx);
5383:     iremote[i].index = lidx;
5384:     iremote[i].rank  = owner;
5385:   }
5386:   /* Create SF to communicate how many nonzero columns for each row */
5387:   PetscSFCreate(comm, &sf);
5388:   /* SF will figure out the number of nonzero colunms for each row, and their
5389:    * offsets
5390:    * */
5391:   PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER);
5392:   PetscSFSetFromOptions(sf);
5393:   PetscSFSetUp(sf);

5395:   PetscCalloc1(2 * (plocalsize + 1), &roffsets);
5396:   PetscCalloc1(2 * plocalsize, &nrcols);
5397:   PetscCalloc1(nrows, &pnnz);
5398:   roffsets[0] = 0;
5399:   roffsets[1] = 0;
5400:   for (i = 0; i < plocalsize; i++) {
5401:     /* diag */
5402:     nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i];
5403:     /* off diag */
5404:     nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i];
5405:     /* compute offsets so that we relative location for each row */
5406:     roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0];
5407:     roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1];
5408:   }
5409:   PetscCalloc1(2 * nrows, &nlcols);
5410:   PetscCalloc1(2 * nrows, &loffsets);
5411:   /* 'r' means root, and 'l' means leaf */
5412:   PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE);
5413:   PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE);
5414:   PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE);
5415:   PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE);
5416:   PetscSFDestroy(&sf);
5417:   PetscFree(roffsets);
5418:   PetscFree(nrcols);
5419:   dntotalcols = 0;
5420:   ontotalcols = 0;
5421:   ncol        = 0;
5422:   for (i = 0; i < nrows; i++) {
5423:     pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1];
5424:     ncol    = PetscMax(pnnz[i], ncol);
5425:     /* diag */
5426:     dntotalcols += nlcols[i * 2 + 0];
5427:     /* off diag */
5428:     ontotalcols += nlcols[i * 2 + 1];
5429:   }
5430:   /* We do not need to figure the right number of columns
5431:    * since all the calculations will be done by going through the raw data
5432:    * */
5433:   MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth);
5434:   MatSetUp(*P_oth);
5435:   PetscFree(pnnz);
5436:   p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5437:   /* diag */
5438:   PetscCalloc1(dntotalcols, &iremote);
5439:   /* off diag */
5440:   PetscCalloc1(ontotalcols, &oiremote);
5441:   /* diag */
5442:   PetscCalloc1(dntotalcols, &ilocal);
5443:   /* off diag */
5444:   PetscCalloc1(ontotalcols, &oilocal);
5445:   dntotalcols = 0;
5446:   ontotalcols = 0;
5447:   ntotalcols  = 0;
5448:   for (i = 0; i < nrows; i++) {
5449:     owner = 0;
5450:     PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL);
5451:     /* Set iremote for diag matrix */
5452:     for (j = 0; j < nlcols[i * 2 + 0]; j++) {
5453:       iremote[dntotalcols].index = loffsets[i * 2 + 0] + j;
5454:       iremote[dntotalcols].rank  = owner;
5455:       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5456:       ilocal[dntotalcols++] = ntotalcols++;
5457:     }
5458:     /* off diag */
5459:     for (j = 0; j < nlcols[i * 2 + 1]; j++) {
5460:       oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j;
5461:       oiremote[ontotalcols].rank  = owner;
5462:       oilocal[ontotalcols++]      = ntotalcols++;
5463:     }
5464:   }
5465:   ISRestoreIndices(rows, &lrowindices);
5466:   PetscFree(loffsets);
5467:   PetscFree(nlcols);
5468:   PetscSFCreate(comm, &sf);
5469:   /* P serves as roots and P_oth is leaves
5470:    * Diag matrix
5471:    * */
5472:   PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER);
5473:   PetscSFSetFromOptions(sf);
5474:   PetscSFSetUp(sf);

5476:   PetscSFCreate(comm, &osf);
5477:   /* Off diag */
5478:   PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER);
5479:   PetscSFSetFromOptions(osf);
5480:   PetscSFSetUp(osf);
5481:   MatSeqAIJGetArrayRead(p->A, &pd_a);
5482:   MatSeqAIJGetArrayRead(p->B, &po_a);
5483:   /* We operate on the matrix internal data for saving memory */
5484:   PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE);
5485:   PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE);
5486:   MatGetOwnershipRangeColumn(P, &pcstart, NULL);
5487:   /* Convert to global indices for diag matrix */
5488:   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart;
5489:   PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE);
5490:   /* We want P_oth store global indices */
5491:   ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping);
5492:   /* Use memory scalable approach */
5493:   ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH);
5494:   ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j);
5495:   PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE);
5496:   PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE);
5497:   /* Convert back to local indices */
5498:   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart;
5499:   PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE);
5500:   nout = 0;
5501:   ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j);
5503:   ISLocalToGlobalMappingDestroy(&mapping);
5504:   /* Exchange values */
5505:   PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE);
5506:   PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE);
5507:   MatSeqAIJRestoreArrayRead(p->A, &pd_a);
5508:   MatSeqAIJRestoreArrayRead(p->B, &po_a);
5509:   /* Stop PETSc from shrinking memory */
5510:   for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i];
5511:   MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY);
5512:   MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY);
5513:   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5514:   PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf);
5515:   PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf);
5516:   PetscSFDestroy(&sf);
5517:   PetscSFDestroy(&osf);
5518:   return 0;
5519: }

5521: /*
5522:  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5523:  * This supports MPIAIJ and MAIJ
5524:  * */
5525: PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth)
5526: {
5527:   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data;
5528:   Mat_SeqAIJ *p_oth;
5529:   IS          rows, map;
5530:   PetscHMapI  hamp;
5531:   PetscInt    i, htsize, *rowindices, off, *mapping, key, count;
5532:   MPI_Comm    comm;
5533:   PetscSF     sf, osf;
5534:   PetscBool   has;

5536:   PetscObjectGetComm((PetscObject)A, &comm);
5537:   PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0);
5538:   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5539:    *  and then create a submatrix (that often is an overlapping matrix)
5540:    * */
5541:   if (reuse == MAT_INITIAL_MATRIX) {
5542:     /* Use a hash table to figure out unique keys */
5543:     PetscHMapICreate(&hamp);
5544:     PetscHMapIResize(hamp, a->B->cmap->n);
5545:     PetscCalloc1(a->B->cmap->n, &mapping);
5546:     count = 0;
5547:     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5548:     for (i = 0; i < a->B->cmap->n; i++) {
5549:       key = a->garray[i] / dof;
5550:       PetscHMapIHas(hamp, key, &has);
5551:       if (!has) {
5552:         mapping[i] = count;
5553:         PetscHMapISet(hamp, key, count++);
5554:       } else {
5555:         /* Current 'i' has the same value the previous step */
5556:         mapping[i] = count - 1;
5557:       }
5558:     }
5559:     ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map);
5560:     PetscHMapIGetSize(hamp, &htsize);
5562:     PetscCalloc1(htsize, &rowindices);
5563:     off = 0;
5564:     PetscHMapIGetKeys(hamp, &off, rowindices);
5565:     PetscHMapIDestroy(&hamp);
5566:     PetscSortInt(htsize, rowindices);
5567:     ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows);
5568:     /* In case, the matrix was already created but users want to recreate the matrix */
5569:     MatDestroy(P_oth);
5570:     MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth);
5571:     PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map);
5572:     ISDestroy(&map);
5573:     ISDestroy(&rows);
5574:   } else if (reuse == MAT_REUSE_MATRIX) {
5575:     /* If matrix was already created, we simply update values using SF objects
5576:      * that as attached to the matrix earlier.
5577:      */
5578:     const PetscScalar *pd_a, *po_a;

5580:     PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf);
5581:     PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf);
5583:     p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5584:     /* Update values in place */
5585:     MatSeqAIJGetArrayRead(p->A, &pd_a);
5586:     MatSeqAIJGetArrayRead(p->B, &po_a);
5587:     PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE);
5588:     PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE);
5589:     PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE);
5590:     PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE);
5591:     MatSeqAIJRestoreArrayRead(p->A, &pd_a);
5592:     MatSeqAIJRestoreArrayRead(p->B, &po_a);
5593:   } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type");
5594:   PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0);
5595:   return 0;
5596: }

5598: /*@C
5599:   MatGetBrowsOfAcols - Returns `IS` that contain rows of B that equal to nonzero columns of local A

5601:   Collective

5603:   Input Parameters:
5604: + A - the first matrix in `MATMPIAIJ` format
5605: . B - the second matrix in `MATMPIAIJ` format
5606: - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`

5608:   Output Parameters:
5609: + rowb - On input index sets of rows of B to extract (or NULL), modified on output
5610: . colb - On input index sets of columns of B to extract (or NULL), modified on output
5611: - B_seq - the sequential matrix generated

5613:   Level: developer

5615: @*/
5616: PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq)
5617: {
5618:   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5619:   PetscInt   *idx, i, start, ncols, nzA, nzB, *cmap, imark;
5620:   IS          isrowb, iscolb;
5621:   Mat        *bseq = NULL;

5623:   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5624:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5625:   }
5626:   PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0);

5628:   if (scall == MAT_INITIAL_MATRIX) {
5629:     start = A->cmap->rstart;
5630:     cmap  = a->garray;
5631:     nzA   = a->A->cmap->n;
5632:     nzB   = a->B->cmap->n;
5633:     PetscMalloc1(nzA + nzB, &idx);
5634:     ncols = 0;
5635:     for (i = 0; i < nzB; i++) { /* row < local row index */
5636:       if (cmap[i] < start) idx[ncols++] = cmap[i];
5637:       else break;
5638:     }
5639:     imark = i;
5640:     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;   /* local rows */
5641:     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5642:     ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb);
5643:     ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb);
5644:   } else {
5646:     isrowb = *rowb;
5647:     iscolb = *colb;
5648:     PetscMalloc1(1, &bseq);
5649:     bseq[0] = *B_seq;
5650:   }
5651:   MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq);
5652:   *B_seq = bseq[0];
5653:   PetscFree(bseq);
5654:   if (!rowb) {
5655:     ISDestroy(&isrowb);
5656:   } else {
5657:     *rowb = isrowb;
5658:   }
5659:   if (!colb) {
5660:     ISDestroy(&iscolb);
5661:   } else {
5662:     *colb = iscolb;
5663:   }
5664:   PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0);
5665:   return 0;
5666: }

5668: /*
5669:     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5670:     of the OFF-DIAGONAL portion of local A

5672:     Collective

5674:    Input Parameters:
5675: +    A,B - the matrices in mpiaij format
5676: -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX

5678:    Output Parameter:
5679: +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5680: .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5681: .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5682: -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N

5684:     Developer Note:
5685:     This directly accesses information inside the VecScatter associated with the matrix-vector product
5686:      for this matrix. This is not desirable..

5688:     Level: developer

5690: */
5691: PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth)
5692: {
5693:   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
5694:   Mat_SeqAIJ        *b_oth;
5695:   VecScatter         ctx;
5696:   MPI_Comm           comm;
5697:   const PetscMPIInt *rprocs, *sprocs;
5698:   const PetscInt    *srow, *rstarts, *sstarts;
5699:   PetscInt          *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs;
5700:   PetscInt           i, j, k = 0, l, ll, nrecvs, nsends, nrows, *rstartsj = NULL, *sstartsj, len;
5701:   PetscScalar       *b_otha, *bufa, *bufA, *vals = NULL;
5702:   MPI_Request       *reqs = NULL, *rwaits = NULL, *swaits = NULL;
5703:   PetscMPIInt        size, tag, rank, nreqs;

5705:   PetscObjectGetComm((PetscObject)A, &comm);
5706:   MPI_Comm_size(comm, &size);

5708:   if (PetscUnlikely(A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)) {
5709:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5710:   }
5711:   PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0);
5712:   MPI_Comm_rank(comm, &rank);

5714:   if (size == 1) {
5715:     startsj_s = NULL;
5716:     bufa_ptr  = NULL;
5717:     *B_oth    = NULL;
5718:     return 0;
5719:   }

5721:   ctx = a->Mvctx;
5722:   tag = ((PetscObject)ctx)->tag;

5724:   VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs);
5725:   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5726:   VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs);
5727:   PetscMPIIntCast(nsends + nrecvs, &nreqs);
5728:   PetscMalloc1(nreqs, &reqs);
5729:   rwaits = reqs;
5730:   swaits = reqs + nrecvs;

5732:   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5733:   if (scall == MAT_INITIAL_MATRIX) {
5734:     /* i-array */
5735:     /*---------*/
5736:     /*  post receives */
5737:     if (nrecvs) PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues); /* rstarts can be NULL when nrecvs=0 */
5738:     for (i = 0; i < nrecvs; i++) {
5739:       rowlen = rvalues + rstarts[i] * rbs;
5740:       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */
5741:       MPI_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i);
5742:     }

5744:     /* pack the outgoing message */
5745:     PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj);

5747:     sstartsj[0] = 0;
5748:     rstartsj[0] = 0;
5749:     len         = 0; /* total length of j or a array to be sent */
5750:     if (nsends) {
5751:       k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5752:       PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues);
5753:     }
5754:     for (i = 0; i < nsends; i++) {
5755:       rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs;
5756:       nrows  = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5757:       for (j = 0; j < nrows; j++) {
5758:         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5759:         for (l = 0; l < sbs; l++) {
5760:           MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL); /* rowlength */

5762:           rowlen[j * sbs + l] = ncols;

5764:           len += ncols;
5765:           MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL);
5766:         }
5767:         k++;
5768:       }
5769:       MPI_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i);

5771:       sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5772:     }
5773:     /* recvs and sends of i-array are completed */
5774:     if (nreqs) MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE);
5775:     PetscFree(svalues);

5777:     /* allocate buffers for sending j and a arrays */
5778:     PetscMalloc1(len + 1, &bufj);
5779:     PetscMalloc1(len + 1, &bufa);

5781:     /* create i-array of B_oth */
5782:     PetscMalloc1(aBn + 2, &b_othi);

5784:     b_othi[0] = 0;
5785:     len       = 0; /* total length of j or a array to be received */
5786:     k         = 0;
5787:     for (i = 0; i < nrecvs; i++) {
5788:       rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs;
5789:       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */
5790:       for (j = 0; j < nrows; j++) {
5791:         b_othi[k + 1] = b_othi[k] + rowlen[j];
5792:         PetscIntSumError(rowlen[j], len, &len);
5793:         k++;
5794:       }
5795:       rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5796:     }
5797:     PetscFree(rvalues);

5799:     /* allocate space for j and a arrays of B_oth */
5800:     PetscMalloc1(b_othi[aBn] + 1, &b_othj);
5801:     PetscMalloc1(b_othi[aBn] + 1, &b_otha);

5803:     /* j-array */
5804:     /*---------*/
5805:     /*  post receives of j-array */
5806:     for (i = 0; i < nrecvs; i++) {
5807:       nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5808:       MPI_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i);
5809:     }

5811:     /* pack the outgoing message j-array */
5812:     if (nsends) k = sstarts[0];
5813:     for (i = 0; i < nsends; i++) {
5814:       nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5815:       bufJ  = bufj + sstartsj[i];
5816:       for (j = 0; j < nrows; j++) {
5817:         row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5818:         for (ll = 0; ll < sbs; ll++) {
5819:           MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL);
5820:           for (l = 0; l < ncols; l++) *bufJ++ = cols[l];
5821:           MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL);
5822:         }
5823:       }
5824:       MPI_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i);
5825:     }

5827:     /* recvs and sends of j-array are completed */
5828:     if (nreqs) MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE);
5829:   } else if (scall == MAT_REUSE_MATRIX) {
5830:     sstartsj = *startsj_s;
5831:     rstartsj = *startsj_r;
5832:     bufa     = *bufa_ptr;
5833:     b_oth    = (Mat_SeqAIJ *)(*B_oth)->data;
5834:     MatSeqAIJGetArrayWrite(*B_oth, &b_otha);
5835:   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container");

5837:   /* a-array */
5838:   /*---------*/
5839:   /*  post receives of a-array */
5840:   for (i = 0; i < nrecvs; i++) {
5841:     nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5842:     MPI_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i);
5843:   }

5845:   /* pack the outgoing message a-array */
5846:   if (nsends) k = sstarts[0];
5847:   for (i = 0; i < nsends; i++) {
5848:     nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5849:     bufA  = bufa + sstartsj[i];
5850:     for (j = 0; j < nrows; j++) {
5851:       row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5852:       for (ll = 0; ll < sbs; ll++) {
5853:         MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals);
5854:         for (l = 0; l < ncols; l++) *bufA++ = vals[l];
5855:         MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals);
5856:       }
5857:     }
5858:     MPI_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i);
5859:   }
5860:   /* recvs and sends of a-array are completed */
5861:   if (nreqs) MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE);
5862:   PetscFree(reqs);

5864:   if (scall == MAT_INITIAL_MATRIX) {
5865:     /* put together the new matrix */
5866:     MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth);

5868:     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5869:     /* Since these are PETSc arrays, change flags to free them as necessary. */
5870:     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5871:     b_oth->free_a  = PETSC_TRUE;
5872:     b_oth->free_ij = PETSC_TRUE;
5873:     b_oth->nonew   = 0;

5875:     PetscFree(bufj);
5876:     if (!startsj_s || !bufa_ptr) {
5877:       PetscFree2(sstartsj, rstartsj);
5878:       PetscFree(bufa_ptr);
5879:     } else {
5880:       *startsj_s = sstartsj;
5881:       *startsj_r = rstartsj;
5882:       *bufa_ptr  = bufa;
5883:     }
5884:   } else if (scall == MAT_REUSE_MATRIX) {
5885:     MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha);
5886:   }

5888:   VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs);
5889:   VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs);
5890:   PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0);
5891:   return 0;
5892: }

5894: PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *);
5895: PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *);
5896: PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *);
5897: #if defined(PETSC_HAVE_MKL_SPARSE)
5898: PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *);
5899: #endif
5900: PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *);
5901: PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
5902: #if defined(PETSC_HAVE_ELEMENTAL)
5903: PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
5904: #endif
5905: #if defined(PETSC_HAVE_SCALAPACK)
5906: PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
5907: #endif
5908: #if defined(PETSC_HAVE_HYPRE)
5909: PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
5910: #endif
5911: #if defined(PETSC_HAVE_CUDA)
5912: PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *);
5913: #endif
5914: #if defined(PETSC_HAVE_KOKKOS_KERNELS)
5915: PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *);
5916: #endif
5917: PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *);
5918: PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
5919: PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);

5921: /*
5922:     Computes (B'*A')' since computing B*A directly is untenable

5924:                n                       p                          p
5925:         [             ]       [             ]         [                 ]
5926:       m [      A      ]  *  n [       B     ]   =   m [         C       ]
5927:         [             ]       [             ]         [                 ]

5929: */
5930: static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C)
5931: {
5932:   Mat At, Bt, Ct;

5934:   MatTranspose(A, MAT_INITIAL_MATRIX, &At);
5935:   MatTranspose(B, MAT_INITIAL_MATRIX, &Bt);
5936:   MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &Ct);
5937:   MatDestroy(&At);
5938:   MatDestroy(&Bt);
5939:   MatTransposeSetPrecursor(Ct, C);
5940:   MatTranspose(Ct, MAT_REUSE_MATRIX, &C);
5941:   MatDestroy(&Ct);
5942:   return 0;
5943: }

5945: static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C)
5946: {
5947:   PetscBool cisdense;

5950:   MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N);
5951:   MatSetBlockSizesFromMats(C, A, B);
5952:   PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, "");
5953:   if (!cisdense) MatSetType(C, ((PetscObject)A)->type_name);
5954:   MatSetUp(C);

5956:   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5957:   return 0;
5958: }

5960: /* ----------------------------------------------------------------*/
5961: static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
5962: {
5963:   Mat_Product *product = C->product;
5964:   Mat          A = product->A, B = product->B;

5966:   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)
5967:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);

5969:   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
5970:   C->ops->productsymbolic = MatProductSymbolic_AB;
5971:   return 0;
5972: }

5974: PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
5975: {
5976:   Mat_Product *product = C->product;

5978:   if (product->type == MATPRODUCT_AB) MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C);
5979:   return 0;
5980: }

5982: /* Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix

5984:   Input Parameters:

5986:     j1,rowBegin1,rowEnd1,perm1,jmap1: describe the first set of nonzeros (Set1)
5987:     j2,rowBegin2,rowEnd2,perm2,jmap2: describe the second set of nonzeros (Set2)

5989:     mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat

5991:     For Set1, j1[] contains column indices of the nonzeros.
5992:     For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k
5993:     respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted,
5994:     but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1.

5996:     Similar for Set2.

5998:     This routine merges the two sets of nonzeros row by row and removes repeats.

6000:   Output Parameters: (memory is allocated by the caller)

6002:     i[],j[]: the CSR of the merged matrix, which has m rows.
6003:     imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix.
6004:     imap2[]: similar to imap1[], but for Set2.
6005:     Note we order nonzeros row-by-row and from left to right.
6006: */
6007: static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[])
6008: {
6009:   PetscInt   r, m; /* Row index of mat */
6010:   PetscCount t, t1, t2, b1, e1, b2, e2;

6012:   MatGetLocalSize(mat, &m, NULL);
6013:   t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */
6014:   i[0]        = 0;
6015:   for (r = 0; r < m; r++) { /* Do row by row merging */
6016:     b1 = rowBegin1[r];
6017:     e1 = rowEnd1[r];
6018:     b2 = rowBegin2[r];
6019:     e2 = rowEnd2[r];
6020:     while (b1 < e1 && b2 < e2) {
6021:       if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */
6022:         j[t]      = j1[b1];
6023:         imap1[t1] = t;
6024:         imap2[t2] = t;
6025:         b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */
6026:         b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */
6027:         t1++;
6028:         t2++;
6029:         t++;
6030:       } else if (j1[b1] < j2[b2]) {
6031:         j[t]      = j1[b1];
6032:         imap1[t1] = t;
6033:         b1 += jmap1[t1 + 1] - jmap1[t1];
6034:         t1++;
6035:         t++;
6036:       } else {
6037:         j[t]      = j2[b2];
6038:         imap2[t2] = t;
6039:         b2 += jmap2[t2 + 1] - jmap2[t2];
6040:         t2++;
6041:         t++;
6042:       }
6043:     }
6044:     /* Merge the remaining in either j1[] or j2[] */
6045:     while (b1 < e1) {
6046:       j[t]      = j1[b1];
6047:       imap1[t1] = t;
6048:       b1 += jmap1[t1 + 1] - jmap1[t1];
6049:       t1++;
6050:       t++;
6051:     }
6052:     while (b2 < e2) {
6053:       j[t]      = j2[b2];
6054:       imap2[t2] = t;
6055:       b2 += jmap2[t2 + 1] - jmap2[t2];
6056:       t2++;
6057:       t++;
6058:     }
6059:     i[r + 1] = t;
6060:   }
6061:   return 0;
6062: }

6064: /* Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block

6066:   Input Parameters:
6067:     mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m.
6068:     n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[]
6069:       respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n.

6071:       i[] is already sorted, but within a row, j[] is not sorted and might have repeats.
6072:       i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting.

6074:   Output Parameters:
6075:     j[],perm[]: the routine needs to sort j[] within each row along with perm[].
6076:     rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller.
6077:       They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block,
6078:       and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block.

6080:     Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine.
6081:       Atot: number of entries belonging to the diagonal block.
6082:       Annz: number of unique nonzeros belonging to the diagonal block.
6083:       Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count
6084:         repeats (i.e., same 'i,j' pair).
6085:       Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t]
6086:         is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0.

6088:       Atot: number of entries belonging to the diagonal block
6089:       Annz: number of unique nonzeros belonging to the diagonal block.

6091:     Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block.

6093:     Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1().
6094: */
6095: static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_)
6096: {
6097:   PetscInt    cstart, cend, rstart, rend, row, col;
6098:   PetscCount  Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */
6099:   PetscCount  Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */
6100:   PetscCount  k, m, p, q, r, s, mid;
6101:   PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap;

6103:   PetscLayoutGetRange(mat->rmap, &rstart, &rend);
6104:   PetscLayoutGetRange(mat->cmap, &cstart, &cend);
6105:   m = rend - rstart;

6107:   for (k = 0; k < n; k++) {
6108:     if (i[k] >= 0) break;
6109:   } /* Skip negative rows */

6111:   /* Process [k,n): sort and partition each local row into diag and offdiag portions,
6112:      fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz.
6113:   */
6114:   while (k < n) {
6115:     row = i[k];
6116:     /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */
6117:     for (s = k; s < n; s++)
6118:       if (i[s] != row) break;
6119:     for (p = k; p < s; p++) {
6120:       if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; /* Shift diag columns to range of [-PETSC_MAX_INT, -1]  */
6121:       else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]);
6122:     }
6123:     PetscSortIntWithCountArray(s - k, j + k, perm + k);
6124:     PetscSortedIntUpperBound(j, k, s, -1, &mid); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */
6125:     rowBegin[row - rstart] = k;
6126:     rowMid[row - rstart]   = mid;
6127:     rowEnd[row - rstart]   = s;

6129:     /* Count nonzeros of this diag/offdiag row, which might have repeats */
6130:     Atot += mid - k;
6131:     Btot += s - mid;

6133:     /* Count unique nonzeros of this diag/offdiag row */
6134:     for (p = k; p < mid;) {
6135:       col = j[p];
6136:       do {
6137:         j[p] += PETSC_MAX_INT;
6138:         p++;
6139:       } while (p < mid && j[p] == col); /* Revert the modified diagonal indices */
6140:       Annz++;
6141:     }

6143:     for (p = mid; p < s;) {
6144:       col = j[p];
6145:       do {
6146:         p++;
6147:       } while (p < s && j[p] == col);
6148:       Bnnz++;
6149:     }
6150:     k = s;
6151:   }

6153:   /* Allocation according to Atot, Btot, Annz, Bnnz */
6154:   PetscMalloc1(Atot, &Aperm);
6155:   PetscMalloc1(Btot, &Bperm);
6156:   PetscMalloc1(Annz + 1, &Ajmap);
6157:   PetscMalloc1(Bnnz + 1, &Bjmap);

6159:   /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */
6160:   Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0;
6161:   for (r = 0; r < m; r++) {
6162:     k   = rowBegin[r];
6163:     mid = rowMid[r];
6164:     s   = rowEnd[r];
6165:     PetscArraycpy(Aperm + Atot, perm + k, mid - k);
6166:     PetscArraycpy(Bperm + Btot, perm + mid, s - mid);
6167:     Atot += mid - k;
6168:     Btot += s - mid;

6170:     /* Scan column indices in this row and find out how many repeats each unique nonzero has */
6171:     for (p = k; p < mid;) {
6172:       col = j[p];
6173:       q   = p;
6174:       do {
6175:         p++;
6176:       } while (p < mid && j[p] == col);
6177:       Ajmap[Annz + 1] = Ajmap[Annz] + (p - q);
6178:       Annz++;
6179:     }

6181:     for (p = mid; p < s;) {
6182:       col = j[p];
6183:       q   = p;
6184:       do {
6185:         p++;
6186:       } while (p < s && j[p] == col);
6187:       Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q);
6188:       Bnnz++;
6189:     }
6190:   }
6191:   /* Output */
6192:   *Aperm_ = Aperm;
6193:   *Annz_  = Annz;
6194:   *Atot_  = Atot;
6195:   *Ajmap_ = Ajmap;
6196:   *Bperm_ = Bperm;
6197:   *Bnnz_  = Bnnz;
6198:   *Btot_  = Btot;
6199:   *Bjmap_ = Bjmap;
6200:   return 0;
6201: }

6203: /* Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix

6205:   Input Parameters:
6206:     nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[]
6207:     nnz:  number of unique nonzeros in the merged matrix
6208:     imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix
6209:     jmap[nnz1+1]: i-th nonzeron in the set has jmap[i+1] - jmap[i] repeats in the set

6211:   Output Parameter: (memory is allocated by the caller)
6212:     jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set

6214:   Example:
6215:     nnz1 = 4
6216:     nnz  = 6
6217:     imap = [1,3,4,5]
6218:     jmap = [0,3,5,6,7]
6219:    then,
6220:     jmap_new = [0,0,3,3,5,6,7]
6221: */
6222: static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[])
6223: {
6224:   PetscCount k, p;

6226:   jmap_new[0] = 0;
6227:   p           = nnz;                /* p loops over jmap_new[] backwards */
6228:   for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */
6229:     for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1];
6230:   }
6231:   for (; p >= 0; p--) jmap_new[p] = jmap[0];
6232:   return 0;
6233: }

6235: PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[])
6236: {
6237:   MPI_Comm    comm;
6238:   PetscMPIInt rank, size;
6239:   PetscInt    m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */
6240:   PetscCount  k, p, q, rem;                           /* Loop variables over coo arrays */
6241:   Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data;

6243:   PetscFree(mpiaij->garray);
6244:   VecDestroy(&mpiaij->lvec);
6245: #if defined(PETSC_USE_CTABLE)
6246:   PetscTableDestroy(&mpiaij->colmap);
6247: #else
6248:   PetscFree(mpiaij->colmap);
6249: #endif
6250:   VecScatterDestroy(&mpiaij->Mvctx);
6251:   mat->assembled     = PETSC_FALSE;
6252:   mat->was_assembled = PETSC_FALSE;
6253:   MatResetPreallocationCOO_MPIAIJ(mat);

6255:   PetscObjectGetComm((PetscObject)mat, &comm);
6256:   MPI_Comm_size(comm, &size);
6257:   MPI_Comm_rank(comm, &rank);
6258:   PetscLayoutSetUp(mat->rmap);
6259:   PetscLayoutSetUp(mat->cmap);
6260:   PetscLayoutGetRange(mat->rmap, &rstart, &rend);
6261:   PetscLayoutGetRange(mat->cmap, &cstart, &cend);
6262:   MatGetLocalSize(mat, &m, &n);
6263:   MatGetSize(mat, &M, &N);

6265:   /* ---------------------------------------------------------------------------*/
6266:   /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */
6267:   /* entries come first, then local rows, then remote rows.                     */
6268:   /* ---------------------------------------------------------------------------*/
6269:   PetscCount n1 = coo_n, *perm1;
6270:   PetscInt  *i1 = coo_i, *j1 = coo_j;

6272:   PetscMalloc1(n1, &perm1);
6273:   for (k = 0; k < n1; k++) perm1[k] = k;

6275:   /* Manipulate indices so that entries with negative row or col indices will have smallest
6276:      row indices, local entries will have greater but negative row indices, and remote entries
6277:      will have positive row indices.
6278:   */
6279:   for (k = 0; k < n1; k++) {
6280:     if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT;                /* e.g., -2^31, minimal to move them ahead */
6281:     else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */
6282:     else {
6284:       if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */
6285:     }
6286:   }

6288:   /* Sort by row; after that, [0,k) have ignored entires, [k,rem) have local rows and [rem,n1) have remote rows */
6289:   PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1);
6290:   for (k = 0; k < n1; k++) {
6291:     if (i1[k] > PETSC_MIN_INT) break;
6292:   }                                                                               /* Advance k to the first entry we need to take care of */
6293:   PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_MAX_INT, &rem); /* rem is upper bound of the last local row */
6294:   for (; k < rem; k++) i1[k] += PETSC_MAX_INT;                                    /* Revert row indices of local rows*/

6296:   /* ---------------------------------------------------------------------------*/
6297:   /*           Split local rows into diag/offdiag portions                      */
6298:   /* ---------------------------------------------------------------------------*/
6299:   PetscCount *rowBegin1, *rowMid1, *rowEnd1;
6300:   PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1, *Cperm1;
6301:   PetscCount  Annz1, Bnnz1, Atot1, Btot1;

6303:   PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1);
6304:   PetscMalloc1(n1 - rem, &Cperm1);
6305:   MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1);

6307:   /* ---------------------------------------------------------------------------*/
6308:   /*           Send remote rows to their owner                                  */
6309:   /* ---------------------------------------------------------------------------*/
6310:   /* Find which rows should be sent to which remote ranks*/
6311:   PetscInt        nsend = 0; /* Number of MPI ranks to send data to */
6312:   PetscMPIInt    *sendto;    /* [nsend], storing remote ranks */
6313:   PetscInt       *nentries;  /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */
6314:   const PetscInt *ranges;
6315:   PetscInt        maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */

6317:   PetscLayoutGetRanges(mat->rmap, &ranges);
6318:   PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries);
6319:   for (k = rem; k < n1;) {
6320:     PetscMPIInt owner;
6321:     PetscInt    firstRow, lastRow;

6323:     /* Locate a row range */
6324:     firstRow = i1[k]; /* first row of this owner */
6325:     PetscLayoutFindOwner(mat->rmap, firstRow, &owner);
6326:     lastRow = ranges[owner + 1] - 1; /* last row of this owner */

6328:     /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */
6329:     PetscSortedIntUpperBound(i1, k, n1, lastRow, &p);

6331:     /* All entries in [k,p) belong to this remote owner */
6332:     if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */
6333:       PetscMPIInt *sendto2;
6334:       PetscInt    *nentries2;
6335:       PetscInt     maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size;

6337:       PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2);
6338:       PetscArraycpy(sendto2, sendto, maxNsend);
6339:       PetscArraycpy(nentries2, nentries2, maxNsend + 1);
6340:       PetscFree2(sendto, nentries2);
6341:       sendto   = sendto2;
6342:       nentries = nentries2;
6343:       maxNsend = maxNsend2;
6344:     }
6345:     sendto[nsend]   = owner;
6346:     nentries[nsend] = p - k;
6347:     PetscCountCast(p - k, &nentries[nsend]);
6348:     nsend++;
6349:     k = p;
6350:   }

6352:   /* Build 1st SF to know offsets on remote to send data */
6353:   PetscSF      sf1;
6354:   PetscInt     nroots = 1, nroots2 = 0;
6355:   PetscInt     nleaves = nsend, nleaves2 = 0;
6356:   PetscInt    *offsets;
6357:   PetscSFNode *iremote;

6359:   PetscSFCreate(comm, &sf1);
6360:   PetscMalloc1(nsend, &iremote);
6361:   PetscMalloc1(nsend, &offsets);
6362:   for (k = 0; k < nsend; k++) {
6363:     iremote[k].rank  = sendto[k];
6364:     iremote[k].index = 0;
6365:     nleaves2 += nentries[k];
6367:   }
6368:   PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER);
6369:   PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM);
6370:   PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM); /* Would nroots2 overflow, we check offsets[] below */
6371:   PetscSFDestroy(&sf1);
6372:   PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT "", nleaves2, n1 - rem);

6374:   /* Build 2nd SF to send remote COOs to their owner */
6375:   PetscSF sf2;
6376:   nroots  = nroots2;
6377:   nleaves = nleaves2;
6378:   PetscSFCreate(comm, &sf2);
6379:   PetscSFSetFromOptions(sf2);
6380:   PetscMalloc1(nleaves, &iremote);
6381:   p = 0;
6382:   for (k = 0; k < nsend; k++) {
6384:     for (q = 0; q < nentries[k]; q++, p++) {
6385:       iremote[p].rank  = sendto[k];
6386:       iremote[p].index = offsets[k] + q;
6387:     }
6388:   }
6389:   PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER);

6391:   /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */
6392:   PetscArraycpy(Cperm1, perm1 + rem, n1 - rem);

6394:   /* Send the remote COOs to their owner */
6395:   PetscInt    n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */
6396:   PetscCount *perm2;                 /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */
6397:   PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2);
6398:   PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1 + rem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE);
6399:   PetscSFReduceEnd(sf2, MPIU_INT, i1 + rem, i2, MPI_REPLACE);
6400:   PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1 + rem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE);
6401:   PetscSFReduceEnd(sf2, MPIU_INT, j1 + rem, j2, MPI_REPLACE);

6403:   PetscFree(offsets);
6404:   PetscFree2(sendto, nentries);

6406:   /* ---------------------------------------------------------------*/
6407:   /* Sort received COOs by row along with the permutation array     */
6408:   /* ---------------------------------------------------------------*/
6409:   for (k = 0; k < n2; k++) perm2[k] = k;
6410:   PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2);

6412:   /* ---------------------------------------------------------------*/
6413:   /* Split received COOs into diag/offdiag portions                 */
6414:   /* ---------------------------------------------------------------*/
6415:   PetscCount *rowBegin2, *rowMid2, *rowEnd2;
6416:   PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2;
6417:   PetscCount  Annz2, Bnnz2, Atot2, Btot2;

6419:   PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2);
6420:   MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2);

6422:   /* --------------------------------------------------------------------------*/
6423:   /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */
6424:   /* --------------------------------------------------------------------------*/
6425:   PetscInt *Ai, *Bi;
6426:   PetscInt *Aj, *Bj;

6428:   PetscMalloc1(m + 1, &Ai);
6429:   PetscMalloc1(m + 1, &Bi);
6430:   PetscMalloc1(Annz1 + Annz2, &Aj); /* Since local and remote entries might have dups, we might allocate excess memory */
6431:   PetscMalloc1(Bnnz1 + Bnnz2, &Bj);

6433:   PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2;
6434:   PetscMalloc1(Annz1, &Aimap1);
6435:   PetscMalloc1(Bnnz1, &Bimap1);
6436:   PetscMalloc1(Annz2, &Aimap2);
6437:   PetscMalloc1(Bnnz2, &Bimap2);

6439:   MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj);
6440:   MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj);

6442:   /* --------------------------------------------------------------------------*/
6443:   /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we     */
6444:   /* expect nonzeros in A/B most likely have local contributing entries        */
6445:   /* --------------------------------------------------------------------------*/
6446:   PetscInt    Annz = Ai[m];
6447:   PetscInt    Bnnz = Bi[m];
6448:   PetscCount *Ajmap1_new, *Bjmap1_new;

6450:   PetscMalloc1(Annz + 1, &Ajmap1_new);
6451:   PetscMalloc1(Bnnz + 1, &Bjmap1_new);

6453:   ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new);
6454:   ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new);

6456:   PetscFree(Aimap1);
6457:   PetscFree(Ajmap1);
6458:   PetscFree(Bimap1);
6459:   PetscFree(Bjmap1);
6460:   PetscFree3(rowBegin1, rowMid1, rowEnd1);
6461:   PetscFree3(rowBegin2, rowMid2, rowEnd2);
6462:   PetscFree(perm1);
6463:   PetscFree3(i2, j2, perm2);

6465:   Ajmap1 = Ajmap1_new;
6466:   Bjmap1 = Bjmap1_new;

6468:   /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */
6469:   if (Annz < Annz1 + Annz2) {
6470:     PetscInt *Aj_new;
6471:     PetscMalloc1(Annz, &Aj_new);
6472:     PetscArraycpy(Aj_new, Aj, Annz);
6473:     PetscFree(Aj);
6474:     Aj = Aj_new;
6475:   }

6477:   if (Bnnz < Bnnz1 + Bnnz2) {
6478:     PetscInt *Bj_new;
6479:     PetscMalloc1(Bnnz, &Bj_new);
6480:     PetscArraycpy(Bj_new, Bj, Bnnz);
6481:     PetscFree(Bj);
6482:     Bj = Bj_new;
6483:   }

6485:   /* --------------------------------------------------------------------------------*/
6486:   /* Create new submatrices for on-process and off-process coupling                  */
6487:   /* --------------------------------------------------------------------------------*/
6488:   PetscScalar *Aa, *Ba;
6489:   MatType      rtype;
6490:   Mat_SeqAIJ  *a, *b;
6491:   PetscCalloc1(Annz, &Aa); /* Zero matrix on device */
6492:   PetscCalloc1(Bnnz, &Ba);
6493:   /* make Aj[] local, i.e, based off the start column of the diagonal portion */
6494:   if (cstart) {
6495:     for (k = 0; k < Annz; k++) Aj[k] -= cstart;
6496:   }
6497:   MatDestroy(&mpiaij->A);
6498:   MatDestroy(&mpiaij->B);
6499:   MatGetRootType_Private(mat, &rtype);
6500:   MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A);
6501:   MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B);
6502:   MatSetUpMultiply_MPIAIJ(mat);

6504:   a               = (Mat_SeqAIJ *)mpiaij->A->data;
6505:   b               = (Mat_SeqAIJ *)mpiaij->B->data;
6506:   a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */
6507:   a->free_a = b->free_a = PETSC_TRUE;
6508:   a->free_ij = b->free_ij = PETSC_TRUE;

6510:   /* conversion must happen AFTER multiply setup */
6511:   MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A);
6512:   MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B);
6513:   VecDestroy(&mpiaij->lvec);
6514:   MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL);

6516:   mpiaij->coo_n   = coo_n;
6517:   mpiaij->coo_sf  = sf2;
6518:   mpiaij->sendlen = nleaves;
6519:   mpiaij->recvlen = nroots;

6521:   mpiaij->Annz = Annz;
6522:   mpiaij->Bnnz = Bnnz;

6524:   mpiaij->Annz2 = Annz2;
6525:   mpiaij->Bnnz2 = Bnnz2;

6527:   mpiaij->Atot1 = Atot1;
6528:   mpiaij->Atot2 = Atot2;
6529:   mpiaij->Btot1 = Btot1;
6530:   mpiaij->Btot2 = Btot2;

6532:   mpiaij->Ajmap1 = Ajmap1;
6533:   mpiaij->Aperm1 = Aperm1;

6535:   mpiaij->Bjmap1 = Bjmap1;
6536:   mpiaij->Bperm1 = Bperm1;

6538:   mpiaij->Aimap2 = Aimap2;
6539:   mpiaij->Ajmap2 = Ajmap2;
6540:   mpiaij->Aperm2 = Aperm2;

6542:   mpiaij->Bimap2 = Bimap2;
6543:   mpiaij->Bjmap2 = Bjmap2;
6544:   mpiaij->Bperm2 = Bperm2;

6546:   mpiaij->Cperm1 = Cperm1;

6548:   /* Allocate in preallocation. If not used, it has zero cost on host */
6549:   PetscMalloc2(mpiaij->sendlen, &mpiaij->sendbuf, mpiaij->recvlen, &mpiaij->recvbuf);
6550:   return 0;
6551: }

6553: static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode)
6554: {
6555:   Mat_MPIAIJ       *mpiaij = (Mat_MPIAIJ *)mat->data;
6556:   Mat               A = mpiaij->A, B = mpiaij->B;
6557:   PetscCount        Annz = mpiaij->Annz, Annz2 = mpiaij->Annz2, Bnnz = mpiaij->Bnnz, Bnnz2 = mpiaij->Bnnz2;
6558:   PetscScalar      *Aa, *Ba;
6559:   PetscScalar      *sendbuf = mpiaij->sendbuf;
6560:   PetscScalar      *recvbuf = mpiaij->recvbuf;
6561:   const PetscCount *Ajmap1 = mpiaij->Ajmap1, *Ajmap2 = mpiaij->Ajmap2, *Aimap2 = mpiaij->Aimap2;
6562:   const PetscCount *Bjmap1 = mpiaij->Bjmap1, *Bjmap2 = mpiaij->Bjmap2, *Bimap2 = mpiaij->Bimap2;
6563:   const PetscCount *Aperm1 = mpiaij->Aperm1, *Aperm2 = mpiaij->Aperm2, *Bperm1 = mpiaij->Bperm1, *Bperm2 = mpiaij->Bperm2;
6564:   const PetscCount *Cperm1 = mpiaij->Cperm1;

6566:   MatSeqAIJGetArray(A, &Aa); /* Might read and write matrix values */
6567:   MatSeqAIJGetArray(B, &Ba);

6569:   /* Pack entries to be sent to remote */
6570:   for (PetscCount i = 0; i < mpiaij->sendlen; i++) sendbuf[i] = v[Cperm1[i]];

6572:   /* Send remote entries to their owner and overlap the communication with local computation */
6573:   PetscSFReduceWithMemTypeBegin(mpiaij->coo_sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE);
6574:   /* Add local entries to A and B */
6575:   for (PetscCount i = 0; i < Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */
6576:     PetscScalar sum = 0.0;                /* Do partial summation first to improve numerical stablility */
6577:     for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]];
6578:     Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum;
6579:   }
6580:   for (PetscCount i = 0; i < Bnnz; i++) {
6581:     PetscScalar sum = 0.0;
6582:     for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]];
6583:     Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum;
6584:   }
6585:   PetscSFReduceEnd(mpiaij->coo_sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE);

6587:   /* Add received remote entries to A and B */
6588:   for (PetscCount i = 0; i < Annz2; i++) {
6589:     for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]];
6590:   }
6591:   for (PetscCount i = 0; i < Bnnz2; i++) {
6592:     for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]];
6593:   }
6594:   MatSeqAIJRestoreArray(A, &Aa);
6595:   MatSeqAIJRestoreArray(B, &Ba);
6596:   return 0;
6597: }

6599: /* ----------------------------------------------------------------*/

6601: /*MC
6602:    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.

6604:    Options Database Keys:
6605: . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()`

6607:    Level: beginner

6609:    Notes:
6610:     `MatSetValues()` may be called for this matrix type with a NULL argument for the numerical values,
6611:     in this case the values associated with the rows and columns one passes in are set to zero
6612:     in the matrix

6614:     `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
6615:     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored

6617: .seealso: `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()`
6618: M*/

6620: PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6621: {
6622:   Mat_MPIAIJ *b;
6623:   PetscMPIInt size;

6625:   MPI_Comm_size(PetscObjectComm((PetscObject)B), &size);

6627:   PetscNew(&b);
6628:   B->data = (void *)b;
6629:   PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps));
6630:   B->assembled  = PETSC_FALSE;
6631:   B->insertmode = NOT_SET_VALUES;
6632:   b->size       = size;

6634:   MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank);

6636:   /* build cache for off array entries formed */
6637:   MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash);

6639:   b->donotstash  = PETSC_FALSE;
6640:   b->colmap      = NULL;
6641:   b->garray      = NULL;
6642:   b->roworiented = PETSC_TRUE;

6644:   /* stuff used for matrix vector multiply */
6645:   b->lvec  = NULL;
6646:   b->Mvctx = NULL;

6648:   /* stuff for MatGetRow() */
6649:   b->rowindices   = NULL;
6650:   b->rowvalues    = NULL;
6651:   b->getrowactive = PETSC_FALSE;

6653:   /* flexible pointer used in CUSPARSE classes */
6654:   b->spptr = NULL;

6656:   PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);
6657:   PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ);
6658:   PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ);
6659:   PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ);
6660:   PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ);
6661:   PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ);
6662:   PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ);
6663:   PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ);
6664:   PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM);
6665:   PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL);
6666: #if defined(PETSC_HAVE_CUDA)
6667:   PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE);
6668: #endif
6669: #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6670:   PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos);
6671: #endif
6672: #if defined(PETSC_HAVE_MKL_SPARSE)
6673:   PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL);
6674: #endif
6675:   PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL);
6676:   PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ);
6677:   PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ);
6678:   PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense);
6679: #if defined(PETSC_HAVE_ELEMENTAL)
6680:   PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental);
6681: #endif
6682: #if defined(PETSC_HAVE_SCALAPACK)
6683:   PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK);
6684: #endif
6685:   PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS);
6686:   PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL);
6687: #if defined(PETSC_HAVE_HYPRE)
6688:   PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE);
6689:   PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ);
6690: #endif
6691:   PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ);
6692:   PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ);
6693:   PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ);
6694:   PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ);
6695:   PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ);
6696:   return 0;
6697: }

6699: /*@C
6700:      MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal"
6701:          and "off-diagonal" part of the matrix in CSR format.

6703:    Collective

6705:    Input Parameters:
6706: +  comm - MPI communicator
6707: .  m - number of local rows (Cannot be `PETSC_DECIDE`)
6708: .  n - This value should be the same as the local size used in creating the
6709:        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
6710:        calculated if N is given) For square matrices n is almost always m.
6711: .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
6712: .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
6713: .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6714: .   j - column indices, which must be local, i.e., based off the start column of the diagonal portion
6715: .   a - matrix values
6716: .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6717: .   oj - column indices, which must be global, representing global columns in the MPIAIJ matrix
6718: -   oa - matrix values

6720:    Output Parameter:
6721: .   mat - the matrix

6723:    Level: advanced

6725:    Notes:
6726:        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6727:        must free the arrays once the matrix has been destroyed and not before.

6729:        The i and j indices are 0 based

6731:        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix

6733:        This sets local rows and cannot be used to set off-processor values.

6735:        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6736:        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6737:        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6738:        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6739:        keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all
6740:        communication if it is known that only local entries will be set.

6742: .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
6743:           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()`
6744: @*/
6745: PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat)
6746: {
6747:   Mat_MPIAIJ *maij;

6752:   MatCreate(comm, mat);
6753:   MatSetSizes(*mat, m, n, M, N);
6754:   MatSetType(*mat, MATMPIAIJ);
6755:   maij = (Mat_MPIAIJ *)(*mat)->data;

6757:   (*mat)->preallocated = PETSC_TRUE;

6759:   PetscLayoutSetUp((*mat)->rmap);
6760:   PetscLayoutSetUp((*mat)->cmap);

6762:   MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A);
6763:   MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B);

6765:   MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE);
6766:   MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY);
6767:   MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY);
6768:   MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE);
6769:   MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE);
6770:   return 0;
6771: }

6773: typedef struct {
6774:   Mat       *mp;    /* intermediate products */
6775:   PetscBool *mptmp; /* is the intermediate product temporary ? */
6776:   PetscInt   cp;    /* number of intermediate products */

6778:   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
6779:   PetscInt    *startsj_s, *startsj_r;
6780:   PetscScalar *bufa;
6781:   Mat          P_oth;

6783:   /* may take advantage of merging product->B */
6784:   Mat Bloc; /* B-local by merging diag and off-diag */

6786:   /* cusparse does not have support to split between symbolic and numeric phases.
6787:      When api_user is true, we don't need to update the numerical values
6788:      of the temporary storage */
6789:   PetscBool reusesym;

6791:   /* support for COO values insertion */
6792:   PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */
6793:   PetscInt   **own;           /* own[i] points to address of on-process COO indices for Mat mp[i] */
6794:   PetscInt   **off;           /* off[i] points to address of off-process COO indices for Mat mp[i] */
6795:   PetscBool    hasoffproc;    /* if true, have off-process values insertion (i.e. AtB or PtAP) */
6796:   PetscSF      sf;            /* used for non-local values insertion and memory malloc */
6797:   PetscMemType mtype;

6799:   /* customization */
6800:   PetscBool abmerge;
6801:   PetscBool P_oth_bind;
6802: } MatMatMPIAIJBACKEND;

6804: PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
6805: {
6806:   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data;
6807:   PetscInt             i;

6809:   PetscFree2(mmdata->startsj_s, mmdata->startsj_r);
6810:   PetscFree(mmdata->bufa);
6811:   PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v);
6812:   PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w);
6813:   MatDestroy(&mmdata->P_oth);
6814:   MatDestroy(&mmdata->Bloc);
6815:   PetscSFDestroy(&mmdata->sf);
6816:   for (i = 0; i < mmdata->cp; i++) MatDestroy(&mmdata->mp[i]);
6817:   PetscFree2(mmdata->mp, mmdata->mptmp);
6818:   PetscFree(mmdata->own[0]);
6819:   PetscFree(mmdata->own);
6820:   PetscFree(mmdata->off[0]);
6821:   PetscFree(mmdata->off);
6822:   PetscFree(mmdata);
6823:   return 0;
6824: }

6826: /* Copy selected n entries with indices in idx[] of A to v[].
6827:    If idx is NULL, copy the whole data array of A to v[]
6828:  */
6829: static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
6830: {
6831:   PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]);

6833:   PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f);
6834:   if (f) {
6835:     (*f)(A, n, idx, v);
6836:   } else {
6837:     const PetscScalar *vv;

6839:     MatSeqAIJGetArrayRead(A, &vv);
6840:     if (n && idx) {
6841:       PetscScalar    *w  = v;
6842:       const PetscInt *oi = idx;
6843:       PetscInt        j;

6845:       for (j = 0; j < n; j++) *w++ = vv[*oi++];
6846:     } else {
6847:       PetscArraycpy(v, vv, n);
6848:     }
6849:     MatSeqAIJRestoreArrayRead(A, &vv);
6850:   }
6851:   return 0;
6852: }

6854: static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
6855: {
6856:   MatMatMPIAIJBACKEND *mmdata;
6857:   PetscInt             i, n_d, n_o;

6859:   MatCheckProduct(C, 1);
6861:   mmdata = (MatMatMPIAIJBACKEND *)C->product->data;
6862:   if (!mmdata->reusesym) { /* update temporary matrices */
6863:     if (mmdata->P_oth) MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth);
6864:     if (mmdata->Bloc) MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc);
6865:   }
6866:   mmdata->reusesym = PETSC_FALSE;

6868:   for (i = 0; i < mmdata->cp; i++) {
6870:     (*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]);
6871:   }
6872:   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
6873:     PetscInt noff = mmdata->off[i + 1] - mmdata->off[i];

6875:     if (mmdata->mptmp[i]) continue;
6876:     if (noff) {
6877:       PetscInt nown = mmdata->own[i + 1] - mmdata->own[i];

6879:       MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o);
6880:       MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d);
6881:       n_o += noff;
6882:       n_d += nown;
6883:     } else {
6884:       Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data;

6886:       MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d);
6887:       n_d += mm->nz;
6888:     }
6889:   }
6890:   if (mmdata->hasoffproc) { /* offprocess insertion */
6891:     PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d);
6892:     PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d);
6893:   }
6894:   MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES);
6895:   return 0;
6896: }

6898: /* Support for Pt * A, A * P, or Pt * A * P */
6899: #define MAX_NUMBER_INTERMEDIATE 4
6900: PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
6901: {
6902:   Mat_Product           *product = C->product;
6903:   Mat                    A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */
6904:   Mat_MPIAIJ            *a, *p;
6905:   MatMatMPIAIJBACKEND   *mmdata;
6906:   ISLocalToGlobalMapping P_oth_l2g = NULL;
6907:   IS                     glob      = NULL;
6908:   const char            *prefix;
6909:   char                   pprefix[256];
6910:   const PetscInt        *globidx, *P_oth_idx;
6911:   PetscInt               i, j, cp, m, n, M, N, *coo_i, *coo_j;
6912:   PetscCount             ncoo, ncoo_d, ncoo_o, ncoo_oown;
6913:   PetscInt               cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */
6914:                                                                                          /* type-0: consecutive, start from 0; type-1: consecutive with */
6915:                                                                                          /* a base offset; type-2: sparse with a local to global map table */
6916:   const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE];       /* col/row local to global map array (table) for type-2 map type */

6918:   MatProductType ptype;
6919:   PetscBool      mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iskokk;
6920:   PetscMPIInt    size;

6922:   MatCheckProduct(C, 1);
6924:   ptype = product->type;
6925:   if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) {
6926:     ptype                                          = MATPRODUCT_AB;
6927:     product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE;
6928:   }
6929:   switch (ptype) {
6930:   case MATPRODUCT_AB:
6931:     A          = product->A;
6932:     P          = product->B;
6933:     m          = A->rmap->n;
6934:     n          = P->cmap->n;
6935:     M          = A->rmap->N;
6936:     N          = P->cmap->N;
6937:     hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */
6938:     break;
6939:   case MATPRODUCT_AtB:
6940:     P          = product->A;
6941:     A          = product->B;
6942:     m          = P->cmap->n;
6943:     n          = A->cmap->n;
6944:     M          = P->cmap->N;
6945:     N          = A->cmap->N;
6946:     hasoffproc = PETSC_TRUE;
6947:     break;
6948:   case MATPRODUCT_PtAP:
6949:     A          = product->A;
6950:     P          = product->B;
6951:     m          = P->cmap->n;
6952:     n          = P->cmap->n;
6953:     M          = P->cmap->N;
6954:     N          = P->cmap->N;
6955:     hasoffproc = PETSC_TRUE;
6956:     break;
6957:   default:
6958:     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
6959:   }
6960:   MPI_Comm_size(PetscObjectComm((PetscObject)C), &size);
6961:   if (size == 1) hasoffproc = PETSC_FALSE;

6963:   /* defaults */
6964:   for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) {
6965:     mp[i]    = NULL;
6966:     mptmp[i] = PETSC_FALSE;
6967:     rmapt[i] = -1;
6968:     cmapt[i] = -1;
6969:     rmapa[i] = NULL;
6970:     cmapa[i] = NULL;
6971:   }

6973:   /* customization */
6974:   PetscNew(&mmdata);
6975:   mmdata->reusesym = product->api_user;
6976:   if (ptype == MATPRODUCT_AB) {
6977:     if (product->api_user) {
6978:       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat");
6979:       PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL);
6980:       PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL);
6981:       PetscOptionsEnd();
6982:     } else {
6983:       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat");
6984:       PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL);
6985:       PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL);
6986:       PetscOptionsEnd();
6987:     }
6988:   } else if (ptype == MATPRODUCT_PtAP) {
6989:     if (product->api_user) {
6990:       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat");
6991:       PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL);
6992:       PetscOptionsEnd();
6993:     } else {
6994:       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat");
6995:       PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL);
6996:       PetscOptionsEnd();
6997:     }
6998:   }
6999:   a = (Mat_MPIAIJ *)A->data;
7000:   p = (Mat_MPIAIJ *)P->data;
7001:   MatSetSizes(C, m, n, M, N);
7002:   PetscLayoutSetUp(C->rmap);
7003:   PetscLayoutSetUp(C->cmap);
7004:   MatSetType(C, ((PetscObject)A)->type_name);
7005:   MatGetOptionsPrefix(C, &prefix);

7007:   cp = 0;
7008:   switch (ptype) {
7009:   case MATPRODUCT_AB: /* A * P */
7010:     MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth);

7012:     /* A_diag * P_local (merged or not) */
7013:     if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */
7014:       /* P is product->B */
7015:       MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc);
7016:       MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]);
7017:       MatProductSetType(mp[cp], MATPRODUCT_AB);
7018:       MatProductSetFill(mp[cp], product->fill);
7019:       PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp);
7020:       MatSetOptionsPrefix(mp[cp], prefix);
7021:       MatAppendOptionsPrefix(mp[cp], pprefix);
7022:       mp[cp]->product->api_user = product->api_user;
7023:       MatProductSetFromOptions(mp[cp]);
7024:       (*mp[cp]->ops->productsymbolic)(mp[cp]);
7025:       ISGetIndices(glob, &globidx);
7026:       rmapt[cp] = 1;
7027:       cmapt[cp] = 2;
7028:       cmapa[cp] = globidx;
7029:       mptmp[cp] = PETSC_FALSE;
7030:       cp++;
7031:     } else { /* A_diag * P_diag and A_diag * P_off */
7032:       MatProductCreate(a->A, p->A, NULL, &mp[cp]);
7033:       MatProductSetType(mp[cp], MATPRODUCT_AB);
7034:       MatProductSetFill(mp[cp], product->fill);
7035:       PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp);
7036:       MatSetOptionsPrefix(mp[cp], prefix);
7037:       MatAppendOptionsPrefix(mp[cp], pprefix);
7038:       mp[cp]->product->api_user = product->api_user;
7039:       MatProductSetFromOptions(mp[cp]);
7040:       (*mp[cp]->ops->productsymbolic)(mp[cp]);
7041:       rmapt[cp] = 1;
7042:       cmapt[cp] = 1;
7043:       mptmp[cp] = PETSC_FALSE;
7044:       cp++;
7045:       MatProductCreate(a->A, p->B, NULL, &mp[cp]);
7046:       MatProductSetType(mp[cp], MATPRODUCT_AB);
7047:       MatProductSetFill(mp[cp], product->fill);
7048:       PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp);
7049:       MatSetOptionsPrefix(mp[cp], prefix);
7050:       MatAppendOptionsPrefix(mp[cp], pprefix);
7051:       mp[cp]->product->api_user = product->api_user;
7052:       MatProductSetFromOptions(mp[cp]);
7053:       (*mp[cp]->ops->productsymbolic)(mp[cp]);
7054:       rmapt[cp] = 1;
7055:       cmapt[cp] = 2;
7056:       cmapa[cp] = p->garray;
7057:       mptmp[cp] = PETSC_FALSE;
7058:       cp++;
7059:     }

7061:     /* A_off * P_other */
7062:     if (mmdata->P_oth) {
7063:       MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g); /* make P_oth use local col ids */
7064:       ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx);
7065:       MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name);
7066:       MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind);
7067:       MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]);
7068:       MatProductSetType(mp[cp], MATPRODUCT_AB);
7069:       MatProductSetFill(mp[cp], product->fill);
7070:       PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp);
7071:       MatSetOptionsPrefix(mp[cp], prefix);
7072:       MatAppendOptionsPrefix(mp[cp], pprefix);
7073:       mp[cp]->product->api_user = product->api_user;
7074:       MatProductSetFromOptions(mp[cp]);
7075:       (*mp[cp]->ops->productsymbolic)(mp[cp]);
7076:       rmapt[cp] = 1;
7077:       cmapt[cp] = 2;
7078:       cmapa[cp] = P_oth_idx;
7079:       mptmp[cp] = PETSC_FALSE;
7080:       cp++;
7081:     }
7082:     break;

7084:   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
7085:     /* A is product->B */
7086:     MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc);
7087:     if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */
7088:       MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp]);
7089:       MatProductSetType(mp[cp], MATPRODUCT_AtB);
7090:       MatProductSetFill(mp[cp], product->fill);
7091:       PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp);
7092:       MatSetOptionsPrefix(mp[cp], prefix);
7093:       MatAppendOptionsPrefix(mp[cp], pprefix);
7094:       mp[cp]->product->api_user = product->api_user;
7095:       MatProductSetFromOptions(mp[cp]);
7096:       (*mp[cp]->ops->productsymbolic)(mp[cp]);
7097:       ISGetIndices(glob, &globidx);
7098:       rmapt[cp] = 2;
7099:       rmapa[cp] = globidx;
7100:       cmapt[cp] = 2;
7101:       cmapa[cp] = globidx;
7102:       mptmp[cp] = PETSC_FALSE;
7103:       cp++;
7104:     } else {
7105:       MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp]);
7106:       MatProductSetType(mp[cp], MATPRODUCT_AtB);
7107:       MatProductSetFill(mp[cp], product->fill);
7108:       PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp);
7109:       MatSetOptionsPrefix(mp[cp], prefix);
7110:       MatAppendOptionsPrefix(mp[cp], pprefix);
7111:       mp[cp]->product->api_user = product->api_user;
7112:       MatProductSetFromOptions(mp[cp]);
7113:       (*mp[cp]->ops->productsymbolic)(mp[cp]);
7114:       ISGetIndices(glob, &globidx);
7115:       rmapt[cp] = 1;
7116:       cmapt[cp] = 2;
7117:       cmapa[cp] = globidx;
7118:       mptmp[cp] = PETSC_FALSE;
7119:       cp++;
7120:       MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp]);
7121:       MatProductSetType(mp[cp], MATPRODUCT_AtB);
7122:       MatProductSetFill(mp[cp], product->fill);
7123:       PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp);
7124:       MatSetOptionsPrefix(mp[cp], prefix);
7125:       MatAppendOptionsPrefix(mp[cp], pprefix);
7126:       mp[cp]->product->api_user = product->api_user;
7127:       MatProductSetFromOptions(mp[cp]);
7128:       (*mp[cp]->ops->productsymbolic)(mp[cp]);
7129:       rmapt[cp] = 2;
7130:       rmapa[cp] = p->garray;
7131:       cmapt[cp] = 2;
7132:       cmapa[cp] = globidx;
7133:       mptmp[cp] = PETSC_FALSE;
7134:       cp++;
7135:     }
7136:     break;
7137:   case MATPRODUCT_PtAP:
7138:     MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth);
7139:     /* P is product->B */
7140:     MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc);
7141:     MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]);
7142:     MatProductSetType(mp[cp], MATPRODUCT_PtAP);
7143:     MatProductSetFill(mp[cp], product->fill);
7144:     PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp);
7145:     MatSetOptionsPrefix(mp[cp], prefix);
7146:     MatAppendOptionsPrefix(mp[cp], pprefix);
7147:     mp[cp]->product->api_user = product->api_user;
7148:     MatProductSetFromOptions(mp[cp]);
7149:     (*mp[cp]->ops->productsymbolic)(mp[cp]);
7150:     ISGetIndices(glob, &globidx);
7151:     rmapt[cp] = 2;
7152:     rmapa[cp] = globidx;
7153:     cmapt[cp] = 2;
7154:     cmapa[cp] = globidx;
7155:     mptmp[cp] = PETSC_FALSE;
7156:     cp++;
7157:     if (mmdata->P_oth) {
7158:       MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g);
7159:       ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx);
7160:       MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name);
7161:       MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind);
7162:       MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]);
7163:       MatProductSetType(mp[cp], MATPRODUCT_AB);
7164:       MatProductSetFill(mp[cp], product->fill);
7165:       PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp);
7166:       MatSetOptionsPrefix(mp[cp], prefix);
7167:       MatAppendOptionsPrefix(mp[cp], pprefix);
7168:       mp[cp]->product->api_user = product->api_user;
7169:       MatProductSetFromOptions(mp[cp]);
7170:       (*mp[cp]->ops->productsymbolic)(mp[cp]);
7171:       mptmp[cp] = PETSC_TRUE;
7172:       cp++;
7173:       MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp]);
7174:       MatProductSetType(mp[cp], MATPRODUCT_AtB);
7175:       MatProductSetFill(mp[cp], product->fill);
7176:       PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp);
7177:       MatSetOptionsPrefix(mp[cp], prefix);
7178:       MatAppendOptionsPrefix(mp[cp], pprefix);
7179:       mp[cp]->product->api_user = product->api_user;
7180:       MatProductSetFromOptions(mp[cp]);
7181:       (*mp[cp]->ops->productsymbolic)(mp[cp]);
7182:       rmapt[cp] = 2;
7183:       rmapa[cp] = globidx;
7184:       cmapt[cp] = 2;
7185:       cmapa[cp] = P_oth_idx;
7186:       mptmp[cp] = PETSC_FALSE;
7187:       cp++;
7188:     }
7189:     break;
7190:   default:
7191:     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7192:   }
7193:   /* sanity check */
7194:   if (size > 1)

7197:   PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp);
7198:   for (i = 0; i < cp; i++) {
7199:     mmdata->mp[i]    = mp[i];
7200:     mmdata->mptmp[i] = mptmp[i];
7201:   }
7202:   mmdata->cp             = cp;
7203:   C->product->data       = mmdata;
7204:   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
7205:   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;

7207:   /* memory type */
7208:   mmdata->mtype = PETSC_MEMTYPE_HOST;
7209:   PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "");
7210:   PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, "");
7211:   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
7212:   else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS;

7214:   /* prepare coo coordinates for values insertion */

7216:   /* count total nonzeros of those intermediate seqaij Mats
7217:     ncoo_d:    # of nonzeros of matrices that do not have offproc entries
7218:     ncoo_o:    # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs
7219:     ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally
7220:   */
7221:   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
7222:     Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data;
7223:     if (mptmp[cp]) continue;
7224:     if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */
7225:       const PetscInt *rmap = rmapa[cp];
7226:       const PetscInt  mr   = mp[cp]->rmap->n;
7227:       const PetscInt  rs   = C->rmap->rstart;
7228:       const PetscInt  re   = C->rmap->rend;
7229:       const PetscInt *ii   = mm->i;
7230:       for (i = 0; i < mr; i++) {
7231:         const PetscInt gr = rmap[i];
7232:         const PetscInt nz = ii[i + 1] - ii[i];
7233:         if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */
7234:         else ncoo_oown += nz;                  /* this row is local */
7235:       }
7236:     } else ncoo_d += mm->nz;
7237:   }

7239:   /*
7240:     ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc

7242:     ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs.

7244:     off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0].

7246:     off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others
7247:     own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally
7248:     so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others.

7250:     coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc.
7251:     Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaing part stores i of nonzeros I will receive.
7252:   */
7253:   PetscCalloc1(mmdata->cp + 1, &mmdata->off); /* +1 to make a csr-like data structure */
7254:   PetscCalloc1(mmdata->cp + 1, &mmdata->own);

7256:   /* gather (i,j) of nonzeros inserted by remote procs */
7257:   if (hasoffproc) {
7258:     PetscSF  msf;
7259:     PetscInt ncoo2, *coo_i2, *coo_j2;

7261:     PetscMalloc1(ncoo_o, &mmdata->off[0]);
7262:     PetscMalloc1(ncoo_oown, &mmdata->own[0]);
7263:     PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j); /* to collect (i,j) of entries to be sent to others */

7265:     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
7266:       Mat_SeqAIJ *mm     = (Mat_SeqAIJ *)mp[cp]->data;
7267:       PetscInt   *idxoff = mmdata->off[cp];
7268:       PetscInt   *idxown = mmdata->own[cp];
7269:       if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */
7270:         const PetscInt *rmap = rmapa[cp];
7271:         const PetscInt *cmap = cmapa[cp];
7272:         const PetscInt *ii   = mm->i;
7273:         PetscInt       *coi  = coo_i + ncoo_o;
7274:         PetscInt       *coj  = coo_j + ncoo_o;
7275:         const PetscInt  mr   = mp[cp]->rmap->n;
7276:         const PetscInt  rs   = C->rmap->rstart;
7277:         const PetscInt  re   = C->rmap->rend;
7278:         const PetscInt  cs   = C->cmap->rstart;
7279:         for (i = 0; i < mr; i++) {
7280:           const PetscInt *jj = mm->j + ii[i];
7281:           const PetscInt  gr = rmap[i];
7282:           const PetscInt  nz = ii[i + 1] - ii[i];
7283:           if (gr < rs || gr >= re) { /* this is an offproc row */
7284:             for (j = ii[i]; j < ii[i + 1]; j++) {
7285:               *coi++    = gr;
7286:               *idxoff++ = j;
7287:             }
7288:             if (!cmapt[cp]) { /* already global */
7289:               for (j = 0; j < nz; j++) *coj++ = jj[j];
7290:             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7291:               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7292:             } else { /* offdiag */
7293:               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7294:             }
7295:             ncoo_o += nz;
7296:           } else { /* this is a local row */
7297:             for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j;
7298:           }
7299:         }
7300:       }
7301:       mmdata->off[cp + 1] = idxoff;
7302:       mmdata->own[cp + 1] = idxown;
7303:     }

7305:     PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf);
7306:     PetscSFSetGraphLayout(mmdata->sf, C->rmap, ncoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i);
7307:     PetscSFGetMultiSF(mmdata->sf, &msf);
7308:     PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL);
7309:     ncoo = ncoo_d + ncoo_oown + ncoo2;
7310:     PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2);
7311:     PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown); /* put (i,j) of remote nonzeros at back */
7312:     PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown);
7313:     PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown);
7314:     PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown);
7315:     PetscFree2(coo_i, coo_j);
7316:     /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */
7317:     PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w);
7318:     coo_i = coo_i2;
7319:     coo_j = coo_j2;
7320:   } else { /* no offproc values insertion */
7321:     ncoo = ncoo_d;
7322:     PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j);

7324:     PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf);
7325:     PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER);
7326:     PetscSFSetUp(mmdata->sf);
7327:   }
7328:   mmdata->hasoffproc = hasoffproc;

7330:   /* gather (i,j) of nonzeros inserted locally */
7331:   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
7332:     Mat_SeqAIJ     *mm   = (Mat_SeqAIJ *)mp[cp]->data;
7333:     PetscInt       *coi  = coo_i + ncoo_d;
7334:     PetscInt       *coj  = coo_j + ncoo_d;
7335:     const PetscInt *jj   = mm->j;
7336:     const PetscInt *ii   = mm->i;
7337:     const PetscInt *cmap = cmapa[cp];
7338:     const PetscInt *rmap = rmapa[cp];
7339:     const PetscInt  mr   = mp[cp]->rmap->n;
7340:     const PetscInt  rs   = C->rmap->rstart;
7341:     const PetscInt  re   = C->rmap->rend;
7342:     const PetscInt  cs   = C->cmap->rstart;

7344:     if (mptmp[cp]) continue;
7345:     if (rmapt[cp] == 1) { /* consecutive rows */
7346:       /* fill coo_i */
7347:       for (i = 0; i < mr; i++) {
7348:         const PetscInt gr = i + rs;
7349:         for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr;
7350:       }
7351:       /* fill coo_j */
7352:       if (!cmapt[cp]) { /* type-0, already global */
7353:         PetscArraycpy(coj, jj, mm->nz);
7354:       } else if (cmapt[cp] == 1) {                        /* type-1, local to global for consecutive columns of C */
7355:         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */
7356:       } else {                                            /* type-2, local to global for sparse columns */
7357:         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
7358:       }
7359:       ncoo_d += mm->nz;
7360:     } else if (rmapt[cp] == 2) { /* sparse rows */
7361:       for (i = 0; i < mr; i++) {
7362:         const PetscInt *jj = mm->j + ii[i];
7363:         const PetscInt  gr = rmap[i];
7364:         const PetscInt  nz = ii[i + 1] - ii[i];
7365:         if (gr >= rs && gr < re) { /* local rows */
7366:           for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr;
7367:           if (!cmapt[cp]) { /* type-0, already global */
7368:             for (j = 0; j < nz; j++) *coj++ = jj[j];
7369:           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7370:             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7371:           } else { /* type-2, local to global for sparse columns */
7372:             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7373:           }
7374:           ncoo_d += nz;
7375:         }
7376:       }
7377:     }
7378:   }
7379:   if (glob) ISRestoreIndices(glob, &globidx);
7380:   ISDestroy(&glob);
7381:   if (P_oth_l2g) ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx);
7382:   ISLocalToGlobalMappingDestroy(&P_oth_l2g);
7383:   /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */
7384:   PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v);

7386:   /* preallocate with COO data */
7387:   MatSetPreallocationCOO(C, ncoo, coo_i, coo_j);
7388:   PetscFree2(coo_i, coo_j);
7389:   return 0;
7390: }

7392: PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
7393: {
7394:   Mat_Product *product = mat->product;
7395: #if defined(PETSC_HAVE_DEVICE)
7396:   PetscBool match  = PETSC_FALSE;
7397:   PetscBool usecpu = PETSC_FALSE;
7398: #else
7399:   PetscBool match = PETSC_TRUE;
7400: #endif

7402:   MatCheckProduct(mat, 1);
7403: #if defined(PETSC_HAVE_DEVICE)
7404:   if (!product->A->boundtocpu && !product->B->boundtocpu) PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match);
7405:   if (match) { /* we can always fallback to the CPU if requested */
7406:     switch (product->type) {
7407:     case MATPRODUCT_AB:
7408:       if (product->api_user) {
7409:         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat");
7410:         PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL);
7411:         PetscOptionsEnd();
7412:       } else {
7413:         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat");
7414:         PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL);
7415:         PetscOptionsEnd();
7416:       }
7417:       break;
7418:     case MATPRODUCT_AtB:
7419:       if (product->api_user) {
7420:         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat");
7421:         PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL);
7422:         PetscOptionsEnd();
7423:       } else {
7424:         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat");
7425:         PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL);
7426:         PetscOptionsEnd();
7427:       }
7428:       break;
7429:     case MATPRODUCT_PtAP:
7430:       if (product->api_user) {
7431:         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat");
7432:         PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL);
7433:         PetscOptionsEnd();
7434:       } else {
7435:         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat");
7436:         PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL);
7437:         PetscOptionsEnd();
7438:       }
7439:       break;
7440:     default:
7441:       break;
7442:     }
7443:     match = (PetscBool)!usecpu;
7444:   }
7445: #endif
7446:   if (match) {
7447:     switch (product->type) {
7448:     case MATPRODUCT_AB:
7449:     case MATPRODUCT_AtB:
7450:     case MATPRODUCT_PtAP:
7451:       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7452:       break;
7453:     default:
7454:       break;
7455:     }
7456:   }
7457:   /* fallback to MPIAIJ ops */
7458:   if (!mat->ops->productsymbolic) MatProductSetFromOptions_MPIAIJ(mat);
7459:   return 0;
7460: }

7462: /*
7463:    Produces a set of block column indices of the matrix row, one for each block represented in the original row

7465:    n - the number of block indices in cc[]
7466:    cc - the block indices (must be large enough to contain the indices)
7467: */
7468: static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc)
7469: {
7470:   PetscInt        cnt = -1, nidx, j;
7471:   const PetscInt *idx;

7473:   MatGetRow(Amat, row, &nidx, &idx, NULL);
7474:   if (nidx) {
7475:     cnt     = 0;
7476:     cc[cnt] = idx[0] / bs;
7477:     for (j = 1; j < nidx; j++) {
7478:       if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs;
7479:     }
7480:   }
7481:   MatRestoreRow(Amat, row, &nidx, &idx, NULL);
7482:   *n = cnt + 1;
7483:   return 0;
7484: }

7486: /*
7487:     Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows

7489:     ncollapsed - the number of block indices
7490:     collapsed - the block indices (must be large enough to contain the indices)
7491: */
7492: static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed)
7493: {
7494:   PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp;

7496:   MatCollapseRow(Amat, start, bs, &nprev, cprev);
7497:   for (i = start + 1; i < start + bs; i++) {
7498:     MatCollapseRow(Amat, i, bs, &ncur, ccur);
7499:     PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged);
7500:     cprevtmp = cprev;
7501:     cprev    = merged;
7502:     merged   = cprevtmp;
7503:   }
7504:   *ncollapsed = nprev;
7505:   if (collapsed) *collapsed = cprev;
7506:   return 0;
7507: }

7509: /*
7510:    This will eventually be folded into MatCreateGraph_AIJ() for optimal performance
7511: */
7512: static PetscErrorCode MatFilter_AIJ(Mat Gmat, PetscReal vfilter, Mat *filteredG)
7513: {
7514:   PetscInt           Istart, Iend, ncols, nnz0, nnz1, NN, MM, nloc;
7515:   Mat                tGmat;
7516:   MPI_Comm           comm;
7517:   const PetscScalar *vals;
7518:   const PetscInt    *idx;
7519:   PetscInt          *d_nnz, *o_nnz, kk, *garray = NULL, *AJ, maxcols = 0;
7520:   MatScalar         *AA; // this is checked in graph
7521:   PetscBool          isseqaij;
7522:   Mat                a, b, c;
7523:   MatType            jtype;

7525:   PetscObjectGetComm((PetscObject)Gmat, &comm);
7526:   PetscObjectBaseTypeCompare((PetscObject)Gmat, MATSEQAIJ, &isseqaij);
7527:   MatGetType(Gmat, &jtype);
7528:   MatCreate(comm, &tGmat);
7529:   MatSetType(tGmat, jtype);

7531:   /* TODO GPU: this can be called when filter = 0 -> Probably provide MatAIJThresholdCompress that compresses the entries below a threshold?
7532:                Also, if the matrix is symmetric, can we skip this
7533:                operation? It can be very expensive on large matrices. */

7535:   // global sizes
7536:   MatGetSize(Gmat, &MM, &NN);
7537:   MatGetOwnershipRange(Gmat, &Istart, &Iend);
7538:   nloc = Iend - Istart;
7539:   PetscMalloc2(nloc, &d_nnz, nloc, &o_nnz);
7540:   if (isseqaij) {
7541:     a = Gmat;
7542:     b = NULL;
7543:   } else {
7544:     Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data;
7545:     a             = d->A;
7546:     b             = d->B;
7547:     garray        = d->garray;
7548:   }
7549:   /* Determine upper bound on non-zeros needed in new filtered matrix */
7550:   for (PetscInt row = 0; row < nloc; row++) {
7551:     MatGetRow(a, row, &ncols, NULL, NULL);
7552:     d_nnz[row] = ncols;
7553:     if (ncols > maxcols) maxcols = ncols;
7554:     MatRestoreRow(a, row, &ncols, NULL, NULL);
7555:   }
7556:   if (b) {
7557:     for (PetscInt row = 0; row < nloc; row++) {
7558:       MatGetRow(b, row, &ncols, NULL, NULL);
7559:       o_nnz[row] = ncols;
7560:       if (ncols > maxcols) maxcols = ncols;
7561:       MatRestoreRow(b, row, &ncols, NULL, NULL);
7562:     }
7563:   }
7564:   MatSetSizes(tGmat, nloc, nloc, MM, MM);
7565:   MatSetBlockSizes(tGmat, 1, 1);
7566:   MatSeqAIJSetPreallocation(tGmat, 0, d_nnz);
7567:   MatMPIAIJSetPreallocation(tGmat, 0, d_nnz, 0, o_nnz);
7568:   MatSetOption(tGmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE);
7569:   PetscFree2(d_nnz, o_nnz);
7570:   //
7571:   PetscMalloc2(maxcols, &AA, maxcols, &AJ);
7572:   nnz0 = nnz1 = 0;
7573:   for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7574:     for (PetscInt row = 0, grow = Istart, ncol_row, jj; row < nloc; row++, grow++) {
7575:       MatGetRow(c, row, &ncols, &idx, &vals);
7576:       for (ncol_row = jj = 0; jj < ncols; jj++, nnz0++) {
7577:         PetscScalar sv = PetscAbs(PetscRealPart(vals[jj]));
7578:         if (PetscRealPart(sv) > vfilter) {
7579:           nnz1++;
7580:           PetscInt cid = idx[jj] + Istart; //diag
7581:           if (c != a) cid = garray[idx[jj]];
7582:           AA[ncol_row] = vals[jj];
7583:           AJ[ncol_row] = cid;
7584:           ncol_row++;
7585:         }
7586:       }
7587:       MatRestoreRow(c, row, &ncols, &idx, &vals);
7588:       MatSetValues(tGmat, 1, &grow, ncol_row, AJ, AA, INSERT_VALUES);
7589:     }
7590:   }
7591:   PetscFree2(AA, AJ);
7592:   MatAssemblyBegin(tGmat, MAT_FINAL_ASSEMBLY);
7593:   MatAssemblyEnd(tGmat, MAT_FINAL_ASSEMBLY);
7594:   MatPropagateSymmetryOptions(Gmat, tGmat); /* Normal Mat options are not relevant ? */

7596:   PetscInfo(tGmat, "\t %g%% nnz after filtering, with threshold %g, %g nnz ave. (N=%" PetscInt_FMT ", max row size %d)\n", (!nnz0) ? 1. : 100. * (double)nnz1 / (double)nnz0, (double)vfilter, (!nloc) ? 1. : (double)nnz0 / (double)nloc, MM, (int)maxcols);

7598:   *filteredG = tGmat;
7599:   MatViewFromOptions(tGmat, NULL, "-mat_filter_graph_view");
7600:   return 0;
7601: }

7603: /*
7604:  MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix

7606:  Input Parameter:
7607:  . Amat - matrix
7608:  - symmetrize - make the result symmetric
7609:  + scale - scale with diagonal

7611:  Output Parameter:
7612:  . a_Gmat - output scalar graph >= 0

7614:  */
7615: PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, Mat *a_Gmat)
7616: {
7617:   PetscInt  Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs;
7618:   MPI_Comm  comm;
7619:   Mat       Gmat;
7620:   PetscBool ismpiaij, isseqaij;
7621:   Mat       a, b, c;
7622:   MatType   jtype;

7624:   PetscObjectGetComm((PetscObject)Amat, &comm);
7625:   MatGetOwnershipRange(Amat, &Istart, &Iend);
7626:   MatGetSize(Amat, &MM, &NN);
7627:   MatGetBlockSize(Amat, &bs);
7628:   nloc = (Iend - Istart) / bs;

7630:   PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij);
7631:   PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij);

7634:   /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */
7635:   /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast
7636:      implementation */
7637:   if (bs > 1) {
7638:     MatGetType(Amat, &jtype);
7639:     MatCreate(comm, &Gmat);
7640:     MatSetType(Gmat, jtype);
7641:     MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE);
7642:     MatSetBlockSizes(Gmat, 1, 1);
7643:     if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) {
7644:       PetscInt  *d_nnz, *o_nnz;
7645:       MatScalar *aa, val, AA[4096];
7646:       PetscInt  *aj, *ai, AJ[4096], nc;
7647:       if (isseqaij) {
7648:         a = Amat;
7649:         b = NULL;
7650:       } else {
7651:         Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data;
7652:         a             = d->A;
7653:         b             = d->B;
7654:       }
7655:       PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc);
7656:       PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz);
7657:       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7658:         PetscInt       *nnz = (c == a) ? d_nnz : o_nnz, nmax = 0;
7659:         const PetscInt *cols;
7660:         for (PetscInt brow = 0, jj, ok = 1, j0; brow < nloc * bs; brow += bs) { // block rows
7661:           MatGetRow(c, brow, &jj, &cols, NULL);
7662:           nnz[brow / bs] = jj / bs;
7663:           if (jj % bs) ok = 0;
7664:           if (cols) j0 = cols[0];
7665:           else j0 = -1;
7666:           MatRestoreRow(c, brow, &jj, &cols, NULL);
7667:           if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs];
7668:           for (PetscInt ii = 1; ii < bs && nnz[brow / bs]; ii++) { // check for non-dense blocks
7669:             MatGetRow(c, brow + ii, &jj, &cols, NULL);
7670:             if (jj % bs) ok = 0;
7671:             if ((cols && j0 != cols[0]) || (!cols && j0 != -1)) ok = 0;
7672:             if (nnz[brow / bs] != jj / bs) ok = 0;
7673:             MatRestoreRow(c, brow + ii, &jj, &cols, NULL);
7674:           }
7675:           if (!ok) {
7676:             PetscFree2(d_nnz, o_nnz);
7677:             goto old_bs;
7678:           }
7679:         }
7681:       }
7682:       MatSeqAIJSetPreallocation(Gmat, 0, d_nnz);
7683:       MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz);
7684:       PetscFree2(d_nnz, o_nnz);
7685:       // diag
7686:       for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows
7687:         Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data;
7688:         ai               = aseq->i;
7689:         n                = ai[brow + 1] - ai[brow];
7690:         aj               = aseq->j + ai[brow];
7691:         for (int k = 0; k < n; k += bs) {        // block columns
7692:           AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart)
7693:           val        = 0;
7694:           for (int ii = 0; ii < bs; ii++) { // rows in block
7695:             aa = aseq->a + ai[brow + ii] + k;
7696:             for (int jj = 0; jj < bs; jj++) {         // columns in block
7697:               val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm
7698:             }
7699:           }
7700:           AA[k / bs] = val;
7701:         }
7702:         grow = Istart / bs + brow / bs;
7703:         MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, INSERT_VALUES);
7704:       }
7705:       // off-diag
7706:       if (ismpiaij) {
7707:         Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)Amat->data;
7708:         const PetscScalar *vals;
7709:         const PetscInt    *cols, *garray = aij->garray;
7711:         for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows
7712:           MatGetRow(b, brow, &ncols, &cols, NULL);
7713:           for (int k = 0, cidx = 0; k < ncols; k += bs, cidx++) {
7714:             AA[k / bs] = 0;
7715:             AJ[cidx]   = garray[cols[k]] / bs;
7716:           }
7717:           nc = ncols / bs;
7718:           MatRestoreRow(b, brow, &ncols, &cols, NULL);
7719:           for (int ii = 0; ii < bs; ii++) { // rows in block
7720:             MatGetRow(b, brow + ii, &ncols, &cols, &vals);
7721:             for (int k = 0; k < ncols; k += bs) {
7722:               for (int jj = 0; jj < bs; jj++) { // cols in block
7723:                 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7724:               }
7725:             }
7726:             MatRestoreRow(b, brow + ii, &ncols, &cols, &vals);
7727:           }
7728:           grow = Istart / bs + brow / bs;
7729:           MatSetValues(Gmat, 1, &grow, nc, AJ, AA, INSERT_VALUES);
7730:         }
7731:       }
7732:       MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY);
7733:       MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY);
7734:     } else {
7735:       const PetscScalar *vals;
7736:       const PetscInt    *idx;
7737:       PetscInt          *d_nnz, *o_nnz, *w0, *w1, *w2;
7738:     old_bs:
7739:       /*
7740:        Determine the preallocation needed for the scalar matrix derived from the vector matrix.
7741:        */
7742:       PetscInfo(Amat, "OLD bs>1 CreateGraph\n");
7743:       PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz);
7744:       if (isseqaij) {
7745:         PetscInt max_d_nnz;
7746:         /*
7747:          Determine exact preallocation count for (sequential) scalar matrix
7748:          */
7749:         MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz);
7750:         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7751:         PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2);
7752:         for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL);
7753:         PetscFree3(w0, w1, w2);
7754:       } else if (ismpiaij) {
7755:         Mat             Daij, Oaij;
7756:         const PetscInt *garray;
7757:         PetscInt        max_d_nnz;
7758:         MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray);
7759:         /*
7760:          Determine exact preallocation count for diagonal block portion of scalar matrix
7761:          */
7762:         MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz);
7763:         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7764:         PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2);
7765:         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL);
7766:         PetscFree3(w0, w1, w2);
7767:         /*
7768:          Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix
7769:          */
7770:         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) {
7771:           o_nnz[jj] = 0;
7772:           for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */
7773:             MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL);
7774:             o_nnz[jj] += ncols;
7775:             MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL);
7776:           }
7777:           if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc;
7778:         }
7779:       } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type");
7780:       /* get scalar copy (norms) of matrix */
7781:       MatSeqAIJSetPreallocation(Gmat, 0, d_nnz);
7782:       MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz);
7783:       PetscFree2(d_nnz, o_nnz);
7784:       for (Ii = Istart; Ii < Iend; Ii++) {
7785:         PetscInt dest_row = Ii / bs;
7786:         MatGetRow(Amat, Ii, &ncols, &idx, &vals);
7787:         for (jj = 0; jj < ncols; jj++) {
7788:           PetscInt    dest_col = idx[jj] / bs;
7789:           PetscScalar sv       = PetscAbs(PetscRealPart(vals[jj]));
7790:           MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES);
7791:         }
7792:         MatRestoreRow(Amat, Ii, &ncols, &idx, &vals);
7793:       }
7794:       MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY);
7795:       MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY);
7796:     }
7797:   } else {
7798:     if (symmetrize || filter >= 0 || scale) MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat);
7799:     else {
7800:       Gmat = Amat;
7801:       PetscObjectReference((PetscObject)Gmat);
7802:     }
7803:     if (isseqaij) {
7804:       a = Gmat;
7805:       b = NULL;
7806:     } else {
7807:       Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data;
7808:       a             = d->A;
7809:       b             = d->B;
7810:     }
7811:     if (filter >= 0 || scale) {
7812:       /* take absolute value of each entry */
7813:       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7814:         MatInfo      info;
7815:         PetscScalar *avals;
7816:         MatGetInfo(c, MAT_LOCAL, &info);
7817:         MatSeqAIJGetArray(c, &avals);
7818:         for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]);
7819:         MatSeqAIJRestoreArray(c, &avals);
7820:       }
7821:     }
7822:   }
7823:   if (symmetrize) {
7824:     PetscBool isset, issym;
7825:     MatIsSymmetricKnown(Amat, &isset, &issym);
7826:     if (!isset || !issym) {
7827:       Mat matTrans;
7828:       MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans);
7829:       MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN);
7830:       MatDestroy(&matTrans);
7831:     }
7832:     MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE);
7833:   } else if (Amat != Gmat) MatPropagateSymmetryOptions(Amat, Gmat);
7834:   if (scale) {
7835:     /* scale c for all diagonal values = 1 or -1 */
7836:     Vec diag;
7837:     MatCreateVecs(Gmat, &diag, NULL);
7838:     MatGetDiagonal(Gmat, diag);
7839:     VecReciprocal(diag);
7840:     VecSqrtAbs(diag);
7841:     MatDiagonalScale(Gmat, diag, diag);
7842:     VecDestroy(&diag);
7843:   }
7844:   MatViewFromOptions(Gmat, NULL, "-mat_graph_view");

7846:   if (filter >= 0) {
7847:     Mat Fmat = NULL; /* some silly compiler needs this */

7849:     MatFilter_AIJ(Gmat, filter, &Fmat);
7850:     MatDestroy(&Gmat);
7851:     Gmat = Fmat;
7852:   }
7853:   *a_Gmat = Gmat;
7854:   return 0;
7855: }

7857: /*
7858:     Special version for direct calls from Fortran
7859: */
7860: #include <petsc/private/fortranimpl.h>

7862: /* Change these macros so can be used in void function */
7863: /* Identical to PetscCallVoid, except it assigns to *_ierr */
7864: #undef PetscCall
7865: #define PetscCall(...) \
7866:   do { \
7867:     PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \
7868:     if (PetscUnlikely(ierr_msv_mpiaij)) { \
7869:       *_PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \
7870:       return; \
7871:     } \
7872:   } while (0)

7874: #undef SETERRQ
7875: #define SETERRQ(comm, ierr, ...) \
7876:   do { \
7877:     *_PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \
7878:     return; \
7879:   } while (0)

7881: #if defined(PETSC_HAVE_FORTRAN_CAPS)
7882:   #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
7883: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
7884:   #define matsetvaluesmpiaij_ matsetvaluesmpiaij
7885: #else
7886: #endif
7887: PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr)
7888: {
7889:   Mat         mat = *mmat;
7890:   PetscInt    m = *mm, n = *mn;
7891:   InsertMode  addv = *maddv;
7892:   Mat_MPIAIJ *aij  = (Mat_MPIAIJ *)mat->data;
7893:   PetscScalar value;

7895:   MatCheckPreallocated(mat, 1);
7896:   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
7898:   {
7899:     PetscInt  i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
7900:     PetscInt  cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
7901:     PetscBool roworiented = aij->roworiented;

7903:     /* Some Variables required in the macro */
7904:     Mat         A     = aij->A;
7905:     Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
7906:     PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
7907:     MatScalar  *aa;
7908:     PetscBool   ignorezeroentries = (((a->ignorezeroentries) && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
7909:     Mat         B                 = aij->B;
7910:     Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
7911:     PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
7912:     MatScalar  *ba;
7913:     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
7914:      * cannot use "#if defined" inside a macro. */
7915:     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;

7917:     PetscInt  *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
7918:     PetscInt   nonew = a->nonew;
7919:     MatScalar *ap1, *ap2;

7921:     MatSeqAIJGetArray(A, &aa);
7922:     MatSeqAIJGetArray(B, &ba);
7923:     for (i = 0; i < m; i++) {
7924:       if (im[i] < 0) continue;
7926:       if (im[i] >= rstart && im[i] < rend) {
7927:         row      = im[i] - rstart;
7928:         lastcol1 = -1;
7929:         rp1      = aj + ai[row];
7930:         ap1      = aa + ai[row];
7931:         rmax1    = aimax[row];
7932:         nrow1    = ailen[row];
7933:         low1     = 0;
7934:         high1    = nrow1;
7935:         lastcol2 = -1;
7936:         rp2      = bj + bi[row];
7937:         ap2      = ba + bi[row];
7938:         rmax2    = bimax[row];
7939:         nrow2    = bilen[row];
7940:         low2     = 0;
7941:         high2    = nrow2;

7943:         for (j = 0; j < n; j++) {
7944:           if (roworiented) value = v[i * n + j];
7945:           else value = v[i + j * m];
7946:           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
7947:           if (in[j] >= cstart && in[j] < cend) {
7948:             col = in[j] - cstart;
7949:             MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
7950:           } else if (in[j] < 0) continue;
7951:           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
7952:             /* extra brace on SETERRQ() is required for --with-errorchecking=0 - due to the next 'else' clause */
7953:             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
7954:           } else {
7955:             if (mat->was_assembled) {
7956:               if (!aij->colmap) MatCreateColmap_MPIAIJ_Private(mat);
7957: #if defined(PETSC_USE_CTABLE)
7958:               PetscTableFind(aij->colmap, in[j] + 1, &col);
7959:               col--;
7960: #else
7961:               col = aij->colmap[in[j]] - 1;
7962: #endif
7963:               if (col < 0 && !((Mat_SeqAIJ *)(aij->A->data))->nonew) {
7964:                 MatDisAssemble_MPIAIJ(mat);
7965:                 col = in[j];
7966:                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
7967:                 B        = aij->B;
7968:                 b        = (Mat_SeqAIJ *)B->data;
7969:                 bimax    = b->imax;
7970:                 bi       = b->i;
7971:                 bilen    = b->ilen;
7972:                 bj       = b->j;
7973:                 rp2      = bj + bi[row];
7974:                 ap2      = ba + bi[row];
7975:                 rmax2    = bimax[row];
7976:                 nrow2    = bilen[row];
7977:                 low2     = 0;
7978:                 high2    = nrow2;
7979:                 bm       = aij->B->rmap->n;
7980:                 ba       = b->a;
7981:                 inserted = PETSC_FALSE;
7982:               }
7983:             } else col = in[j];
7984:             MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
7985:           }
7986:         }
7987:       } else if (!aij->donotstash) {
7988:         if (roworiented) {
7989:           MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));
7990:         } else {
7991:           MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));
7992:         }
7993:       }
7994:     }
7995:     MatSeqAIJRestoreArray(A, &aa);
7996:     MatSeqAIJRestoreArray(B, &ba);
7997:   }
7998:   return;
7999: }

8001: /* Undefining these here since they were redefined from their original definition above! No
8002:  * other PETSc functions should be defined past this point, as it is impossible to recover the
8003:  * original definitions */
8004: #undef PetscCall
8005: #undef SETERRQ