Actual source code: mpibaij.c

  1: #include <../src/mat/impls/baij/mpi/mpibaij.h>

  3: #include <petsc/private/hashseti.h>
  4: #include <petscblaslapack.h>
  5: #include <petscsf.h>

  7: #if defined(PETSC_HAVE_HYPRE)
  8: PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
  9: #endif

 11: PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A, Vec v, PetscInt idx[])
 12: {
 13:   Mat_MPIBAIJ       *a = (Mat_MPIBAIJ *)A->data;
 14:   PetscInt           i, *idxb = NULL, m = A->rmap->n, bs = A->cmap->bs;
 15:   PetscScalar       *va, *vv;
 16:   Vec                vB, vA;
 17:   const PetscScalar *vb;

 19:   VecCreateSeq(PETSC_COMM_SELF, m, &vA);
 20:   MatGetRowMaxAbs(a->A, vA, idx);

 22:   VecGetArrayWrite(vA, &va);
 23:   if (idx) {
 24:     for (i = 0; i < m; i++) {
 25:       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
 26:     }
 27:   }

 29:   VecCreateSeq(PETSC_COMM_SELF, m, &vB);
 30:   PetscMalloc1(m, &idxb);
 31:   MatGetRowMaxAbs(a->B, vB, idxb);

 33:   VecGetArrayWrite(v, &vv);
 34:   VecGetArrayRead(vB, &vb);
 35:   for (i = 0; i < m; i++) {
 36:     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
 37:       vv[i] = vb[i];
 38:       if (idx) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs);
 39:     } else {
 40:       vv[i] = va[i];
 41:       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs * a->garray[idxb[i] / bs] + (idxb[i] % bs)) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs);
 42:     }
 43:   }
 44:   VecRestoreArrayWrite(vA, &vv);
 45:   VecRestoreArrayWrite(vA, &va);
 46:   VecRestoreArrayRead(vB, &vb);
 47:   PetscFree(idxb);
 48:   VecDestroy(&vA);
 49:   VecDestroy(&vB);
 50:   return 0;
 51: }

 53: PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat)
 54: {
 55:   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;

 57:   MatStoreValues(aij->A);
 58:   MatStoreValues(aij->B);
 59:   return 0;
 60: }

 62: PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat)
 63: {
 64:   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;

 66:   MatRetrieveValues(aij->A);
 67:   MatRetrieveValues(aij->B);
 68:   return 0;
 69: }

 71: /*
 72:      Local utility routine that creates a mapping from the global column
 73:    number to the local number in the off-diagonal part of the local
 74:    storage of the matrix.  This is done in a non scalable way since the
 75:    length of colmap equals the global matrix length.
 76: */
 77: PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
 78: {
 79:   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
 80:   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ *)baij->B->data;
 81:   PetscInt     nbs = B->nbs, i, bs = mat->rmap->bs;

 83: #if defined(PETSC_USE_CTABLE)
 84:   PetscTableCreate(baij->nbs, baij->Nbs + 1, &baij->colmap);
 85:   for (i = 0; i < nbs; i++) PetscTableAdd(baij->colmap, baij->garray[i] + 1, i * bs + 1, INSERT_VALUES);
 86: #else
 87:   PetscCalloc1(baij->Nbs + 1, &baij->colmap);
 88:   for (i = 0; i < nbs; i++) baij->colmap[baij->garray[i]] = i * bs + 1;
 89: #endif
 90:   return 0;
 91: }

 93: #define MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, orow, ocol) \
 94:   { \
 95:     brow = row / bs; \
 96:     rp   = aj + ai[brow]; \
 97:     ap   = aa + bs2 * ai[brow]; \
 98:     rmax = aimax[brow]; \
 99:     nrow = ailen[brow]; \
100:     bcol = col / bs; \
101:     ridx = row % bs; \
102:     cidx = col % bs; \
103:     low  = 0; \
104:     high = nrow; \
105:     while (high - low > 3) { \
106:       t = (low + high) / 2; \
107:       if (rp[t] > bcol) high = t; \
108:       else low = t; \
109:     } \
110:     for (_i = low; _i < high; _i++) { \
111:       if (rp[_i] > bcol) break; \
112:       if (rp[_i] == bcol) { \
113:         bap = ap + bs2 * _i + bs * cidx + ridx; \
114:         if (addv == ADD_VALUES) *bap += value; \
115:         else *bap = value; \
116:         goto a_noinsert; \
117:       } \
118:     } \
119:     if (a->nonew == 1) goto a_noinsert; \
121:     MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \
122:     N = nrow++ - 1; \
123:     /* shift up all the later entries in this row */ \
124:     PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1); \
125:     PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1)); \
126:     PetscArrayzero(ap + bs2 * _i, bs2); \
127:     rp[_i]                          = bcol; \
128:     ap[bs2 * _i + bs * cidx + ridx] = value; \
129:   a_noinsert:; \
130:     ailen[brow] = nrow; \
131:   }

133: #define MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, orow, ocol) \
134:   { \
135:     brow = row / bs; \
136:     rp   = bj + bi[brow]; \
137:     ap   = ba + bs2 * bi[brow]; \
138:     rmax = bimax[brow]; \
139:     nrow = bilen[brow]; \
140:     bcol = col / bs; \
141:     ridx = row % bs; \
142:     cidx = col % bs; \
143:     low  = 0; \
144:     high = nrow; \
145:     while (high - low > 3) { \
146:       t = (low + high) / 2; \
147:       if (rp[t] > bcol) high = t; \
148:       else low = t; \
149:     } \
150:     for (_i = low; _i < high; _i++) { \
151:       if (rp[_i] > bcol) break; \
152:       if (rp[_i] == bcol) { \
153:         bap = ap + bs2 * _i + bs * cidx + ridx; \
154:         if (addv == ADD_VALUES) *bap += value; \
155:         else *bap = value; \
156:         goto b_noinsert; \
157:       } \
158:     } \
159:     if (b->nonew == 1) goto b_noinsert; \
161:     MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \
162:     N = nrow++ - 1; \
163:     /* shift up all the later entries in this row */ \
164:     PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1); \
165:     PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1)); \
166:     PetscArrayzero(ap + bs2 * _i, bs2); \
167:     rp[_i]                          = bcol; \
168:     ap[bs2 * _i + bs * cidx + ridx] = value; \
169:   b_noinsert:; \
170:     bilen[brow] = nrow; \
171:   }

173: PetscErrorCode MatSetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
174: {
175:   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
176:   MatScalar    value;
177:   PetscBool    roworiented = baij->roworiented;
178:   PetscInt     i, j, row, col;
179:   PetscInt     rstart_orig = mat->rmap->rstart;
180:   PetscInt     rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart;
181:   PetscInt     cend_orig = mat->cmap->rend, bs = mat->rmap->bs;

183:   /* Some Variables required in the macro */
184:   Mat          A     = baij->A;
185:   Mat_SeqBAIJ *a     = (Mat_SeqBAIJ *)(A)->data;
186:   PetscInt    *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
187:   MatScalar   *aa = a->a;

189:   Mat          B     = baij->B;
190:   Mat_SeqBAIJ *b     = (Mat_SeqBAIJ *)(B)->data;
191:   PetscInt    *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j;
192:   MatScalar   *ba = b->a;

194:   PetscInt  *rp, ii, nrow, _i, rmax, N, brow, bcol;
195:   PetscInt   low, high, t, ridx, cidx, bs2 = a->bs2;
196:   MatScalar *ap, *bap;

198:   for (i = 0; i < m; i++) {
199:     if (im[i] < 0) continue;
201:     if (im[i] >= rstart_orig && im[i] < rend_orig) {
202:       row = im[i] - rstart_orig;
203:       for (j = 0; j < n; j++) {
204:         if (in[j] >= cstart_orig && in[j] < cend_orig) {
205:           col = in[j] - cstart_orig;
206:           if (roworiented) value = v[i * n + j];
207:           else value = v[i + j * m];
208:           MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, im[i], in[j]);
209:         } else if (in[j] < 0) {
210:           continue;
211:         } else {
213:           if (mat->was_assembled) {
214:             if (!baij->colmap) MatCreateColmap_MPIBAIJ_Private(mat);
215: #if defined(PETSC_USE_CTABLE)
216:             PetscTableFind(baij->colmap, in[j] / bs + 1, &col);
217:             col = col - 1;
218: #else
219:             col = baij->colmap[in[j] / bs] - 1;
220: #endif
221:             if (col < 0 && !((Mat_SeqBAIJ *)(baij->B->data))->nonew) {
222:               MatDisAssemble_MPIBAIJ(mat);
223:               col = in[j];
224:               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
225:               B     = baij->B;
226:               b     = (Mat_SeqBAIJ *)(B)->data;
227:               bimax = b->imax;
228:               bi    = b->i;
229:               bilen = b->ilen;
230:               bj    = b->j;
231:               ba    = b->a;
232:             } else {
234:               col += in[j] % bs;
235:             }
236:           } else col = in[j];
237:           if (roworiented) value = v[i * n + j];
238:           else value = v[i + j * m];
239:           MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, im[i], in[j]);
240:           /* MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv); */
241:         }
242:       }
243:     } else {
245:       if (!baij->donotstash) {
246:         mat->assembled = PETSC_FALSE;
247:         if (roworiented) {
248:           MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE);
249:         } else {
250:           MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE);
251:         }
252:       }
253:     }
254:   }
255:   return 0;
256: }

258: static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
259: {
260:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
261:   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
262:   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
263:   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
264:   PetscBool          roworiented = a->roworiented;
265:   const PetscScalar *value       = v;
266:   MatScalar         *ap, *aa = a->a, *bap;

268:   rp    = aj + ai[row];
269:   ap    = aa + bs2 * ai[row];
270:   rmax  = imax[row];
271:   nrow  = ailen[row];
272:   value = v;
273:   low   = 0;
274:   high  = nrow;
275:   while (high - low > 7) {
276:     t = (low + high) / 2;
277:     if (rp[t] > col) high = t;
278:     else low = t;
279:   }
280:   for (i = low; i < high; i++) {
281:     if (rp[i] > col) break;
282:     if (rp[i] == col) {
283:       bap = ap + bs2 * i;
284:       if (roworiented) {
285:         if (is == ADD_VALUES) {
286:           for (ii = 0; ii < bs; ii++) {
287:             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
288:           }
289:         } else {
290:           for (ii = 0; ii < bs; ii++) {
291:             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
292:           }
293:         }
294:       } else {
295:         if (is == ADD_VALUES) {
296:           for (ii = 0; ii < bs; ii++, value += bs) {
297:             for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
298:             bap += bs;
299:           }
300:         } else {
301:           for (ii = 0; ii < bs; ii++, value += bs) {
302:             for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
303:             bap += bs;
304:           }
305:         }
306:       }
307:       goto noinsert2;
308:     }
309:   }
310:   if (nonew == 1) goto noinsert2;
312:   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
313:   N = nrow++ - 1;
314:   high++;
315:   /* shift up all the later entries in this row */
316:   PetscArraymove(rp + i + 1, rp + i, N - i + 1);
317:   PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1));
318:   rp[i] = col;
319:   bap   = ap + bs2 * i;
320:   if (roworiented) {
321:     for (ii = 0; ii < bs; ii++) {
322:       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
323:     }
324:   } else {
325:     for (ii = 0; ii < bs; ii++) {
326:       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
327:     }
328:   }
329: noinsert2:;
330:   ailen[row] = nrow;
331:   return 0;
332: }

334: /*
335:     This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed
336:     by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine
337: */
338: PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
339: {
340:   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ *)mat->data;
341:   const PetscScalar *value;
342:   MatScalar         *barray      = baij->barray;
343:   PetscBool          roworiented = baij->roworiented;
344:   PetscInt           i, j, ii, jj, row, col, rstart = baij->rstartbs;
345:   PetscInt           rend = baij->rendbs, cstart = baij->cstartbs, stepval;
346:   PetscInt           cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;

348:   if (!barray) {
349:     PetscMalloc1(bs2, &barray);
350:     baij->barray = barray;
351:   }

353:   if (roworiented) stepval = (n - 1) * bs;
354:   else stepval = (m - 1) * bs;

356:   for (i = 0; i < m; i++) {
357:     if (im[i] < 0) continue;
359:     if (im[i] >= rstart && im[i] < rend) {
360:       row = im[i] - rstart;
361:       for (j = 0; j < n; j++) {
362:         /* If NumCol = 1 then a copy is not required */
363:         if ((roworiented) && (n == 1)) {
364:           barray = (MatScalar *)v + i * bs2;
365:         } else if ((!roworiented) && (m == 1)) {
366:           barray = (MatScalar *)v + j * bs2;
367:         } else { /* Here a copy is required */
368:           if (roworiented) {
369:             value = v + (i * (stepval + bs) + j) * bs;
370:           } else {
371:             value = v + (j * (stepval + bs) + i) * bs;
372:           }
373:           for (ii = 0; ii < bs; ii++, value += bs + stepval) {
374:             for (jj = 0; jj < bs; jj++) barray[jj] = value[jj];
375:             barray += bs;
376:           }
377:           barray -= bs2;
378:         }

380:         if (in[j] >= cstart && in[j] < cend) {
381:           col = in[j] - cstart;
382:           MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]);
383:         } else if (in[j] < 0) {
384:           continue;
385:         } else {
387:           if (mat->was_assembled) {
388:             if (!baij->colmap) MatCreateColmap_MPIBAIJ_Private(mat);

390: #if defined(PETSC_USE_DEBUG)
391:   #if defined(PETSC_USE_CTABLE)
392:             {
393:               PetscInt data;
394:               PetscTableFind(baij->colmap, in[j] + 1, &data);
396:             }
397:   #else
399:   #endif
400: #endif
401: #if defined(PETSC_USE_CTABLE)
402:             PetscTableFind(baij->colmap, in[j] + 1, &col);
403:             col = (col - 1) / bs;
404: #else
405:             col = (baij->colmap[in[j]] - 1) / bs;
406: #endif
407:             if (col < 0 && !((Mat_SeqBAIJ *)(baij->B->data))->nonew) {
408:               MatDisAssemble_MPIBAIJ(mat);
409:               col = in[j];
411:           } else col = in[j];
412:           MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]);
413:         }
414:       }
415:     } else {
417:       if (!baij->donotstash) {
418:         if (roworiented) {
419:           MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i);
420:         } else {
421:           MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i);
422:         }
423:       }
424:     }
425:   }
426:   return 0;
427: }

429: #define HASH_KEY             0.6180339887
430: #define HASH(size, key, tmp) (tmp = (key)*HASH_KEY, (PetscInt)((size) * (tmp - (PetscInt)tmp)))
431: /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
432: /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
433: PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
434: {
435:   Mat_MPIBAIJ *baij        = (Mat_MPIBAIJ *)mat->data;
436:   PetscBool    roworiented = baij->roworiented;
437:   PetscInt     i, j, row, col;
438:   PetscInt     rstart_orig = mat->rmap->rstart;
439:   PetscInt     rend_orig = mat->rmap->rend, Nbs = baij->Nbs;
440:   PetscInt     h1, key, size = baij->ht_size, bs = mat->rmap->bs, *HT = baij->ht, idx;
441:   PetscReal    tmp;
442:   MatScalar  **HD       = baij->hd, value;
443:   PetscInt     total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct;

445:   for (i = 0; i < m; i++) {
446:     if (PetscDefined(USE_DEBUG)) {
449:     }
450:     row = im[i];
451:     if (row >= rstart_orig && row < rend_orig) {
452:       for (j = 0; j < n; j++) {
453:         col = in[j];
454:         if (roworiented) value = v[i * n + j];
455:         else value = v[i + j * m];
456:         /* Look up PetscInto the Hash Table */
457:         key = (row / bs) * Nbs + (col / bs) + 1;
458:         h1  = HASH(size, key, tmp);

460:         idx = h1;
461:         if (PetscDefined(USE_DEBUG)) {
462:           insert_ct++;
463:           total_ct++;
464:           if (HT[idx] != key) {
465:             for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++)
466:               ;
467:             if (idx == size) {
468:               for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++)
469:                 ;
471:             }
472:           }
473:         } else if (HT[idx] != key) {
474:           for (idx = h1; (idx < size) && (HT[idx] != key); idx++)
475:             ;
476:           if (idx == size) {
477:             for (idx = 0; (idx < h1) && (HT[idx] != key); idx++)
478:               ;
480:           }
481:         }
482:         /* A HASH table entry is found, so insert the values at the correct address */
483:         if (addv == ADD_VALUES) *(HD[idx] + (col % bs) * bs + (row % bs)) += value;
484:         else *(HD[idx] + (col % bs) * bs + (row % bs)) = value;
485:       }
486:     } else if (!baij->donotstash) {
487:       if (roworiented) {
488:         MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE);
489:       } else {
490:         MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE);
491:       }
492:     }
493:   }
494:   if (PetscDefined(USE_DEBUG)) {
495:     baij->ht_total_ct += total_ct;
496:     baij->ht_insert_ct += insert_ct;
497:   }
498:   return 0;
499: }

501: PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
502: {
503:   Mat_MPIBAIJ       *baij        = (Mat_MPIBAIJ *)mat->data;
504:   PetscBool          roworiented = baij->roworiented;
505:   PetscInt           i, j, ii, jj, row, col;
506:   PetscInt           rstart = baij->rstartbs;
507:   PetscInt           rend = mat->rmap->rend, stepval, bs = mat->rmap->bs, bs2 = baij->bs2, nbs2 = n * bs2;
508:   PetscInt           h1, key, size = baij->ht_size, idx, *HT = baij->ht, Nbs = baij->Nbs;
509:   PetscReal          tmp;
510:   MatScalar        **HD = baij->hd, *baij_a;
511:   const PetscScalar *v_t, *value;
512:   PetscInt           total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct;

514:   if (roworiented) stepval = (n - 1) * bs;
515:   else stepval = (m - 1) * bs;

517:   for (i = 0; i < m; i++) {
518:     if (PetscDefined(USE_DEBUG)) {
521:     }
522:     row = im[i];
523:     v_t = v + i * nbs2;
524:     if (row >= rstart && row < rend) {
525:       for (j = 0; j < n; j++) {
526:         col = in[j];

528:         /* Look up into the Hash Table */
529:         key = row * Nbs + col + 1;
530:         h1  = HASH(size, key, tmp);

532:         idx = h1;
533:         if (PetscDefined(USE_DEBUG)) {
534:           total_ct++;
535:           insert_ct++;
536:           if (HT[idx] != key) {
537:             for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++)
538:               ;
539:             if (idx == size) {
540:               for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++)
541:                 ;
543:             }
544:           }
545:         } else if (HT[idx] != key) {
546:           for (idx = h1; (idx < size) && (HT[idx] != key); idx++)
547:             ;
548:           if (idx == size) {
549:             for (idx = 0; (idx < h1) && (HT[idx] != key); idx++)
550:               ;
552:           }
553:         }
554:         baij_a = HD[idx];
555:         if (roworiented) {
556:           /*value = v + i*(stepval+bs)*bs + j*bs;*/
557:           /* value = v + (i*(stepval+bs)+j)*bs; */
558:           value = v_t;
559:           v_t += bs;
560:           if (addv == ADD_VALUES) {
561:             for (ii = 0; ii < bs; ii++, value += stepval) {
562:               for (jj = ii; jj < bs2; jj += bs) baij_a[jj] += *value++;
563:             }
564:           } else {
565:             for (ii = 0; ii < bs; ii++, value += stepval) {
566:               for (jj = ii; jj < bs2; jj += bs) baij_a[jj] = *value++;
567:             }
568:           }
569:         } else {
570:           value = v + j * (stepval + bs) * bs + i * bs;
571:           if (addv == ADD_VALUES) {
572:             for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) {
573:               for (jj = 0; jj < bs; jj++) baij_a[jj] += *value++;
574:             }
575:           } else {
576:             for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) {
577:               for (jj = 0; jj < bs; jj++) baij_a[jj] = *value++;
578:             }
579:           }
580:         }
581:       }
582:     } else {
583:       if (!baij->donotstash) {
584:         if (roworiented) {
585:           MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i);
586:         } else {
587:           MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i);
588:         }
589:       }
590:     }
591:   }
592:   if (PetscDefined(USE_DEBUG)) {
593:     baij->ht_total_ct += total_ct;
594:     baij->ht_insert_ct += insert_ct;
595:   }
596:   return 0;
597: }

599: PetscErrorCode MatGetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
600: {
601:   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
602:   PetscInt     bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend;
603:   PetscInt     bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data;

605:   for (i = 0; i < m; i++) {
606:     if (idxm[i] < 0) continue; /* negative row */
608:     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
609:       row = idxm[i] - bsrstart;
610:       for (j = 0; j < n; j++) {
611:         if (idxn[j] < 0) continue; /* negative column */
613:         if (idxn[j] >= bscstart && idxn[j] < bscend) {
614:           col = idxn[j] - bscstart;
615:           MatGetValues_SeqBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j);
616:         } else {
617:           if (!baij->colmap) MatCreateColmap_MPIBAIJ_Private(mat);
618: #if defined(PETSC_USE_CTABLE)
619:           PetscTableFind(baij->colmap, idxn[j] / bs + 1, &data);
620:           data--;
621: #else
622:           data = baij->colmap[idxn[j] / bs] - 1;
623: #endif
624:           if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0;
625:           else {
626:             col = data + idxn[j] % bs;
627:             MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j);
628:           }
629:         }
630:       }
631:     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
632:   }
633:   return 0;
634: }

636: PetscErrorCode MatNorm_MPIBAIJ(Mat mat, NormType type, PetscReal *nrm)
637: {
638:   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
639:   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ *)baij->A->data, *bmat = (Mat_SeqBAIJ *)baij->B->data;
640:   PetscInt     i, j, bs2 = baij->bs2, bs = baij->A->rmap->bs, nz, row, col;
641:   PetscReal    sum = 0.0;
642:   MatScalar   *v;

644:   if (baij->size == 1) {
645:     MatNorm(baij->A, type, nrm);
646:   } else {
647:     if (type == NORM_FROBENIUS) {
648:       v  = amat->a;
649:       nz = amat->nz * bs2;
650:       for (i = 0; i < nz; i++) {
651:         sum += PetscRealPart(PetscConj(*v) * (*v));
652:         v++;
653:       }
654:       v  = bmat->a;
655:       nz = bmat->nz * bs2;
656:       for (i = 0; i < nz; i++) {
657:         sum += PetscRealPart(PetscConj(*v) * (*v));
658:         v++;
659:       }
660:       MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat));
661:       *nrm = PetscSqrtReal(*nrm);
662:     } else if (type == NORM_1) { /* max column sum */
663:       PetscReal *tmp, *tmp2;
664:       PetscInt  *jj, *garray = baij->garray, cstart = baij->rstartbs;
665:       PetscCalloc1(mat->cmap->N, &tmp);
666:       PetscMalloc1(mat->cmap->N, &tmp2);
667:       v  = amat->a;
668:       jj = amat->j;
669:       for (i = 0; i < amat->nz; i++) {
670:         for (j = 0; j < bs; j++) {
671:           col = bs * (cstart + *jj) + j; /* column index */
672:           for (row = 0; row < bs; row++) {
673:             tmp[col] += PetscAbsScalar(*v);
674:             v++;
675:           }
676:         }
677:         jj++;
678:       }
679:       v  = bmat->a;
680:       jj = bmat->j;
681:       for (i = 0; i < bmat->nz; i++) {
682:         for (j = 0; j < bs; j++) {
683:           col = bs * garray[*jj] + j;
684:           for (row = 0; row < bs; row++) {
685:             tmp[col] += PetscAbsScalar(*v);
686:             v++;
687:           }
688:         }
689:         jj++;
690:       }
691:       MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat));
692:       *nrm = 0.0;
693:       for (j = 0; j < mat->cmap->N; j++) {
694:         if (tmp2[j] > *nrm) *nrm = tmp2[j];
695:       }
696:       PetscFree(tmp);
697:       PetscFree(tmp2);
698:     } else if (type == NORM_INFINITY) { /* max row sum */
699:       PetscReal *sums;
700:       PetscMalloc1(bs, &sums);
701:       sum = 0.0;
702:       for (j = 0; j < amat->mbs; j++) {
703:         for (row = 0; row < bs; row++) sums[row] = 0.0;
704:         v  = amat->a + bs2 * amat->i[j];
705:         nz = amat->i[j + 1] - amat->i[j];
706:         for (i = 0; i < nz; i++) {
707:           for (col = 0; col < bs; col++) {
708:             for (row = 0; row < bs; row++) {
709:               sums[row] += PetscAbsScalar(*v);
710:               v++;
711:             }
712:           }
713:         }
714:         v  = bmat->a + bs2 * bmat->i[j];
715:         nz = bmat->i[j + 1] - bmat->i[j];
716:         for (i = 0; i < nz; i++) {
717:           for (col = 0; col < bs; col++) {
718:             for (row = 0; row < bs; row++) {
719:               sums[row] += PetscAbsScalar(*v);
720:               v++;
721:             }
722:           }
723:         }
724:         for (row = 0; row < bs; row++) {
725:           if (sums[row] > sum) sum = sums[row];
726:         }
727:       }
728:       MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat));
729:       PetscFree(sums);
730:     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for this norm yet");
731:   }
732:   return 0;
733: }

735: /*
736:   Creates the hash table, and sets the table
737:   This table is created only once.
738:   If new entried need to be added to the matrix
739:   then the hash table has to be destroyed and
740:   recreated.
741: */
742: PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat, PetscReal factor)
743: {
744:   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
745:   Mat          A = baij->A, B = baij->B;
746:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
747:   PetscInt     i, j, k, nz = a->nz + b->nz, h1, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
748:   PetscInt     ht_size, bs2 = baij->bs2, rstart = baij->rstartbs;
749:   PetscInt     cstart = baij->cstartbs, *garray = baij->garray, row, col, Nbs = baij->Nbs;
750:   PetscInt    *HT, key;
751:   MatScalar  **HD;
752:   PetscReal    tmp;
753: #if defined(PETSC_USE_INFO)
754:   PetscInt ct = 0, max = 0;
755: #endif

757:   if (baij->ht) return 0;

759:   baij->ht_size = (PetscInt)(factor * nz);
760:   ht_size       = baij->ht_size;

762:   /* Allocate Memory for Hash Table */
763:   PetscCalloc2(ht_size, &baij->hd, ht_size, &baij->ht);
764:   HD = baij->hd;
765:   HT = baij->ht;

767:   /* Loop Over A */
768:   for (i = 0; i < a->mbs; i++) {
769:     for (j = ai[i]; j < ai[i + 1]; j++) {
770:       row = i + rstart;
771:       col = aj[j] + cstart;

773:       key = row * Nbs + col + 1;
774:       h1  = HASH(ht_size, key, tmp);
775:       for (k = 0; k < ht_size; k++) {
776:         if (!HT[(h1 + k) % ht_size]) {
777:           HT[(h1 + k) % ht_size] = key;
778:           HD[(h1 + k) % ht_size] = a->a + j * bs2;
779:           break;
780: #if defined(PETSC_USE_INFO)
781:         } else {
782:           ct++;
783: #endif
784:         }
785:       }
786: #if defined(PETSC_USE_INFO)
787:       if (k > max) max = k;
788: #endif
789:     }
790:   }
791:   /* Loop Over B */
792:   for (i = 0; i < b->mbs; i++) {
793:     for (j = bi[i]; j < bi[i + 1]; j++) {
794:       row = i + rstart;
795:       col = garray[bj[j]];
796:       key = row * Nbs + col + 1;
797:       h1  = HASH(ht_size, key, tmp);
798:       for (k = 0; k < ht_size; k++) {
799:         if (!HT[(h1 + k) % ht_size]) {
800:           HT[(h1 + k) % ht_size] = key;
801:           HD[(h1 + k) % ht_size] = b->a + j * bs2;
802:           break;
803: #if defined(PETSC_USE_INFO)
804:         } else {
805:           ct++;
806: #endif
807:         }
808:       }
809: #if defined(PETSC_USE_INFO)
810:       if (k > max) max = k;
811: #endif
812:     }
813:   }

815:   /* Print Summary */
816: #if defined(PETSC_USE_INFO)
817:   for (i = 0, j = 0; i < ht_size; i++) {
818:     if (HT[i]) j++;
819:   }
820:   PetscInfo(mat, "Average Search = %5.2g,max search = %" PetscInt_FMT "\n", (!j) ? (double)0.0 : (double)(((PetscReal)(ct + j)) / (double)j), max);
821: #endif
822:   return 0;
823: }

825: PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat, MatAssemblyType mode)
826: {
827:   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
828:   PetscInt     nstash, reallocs;

830:   if (baij->donotstash || mat->nooffprocentries) return 0;

832:   MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range);
833:   MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs);
834:   MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs);
835:   PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs);
836:   MatStashGetInfo_Private(&mat->bstash, &nstash, &reallocs);
837:   PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs);
838:   return 0;
839: }

841: PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat, MatAssemblyType mode)
842: {
843:   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
844:   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ *)baij->A->data;
845:   PetscInt     i, j, rstart, ncols, flg, bs2 = baij->bs2;
846:   PetscInt    *row, *col;
847:   PetscBool    r1, r2, r3, other_disassembled;
848:   MatScalar   *val;
849:   PetscMPIInt  n;

851:   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
852:   if (!baij->donotstash && !mat->nooffprocentries) {
853:     while (1) {
854:       MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg);
855:       if (!flg) break;

857:       for (i = 0; i < n;) {
858:         /* Now identify the consecutive vals belonging to the same row */
859:         for (j = i, rstart = row[j]; j < n; j++) {
860:           if (row[j] != rstart) break;
861:         }
862:         if (j < n) ncols = j - i;
863:         else ncols = n - i;
864:         /* Now assemble all these values with a single function call */
865:         MatSetValues_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode);
866:         i = j;
867:       }
868:     }
869:     MatStashScatterEnd_Private(&mat->stash);
870:     /* Now process the block-stash. Since the values are stashed column-oriented,
871:        set the roworiented flag to column oriented, and after MatSetValues()
872:        restore the original flags */
873:     r1 = baij->roworiented;
874:     r2 = a->roworiented;
875:     r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented;

877:     baij->roworiented = PETSC_FALSE;
878:     a->roworiented    = PETSC_FALSE;

880:     (((Mat_SeqBAIJ *)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */
881:     while (1) {
882:       MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg);
883:       if (!flg) break;

885:       for (i = 0; i < n;) {
886:         /* Now identify the consecutive vals belonging to the same row */
887:         for (j = i, rstart = row[j]; j < n; j++) {
888:           if (row[j] != rstart) break;
889:         }
890:         if (j < n) ncols = j - i;
891:         else ncols = n - i;
892:         MatSetValuesBlocked_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode);
893:         i = j;
894:       }
895:     }
896:     MatStashScatterEnd_Private(&mat->bstash);

898:     baij->roworiented = r1;
899:     a->roworiented    = r2;

901:     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; /* b->roworiented */
902:   }

904:   MatAssemblyBegin(baij->A, mode);
905:   MatAssemblyEnd(baij->A, mode);

907:   /* determine if any processor has disassembled, if so we must
908:      also disassemble ourselves, in order that we may reassemble. */
909:   /*
910:      if nonzero structure of submatrix B cannot change then we know that
911:      no processor disassembled thus we can skip this stuff
912:   */
913:   if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) {
914:     MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat));
915:     if (mat->was_assembled && !other_disassembled) MatDisAssemble_MPIBAIJ(mat);
916:   }

918:   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) MatSetUpMultiply_MPIBAIJ(mat);
919:   MatAssemblyBegin(baij->B, mode);
920:   MatAssemblyEnd(baij->B, mode);

922: #if defined(PETSC_USE_INFO)
923:   if (baij->ht && mode == MAT_FINAL_ASSEMBLY) {
924:     PetscInfo(mat, "Average Hash Table Search in MatSetValues = %5.2f\n", (double)((PetscReal)baij->ht_total_ct) / baij->ht_insert_ct);

926:     baij->ht_total_ct  = 0;
927:     baij->ht_insert_ct = 0;
928:   }
929: #endif
930:   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
931:     MatCreateHashTable_MPIBAIJ_Private(mat, baij->ht_fact);

933:     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
934:     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
935:   }

937:   PetscFree2(baij->rowvalues, baij->rowindices);

939:   baij->rowvalues = NULL;

941:   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
942:   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)(baij->A->data))->nonew) {
943:     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
944:     MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat));
945:   }
946:   return 0;
947: }

949: extern PetscErrorCode MatView_SeqBAIJ(Mat, PetscViewer);
950: #include <petscdraw.h>
951: static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
952: {
953:   Mat_MPIBAIJ      *baij = (Mat_MPIBAIJ *)mat->data;
954:   PetscMPIInt       rank = baij->rank;
955:   PetscInt          bs   = mat->rmap->bs;
956:   PetscBool         iascii, isdraw;
957:   PetscViewer       sviewer;
958:   PetscViewerFormat format;

960:   PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii);
961:   PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw);
962:   if (iascii) {
963:     PetscViewerGetFormat(viewer, &format);
964:     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
965:       MatInfo info;
966:       MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank);
967:       MatGetInfo(mat, MAT_LOCAL, &info);
968:       PetscViewerASCIIPushSynchronized(viewer);
969:       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
970:                                                    mat->rmap->bs, (double)info.memory));
971:       MatGetInfo(baij->A, MAT_LOCAL, &info);
972:       PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used);
973:       MatGetInfo(baij->B, MAT_LOCAL, &info);
974:       PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used);
975:       PetscViewerFlush(viewer);
976:       PetscViewerASCIIPopSynchronized(viewer);
977:       PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n");
978:       VecScatterView(baij->Mvctx, viewer);
979:       return 0;
980:     } else if (format == PETSC_VIEWER_ASCII_INFO) {
981:       PetscViewerASCIIPrintf(viewer, "  block size is %" PetscInt_FMT "\n", bs);
982:       return 0;
983:     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
984:       return 0;
985:     }
986:   }

988:   if (isdraw) {
989:     PetscDraw draw;
990:     PetscBool isnull;
991:     PetscViewerDrawGetDraw(viewer, 0, &draw);
992:     PetscDrawIsNull(draw, &isnull);
993:     if (isnull) return 0;
994:   }

996:   {
997:     /* assemble the entire matrix onto first processor. */
998:     Mat          A;
999:     Mat_SeqBAIJ *Aloc;
1000:     PetscInt     M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs;
1001:     MatScalar   *a;
1002:     const char  *matname;

1004:     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1005:     /* Perhaps this should be the type of mat? */
1006:     MatCreate(PetscObjectComm((PetscObject)mat), &A);
1007:     if (rank == 0) {
1008:       MatSetSizes(A, M, N, M, N);
1009:     } else {
1010:       MatSetSizes(A, 0, 0, M, N);
1011:     }
1012:     MatSetType(A, MATMPIBAIJ);
1013:     MatMPIBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL);
1014:     MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE);

1016:     /* copy over the A part */
1017:     Aloc = (Mat_SeqBAIJ *)baij->A->data;
1018:     ai   = Aloc->i;
1019:     aj   = Aloc->j;
1020:     a    = Aloc->a;
1021:     PetscMalloc1(bs, &rvals);

1023:     for (i = 0; i < mbs; i++) {
1024:       rvals[0] = bs * (baij->rstartbs + i);
1025:       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1026:       for (j = ai[i]; j < ai[i + 1]; j++) {
1027:         col = (baij->cstartbs + aj[j]) * bs;
1028:         for (k = 0; k < bs; k++) {
1029:           MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES);
1030:           col++;
1031:           a += bs;
1032:         }
1033:       }
1034:     }
1035:     /* copy over the B part */
1036:     Aloc = (Mat_SeqBAIJ *)baij->B->data;
1037:     ai   = Aloc->i;
1038:     aj   = Aloc->j;
1039:     a    = Aloc->a;
1040:     for (i = 0; i < mbs; i++) {
1041:       rvals[0] = bs * (baij->rstartbs + i);
1042:       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1043:       for (j = ai[i]; j < ai[i + 1]; j++) {
1044:         col = baij->garray[aj[j]] * bs;
1045:         for (k = 0; k < bs; k++) {
1046:           MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES);
1047:           col++;
1048:           a += bs;
1049:         }
1050:       }
1051:     }
1052:     PetscFree(rvals);
1053:     MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
1054:     MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
1055:     /*
1056:        Everyone has to call to draw the matrix since the graphics waits are
1057:        synchronized across all processors that share the PetscDraw object
1058:     */
1059:     PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer);
1060:     PetscObjectGetName((PetscObject)mat, &matname);
1061:     if (rank == 0) {
1062:       PetscObjectSetName((PetscObject)((Mat_MPIBAIJ *)(A->data))->A, matname);
1063:       MatView_SeqBAIJ(((Mat_MPIBAIJ *)(A->data))->A, sviewer);
1064:     }
1065:     PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer);
1066:     PetscViewerFlush(viewer);
1067:     MatDestroy(&A);
1068:   }
1069:   return 0;
1070: }

1072: /* Used for both MPIBAIJ and MPISBAIJ matrices */
1073: PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat, PetscViewer viewer)
1074: {
1075:   Mat_MPIBAIJ    *aij    = (Mat_MPIBAIJ *)mat->data;
1076:   Mat_SeqBAIJ    *A      = (Mat_SeqBAIJ *)aij->A->data;
1077:   Mat_SeqBAIJ    *B      = (Mat_SeqBAIJ *)aij->B->data;
1078:   const PetscInt *garray = aij->garray;
1079:   PetscInt        header[4], M, N, m, rs, cs, bs, nz, cnt, i, j, ja, jb, k, l;
1080:   PetscInt       *rowlens, *colidxs;
1081:   PetscScalar    *matvals;

1083:   PetscViewerSetUp(viewer);

1085:   M  = mat->rmap->N;
1086:   N  = mat->cmap->N;
1087:   m  = mat->rmap->n;
1088:   rs = mat->rmap->rstart;
1089:   cs = mat->cmap->rstart;
1090:   bs = mat->rmap->bs;
1091:   nz = bs * bs * (A->nz + B->nz);

1093:   /* write matrix header */
1094:   header[0] = MAT_FILE_CLASSID;
1095:   header[1] = M;
1096:   header[2] = N;
1097:   header[3] = nz;
1098:   MPI_Reduce(&nz, &header[3], 1, MPIU_INT, MPI_SUM, 0, PetscObjectComm((PetscObject)mat));
1099:   PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT);

1101:   /* fill in and store row lengths */
1102:   PetscMalloc1(m, &rowlens);
1103:   for (cnt = 0, i = 0; i < A->mbs; i++)
1104:     for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]);
1105:   PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT);
1106:   PetscFree(rowlens);

1108:   /* fill in and store column indices */
1109:   PetscMalloc1(nz, &colidxs);
1110:   for (cnt = 0, i = 0; i < A->mbs; i++) {
1111:     for (k = 0; k < bs; k++) {
1112:       for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1113:         if (garray[B->j[jb]] > cs / bs) break;
1114:         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l;
1115:       }
1116:       for (ja = A->i[i]; ja < A->i[i + 1]; ja++)
1117:         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[ja] + l + cs;
1118:       for (; jb < B->i[i + 1]; jb++)
1119:         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l;
1120:     }
1121:   }
1123:   PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_INT);
1124:   PetscFree(colidxs);

1126:   /* fill in and store nonzero values */
1127:   PetscMalloc1(nz, &matvals);
1128:   for (cnt = 0, i = 0; i < A->mbs; i++) {
1129:     for (k = 0; k < bs; k++) {
1130:       for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1131:         if (garray[B->j[jb]] > cs / bs) break;
1132:         for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k];
1133:       }
1134:       for (ja = A->i[i]; ja < A->i[i + 1]; ja++)
1135:         for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * ja + l) + k];
1136:       for (; jb < B->i[i + 1]; jb++)
1137:         for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k];
1138:     }
1139:   }
1140:   PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_SCALAR);
1141:   PetscFree(matvals);

1143:   /* write block size option to the viewer's .info file */
1144:   MatView_Binary_BlockSizes(mat, viewer);
1145:   return 0;
1146: }

1148: PetscErrorCode MatView_MPIBAIJ(Mat mat, PetscViewer viewer)
1149: {
1150:   PetscBool iascii, isdraw, issocket, isbinary;

1152:   PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii);
1153:   PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw);
1154:   PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket);
1155:   PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary);
1156:   if (iascii || isdraw || issocket) {
1157:     MatView_MPIBAIJ_ASCIIorDraworSocket(mat, viewer);
1158:   } else if (isbinary) MatView_MPIBAIJ_Binary(mat, viewer);
1159:   return 0;
1160: }

1162: PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1163: {
1164:   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;

1166: #if defined(PETSC_USE_LOG)
1167:   PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N);
1168: #endif
1169:   MatStashDestroy_Private(&mat->stash);
1170:   MatStashDestroy_Private(&mat->bstash);
1171:   MatDestroy(&baij->A);
1172:   MatDestroy(&baij->B);
1173: #if defined(PETSC_USE_CTABLE)
1174:   PetscTableDestroy(&baij->colmap);
1175: #else
1176:   PetscFree(baij->colmap);
1177: #endif
1178:   PetscFree(baij->garray);
1179:   VecDestroy(&baij->lvec);
1180:   VecScatterDestroy(&baij->Mvctx);
1181:   PetscFree2(baij->rowvalues, baij->rowindices);
1182:   PetscFree(baij->barray);
1183:   PetscFree2(baij->hd, baij->ht);
1184:   PetscFree(baij->rangebs);
1185:   PetscFree(mat->data);

1187:   PetscObjectChangeTypeName((PetscObject)mat, NULL);
1188:   PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL);
1189:   PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL);
1190:   PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocation_C", NULL);
1191:   PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocationCSR_C", NULL);
1192:   PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL);
1193:   PetscObjectComposeFunction((PetscObject)mat, "MatSetHashTableFactor_C", NULL);
1194:   PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpisbaij_C", NULL);
1195:   PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiadj_C", NULL);
1196:   PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiaij_C", NULL);
1197: #if defined(PETSC_HAVE_HYPRE)
1198:   PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_hypre_C", NULL);
1199: #endif
1200:   PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_is_C", NULL);
1201:   return 0;
1202: }

1204: PetscErrorCode MatMult_MPIBAIJ(Mat A, Vec xx, Vec yy)
1205: {
1206:   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1207:   PetscInt     nt;

1209:   VecGetLocalSize(xx, &nt);
1211:   VecGetLocalSize(yy, &nt);
1213:   VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD);
1214:   (*a->A->ops->mult)(a->A, xx, yy);
1215:   VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD);
1216:   (*a->B->ops->multadd)(a->B, a->lvec, yy, yy);
1217:   return 0;
1218: }

1220: PetscErrorCode MatMultAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1221: {
1222:   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;

1224:   VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD);
1225:   (*a->A->ops->multadd)(a->A, xx, yy, zz);
1226:   VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD);
1227:   (*a->B->ops->multadd)(a->B, a->lvec, zz, zz);
1228:   return 0;
1229: }

1231: PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A, Vec xx, Vec yy)
1232: {
1233:   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;

1235:   /* do nondiagonal part */
1236:   (*a->B->ops->multtranspose)(a->B, xx, a->lvec);
1237:   /* do local part */
1238:   (*a->A->ops->multtranspose)(a->A, xx, yy);
1239:   /* add partial results together */
1240:   VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE);
1241:   VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE);
1242:   return 0;
1243: }

1245: PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1246: {
1247:   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;

1249:   /* do nondiagonal part */
1250:   (*a->B->ops->multtranspose)(a->B, xx, a->lvec);
1251:   /* do local part */
1252:   (*a->A->ops->multtransposeadd)(a->A, xx, yy, zz);
1253:   /* add partial results together */
1254:   VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE);
1255:   VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE);
1256:   return 0;
1257: }

1259: /*
1260:   This only works correctly for square matrices where the subblock A->A is the
1261:    diagonal block
1262: */
1263: PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A, Vec v)
1264: {
1266:   MatGetDiagonal(((Mat_MPIBAIJ *)A->data)->A, v);
1267:   return 0;
1268: }

1270: PetscErrorCode MatScale_MPIBAIJ(Mat A, PetscScalar aa)
1271: {
1272:   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;

1274:   MatScale(a->A, aa);
1275:   MatScale(a->B, aa);
1276:   return 0;
1277: }

1279: PetscErrorCode MatGetRow_MPIBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1280: {
1281:   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data;
1282:   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1283:   PetscInt     bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB;
1284:   PetscInt     nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend;
1285:   PetscInt    *cmap, *idx_p, cstart = mat->cstartbs;

1289:   mat->getrowactive = PETSC_TRUE;

1291:   if (!mat->rowvalues && (idx || v)) {
1292:     /*
1293:         allocate enough space to hold information from the longest row.
1294:     */
1295:     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *)mat->A->data, *Ba = (Mat_SeqBAIJ *)mat->B->data;
1296:     PetscInt     max = 1, mbs = mat->mbs, tmp;
1297:     for (i = 0; i < mbs; i++) {
1298:       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1299:       if (max < tmp) max = tmp;
1300:     }
1301:     PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices);
1302:   }
1303:   lrow = row - brstart;

1305:   pvA = &vworkA;
1306:   pcA = &cworkA;
1307:   pvB = &vworkB;
1308:   pcB = &cworkB;
1309:   if (!v) {
1310:     pvA = NULL;
1311:     pvB = NULL;
1312:   }
1313:   if (!idx) {
1314:     pcA = NULL;
1315:     if (!v) pcB = NULL;
1316:   }
1317:   (*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA);
1318:   (*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB);
1319:   nztot = nzA + nzB;

1321:   cmap = mat->garray;
1322:   if (v || idx) {
1323:     if (nztot) {
1324:       /* Sort by increasing column numbers, assuming A and B already sorted */
1325:       PetscInt imark = -1;
1326:       if (v) {
1327:         *v = v_p = mat->rowvalues;
1328:         for (i = 0; i < nzB; i++) {
1329:           if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i];
1330:           else break;
1331:         }
1332:         imark = i;
1333:         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1334:         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1335:       }
1336:       if (idx) {
1337:         *idx = idx_p = mat->rowindices;
1338:         if (imark > -1) {
1339:           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1340:         } else {
1341:           for (i = 0; i < nzB; i++) {
1342:             if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1343:             else break;
1344:           }
1345:           imark = i;
1346:         }
1347:         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i];
1348:         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1349:       }
1350:     } else {
1351:       if (idx) *idx = NULL;
1352:       if (v) *v = NULL;
1353:     }
1354:   }
1355:   *nz = nztot;
1356:   (*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA);
1357:   (*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB);
1358:   return 0;
1359: }

1361: PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1362: {
1363:   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;

1366:   baij->getrowactive = PETSC_FALSE;
1367:   return 0;
1368: }

1370: PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1371: {
1372:   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data;

1374:   MatZeroEntries(l->A);
1375:   MatZeroEntries(l->B);
1376:   return 0;
1377: }

1379: PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1380: {
1381:   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ *)matin->data;
1382:   Mat            A = a->A, B = a->B;
1383:   PetscLogDouble isend[5], irecv[5];

1385:   info->block_size = (PetscReal)matin->rmap->bs;

1387:   MatGetInfo(A, MAT_LOCAL, info);

1389:   isend[0] = info->nz_used;
1390:   isend[1] = info->nz_allocated;
1391:   isend[2] = info->nz_unneeded;
1392:   isend[3] = info->memory;
1393:   isend[4] = info->mallocs;

1395:   MatGetInfo(B, MAT_LOCAL, info);

1397:   isend[0] += info->nz_used;
1398:   isend[1] += info->nz_allocated;
1399:   isend[2] += info->nz_unneeded;
1400:   isend[3] += info->memory;
1401:   isend[4] += info->mallocs;

1403:   if (flag == MAT_LOCAL) {
1404:     info->nz_used      = isend[0];
1405:     info->nz_allocated = isend[1];
1406:     info->nz_unneeded  = isend[2];
1407:     info->memory       = isend[3];
1408:     info->mallocs      = isend[4];
1409:   } else if (flag == MAT_GLOBAL_MAX) {
1410:     MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin));

1412:     info->nz_used      = irecv[0];
1413:     info->nz_allocated = irecv[1];
1414:     info->nz_unneeded  = irecv[2];
1415:     info->memory       = irecv[3];
1416:     info->mallocs      = irecv[4];
1417:   } else if (flag == MAT_GLOBAL_SUM) {
1418:     MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin));

1420:     info->nz_used      = irecv[0];
1421:     info->nz_allocated = irecv[1];
1422:     info->nz_unneeded  = irecv[2];
1423:     info->memory       = irecv[3];
1424:     info->mallocs      = irecv[4];
1425:   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag);
1426:   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1427:   info->fill_ratio_needed = 0;
1428:   info->factor_mallocs    = 0;
1429:   return 0;
1430: }

1432: PetscErrorCode MatSetOption_MPIBAIJ(Mat A, MatOption op, PetscBool flg)
1433: {
1434:   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;

1436:   switch (op) {
1437:   case MAT_NEW_NONZERO_LOCATIONS:
1438:   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1439:   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1440:   case MAT_KEEP_NONZERO_PATTERN:
1441:   case MAT_NEW_NONZERO_LOCATION_ERR:
1442:     MatCheckPreallocated(A, 1);
1443:     MatSetOption(a->A, op, flg);
1444:     MatSetOption(a->B, op, flg);
1445:     break;
1446:   case MAT_ROW_ORIENTED:
1447:     MatCheckPreallocated(A, 1);
1448:     a->roworiented = flg;

1450:     MatSetOption(a->A, op, flg);
1451:     MatSetOption(a->B, op, flg);
1452:     break;
1453:   case MAT_FORCE_DIAGONAL_ENTRIES:
1454:   case MAT_SORTED_FULL:
1455:     PetscInfo(A, "Option %s ignored\n", MatOptions[op]);
1456:     break;
1457:   case MAT_IGNORE_OFF_PROC_ENTRIES:
1458:     a->donotstash = flg;
1459:     break;
1460:   case MAT_USE_HASH_TABLE:
1461:     a->ht_flag = flg;
1462:     a->ht_fact = 1.39;
1463:     break;
1464:   case MAT_SYMMETRIC:
1465:   case MAT_STRUCTURALLY_SYMMETRIC:
1466:   case MAT_HERMITIAN:
1467:   case MAT_SUBMAT_SINGLEIS:
1468:   case MAT_SYMMETRY_ETERNAL:
1469:   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1470:   case MAT_SPD_ETERNAL:
1471:     /* if the diagonal matrix is square it inherits some of the properties above */
1472:     break;
1473:   default:
1474:     SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "unknown option %d", op);
1475:   }
1476:   return 0;
1477: }

1479: PetscErrorCode MatTranspose_MPIBAIJ(Mat A, MatReuse reuse, Mat *matout)
1480: {
1481:   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data;
1482:   Mat_SeqBAIJ *Aloc;
1483:   Mat          B;
1484:   PetscInt     M = A->rmap->N, N = A->cmap->N, *ai, *aj, i, *rvals, j, k, col;
1485:   PetscInt     bs = A->rmap->bs, mbs = baij->mbs;
1486:   MatScalar   *a;

1488:   if (reuse == MAT_REUSE_MATRIX) MatTransposeCheckNonzeroState_Private(A, *matout);
1489:   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1490:     MatCreate(PetscObjectComm((PetscObject)A), &B);
1491:     MatSetSizes(B, A->cmap->n, A->rmap->n, N, M);
1492:     MatSetType(B, ((PetscObject)A)->type_name);
1493:     /* Do not know preallocation information, but must set block size */
1494:     MatMPIBAIJSetPreallocation(B, A->rmap->bs, PETSC_DECIDE, NULL, PETSC_DECIDE, NULL);
1495:   } else {
1496:     B = *matout;
1497:   }

1499:   /* copy over the A part */
1500:   Aloc = (Mat_SeqBAIJ *)baij->A->data;
1501:   ai   = Aloc->i;
1502:   aj   = Aloc->j;
1503:   a    = Aloc->a;
1504:   PetscMalloc1(bs, &rvals);

1506:   for (i = 0; i < mbs; i++) {
1507:     rvals[0] = bs * (baij->rstartbs + i);
1508:     for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1509:     for (j = ai[i]; j < ai[i + 1]; j++) {
1510:       col = (baij->cstartbs + aj[j]) * bs;
1511:       for (k = 0; k < bs; k++) {
1512:         MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES);

1514:         col++;
1515:         a += bs;
1516:       }
1517:     }
1518:   }
1519:   /* copy over the B part */
1520:   Aloc = (Mat_SeqBAIJ *)baij->B->data;
1521:   ai   = Aloc->i;
1522:   aj   = Aloc->j;
1523:   a    = Aloc->a;
1524:   for (i = 0; i < mbs; i++) {
1525:     rvals[0] = bs * (baij->rstartbs + i);
1526:     for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1527:     for (j = ai[i]; j < ai[i + 1]; j++) {
1528:       col = baij->garray[aj[j]] * bs;
1529:       for (k = 0; k < bs; k++) {
1530:         MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES);
1531:         col++;
1532:         a += bs;
1533:       }
1534:     }
1535:   }
1536:   PetscFree(rvals);
1537:   MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY);
1538:   MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY);

1540:   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B;
1541:   else MatHeaderMerge(A, &B);
1542:   return 0;
1543: }

1545: PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat, Vec ll, Vec rr)
1546: {
1547:   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
1548:   Mat          a = baij->A, b = baij->B;
1549:   PetscInt     s1, s2, s3;

1551:   MatGetLocalSize(mat, &s2, &s3);
1552:   if (rr) {
1553:     VecGetLocalSize(rr, &s1);
1555:     /* Overlap communication with computation. */
1556:     VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD);
1557:   }
1558:   if (ll) {
1559:     VecGetLocalSize(ll, &s1);
1561:     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1562:   }
1563:   /* scale  the diagonal block */
1564:   PetscUseTypeMethod(a, diagonalscale, ll, rr);

1566:   if (rr) {
1567:     /* Do a scatter end and then right scale the off-diagonal block */
1568:     VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD);
1569:     PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec);
1570:   }
1571:   return 0;
1572: }

1574: PetscErrorCode MatZeroRows_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
1575: {
1576:   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data;
1577:   PetscInt    *lrows;
1578:   PetscInt     r, len;
1579:   PetscBool    cong;

1581:   /* get locally owned rows */
1582:   MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows);
1583:   /* fix right hand side if needed */
1584:   if (x && b) {
1585:     const PetscScalar *xx;
1586:     PetscScalar       *bb;

1588:     VecGetArrayRead(x, &xx);
1589:     VecGetArray(b, &bb);
1590:     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
1591:     VecRestoreArrayRead(x, &xx);
1592:     VecRestoreArray(b, &bb);
1593:   }

1595:   /* actually zap the local rows */
1596:   /*
1597:         Zero the required rows. If the "diagonal block" of the matrix
1598:      is square and the user wishes to set the diagonal we use separate
1599:      code so that MatSetValues() is not called for each diagonal allocating
1600:      new memory, thus calling lots of mallocs and slowing things down.

1602:   */
1603:   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1604:   MatZeroRows_SeqBAIJ(l->B, len, lrows, 0.0, NULL, NULL);
1605:   MatHasCongruentLayouts(A, &cong);
1606:   if ((diag != 0.0) && cong) {
1607:     MatZeroRows_SeqBAIJ(l->A, len, lrows, diag, NULL, NULL);
1608:   } else if (diag != 0.0) {
1609:     MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL);
1611:        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1612:     for (r = 0; r < len; ++r) {
1613:       const PetscInt row = lrows[r] + A->rmap->rstart;
1614:       MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);
1615:     }
1616:     MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
1617:     MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
1618:   } else {
1619:     MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL);
1620:   }
1621:   PetscFree(lrows);

1623:   /* only change matrix nonzero state if pattern was allowed to be changed */
1624:   if (!((Mat_SeqBAIJ *)(l->A->data))->keepnonzeropattern) {
1625:     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1626:     MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A));
1627:   }
1628:   return 0;
1629: }

1631: PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
1632: {
1633:   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ *)A->data;
1634:   PetscMPIInt        n = A->rmap->n, p = 0;
1635:   PetscInt           i, j, k, r, len = 0, row, col, count;
1636:   PetscInt          *lrows, *owners = A->rmap->range;
1637:   PetscSFNode       *rrows;
1638:   PetscSF            sf;
1639:   const PetscScalar *xx;
1640:   PetscScalar       *bb, *mask;
1641:   Vec                xmask, lmask;
1642:   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ *)l->B->data;
1643:   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2;
1644:   PetscScalar       *aa;

1646:   /* Create SF where leaves are input rows and roots are owned rows */
1647:   PetscMalloc1(n, &lrows);
1648:   for (r = 0; r < n; ++r) lrows[r] = -1;
1649:   PetscMalloc1(N, &rrows);
1650:   for (r = 0; r < N; ++r) {
1651:     const PetscInt idx = rows[r];
1653:     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
1654:       PetscLayoutFindOwner(A->rmap, idx, &p);
1655:     }
1656:     rrows[r].rank  = p;
1657:     rrows[r].index = rows[r] - owners[p];
1658:   }
1659:   PetscSFCreate(PetscObjectComm((PetscObject)A), &sf);
1660:   PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);
1661:   /* Collect flags for rows to be zeroed */
1662:   PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR);
1663:   PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR);
1664:   PetscSFDestroy(&sf);
1665:   /* Compress and put in row numbers */
1666:   for (r = 0; r < n; ++r)
1667:     if (lrows[r] >= 0) lrows[len++] = r;
1668:   /* zero diagonal part of matrix */
1669:   MatZeroRowsColumns(l->A, len, lrows, diag, x, b);
1670:   /* handle off diagonal part of matrix */
1671:   MatCreateVecs(A, &xmask, NULL);
1672:   VecDuplicate(l->lvec, &lmask);
1673:   VecGetArray(xmask, &bb);
1674:   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
1675:   VecRestoreArray(xmask, &bb);
1676:   VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD);
1677:   VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD);
1678:   VecDestroy(&xmask);
1679:   if (x) {
1680:     VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD);
1681:     VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD);
1682:     VecGetArrayRead(l->lvec, &xx);
1683:     VecGetArray(b, &bb);
1684:   }
1685:   VecGetArray(lmask, &mask);
1686:   /* remove zeroed rows of off diagonal matrix */
1687:   for (i = 0; i < len; ++i) {
1688:     row   = lrows[i];
1689:     count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
1690:     aa    = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs);
1691:     for (k = 0; k < count; ++k) {
1692:       aa[0] = 0.0;
1693:       aa += bs;
1694:     }
1695:   }
1696:   /* loop over all elements of off process part of matrix zeroing removed columns*/
1697:   for (i = 0; i < l->B->rmap->N; ++i) {
1698:     row = i / bs;
1699:     for (j = baij->i[row]; j < baij->i[row + 1]; ++j) {
1700:       for (k = 0; k < bs; ++k) {
1701:         col = bs * baij->j[j] + k;
1702:         if (PetscAbsScalar(mask[col])) {
1703:           aa = ((MatScalar *)(baij->a)) + j * bs2 + (i % bs) + bs * k;
1704:           if (x) bb[i] -= aa[0] * xx[col];
1705:           aa[0] = 0.0;
1706:         }
1707:       }
1708:     }
1709:   }
1710:   if (x) {
1711:     VecRestoreArray(b, &bb);
1712:     VecRestoreArrayRead(l->lvec, &xx);
1713:   }
1714:   VecRestoreArray(lmask, &mask);
1715:   VecDestroy(&lmask);
1716:   PetscFree(lrows);

1718:   /* only change matrix nonzero state if pattern was allowed to be changed */
1719:   if (!((Mat_SeqBAIJ *)(l->A->data))->keepnonzeropattern) {
1720:     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1721:     MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A));
1722:   }
1723:   return 0;
1724: }

1726: PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1727: {
1728:   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;

1730:   MatSetUnfactored(a->A);
1731:   return 0;
1732: }

1734: static PetscErrorCode MatDuplicate_MPIBAIJ(Mat, MatDuplicateOption, Mat *);

1736: PetscErrorCode MatEqual_MPIBAIJ(Mat A, Mat B, PetscBool *flag)
1737: {
1738:   Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *)B->data, *matA = (Mat_MPIBAIJ *)A->data;
1739:   Mat          a, b, c, d;
1740:   PetscBool    flg;

1742:   a = matA->A;
1743:   b = matA->B;
1744:   c = matB->A;
1745:   d = matB->B;

1747:   MatEqual(a, c, &flg);
1748:   if (flg) MatEqual(b, d, &flg);
1749:   MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A));
1750:   return 0;
1751: }

1753: PetscErrorCode MatCopy_MPIBAIJ(Mat A, Mat B, MatStructure str)
1754: {
1755:   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1756:   Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data;

1758:   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1759:   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1760:     MatCopy_Basic(A, B, str);
1761:   } else {
1762:     MatCopy(a->A, b->A, str);
1763:     MatCopy(a->B, b->B, str);
1764:   }
1765:   PetscObjectStateIncrease((PetscObject)B);
1766:   return 0;
1767: }

1769: PetscErrorCode MatSetUp_MPIBAIJ(Mat A)
1770: {
1771:   MatMPIBAIJSetPreallocation(A, A->rmap->bs, PETSC_DEFAULT, NULL, PETSC_DEFAULT, NULL);
1772:   return 0;
1773: }

1775: PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz)
1776: {
1777:   PetscInt     bs = Y->rmap->bs, m = Y->rmap->N / bs;
1778:   Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data;
1779:   Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data;

1781:   MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz);
1782:   return 0;
1783: }

1785: PetscErrorCode MatAXPY_MPIBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
1786: {
1787:   Mat_MPIBAIJ *xx = (Mat_MPIBAIJ *)X->data, *yy = (Mat_MPIBAIJ *)Y->data;
1788:   PetscBLASInt bnz, one                         = 1;
1789:   Mat_SeqBAIJ *x, *y;
1790:   PetscInt     bs2 = Y->rmap->bs * Y->rmap->bs;

1792:   if (str == SAME_NONZERO_PATTERN) {
1793:     PetscScalar alpha = a;
1794:     x                 = (Mat_SeqBAIJ *)xx->A->data;
1795:     y                 = (Mat_SeqBAIJ *)yy->A->data;
1796:     PetscBLASIntCast(x->nz * bs2, &bnz);
1797:     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
1798:     x = (Mat_SeqBAIJ *)xx->B->data;
1799:     y = (Mat_SeqBAIJ *)yy->B->data;
1800:     PetscBLASIntCast(x->nz * bs2, &bnz);
1801:     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
1802:     PetscObjectStateIncrease((PetscObject)Y);
1803:   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1804:     MatAXPY_Basic(Y, a, X, str);
1805:   } else {
1806:     Mat       B;
1807:     PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs;
1808:     PetscMalloc1(yy->A->rmap->N, &nnz_d);
1809:     PetscMalloc1(yy->B->rmap->N, &nnz_o);
1810:     MatCreate(PetscObjectComm((PetscObject)Y), &B);
1811:     PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name);
1812:     MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N);
1813:     MatSetBlockSizesFromMats(B, Y, Y);
1814:     MatSetType(B, MATMPIBAIJ);
1815:     MatAXPYGetPreallocation_SeqBAIJ(yy->A, xx->A, nnz_d);
1816:     MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o);
1817:     MatMPIBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o);
1818:     /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */
1819:     MatAXPY_BasicWithPreallocation(B, Y, a, X, str);
1820:     MatHeaderMerge(Y, &B);
1821:     PetscFree(nnz_d);
1822:     PetscFree(nnz_o);
1823:   }
1824:   return 0;
1825: }

1827: PetscErrorCode MatConjugate_MPIBAIJ(Mat mat)
1828: {
1829:   if (PetscDefined(USE_COMPLEX)) {
1830:     Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)mat->data;

1832:     MatConjugate_SeqBAIJ(a->A);
1833:     MatConjugate_SeqBAIJ(a->B);
1834:   }
1835:   return 0;
1836: }

1838: PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1839: {
1840:   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;

1842:   MatRealPart(a->A);
1843:   MatRealPart(a->B);
1844:   return 0;
1845: }

1847: PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1848: {
1849:   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;

1851:   MatImaginaryPart(a->A);
1852:   MatImaginaryPart(a->B);
1853:   return 0;
1854: }

1856: PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
1857: {
1858:   IS       iscol_local;
1859:   PetscInt csize;

1861:   ISGetLocalSize(iscol, &csize);
1862:   if (call == MAT_REUSE_MATRIX) {
1863:     PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local);
1865:   } else {
1866:     ISAllGather(iscol, &iscol_local);
1867:   }
1868:   MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat);
1869:   if (call == MAT_INITIAL_MATRIX) {
1870:     PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local);
1871:     ISDestroy(&iscol_local);
1872:   }
1873:   return 0;
1874: }

1876: /*
1877:   Not great since it makes two copies of the submatrix, first an SeqBAIJ
1878:   in local and then by concatenating the local matrices the end result.
1879:   Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ().
1880:   This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency).
1881: */
1882: PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat)
1883: {
1884:   PetscMPIInt  rank, size;
1885:   PetscInt     i, m, n, rstart, row, rend, nz, *cwork, j, bs;
1886:   PetscInt    *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
1887:   Mat          M, Mreuse;
1888:   MatScalar   *vwork, *aa;
1889:   MPI_Comm     comm;
1890:   IS           isrow_new, iscol_new;
1891:   Mat_SeqBAIJ *aij;

1893:   PetscObjectGetComm((PetscObject)mat, &comm);
1894:   MPI_Comm_rank(comm, &rank);
1895:   MPI_Comm_size(comm, &size);
1896:   /* The compression and expansion should be avoided. Doesn't point
1897:      out errors, might change the indices, hence buggey */
1898:   ISCompressIndicesGeneral(mat->rmap->N, mat->rmap->n, mat->rmap->bs, 1, &isrow, &isrow_new);
1899:   ISCompressIndicesGeneral(mat->cmap->N, mat->cmap->n, mat->cmap->bs, 1, &iscol, &iscol_new);

1901:   if (call == MAT_REUSE_MATRIX) {
1902:     PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse);
1904:     MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_REUSE_MATRIX, &Mreuse);
1905:   } else {
1906:     MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_INITIAL_MATRIX, &Mreuse);
1907:   }
1908:   ISDestroy(&isrow_new);
1909:   ISDestroy(&iscol_new);
1910:   /*
1911:       m - number of local rows
1912:       n - number of columns (same on all processors)
1913:       rstart - first row in new global matrix generated
1914:   */
1915:   MatGetBlockSize(mat, &bs);
1916:   MatGetSize(Mreuse, &m, &n);
1917:   m = m / bs;
1918:   n = n / bs;

1920:   if (call == MAT_INITIAL_MATRIX) {
1921:     aij = (Mat_SeqBAIJ *)(Mreuse)->data;
1922:     ii  = aij->i;
1923:     jj  = aij->j;

1925:     /*
1926:         Determine the number of non-zeros in the diagonal and off-diagonal
1927:         portions of the matrix in order to do correct preallocation
1928:     */

1930:     /* first get start and end of "diagonal" columns */
1931:     if (csize == PETSC_DECIDE) {
1932:       ISGetSize(isrow, &mglobal);
1933:       if (mglobal == n * bs) { /* square matrix */
1934:         nlocal = m;
1935:       } else {
1936:         nlocal = n / size + ((n % size) > rank);
1937:       }
1938:     } else {
1939:       nlocal = csize / bs;
1940:     }
1941:     MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm);
1942:     rstart = rend - nlocal;

1945:     /* next, compute all the lengths */
1946:     PetscMalloc2(m + 1, &dlens, m + 1, &olens);
1947:     for (i = 0; i < m; i++) {
1948:       jend = ii[i + 1] - ii[i];
1949:       olen = 0;
1950:       dlen = 0;
1951:       for (j = 0; j < jend; j++) {
1952:         if (*jj < rstart || *jj >= rend) olen++;
1953:         else dlen++;
1954:         jj++;
1955:       }
1956:       olens[i] = olen;
1957:       dlens[i] = dlen;
1958:     }
1959:     MatCreate(comm, &M);
1960:     MatSetSizes(M, bs * m, bs * nlocal, PETSC_DECIDE, bs * n);
1961:     MatSetType(M, ((PetscObject)mat)->type_name);
1962:     MatMPIBAIJSetPreallocation(M, bs, 0, dlens, 0, olens);
1963:     MatMPISBAIJSetPreallocation(M, bs, 0, dlens, 0, olens);
1964:     PetscFree2(dlens, olens);
1965:   } else {
1966:     PetscInt ml, nl;

1968:     M = *newmat;
1969:     MatGetLocalSize(M, &ml, &nl);
1971:     MatZeroEntries(M);
1972:     /*
1973:          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
1974:        rather than the slower MatSetValues().
1975:     */
1976:     M->was_assembled = PETSC_TRUE;
1977:     M->assembled     = PETSC_FALSE;
1978:   }
1979:   MatSetOption(M, MAT_ROW_ORIENTED, PETSC_FALSE);
1980:   MatGetOwnershipRange(M, &rstart, &rend);
1981:   aij = (Mat_SeqBAIJ *)(Mreuse)->data;
1982:   ii  = aij->i;
1983:   jj  = aij->j;
1984:   aa  = aij->a;
1985:   for (i = 0; i < m; i++) {
1986:     row   = rstart / bs + i;
1987:     nz    = ii[i + 1] - ii[i];
1988:     cwork = jj;
1989:     jj += nz;
1990:     vwork = aa;
1991:     aa += nz * bs * bs;
1992:     MatSetValuesBlocked_MPIBAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES);
1993:   }

1995:   MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY);
1996:   MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY);
1997:   *newmat = M;

1999:   /* save submatrix used in processor for next request */
2000:   if (call == MAT_INITIAL_MATRIX) {
2001:     PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse);
2002:     PetscObjectDereference((PetscObject)Mreuse);
2003:   }
2004:   return 0;
2005: }

2007: PetscErrorCode MatPermute_MPIBAIJ(Mat A, IS rowp, IS colp, Mat *B)
2008: {
2009:   MPI_Comm        comm, pcomm;
2010:   PetscInt        clocal_size, nrows;
2011:   const PetscInt *rows;
2012:   PetscMPIInt     size;
2013:   IS              crowp, lcolp;

2015:   PetscObjectGetComm((PetscObject)A, &comm);
2016:   /* make a collective version of 'rowp' */
2017:   PetscObjectGetComm((PetscObject)rowp, &pcomm);
2018:   if (pcomm == comm) {
2019:     crowp = rowp;
2020:   } else {
2021:     ISGetSize(rowp, &nrows);
2022:     ISGetIndices(rowp, &rows);
2023:     ISCreateGeneral(comm, nrows, rows, PETSC_COPY_VALUES, &crowp);
2024:     ISRestoreIndices(rowp, &rows);
2025:   }
2026:   ISSetPermutation(crowp);
2027:   /* make a local version of 'colp' */
2028:   PetscObjectGetComm((PetscObject)colp, &pcomm);
2029:   MPI_Comm_size(pcomm, &size);
2030:   if (size == 1) {
2031:     lcolp = colp;
2032:   } else {
2033:     ISAllGather(colp, &lcolp);
2034:   }
2035:   ISSetPermutation(lcolp);
2036:   /* now we just get the submatrix */
2037:   MatGetLocalSize(A, NULL, &clocal_size);
2038:   MatCreateSubMatrix_MPIBAIJ_Private(A, crowp, lcolp, clocal_size, MAT_INITIAL_MATRIX, B);
2039:   /* clean up */
2040:   if (pcomm != comm) ISDestroy(&crowp);
2041:   if (size > 1) ISDestroy(&lcolp);
2042:   return 0;
2043: }

2045: PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
2046: {
2047:   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
2048:   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ *)baij->B->data;

2050:   if (nghosts) *nghosts = B->nbs;
2051:   if (ghosts) *ghosts = baij->garray;
2052:   return 0;
2053: }

2055: PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A, Mat *newmat)
2056: {
2057:   Mat          B;
2058:   Mat_MPIBAIJ *a  = (Mat_MPIBAIJ *)A->data;
2059:   Mat_SeqBAIJ *ad = (Mat_SeqBAIJ *)a->A->data, *bd = (Mat_SeqBAIJ *)a->B->data;
2060:   Mat_SeqAIJ  *b;
2061:   PetscMPIInt  size, rank, *recvcounts = NULL, *displs = NULL;
2062:   PetscInt     sendcount, i, *rstarts = A->rmap->range, n, cnt, j, bs = A->rmap->bs;
2063:   PetscInt     m, *garray = a->garray, *lens, *jsendbuf, *a_jsendbuf, *b_jsendbuf;

2065:   MPI_Comm_size(PetscObjectComm((PetscObject)A), &size);
2066:   MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank);

2068:   /* ----------------------------------------------------------------
2069:      Tell every processor the number of nonzeros per row
2070:   */
2071:   PetscMalloc1(A->rmap->N / bs, &lens);
2072:   for (i = A->rmap->rstart / bs; i < A->rmap->rend / bs; i++) lens[i] = ad->i[i - A->rmap->rstart / bs + 1] - ad->i[i - A->rmap->rstart / bs] + bd->i[i - A->rmap->rstart / bs + 1] - bd->i[i - A->rmap->rstart / bs];
2073:   PetscMalloc1(2 * size, &recvcounts);
2074:   displs = recvcounts + size;
2075:   for (i = 0; i < size; i++) {
2076:     recvcounts[i] = A->rmap->range[i + 1] / bs - A->rmap->range[i] / bs;
2077:     displs[i]     = A->rmap->range[i] / bs;
2078:   }
2079:   MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, lens, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A));
2080:   /* ---------------------------------------------------------------
2081:      Create the sequential matrix of the same type as the local block diagonal
2082:   */
2083:   MatCreate(PETSC_COMM_SELF, &B);
2084:   MatSetSizes(B, A->rmap->N / bs, A->cmap->N / bs, PETSC_DETERMINE, PETSC_DETERMINE);
2085:   MatSetType(B, MATSEQAIJ);
2086:   MatSeqAIJSetPreallocation(B, 0, lens);
2087:   b = (Mat_SeqAIJ *)B->data;

2089:   /*--------------------------------------------------------------------
2090:     Copy my part of matrix column indices over
2091:   */
2092:   sendcount  = ad->nz + bd->nz;
2093:   jsendbuf   = b->j + b->i[rstarts[rank] / bs];
2094:   a_jsendbuf = ad->j;
2095:   b_jsendbuf = bd->j;
2096:   n          = A->rmap->rend / bs - A->rmap->rstart / bs;
2097:   cnt        = 0;
2098:   for (i = 0; i < n; i++) {
2099:     /* put in lower diagonal portion */
2100:     m = bd->i[i + 1] - bd->i[i];
2101:     while (m > 0) {
2102:       /* is it above diagonal (in bd (compressed) numbering) */
2103:       if (garray[*b_jsendbuf] > A->rmap->rstart / bs + i) break;
2104:       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2105:       m--;
2106:     }

2108:     /* put in diagonal portion */
2109:     for (j = ad->i[i]; j < ad->i[i + 1]; j++) jsendbuf[cnt++] = A->rmap->rstart / bs + *a_jsendbuf++;

2111:     /* put in upper diagonal portion */
2112:     while (m-- > 0) jsendbuf[cnt++] = garray[*b_jsendbuf++];
2113:   }

2116:   /*--------------------------------------------------------------------
2117:     Gather all column indices to all processors
2118:   */
2119:   for (i = 0; i < size; i++) {
2120:     recvcounts[i] = 0;
2121:     for (j = A->rmap->range[i] / bs; j < A->rmap->range[i + 1] / bs; j++) recvcounts[i] += lens[j];
2122:   }
2123:   displs[0] = 0;
2124:   for (i = 1; i < size; i++) displs[i] = displs[i - 1] + recvcounts[i - 1];
2125:   MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, b->j, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A));
2126:   /*--------------------------------------------------------------------
2127:     Assemble the matrix into useable form (note numerical values not yet set)
2128:   */
2129:   /* set the b->ilen (length of each row) values */
2130:   PetscArraycpy(b->ilen, lens, A->rmap->N / bs);
2131:   /* set the b->i indices */
2132:   b->i[0] = 0;
2133:   for (i = 1; i <= A->rmap->N / bs; i++) b->i[i] = b->i[i - 1] + lens[i - 1];
2134:   PetscFree(lens);
2135:   MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY);
2136:   MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY);
2137:   PetscFree(recvcounts);

2139:   MatPropagateSymmetryOptions(A, B);
2140:   *newmat = B;
2141:   return 0;
2142: }

2144: PetscErrorCode MatSOR_MPIBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
2145: {
2146:   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data;
2147:   Vec          bb1 = NULL;

2149:   if (flag == SOR_APPLY_UPPER) {
2150:     (*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx);
2151:     return 0;
2152:   }

2154:   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) VecDuplicate(bb, &bb1);

2156:   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2157:     if (flag & SOR_ZERO_INITIAL_GUESS) {
2158:       (*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx);
2159:       its--;
2160:     }

2162:     while (its--) {
2163:       VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD);
2164:       VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD);

2166:       /* update rhs: bb1 = bb - B*x */
2167:       VecScale(mat->lvec, -1.0);
2168:       (*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1);

2170:       /* local sweep */
2171:       (*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx);
2172:     }
2173:   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2174:     if (flag & SOR_ZERO_INITIAL_GUESS) {
2175:       (*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx);
2176:       its--;
2177:     }
2178:     while (its--) {
2179:       VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD);
2180:       VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD);

2182:       /* update rhs: bb1 = bb - B*x */
2183:       VecScale(mat->lvec, -1.0);
2184:       (*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1);

2186:       /* local sweep */
2187:       (*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx);
2188:     }
2189:   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2190:     if (flag & SOR_ZERO_INITIAL_GUESS) {
2191:       (*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx);
2192:       its--;
2193:     }
2194:     while (its--) {
2195:       VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD);
2196:       VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD);

2198:       /* update rhs: bb1 = bb - B*x */
2199:       VecScale(mat->lvec, -1.0);
2200:       (*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1);

2202:       /* local sweep */
2203:       (*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx);
2204:     }
2205:   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel version of SOR requested not supported");

2207:   VecDestroy(&bb1);
2208:   return 0;
2209: }

2211: PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A, PetscInt type, PetscReal *reductions)
2212: {
2213:   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)A->data;
2214:   PetscInt     m, N, i, *garray = aij->garray;
2215:   PetscInt     ib, jb, bs = A->rmap->bs;
2216:   Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)aij->A->data;
2217:   MatScalar   *a_val = a_aij->a;
2218:   Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ *)aij->B->data;
2219:   MatScalar   *b_val = b_aij->a;
2220:   PetscReal   *work;

2222:   MatGetSize(A, &m, &N);
2223:   PetscCalloc1(N, &work);
2224:   if (type == NORM_2) {
2225:     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2226:       for (jb = 0; jb < bs; jb++) {
2227:         for (ib = 0; ib < bs; ib++) {
2228:           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2229:           a_val++;
2230:         }
2231:       }
2232:     }
2233:     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2234:       for (jb = 0; jb < bs; jb++) {
2235:         for (ib = 0; ib < bs; ib++) {
2236:           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2237:           b_val++;
2238:         }
2239:       }
2240:     }
2241:   } else if (type == NORM_1) {
2242:     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2243:       for (jb = 0; jb < bs; jb++) {
2244:         for (ib = 0; ib < bs; ib++) {
2245:           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2246:           a_val++;
2247:         }
2248:       }
2249:     }
2250:     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2251:       for (jb = 0; jb < bs; jb++) {
2252:         for (ib = 0; ib < bs; ib++) {
2253:           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2254:           b_val++;
2255:         }
2256:       }
2257:     }
2258:   } else if (type == NORM_INFINITY) {
2259:     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2260:       for (jb = 0; jb < bs; jb++) {
2261:         for (ib = 0; ib < bs; ib++) {
2262:           int col   = A->cmap->rstart + a_aij->j[i] * bs + jb;
2263:           work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]);
2264:           a_val++;
2265:         }
2266:       }
2267:     }
2268:     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2269:       for (jb = 0; jb < bs; jb++) {
2270:         for (ib = 0; ib < bs; ib++) {
2271:           int col   = garray[b_aij->j[i]] * bs + jb;
2272:           work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]);
2273:           b_val++;
2274:         }
2275:       }
2276:     }
2277:   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
2278:     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2279:       for (jb = 0; jb < bs; jb++) {
2280:         for (ib = 0; ib < bs; ib++) {
2281:           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val);
2282:           a_val++;
2283:         }
2284:       }
2285:     }
2286:     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2287:       for (jb = 0; jb < bs; jb++) {
2288:         for (ib = 0; ib < bs; ib++) {
2289:           work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val);
2290:           b_val++;
2291:         }
2292:       }
2293:     }
2294:   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2295:     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2296:       for (jb = 0; jb < bs; jb++) {
2297:         for (ib = 0; ib < bs; ib++) {
2298:           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val);
2299:           a_val++;
2300:         }
2301:       }
2302:     }
2303:     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2304:       for (jb = 0; jb < bs; jb++) {
2305:         for (ib = 0; ib < bs; ib++) {
2306:           work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val);
2307:           b_val++;
2308:         }
2309:       }
2310:     }
2311:   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
2312:   if (type == NORM_INFINITY) {
2313:     MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A));
2314:   } else {
2315:     MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A));
2316:   }
2317:   PetscFree(work);
2318:   if (type == NORM_2) {
2319:     for (i = 0; i < N; i++) reductions[i] = PetscSqrtReal(reductions[i]);
2320:   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2321:     for (i = 0; i < N; i++) reductions[i] /= m;
2322:   }
2323:   return 0;
2324: }

2326: PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A, const PetscScalar **values)
2327: {
2328:   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;

2330:   MatInvertBlockDiagonal(a->A, values);
2331:   A->factorerrortype             = a->A->factorerrortype;
2332:   A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value;
2333:   A->factorerror_zeropivot_row   = a->A->factorerror_zeropivot_row;
2334:   return 0;
2335: }

2337: PetscErrorCode MatShift_MPIBAIJ(Mat Y, PetscScalar a)
2338: {
2339:   Mat_MPIBAIJ *maij = (Mat_MPIBAIJ *)Y->data;
2340:   Mat_SeqBAIJ *aij  = (Mat_SeqBAIJ *)maij->A->data;

2342:   if (!Y->preallocated) {
2343:     MatMPIBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL);
2344:   } else if (!aij->nz) {
2345:     PetscInt nonew = aij->nonew;
2346:     MatSeqBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL);
2347:     aij->nonew = nonew;
2348:   }
2349:   MatShift_Basic(Y, a);
2350:   return 0;
2351: }

2353: PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A, PetscBool *missing, PetscInt *d)
2354: {
2355:   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;

2358:   MatMissingDiagonal(a->A, missing, d);
2359:   if (d) {
2360:     PetscInt rstart;
2361:     MatGetOwnershipRange(A, &rstart, NULL);
2362:     *d += rstart / A->rmap->bs;
2363:   }
2364:   return 0;
2365: }

2367: PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A, Mat *a)
2368: {
2369:   *a = ((Mat_MPIBAIJ *)A->data)->A;
2370:   return 0;
2371: }

2373: /* -------------------------------------------------------------------*/
2374: static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2375:                                        MatGetRow_MPIBAIJ,
2376:                                        MatRestoreRow_MPIBAIJ,
2377:                                        MatMult_MPIBAIJ,
2378:                                        /* 4*/ MatMultAdd_MPIBAIJ,
2379:                                        MatMultTranspose_MPIBAIJ,
2380:                                        MatMultTransposeAdd_MPIBAIJ,
2381:                                        NULL,
2382:                                        NULL,
2383:                                        NULL,
2384:                                        /*10*/ NULL,
2385:                                        NULL,
2386:                                        NULL,
2387:                                        MatSOR_MPIBAIJ,
2388:                                        MatTranspose_MPIBAIJ,
2389:                                        /*15*/ MatGetInfo_MPIBAIJ,
2390:                                        MatEqual_MPIBAIJ,
2391:                                        MatGetDiagonal_MPIBAIJ,
2392:                                        MatDiagonalScale_MPIBAIJ,
2393:                                        MatNorm_MPIBAIJ,
2394:                                        /*20*/ MatAssemblyBegin_MPIBAIJ,
2395:                                        MatAssemblyEnd_MPIBAIJ,
2396:                                        MatSetOption_MPIBAIJ,
2397:                                        MatZeroEntries_MPIBAIJ,
2398:                                        /*24*/ MatZeroRows_MPIBAIJ,
2399:                                        NULL,
2400:                                        NULL,
2401:                                        NULL,
2402:                                        NULL,
2403:                                        /*29*/ MatSetUp_MPIBAIJ,
2404:                                        NULL,
2405:                                        NULL,
2406:                                        MatGetDiagonalBlock_MPIBAIJ,
2407:                                        NULL,
2408:                                        /*34*/ MatDuplicate_MPIBAIJ,
2409:                                        NULL,
2410:                                        NULL,
2411:                                        NULL,
2412:                                        NULL,
2413:                                        /*39*/ MatAXPY_MPIBAIJ,
2414:                                        MatCreateSubMatrices_MPIBAIJ,
2415:                                        MatIncreaseOverlap_MPIBAIJ,
2416:                                        MatGetValues_MPIBAIJ,
2417:                                        MatCopy_MPIBAIJ,
2418:                                        /*44*/ NULL,
2419:                                        MatScale_MPIBAIJ,
2420:                                        MatShift_MPIBAIJ,
2421:                                        NULL,
2422:                                        MatZeroRowsColumns_MPIBAIJ,
2423:                                        /*49*/ NULL,
2424:                                        NULL,
2425:                                        NULL,
2426:                                        NULL,
2427:                                        NULL,
2428:                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2429:                                        NULL,
2430:                                        MatSetUnfactored_MPIBAIJ,
2431:                                        MatPermute_MPIBAIJ,
2432:                                        MatSetValuesBlocked_MPIBAIJ,
2433:                                        /*59*/ MatCreateSubMatrix_MPIBAIJ,
2434:                                        MatDestroy_MPIBAIJ,
2435:                                        MatView_MPIBAIJ,
2436:                                        NULL,
2437:                                        NULL,
2438:                                        /*64*/ NULL,
2439:                                        NULL,
2440:                                        NULL,
2441:                                        NULL,
2442:                                        NULL,
2443:                                        /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2444:                                        NULL,
2445:                                        NULL,
2446:                                        NULL,
2447:                                        NULL,
2448:                                        /*74*/ NULL,
2449:                                        MatFDColoringApply_BAIJ,
2450:                                        NULL,
2451:                                        NULL,
2452:                                        NULL,
2453:                                        /*79*/ NULL,
2454:                                        NULL,
2455:                                        NULL,
2456:                                        NULL,
2457:                                        MatLoad_MPIBAIJ,
2458:                                        /*84*/ NULL,
2459:                                        NULL,
2460:                                        NULL,
2461:                                        NULL,
2462:                                        NULL,
2463:                                        /*89*/ NULL,
2464:                                        NULL,
2465:                                        NULL,
2466:                                        NULL,
2467:                                        NULL,
2468:                                        /*94*/ NULL,
2469:                                        NULL,
2470:                                        NULL,
2471:                                        NULL,
2472:                                        NULL,
2473:                                        /*99*/ NULL,
2474:                                        NULL,
2475:                                        NULL,
2476:                                        MatConjugate_MPIBAIJ,
2477:                                        NULL,
2478:                                        /*104*/ NULL,
2479:                                        MatRealPart_MPIBAIJ,
2480:                                        MatImaginaryPart_MPIBAIJ,
2481:                                        NULL,
2482:                                        NULL,
2483:                                        /*109*/ NULL,
2484:                                        NULL,
2485:                                        NULL,
2486:                                        NULL,
2487:                                        MatMissingDiagonal_MPIBAIJ,
2488:                                        /*114*/ MatGetSeqNonzeroStructure_MPIBAIJ,
2489:                                        NULL,
2490:                                        MatGetGhosts_MPIBAIJ,
2491:                                        NULL,
2492:                                        NULL,
2493:                                        /*119*/ NULL,
2494:                                        NULL,
2495:                                        NULL,
2496:                                        NULL,
2497:                                        MatGetMultiProcBlock_MPIBAIJ,
2498:                                        /*124*/ NULL,
2499:                                        MatGetColumnReductions_MPIBAIJ,
2500:                                        MatInvertBlockDiagonal_MPIBAIJ,
2501:                                        NULL,
2502:                                        NULL,
2503:                                        /*129*/ NULL,
2504:                                        NULL,
2505:                                        NULL,
2506:                                        NULL,
2507:                                        NULL,
2508:                                        /*134*/ NULL,
2509:                                        NULL,
2510:                                        NULL,
2511:                                        NULL,
2512:                                        NULL,
2513:                                        /*139*/ MatSetBlockSizes_Default,
2514:                                        NULL,
2515:                                        NULL,
2516:                                        MatFDColoringSetUp_MPIXAIJ,
2517:                                        NULL,
2518:                                        /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIBAIJ,
2519:                                        NULL,
2520:                                        NULL,
2521:                                        NULL,
2522:                                        NULL,
2523:                                        NULL,
2524:                                        /*150*/ NULL};

2526: PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
2527: PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);

2529: PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
2530: {
2531:   PetscInt        m, rstart, cstart, cend;
2532:   PetscInt        i, j, dlen, olen, nz, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL;
2533:   const PetscInt *JJ          = NULL;
2534:   PetscScalar    *values      = NULL;
2535:   PetscBool       roworiented = ((Mat_MPIBAIJ *)B->data)->roworiented;
2536:   PetscBool       nooffprocentries;

2538:   PetscLayoutSetBlockSize(B->rmap, bs);
2539:   PetscLayoutSetBlockSize(B->cmap, bs);
2540:   PetscLayoutSetUp(B->rmap);
2541:   PetscLayoutSetUp(B->cmap);
2542:   PetscLayoutGetBlockSize(B->rmap, &bs);
2543:   m      = B->rmap->n / bs;
2544:   rstart = B->rmap->rstart / bs;
2545:   cstart = B->cmap->rstart / bs;
2546:   cend   = B->cmap->rend / bs;

2549:   PetscMalloc2(m, &d_nnz, m, &o_nnz);
2550:   for (i = 0; i < m; i++) {
2551:     nz = ii[i + 1] - ii[i];
2553:     nz_max = PetscMax(nz_max, nz);
2554:     dlen   = 0;
2555:     olen   = 0;
2556:     JJ     = jj + ii[i];
2557:     for (j = 0; j < nz; j++) {
2558:       if (*JJ < cstart || *JJ >= cend) olen++;
2559:       else dlen++;
2560:       JJ++;
2561:     }
2562:     d_nnz[i] = dlen;
2563:     o_nnz[i] = olen;
2564:   }
2565:   MatMPIBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz);
2566:   PetscFree2(d_nnz, o_nnz);

2568:   values = (PetscScalar *)V;
2569:   if (!values) PetscCalloc1(bs * bs * nz_max, &values);
2570:   for (i = 0; i < m; i++) {
2571:     PetscInt        row   = i + rstart;
2572:     PetscInt        ncols = ii[i + 1] - ii[i];
2573:     const PetscInt *icols = jj + ii[i];
2574:     if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2575:       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
2576:       MatSetValuesBlocked_MPIBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES);
2577:     } else { /* block ordering does not match so we can only insert one block at a time. */
2578:       PetscInt j;
2579:       for (j = 0; j < ncols; j++) {
2580:         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
2581:         MatSetValuesBlocked_MPIBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES);
2582:       }
2583:     }
2584:   }

2586:   if (!V) PetscFree(values);
2587:   nooffprocentries    = B->nooffprocentries;
2588:   B->nooffprocentries = PETSC_TRUE;
2589:   MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY);
2590:   MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY);
2591:   B->nooffprocentries = nooffprocentries;

2593:   MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE);
2594:   return 0;
2595: }

2597: /*@C
2598:    MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATBAIJ` format using the given nonzero structure and (optional) numerical values

2600:    Collective

2602:    Input Parameters:
2603: +  B - the matrix
2604: .  bs - the block size
2605: .  i - the indices into j for the start of each local row (starts with zero)
2606: .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2607: -  v - optional values in the matrix

2609:    Level: advanced

2611:    Notes:
2612:     The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`.  For example, C programs
2613:    may want to use the default `MAT_ROW_ORIENTED` with value `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is
2614:    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
2615:    `MAT_ROW_ORIENTED` with value `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2616:    block column and the second index is over columns within a block.

2618:    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well

2620: .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MPIAIJ`, `MatCreateMPIBAIJWithArrays()`, `MPIBAIJ`
2621: @*/
2622: PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
2623: {
2627:   PetscTryMethod(B, "MatMPIBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
2628:   return 0;
2629: }

2631: PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz)
2632: {
2633:   Mat_MPIBAIJ *b;
2634:   PetscInt     i;
2635:   PetscMPIInt  size;

2637:   MatSetBlockSize(B, PetscAbs(bs));
2638:   PetscLayoutSetUp(B->rmap);
2639:   PetscLayoutSetUp(B->cmap);
2640:   PetscLayoutGetBlockSize(B->rmap, &bs);

2642:   if (d_nnz) {
2644:   }
2645:   if (o_nnz) {
2647:   }

2649:   b      = (Mat_MPIBAIJ *)B->data;
2650:   b->bs2 = bs * bs;
2651:   b->mbs = B->rmap->n / bs;
2652:   b->nbs = B->cmap->n / bs;
2653:   b->Mbs = B->rmap->N / bs;
2654:   b->Nbs = B->cmap->N / bs;

2656:   for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs;
2657:   b->rstartbs = B->rmap->rstart / bs;
2658:   b->rendbs   = B->rmap->rend / bs;
2659:   b->cstartbs = B->cmap->rstart / bs;
2660:   b->cendbs   = B->cmap->rend / bs;

2662: #if defined(PETSC_USE_CTABLE)
2663:   PetscTableDestroy(&b->colmap);
2664: #else
2665:   PetscFree(b->colmap);
2666: #endif
2667:   PetscFree(b->garray);
2668:   VecDestroy(&b->lvec);
2669:   VecScatterDestroy(&b->Mvctx);

2671:   /* Because the B will have been resized we simply destroy it and create a new one each time */
2672:   MPI_Comm_size(PetscObjectComm((PetscObject)B), &size);
2673:   MatDestroy(&b->B);
2674:   MatCreate(PETSC_COMM_SELF, &b->B);
2675:   MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0);
2676:   MatSetType(b->B, MATSEQBAIJ);

2678:   if (!B->preallocated) {
2679:     MatCreate(PETSC_COMM_SELF, &b->A);
2680:     MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n);
2681:     MatSetType(b->A, MATSEQBAIJ);
2682:     MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash);
2683:   }

2685:   MatSeqBAIJSetPreallocation(b->A, bs, d_nz, d_nnz);
2686:   MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz);
2687:   B->preallocated  = PETSC_TRUE;
2688:   B->was_assembled = PETSC_FALSE;
2689:   B->assembled     = PETSC_FALSE;
2690:   return 0;
2691: }

2693: extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat, Vec);
2694: extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat, PetscReal);

2696: PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype, MatReuse reuse, Mat *adj)
2697: {
2698:   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
2699:   Mat_SeqBAIJ    *d = (Mat_SeqBAIJ *)b->A->data, *o = (Mat_SeqBAIJ *)b->B->data;
2700:   PetscInt        M = B->rmap->n / B->rmap->bs, i, *ii, *jj, cnt, j, k, rstart = B->rmap->rstart / B->rmap->bs;
2701:   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;

2703:   PetscMalloc1(M + 1, &ii);
2704:   ii[0] = 0;
2705:   for (i = 0; i < M; i++) {
2708:     ii[i + 1] = ii[i] + id[i + 1] - id[i] + io[i + 1] - io[i];
2709:     /* remove one from count of matrix has diagonal */
2710:     for (j = id[i]; j < id[i + 1]; j++) {
2711:       if (jd[j] == i) {
2712:         ii[i + 1]--;
2713:         break;
2714:       }
2715:     }
2716:   }
2717:   PetscMalloc1(ii[M], &jj);
2718:   cnt = 0;
2719:   for (i = 0; i < M; i++) {
2720:     for (j = io[i]; j < io[i + 1]; j++) {
2721:       if (garray[jo[j]] > rstart) break;
2722:       jj[cnt++] = garray[jo[j]];
2723:     }
2724:     for (k = id[i]; k < id[i + 1]; k++) {
2725:       if (jd[k] != i) jj[cnt++] = rstart + jd[k];
2726:     }
2727:     for (; j < io[i + 1]; j++) jj[cnt++] = garray[jo[j]];
2728:   }
2729:   MatCreateMPIAdj(PetscObjectComm((PetscObject)B), M, B->cmap->N / B->rmap->bs, ii, jj, NULL, adj);
2730:   return 0;
2731: }

2733: #include <../src/mat/impls/aij/mpi/mpiaij.h>

2735: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *);

2737: PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
2738: {
2739:   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2740:   Mat_MPIAIJ  *b;
2741:   Mat          B;


2745:   if (reuse == MAT_REUSE_MATRIX) {
2746:     B = *newmat;
2747:   } else {
2748:     MatCreate(PetscObjectComm((PetscObject)A), &B);
2749:     MatSetType(B, MATMPIAIJ);
2750:     MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N);
2751:     MatSetBlockSizes(B, A->rmap->bs, A->cmap->bs);
2752:     MatSeqAIJSetPreallocation(B, 0, NULL);
2753:     MatMPIAIJSetPreallocation(B, 0, NULL, 0, NULL);
2754:   }
2755:   b = (Mat_MPIAIJ *)B->data;

2757:   if (reuse == MAT_REUSE_MATRIX) {
2758:     MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A);
2759:     MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B);
2760:   } else {
2761:     MatDestroy(&b->A);
2762:     MatDestroy(&b->B);
2763:     MatDisAssemble_MPIBAIJ(A);
2764:     MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);
2765:     MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);
2766:     MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
2767:     MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
2768:   }
2769:   MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY);
2770:   MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY);

2772:   if (reuse == MAT_INPLACE_MATRIX) {
2773:     MatHeaderReplace(A, &B);
2774:   } else {
2775:     *newmat = B;
2776:   }
2777:   return 0;
2778: }

2780: /*MC
2781:    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.

2783:    Options Database Keys:
2784: + -mat_type mpibaij - sets the matrix type to `MATMPIBAIJ` during a call to `MatSetFromOptions()`
2785: . -mat_block_size <bs> - set the blocksize used to store the matrix
2786: . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use  (0 often indicates using BLAS)
2787: - -mat_use_hash_table <fact> - set hash table factor

2789:    Level: beginner

2791:    Note:
2792:     `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
2793:     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored

2795: .seealso: MATBAIJ`, MATSEQBAIJ`, `MatCreateBAIJ`
2796: M*/

2798: PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat, MatType, MatReuse, Mat *);

2800: PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
2801: {
2802:   Mat_MPIBAIJ *b;
2803:   PetscBool    flg = PETSC_FALSE;

2805:   PetscNew(&b);
2806:   B->data = (void *)b;

2808:   PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps));
2809:   B->assembled = PETSC_FALSE;

2811:   B->insertmode = NOT_SET_VALUES;
2812:   MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank);
2813:   MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size);

2815:   /* build local table of row and column ownerships */
2816:   PetscMalloc1(b->size + 1, &b->rangebs);

2818:   /* build cache for off array entries formed */
2819:   MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash);

2821:   b->donotstash  = PETSC_FALSE;
2822:   b->colmap      = NULL;
2823:   b->garray      = NULL;
2824:   b->roworiented = PETSC_TRUE;

2826:   /* stuff used in block assembly */
2827:   b->barray = NULL;

2829:   /* stuff used for matrix vector multiply */
2830:   b->lvec  = NULL;
2831:   b->Mvctx = NULL;

2833:   /* stuff for MatGetRow() */
2834:   b->rowindices   = NULL;
2835:   b->rowvalues    = NULL;
2836:   b->getrowactive = PETSC_FALSE;

2838:   /* hash table stuff */
2839:   b->ht           = NULL;
2840:   b->hd           = NULL;
2841:   b->ht_size      = 0;
2842:   b->ht_flag      = PETSC_FALSE;
2843:   b->ht_fact      = 0;
2844:   b->ht_total_ct  = 0;
2845:   b->ht_insert_ct = 0;

2847:   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2848:   b->ijonly = PETSC_FALSE;

2850:   PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiadj_C", MatConvert_MPIBAIJ_MPIAdj);
2851:   PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiaij_C", MatConvert_MPIBAIJ_MPIAIJ);
2852:   PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpisbaij_C", MatConvert_MPIBAIJ_MPISBAIJ);
2853: #if defined(PETSC_HAVE_HYPRE)
2854:   PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_hypre_C", MatConvert_AIJ_HYPRE);
2855: #endif
2856:   PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIBAIJ);
2857:   PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIBAIJ);
2858:   PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocation_C", MatMPIBAIJSetPreallocation_MPIBAIJ);
2859:   PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocationCSR_C", MatMPIBAIJSetPreallocationCSR_MPIBAIJ);
2860:   PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIBAIJ);
2861:   PetscObjectComposeFunction((PetscObject)B, "MatSetHashTableFactor_C", MatSetHashTableFactor_MPIBAIJ);
2862:   PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_is_C", MatConvert_XAIJ_IS);
2863:   PetscObjectChangeTypeName((PetscObject)B, MATMPIBAIJ);

2865:   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPIBAIJ matrix 1", "Mat");
2866:   PetscOptionsName("-mat_use_hash_table", "Use hash table to save time in constructing matrix", "MatSetOption", &flg);
2867:   if (flg) {
2868:     PetscReal fact = 1.39;
2869:     MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE);
2870:     PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL);
2871:     if (fact <= 1.0) fact = 1.39;
2872:     MatMPIBAIJSetHashTableFactor(B, fact);
2873:     PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact);
2874:   }
2875:   PetscOptionsEnd();
2876:   return 0;
2877: }

2879: /*MC
2880:    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.

2882:    This matrix type is identical to `MATSEQBAIJ` when constructed with a single process communicator,
2883:    and `MATMPIBAIJ` otherwise.

2885:    Options Database Keys:
2886: . -mat_type baij - sets the matrix type to `MATBAIJ` during a call to `MatSetFromOptions()`

2888:   Level: beginner

2890: .seealso: `MatCreateBAIJ()`, `MATSEQBAIJ`, `MATMPIBAIJ`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`
2891: M*/

2893: /*@C
2894:    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in `MATMPIBAIJ` format
2895:    (block compressed row).  For good matrix assembly performance
2896:    the user should preallocate the matrix storage by setting the parameters
2897:    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2898:    performance can be increased by more than a factor of 50.

2900:    Collective

2902:    Input Parameters:
2903: +  B - the matrix
2904: .  bs   - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
2905:           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2906: .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2907:            submatrix  (same for all local rows)
2908: .  d_nnz - array containing the number of block nonzeros in the various block rows
2909:            of the in diagonal portion of the local (possibly different for each block
2910:            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry and
2911:            set it even if it is zero.
2912: .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2913:            submatrix (same for all local rows).
2914: -  o_nnz - array containing the number of nonzeros in the various block rows of the
2915:            off-diagonal portion of the local submatrix (possibly different for
2916:            each block row) or NULL.

2918:    If the *_nnz parameter is given then the *_nz parameter is ignored

2920:    Options Database Keys:
2921: +   -mat_block_size - size of the blocks to use
2922: -   -mat_use_hash_table <fact> - set hash table factor

2924:    Notes:
2925:    If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
2926:    than it must be used on all processors that share the object for that argument.

2928:    Storage Information:
2929:    For a square global matrix we define each processor's diagonal portion
2930:    to be its local rows and the corresponding columns (a square submatrix);
2931:    each processor's off-diagonal portion encompasses the remainder of the
2932:    local matrix (a rectangular submatrix).

2934:    The user can specify preallocated storage for the diagonal part of
2935:    the local submatrix with either d_nz or d_nnz (not both).  Set
2936:    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2937:    memory allocation.  Likewise, specify preallocated storage for the
2938:    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).

2940:    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2941:    the figure below we depict these three local rows and all columns (0-11).

2943: .vb
2944:            0 1 2 3 4 5 6 7 8 9 10 11
2945:           --------------------------
2946:    row 3  |o o o d d d o o o o  o  o
2947:    row 4  |o o o d d d o o o o  o  o
2948:    row 5  |o o o d d d o o o o  o  o
2949:           --------------------------
2950: .ve

2952:    Thus, any entries in the d locations are stored in the d (diagonal)
2953:    submatrix, and any entries in the o locations are stored in the
2954:    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2955:    stored simply in the `MATSEQBAIJ` format for compressed row storage.

2957:    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2958:    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2959:    In general, for PDE problems in which most nonzeros are near the diagonal,
2960:    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2961:    or you will get TERRIBLE performance; see the users' manual chapter on
2962:    matrices.

2964:    You can call `MatGetInfo()` to get information on how effective the preallocation was;
2965:    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2966:    You can also run with the option -info and look for messages with the string
2967:    malloc in them to see if additional memory allocation was needed.

2969:    Level: intermediate

2971: .seealso: `MATMPIBAIJ`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocationCSR()`, `PetscSplitOwnership()`
2972: @*/
2973: PetscErrorCode MatMPIBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
2974: {
2978:   PetscTryMethod(B, "MatMPIBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz));
2979:   return 0;
2980: }

2982: /*@C
2983:    MatCreateBAIJ - Creates a sparse parallel matrix in `MATBAIJ` format
2984:    (block compressed row).  For good matrix assembly performance
2985:    the user should preallocate the matrix storage by setting the parameters
2986:    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2987:    performance can be increased by more than a factor of 50.

2989:    Collective

2991:    Input Parameters:
2992: +  comm - MPI communicator
2993: .  bs   - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
2994:           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
2995: .  m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
2996:            This value should be the same as the local size used in creating the
2997:            y vector for the matrix-vector product y = Ax.
2998: .  n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given)
2999:            This value should be the same as the local size used in creating the
3000:            x vector for the matrix-vector product y = Ax.
3001: .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
3002: .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
3003: .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3004:            submatrix  (same for all local rows)
3005: .  d_nnz - array containing the number of nonzero blocks in the various block rows
3006:            of the in diagonal portion of the local (possibly different for each block
3007:            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3008:            and set it even if it is zero.
3009: .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3010:            submatrix (same for all local rows).
3011: -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3012:            off-diagonal portion of the local submatrix (possibly different for
3013:            each block row) or NULL.

3015:    Output Parameter:
3016: .  A - the matrix

3018:    Options Database Keys:
3019: +   -mat_block_size - size of the blocks to use
3020: -   -mat_use_hash_table <fact> - set hash table factor

3022:    It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
3023:    MatXXXXSetPreallocation() paradigm instead of this routine directly.
3024:    [MatXXXXSetPreallocation() is, for example, `MatSeqBAIJSetPreallocation()`]

3026:    Notes:
3027:    If the *_nnz parameter is given then the *_nz parameter is ignored

3029:    A nonzero block is any block that as 1 or more nonzeros in it

3031:    The user MUST specify either the local or global matrix dimensions
3032:    (possibly both).

3034:    If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
3035:    than it must be used on all processors that share the object for that argument.

3037:    Storage Information:
3038:    For a square global matrix we define each processor's diagonal portion
3039:    to be its local rows and the corresponding columns (a square submatrix);
3040:    each processor's off-diagonal portion encompasses the remainder of the
3041:    local matrix (a rectangular submatrix).

3043:    The user can specify preallocated storage for the diagonal part of
3044:    the local submatrix with either d_nz or d_nnz (not both).  Set
3045:    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3046:    memory allocation.  Likewise, specify preallocated storage for the
3047:    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).

3049:    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3050:    the figure below we depict these three local rows and all columns (0-11).

3052: .vb
3053:            0 1 2 3 4 5 6 7 8 9 10 11
3054:           --------------------------
3055:    row 3  |o o o d d d o o o o  o  o
3056:    row 4  |o o o d d d o o o o  o  o
3057:    row 5  |o o o d d d o o o o  o  o
3058:           --------------------------
3059: .ve

3061:    Thus, any entries in the d locations are stored in the d (diagonal)
3062:    submatrix, and any entries in the o locations are stored in the
3063:    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3064:    stored simply in the `MATSEQBAIJ` format for compressed row storage.

3066:    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3067:    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3068:    In general, for PDE problems in which most nonzeros are near the diagonal,
3069:    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3070:    or you will get TERRIBLE performance; see the users' manual chapter on
3071:    matrices.

3073:    Level: intermediate

3075: .seealso: `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`
3076: @*/
3077: PetscErrorCode MatCreateBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
3078: {
3079:   PetscMPIInt size;

3081:   MatCreate(comm, A);
3082:   MatSetSizes(*A, m, n, M, N);
3083:   MPI_Comm_size(comm, &size);
3084:   if (size > 1) {
3085:     MatSetType(*A, MATMPIBAIJ);
3086:     MatMPIBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz);
3087:   } else {
3088:     MatSetType(*A, MATSEQBAIJ);
3089:     MatSeqBAIJSetPreallocation(*A, bs, d_nz, d_nnz);
3090:   }
3091:   return 0;
3092: }

3094: static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
3095: {
3096:   Mat          mat;
3097:   Mat_MPIBAIJ *a, *oldmat = (Mat_MPIBAIJ *)matin->data;
3098:   PetscInt     len = 0;

3100:   *newmat = NULL;
3101:   MatCreate(PetscObjectComm((PetscObject)matin), &mat);
3102:   MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N);
3103:   MatSetType(mat, ((PetscObject)matin)->type_name);

3105:   mat->factortype   = matin->factortype;
3106:   mat->preallocated = PETSC_TRUE;
3107:   mat->assembled    = PETSC_TRUE;
3108:   mat->insertmode   = NOT_SET_VALUES;

3110:   a             = (Mat_MPIBAIJ *)mat->data;
3111:   mat->rmap->bs = matin->rmap->bs;
3112:   a->bs2        = oldmat->bs2;
3113:   a->mbs        = oldmat->mbs;
3114:   a->nbs        = oldmat->nbs;
3115:   a->Mbs        = oldmat->Mbs;
3116:   a->Nbs        = oldmat->Nbs;

3118:   PetscLayoutReference(matin->rmap, &mat->rmap);
3119:   PetscLayoutReference(matin->cmap, &mat->cmap);

3121:   a->size         = oldmat->size;
3122:   a->rank         = oldmat->rank;
3123:   a->donotstash   = oldmat->donotstash;
3124:   a->roworiented  = oldmat->roworiented;
3125:   a->rowindices   = NULL;
3126:   a->rowvalues    = NULL;
3127:   a->getrowactive = PETSC_FALSE;
3128:   a->barray       = NULL;
3129:   a->rstartbs     = oldmat->rstartbs;
3130:   a->rendbs       = oldmat->rendbs;
3131:   a->cstartbs     = oldmat->cstartbs;
3132:   a->cendbs       = oldmat->cendbs;

3134:   /* hash table stuff */
3135:   a->ht           = NULL;
3136:   a->hd           = NULL;
3137:   a->ht_size      = 0;
3138:   a->ht_flag      = oldmat->ht_flag;
3139:   a->ht_fact      = oldmat->ht_fact;
3140:   a->ht_total_ct  = 0;
3141:   a->ht_insert_ct = 0;

3143:   PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 1);
3144:   if (oldmat->colmap) {
3145: #if defined(PETSC_USE_CTABLE)
3146:     PetscTableCreateCopy(oldmat->colmap, &a->colmap);
3147: #else
3148:     PetscMalloc1(a->Nbs, &a->colmap);
3149:     PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs);
3150: #endif
3151:   } else a->colmap = NULL;

3153:   if (oldmat->garray && (len = ((Mat_SeqBAIJ *)(oldmat->B->data))->nbs)) {
3154:     PetscMalloc1(len, &a->garray);
3155:     PetscArraycpy(a->garray, oldmat->garray, len);
3156:   } else a->garray = NULL;

3158:   MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash);
3159:   VecDuplicate(oldmat->lvec, &a->lvec);
3160:   VecScatterCopy(oldmat->Mvctx, &a->Mvctx);

3162:   MatDuplicate(oldmat->A, cpvalues, &a->A);
3163:   MatDuplicate(oldmat->B, cpvalues, &a->B);
3164:   PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist);
3165:   *newmat = mat;
3166:   return 0;
3167: }

3169: /* Used for both MPIBAIJ and MPISBAIJ matrices */
3170: PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat, PetscViewer viewer)
3171: {
3172:   PetscInt     header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k;
3173:   PetscInt    *rowidxs, *colidxs, rs, cs, ce;
3174:   PetscScalar *matvals;

3176:   PetscViewerSetUp(viewer);

3178:   /* read in matrix header */
3179:   PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT);
3181:   M  = header[1];
3182:   N  = header[2];
3183:   nz = header[3];

3188:   /* set block sizes from the viewer's .info file */
3189:   MatLoad_Binary_BlockSizes(mat, viewer);
3190:   /* set local sizes if not set already */
3191:   if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n;
3192:   if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n;
3193:   /* set global sizes if not set already */
3194:   if (mat->rmap->N < 0) mat->rmap->N = M;
3195:   if (mat->cmap->N < 0) mat->cmap->N = N;
3196:   PetscLayoutSetUp(mat->rmap);
3197:   PetscLayoutSetUp(mat->cmap);

3199:   /* check if the matrix sizes are correct */
3200:   MatGetSize(mat, &rows, &cols);
3202:   MatGetBlockSize(mat, &bs);
3203:   MatGetLocalSize(mat, &m, &n);
3204:   PetscLayoutGetRange(mat->rmap, &rs, NULL);
3205:   PetscLayoutGetRange(mat->cmap, &cs, &ce);
3206:   mbs = m / bs;
3207:   nbs = n / bs;

3209:   /* read in row lengths and build row indices */
3210:   PetscMalloc1(m + 1, &rowidxs);
3211:   PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT);
3212:   rowidxs[0] = 0;
3213:   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3214:   MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer));

3217:   /* read in column indices and matrix values */
3218:   PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals);
3219:   PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT);
3220:   PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR);

3222:   {                /* preallocate matrix storage */
3223:     PetscBT    bt; /* helper bit set to count diagonal nonzeros */
3224:     PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */
3225:     PetscBool  sbaij, done;
3226:     PetscInt  *d_nnz, *o_nnz;

3228:     PetscBTCreate(nbs, &bt);
3229:     PetscHSetICreate(&ht);
3230:     PetscCalloc2(mbs, &d_nnz, mbs, &o_nnz);
3231:     PetscObjectTypeCompare((PetscObject)mat, MATMPISBAIJ, &sbaij);
3232:     for (i = 0; i < mbs; i++) {
3233:       PetscBTMemzero(nbs, bt);
3234:       PetscHSetIClear(ht);
3235:       for (k = 0; k < bs; k++) {
3236:         PetscInt row = bs * i + k;
3237:         for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) {
3238:           PetscInt col = colidxs[j];
3239:           if (!sbaij || col >= row) {
3240:             if (col >= cs && col < ce) {
3241:               if (!PetscBTLookupSet(bt, (col - cs) / bs)) d_nnz[i]++;
3242:             } else {
3243:               PetscHSetIQueryAdd(ht, col / bs, &done);
3244:               if (done) o_nnz[i]++;
3245:             }
3246:           }
3247:         }
3248:       }
3249:     }
3250:     PetscBTDestroy(&bt);
3251:     PetscHSetIDestroy(&ht);
3252:     MatMPIBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz);
3253:     MatMPISBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz);
3254:     PetscFree2(d_nnz, o_nnz);
3255:   }

3257:   /* store matrix values */
3258:   for (i = 0; i < m; i++) {
3259:     PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i + 1];
3260:     (*mat->ops->setvalues)(mat, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES);
3261:   }

3263:   PetscFree(rowidxs);
3264:   PetscFree2(colidxs, matvals);
3265:   MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY);
3266:   MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY);
3267:   return 0;
3268: }

3270: PetscErrorCode MatLoad_MPIBAIJ(Mat mat, PetscViewer viewer)
3271: {
3272:   PetscBool isbinary;

3274:   PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary);
3276:   MatLoad_MPIBAIJ_Binary(mat, viewer);
3277:   return 0;
3278: }

3280: /*@
3281:    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the matrices hash table

3283:    Input Parameters:
3284: +  mat  - the matrix
3285: -  fact - factor

3287:    Options Database Key:
3288: .  -mat_use_hash_table <fact> - provide the factor

3290:    Level: advanced

3292: .seealso: `MATMPIBAIJ`, `MatSetOption()`
3293: @*/
3294: PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat, PetscReal fact)
3295: {
3296:   PetscTryMethod(mat, "MatSetHashTableFactor_C", (Mat, PetscReal), (mat, fact));
3297:   return 0;
3298: }

3300: PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat, PetscReal fact)
3301: {
3302:   Mat_MPIBAIJ *baij;

3304:   baij          = (Mat_MPIBAIJ *)mat->data;
3305:   baij->ht_fact = fact;
3306:   return 0;
3307: }

3309: PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
3310: {
3311:   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3312:   PetscBool    flg;

3314:   PetscObjectTypeCompare((PetscObject)A, MATMPIBAIJ, &flg);
3316:   if (Ad) *Ad = a->A;
3317:   if (Ao) *Ao = a->B;
3318:   if (colmap) *colmap = a->garray;
3319:   return 0;
3320: }

3322: /*
3323:     Special version for direct calls from Fortran (to eliminate two function call overheads
3324: */
3325: #if defined(PETSC_HAVE_FORTRAN_CAPS)
3326:   #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3327: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3328:   #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3329: #endif

3331: /*@C
3332:   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to `MatSetValuesBlocked()`

3334:   Collective

3336:   Input Parameters:
3337: + mat - the matrix
3338: . min - number of input rows
3339: . im - input rows
3340: . nin - number of input columns
3341: . in - input columns
3342: . v - numerical values input
3343: - addvin - `INSERT_VALUES` or `ADD_VALUES`

3345:   Developer Note:
3346:     This has a complete copy of `MatSetValuesBlocked_MPIBAIJ()` which is terrible code un-reuse.

3348:   Level: advanced

3350: .seealso: `MatSetValuesBlocked()`
3351: @*/
3352: PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin, PetscInt *min, const PetscInt im[], PetscInt *nin, const PetscInt in[], const MatScalar v[], InsertMode *addvin)
3353: {
3354:   /* convert input arguments to C version */
3355:   Mat        mat = *matin;
3356:   PetscInt   m = *min, n = *nin;
3357:   InsertMode addv = *addvin;

3359:   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ *)mat->data;
3360:   const MatScalar *value;
3361:   MatScalar       *barray      = baij->barray;
3362:   PetscBool        roworiented = baij->roworiented;
3363:   PetscInt         i, j, ii, jj, row, col, rstart = baij->rstartbs;
3364:   PetscInt         rend = baij->rendbs, cstart = baij->cstartbs, stepval;
3365:   PetscInt         cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;

3367:   /* tasks normally handled by MatSetValuesBlocked() */
3368:   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3371:   if (mat->assembled) {
3372:     mat->was_assembled = PETSC_TRUE;
3373:     mat->assembled     = PETSC_FALSE;
3374:   }
3375:   PetscLogEventBegin(MAT_SetValues, mat, 0, 0, 0);

3377:   if (!barray) {
3378:     PetscMalloc1(bs2, &barray);
3379:     baij->barray = barray;
3380:   }

3382:   if (roworiented) stepval = (n - 1) * bs;
3383:   else stepval = (m - 1) * bs;

3385:   for (i = 0; i < m; i++) {
3386:     if (im[i] < 0) continue;
3388:     if (im[i] >= rstart && im[i] < rend) {
3389:       row = im[i] - rstart;
3390:       for (j = 0; j < n; j++) {
3391:         /* If NumCol = 1 then a copy is not required */
3392:         if ((roworiented) && (n == 1)) {
3393:           barray = (MatScalar *)v + i * bs2;
3394:         } else if ((!roworiented) && (m == 1)) {
3395:           barray = (MatScalar *)v + j * bs2;
3396:         } else { /* Here a copy is required */
3397:           if (roworiented) {
3398:             value = v + i * (stepval + bs) * bs + j * bs;
3399:           } else {
3400:             value = v + j * (stepval + bs) * bs + i * bs;
3401:           }
3402:           for (ii = 0; ii < bs; ii++, value += stepval) {
3403:             for (jj = 0; jj < bs; jj++) *barray++ = *value++;
3404:           }
3405:           barray -= bs2;
3406:         }

3408:         if (in[j] >= cstart && in[j] < cend) {
3409:           col = in[j] - cstart;
3410:           MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]);
3411:         } else if (in[j] < 0) {
3412:           continue;
3413:         } else {
3415:           if (mat->was_assembled) {
3416:             if (!baij->colmap) MatCreateColmap_MPIBAIJ_Private(mat);

3418: #if defined(PETSC_USE_DEBUG)
3419:   #if defined(PETSC_USE_CTABLE)
3420:             {
3421:               PetscInt data;
3422:               PetscTableFind(baij->colmap, in[j] + 1, &data);
3424:             }
3425:   #else
3427:   #endif
3428: #endif
3429: #if defined(PETSC_USE_CTABLE)
3430:             PetscTableFind(baij->colmap, in[j] + 1, &col);
3431:             col = (col - 1) / bs;
3432: #else
3433:             col = (baij->colmap[in[j]] - 1) / bs;
3434: #endif
3435:             if (col < 0 && !((Mat_SeqBAIJ *)(baij->A->data))->nonew) {
3436:               MatDisAssemble_MPIBAIJ(mat);
3437:               col = in[j];
3438:             }
3439:           } else col = in[j];
3440:           MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]);
3441:         }
3442:       }
3443:     } else {
3444:       if (!baij->donotstash) {
3445:         if (roworiented) {
3446:           MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i);
3447:         } else {
3448:           MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i);
3449:         }
3450:       }
3451:     }
3452:   }

3454:   /* task normally handled by MatSetValuesBlocked() */
3455:   PetscLogEventEnd(MAT_SetValues, mat, 0, 0, 0);
3456:   return 0;
3457: }

3459: /*@
3460:      MatCreateMPIBAIJWithArrays - creates a `MATMPIBAIJ` matrix using arrays that contain in standard block
3461:          CSR format the local rows.

3463:    Collective

3465:    Input Parameters:
3466: +  comm - MPI communicator
3467: .  bs - the block size, only a block size of 1 is supported
3468: .  m - number of local rows (Cannot be `PETSC_DECIDE`)
3469: .  n - This value should be the same as the local size used in creating the
3470:        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
3471:        calculated if N is given) For square matrices n is almost always m.
3472: .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
3473: .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
3474: .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix
3475: .   j - column indices
3476: -   a - matrix values

3478:    Output Parameter:
3479: .   mat - the matrix

3481:    Level: intermediate

3483:    Notes:
3484:        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3485:      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3486:      called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.

3488:      The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3489:      the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3490:      block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3491:      with column-major ordering within blocks.

3493:        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.

3495: .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
3496:           `MPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`
3497: @*/
3498: PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
3499: {
3502:   MatCreate(comm, mat);
3503:   MatSetSizes(*mat, m, n, M, N);
3504:   MatSetType(*mat, MATMPIBAIJ);
3505:   MatSetBlockSize(*mat, bs);
3506:   MatSetUp(*mat);
3507:   MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_FALSE);
3508:   MatMPIBAIJSetPreallocationCSR(*mat, bs, i, j, a);
3509:   MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_TRUE);
3510:   return 0;
3511: }

3513: PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
3514: {
3515:   PetscInt     m, N, i, rstart, nnz, Ii, bs, cbs;
3516:   PetscInt    *indx;
3517:   PetscScalar *values;

3519:   MatGetSize(inmat, &m, &N);
3520:   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3521:     Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inmat->data;
3522:     PetscInt    *dnz, *onz, mbs, Nbs, nbs;
3523:     PetscInt    *bindx, rmax = a->rmax, j;
3524:     PetscMPIInt  rank, size;

3526:     MatGetBlockSizes(inmat, &bs, &cbs);
3527:     mbs = m / bs;
3528:     Nbs = N / cbs;
3529:     if (n == PETSC_DECIDE) PetscSplitOwnershipBlock(comm, cbs, &n, &N);
3530:     nbs = n / cbs;

3532:     PetscMalloc1(rmax, &bindx);
3533:     MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */

3535:     MPI_Comm_rank(comm, &rank);
3536:     MPI_Comm_rank(comm, &size);
3537:     if (rank == size - 1) {
3538:       /* Check sum(nbs) = Nbs */
3540:     }

3542:     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */
3543:     for (i = 0; i < mbs; i++) {
3544:       MatGetRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL); /* non-blocked nnz and indx */
3545:       nnz = nnz / bs;
3546:       for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs;
3547:       MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz);
3548:       MatRestoreRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL);
3549:     }
3550:     PetscFree(bindx);

3552:     MatCreate(comm, outmat);
3553:     MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE);
3554:     MatSetBlockSizes(*outmat, bs, cbs);
3555:     MatSetType(*outmat, MATBAIJ);
3556:     MatSeqBAIJSetPreallocation(*outmat, bs, 0, dnz);
3557:     MatMPIBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz);
3558:     MatPreallocateEnd(dnz, onz);
3559:     MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE);
3560:   }

3562:   /* numeric phase */
3563:   MatGetBlockSizes(inmat, &bs, &cbs);
3564:   MatGetOwnershipRange(*outmat, &rstart, NULL);

3566:   for (i = 0; i < m; i++) {
3567:     MatGetRow_SeqBAIJ(inmat, i, &nnz, &indx, &values);
3568:     Ii = i + rstart;
3569:     MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES);
3570:     MatRestoreRow_SeqBAIJ(inmat, i, &nnz, &indx, &values);
3571:   }
3572:   MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY);
3573:   MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY);
3574:   return 0;
3575: }