Actual source code: baij.c

  1: /*
  2:     Defines the basic matrix operations for the BAIJ (compressed row)
  3:   matrix storage format.
  4: */
  5: #include <../src/mat/impls/baij/seq/baij.h>
  6: #include <petscblaslapack.h>
  7: #include <petsc/private/kernels/blockinvert.h>
  8: #include <petsc/private/kernels/blockmatmult.h>

 10: /* defines MatSetValues_Seq_Hash(), MatAssemblyEnd_Seq_Hash(), MatSetUp_Seq_Hash() */
 11: #define TYPE BAIJ
 12: #define TYPE_BS
 13: #include "../src/mat/impls/aij/seq/seqhashmatsetvalues.h"
 14: #undef TYPE_BS
 15: #define TYPE_BS _BS
 16: #define TYPE_BS_ON
 17: #include "../src/mat/impls/aij/seq/seqhashmatsetvalues.h"
 18: #undef TYPE_BS
 19: #include "../src/mat/impls/aij/seq/seqhashmat.h"
 20: #undef TYPE
 21: #undef TYPE_BS_ON

 23: #if defined(PETSC_HAVE_HYPRE)
 24: PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
 25: #endif

 27: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
 28: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat, MatType, MatReuse, Mat *);
 29: #endif
 30: PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);

 32: MatGetDiagonalMarkers(SeqBAIJ, A->rmap->bs)

 34: static PetscErrorCode MatGetColumnReductions_SeqBAIJ(Mat A, PetscInt type, PetscReal *reductions)
 35: {
 36:   Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)A->data;
 37:   PetscInt     m, n, ib, jb, bs = A->rmap->bs;
 38:   MatScalar   *a_val = a_aij->a;

 40:   PetscFunctionBegin;
 41:   PetscCall(MatGetSize(A, &m, &n));
 42:   PetscCall(PetscArrayzero(reductions, n));
 43:   if (type == NORM_2) {
 44:     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
 45:       for (jb = 0; jb < bs; jb++) {
 46:         for (ib = 0; ib < bs; ib++) {
 47:           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
 48:           a_val++;
 49:         }
 50:       }
 51:     }
 52:   } else if (type == NORM_1) {
 53:     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
 54:       for (jb = 0; jb < bs; jb++) {
 55:         for (ib = 0; ib < bs; ib++) {
 56:           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
 57:           a_val++;
 58:         }
 59:       }
 60:     }
 61:   } else if (type == NORM_INFINITY) {
 62:     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
 63:       for (jb = 0; jb < bs; jb++) {
 64:         for (ib = 0; ib < bs; ib++) {
 65:           PetscInt col    = A->cmap->rstart + a_aij->j[i] * bs + jb;
 66:           reductions[col] = PetscMax(PetscAbsScalar(*a_val), reductions[col]);
 67:           a_val++;
 68:         }
 69:       }
 70:     }
 71:   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
 72:     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
 73:       for (jb = 0; jb < bs; jb++) {
 74:         for (ib = 0; ib < bs; ib++) {
 75:           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val);
 76:           a_val++;
 77:         }
 78:       }
 79:     }
 80:   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
 81:     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
 82:       for (jb = 0; jb < bs; jb++) {
 83:         for (ib = 0; ib < bs; ib++) {
 84:           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val);
 85:           a_val++;
 86:         }
 87:       }
 88:     }
 89:   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
 90:   if (type == NORM_2) {
 91:     for (PetscInt i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
 92:   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
 93:     for (PetscInt i = 0; i < n; i++) reductions[i] /= m;
 94:   }
 95:   PetscFunctionReturn(PETSC_SUCCESS);
 96: }

 98: static PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A, const PetscScalar **values)
 99: {
100:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ *)A->data;
101:   PetscInt        i, bs = A->rmap->bs, mbs = a->mbs, ipvt[5], bs2 = bs * bs, *v_pivots;
102:   MatScalar      *v     = a->a, *odiag, *diag, work[25], *v_work;
103:   PetscReal       shift = 0.0;
104:   PetscBool       allowzeropivot, zeropivotdetected = PETSC_FALSE;
105:   const PetscInt *adiag;

107:   PetscFunctionBegin;
108:   allowzeropivot = PetscNot(A->erroriffailure);

110:   if (a->idiagvalid) {
111:     if (values) *values = a->idiag;
112:     PetscFunctionReturn(PETSC_SUCCESS);
113:   }
114:   PetscCall(MatGetDiagonalMarkers_SeqBAIJ(A, &adiag, NULL));
115:   if (!a->idiag) PetscCall(PetscMalloc1(bs2 * mbs, &a->idiag));
116:   diag = a->idiag;
117:   if (values) *values = a->idiag;
118:   /* factor and invert each block */
119:   switch (bs) {
120:   case 1:
121:     for (i = 0; i < mbs; i++) {
122:       odiag   = v + 1 * adiag[i];
123:       diag[0] = odiag[0];

125:       if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
126:         PetscCheck(allowzeropivot, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row %" PetscInt_FMT " pivot value %g tolerance %g", i, (double)PetscAbsScalar(diag[0]), (double)PETSC_MACHINE_EPSILON);
127:         A->factorerrortype             = MAT_FACTOR_NUMERIC_ZEROPIVOT;
128:         A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]);
129:         A->factorerror_zeropivot_row   = i;
130:         PetscCall(PetscInfo(A, "Zero pivot, row %" PetscInt_FMT "\n", i));
131:       }

133:       diag[0] = (PetscScalar)1.0 / (diag[0] + shift);
134:       diag += 1;
135:     }
136:     break;
137:   case 2:
138:     for (i = 0; i < mbs; i++) {
139:       odiag   = v + 4 * adiag[i];
140:       diag[0] = odiag[0];
141:       diag[1] = odiag[1];
142:       diag[2] = odiag[2];
143:       diag[3] = odiag[3];
144:       PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected));
145:       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
146:       diag += 4;
147:     }
148:     break;
149:   case 3:
150:     for (i = 0; i < mbs; i++) {
151:       odiag   = v + 9 * adiag[i];
152:       diag[0] = odiag[0];
153:       diag[1] = odiag[1];
154:       diag[2] = odiag[2];
155:       diag[3] = odiag[3];
156:       diag[4] = odiag[4];
157:       diag[5] = odiag[5];
158:       diag[6] = odiag[6];
159:       diag[7] = odiag[7];
160:       diag[8] = odiag[8];
161:       PetscCall(PetscKernel_A_gets_inverse_A_3(diag, shift, allowzeropivot, &zeropivotdetected));
162:       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
163:       diag += 9;
164:     }
165:     break;
166:   case 4:
167:     for (i = 0; i < mbs; i++) {
168:       odiag = v + 16 * adiag[i];
169:       PetscCall(PetscArraycpy(diag, odiag, 16));
170:       PetscCall(PetscKernel_A_gets_inverse_A_4(diag, shift, allowzeropivot, &zeropivotdetected));
171:       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
172:       diag += 16;
173:     }
174:     break;
175:   case 5:
176:     for (i = 0; i < mbs; i++) {
177:       odiag = v + 25 * adiag[i];
178:       PetscCall(PetscArraycpy(diag, odiag, 25));
179:       PetscCall(PetscKernel_A_gets_inverse_A_5(diag, ipvt, work, shift, allowzeropivot, &zeropivotdetected));
180:       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
181:       diag += 25;
182:     }
183:     break;
184:   case 6:
185:     for (i = 0; i < mbs; i++) {
186:       odiag = v + 36 * adiag[i];
187:       PetscCall(PetscArraycpy(diag, odiag, 36));
188:       PetscCall(PetscKernel_A_gets_inverse_A_6(diag, shift, allowzeropivot, &zeropivotdetected));
189:       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
190:       diag += 36;
191:     }
192:     break;
193:   case 7:
194:     for (i = 0; i < mbs; i++) {
195:       odiag = v + 49 * adiag[i];
196:       PetscCall(PetscArraycpy(diag, odiag, 49));
197:       PetscCall(PetscKernel_A_gets_inverse_A_7(diag, shift, allowzeropivot, &zeropivotdetected));
198:       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
199:       diag += 49;
200:     }
201:     break;
202:   default:
203:     PetscCall(PetscMalloc2(bs, &v_work, bs, &v_pivots));
204:     for (i = 0; i < mbs; i++) {
205:       odiag = v + bs2 * adiag[i];
206:       PetscCall(PetscArraycpy(diag, odiag, bs2));
207:       PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, v_pivots, v_work, allowzeropivot, &zeropivotdetected));
208:       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
209:       diag += bs2;
210:     }
211:     PetscCall(PetscFree2(v_work, v_pivots));
212:   }
213:   a->idiagvalid = PETSC_TRUE;
214:   PetscFunctionReturn(PETSC_SUCCESS);
215: }

217: static PetscErrorCode MatSOR_SeqBAIJ(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
218: {
219:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
220:   PetscScalar       *x, *work, *w, *workt, *t;
221:   const MatScalar   *v, *aa = a->a, *idiag;
222:   const PetscScalar *b, *xb;
223:   PetscScalar        s[7], xw[7] = {0}; /* avoid some compilers thinking xw is uninitialized */
224:   PetscInt           m = a->mbs, i, i2, nz, bs = A->rmap->bs, bs2 = bs * bs, k, j, idx, it;
225:   const PetscInt    *diag, *ai = a->i, *aj = a->j, *vi;

227:   PetscFunctionBegin;
228:   its = its * lits;
229:   PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat");
230:   PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
231:   PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for diagonal shift");
232:   PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for non-trivial relaxation factor");
233:   PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for applying upper or lower triangular parts");

235:   if (!a->idiagvalid) PetscCall(MatInvertBlockDiagonal(A, NULL));

237:   if (!m) PetscFunctionReturn(PETSC_SUCCESS);
238:   diag  = a->diag;
239:   idiag = a->idiag;
240:   k     = PetscMax(A->rmap->n, A->cmap->n);
241:   if (!a->mult_work) PetscCall(PetscMalloc1(k + 1, &a->mult_work));
242:   if (!a->sor_workt) PetscCall(PetscMalloc1(k, &a->sor_workt));
243:   if (!a->sor_work) PetscCall(PetscMalloc1(bs, &a->sor_work));
244:   work = a->mult_work;
245:   t    = a->sor_workt;
246:   w    = a->sor_work;

248:   PetscCall(VecGetArray(xx, &x));
249:   PetscCall(VecGetArrayRead(bb, &b));

251:   if (flag & SOR_ZERO_INITIAL_GUESS) {
252:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
253:       switch (bs) {
254:       case 1:
255:         PetscKernel_v_gets_A_times_w_1(x, idiag, b);
256:         t[0] = b[0];
257:         i2   = 1;
258:         idiag += 1;
259:         for (i = 1; i < m; i++) {
260:           v    = aa + ai[i];
261:           vi   = aj + ai[i];
262:           nz   = diag[i] - ai[i];
263:           s[0] = b[i2];
264:           for (j = 0; j < nz; j++) {
265:             xw[0] = x[vi[j]];
266:             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
267:           }
268:           t[i2] = s[0];
269:           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
270:           x[i2] = xw[0];
271:           idiag += 1;
272:           i2 += 1;
273:         }
274:         break;
275:       case 2:
276:         PetscKernel_v_gets_A_times_w_2(x, idiag, b);
277:         t[0] = b[0];
278:         t[1] = b[1];
279:         i2   = 2;
280:         idiag += 4;
281:         for (i = 1; i < m; i++) {
282:           v    = aa + 4 * ai[i];
283:           vi   = aj + ai[i];
284:           nz   = diag[i] - ai[i];
285:           s[0] = b[i2];
286:           s[1] = b[i2 + 1];
287:           for (j = 0; j < nz; j++) {
288:             idx   = 2 * vi[j];
289:             it    = 4 * j;
290:             xw[0] = x[idx];
291:             xw[1] = x[1 + idx];
292:             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
293:           }
294:           t[i2]     = s[0];
295:           t[i2 + 1] = s[1];
296:           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
297:           x[i2]     = xw[0];
298:           x[i2 + 1] = xw[1];
299:           idiag += 4;
300:           i2 += 2;
301:         }
302:         break;
303:       case 3:
304:         PetscKernel_v_gets_A_times_w_3(x, idiag, b);
305:         t[0] = b[0];
306:         t[1] = b[1];
307:         t[2] = b[2];
308:         i2   = 3;
309:         idiag += 9;
310:         for (i = 1; i < m; i++) {
311:           v    = aa + 9 * ai[i];
312:           vi   = aj + ai[i];
313:           nz   = diag[i] - ai[i];
314:           s[0] = b[i2];
315:           s[1] = b[i2 + 1];
316:           s[2] = b[i2 + 2];
317:           while (nz--) {
318:             idx   = 3 * (*vi++);
319:             xw[0] = x[idx];
320:             xw[1] = x[1 + idx];
321:             xw[2] = x[2 + idx];
322:             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
323:             v += 9;
324:           }
325:           t[i2]     = s[0];
326:           t[i2 + 1] = s[1];
327:           t[i2 + 2] = s[2];
328:           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
329:           x[i2]     = xw[0];
330:           x[i2 + 1] = xw[1];
331:           x[i2 + 2] = xw[2];
332:           idiag += 9;
333:           i2 += 3;
334:         }
335:         break;
336:       case 4:
337:         PetscKernel_v_gets_A_times_w_4(x, idiag, b);
338:         t[0] = b[0];
339:         t[1] = b[1];
340:         t[2] = b[2];
341:         t[3] = b[3];
342:         i2   = 4;
343:         idiag += 16;
344:         for (i = 1; i < m; i++) {
345:           v    = aa + 16 * ai[i];
346:           vi   = aj + ai[i];
347:           nz   = diag[i] - ai[i];
348:           s[0] = b[i2];
349:           s[1] = b[i2 + 1];
350:           s[2] = b[i2 + 2];
351:           s[3] = b[i2 + 3];
352:           while (nz--) {
353:             idx   = 4 * (*vi++);
354:             xw[0] = x[idx];
355:             xw[1] = x[1 + idx];
356:             xw[2] = x[2 + idx];
357:             xw[3] = x[3 + idx];
358:             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
359:             v += 16;
360:           }
361:           t[i2]     = s[0];
362:           t[i2 + 1] = s[1];
363:           t[i2 + 2] = s[2];
364:           t[i2 + 3] = s[3];
365:           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
366:           x[i2]     = xw[0];
367:           x[i2 + 1] = xw[1];
368:           x[i2 + 2] = xw[2];
369:           x[i2 + 3] = xw[3];
370:           idiag += 16;
371:           i2 += 4;
372:         }
373:         break;
374:       case 5:
375:         PetscKernel_v_gets_A_times_w_5(x, idiag, b);
376:         t[0] = b[0];
377:         t[1] = b[1];
378:         t[2] = b[2];
379:         t[3] = b[3];
380:         t[4] = b[4];
381:         i2   = 5;
382:         idiag += 25;
383:         for (i = 1; i < m; i++) {
384:           v    = aa + 25 * ai[i];
385:           vi   = aj + ai[i];
386:           nz   = diag[i] - ai[i];
387:           s[0] = b[i2];
388:           s[1] = b[i2 + 1];
389:           s[2] = b[i2 + 2];
390:           s[3] = b[i2 + 3];
391:           s[4] = b[i2 + 4];
392:           while (nz--) {
393:             idx   = 5 * (*vi++);
394:             xw[0] = x[idx];
395:             xw[1] = x[1 + idx];
396:             xw[2] = x[2 + idx];
397:             xw[3] = x[3 + idx];
398:             xw[4] = x[4 + idx];
399:             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
400:             v += 25;
401:           }
402:           t[i2]     = s[0];
403:           t[i2 + 1] = s[1];
404:           t[i2 + 2] = s[2];
405:           t[i2 + 3] = s[3];
406:           t[i2 + 4] = s[4];
407:           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
408:           x[i2]     = xw[0];
409:           x[i2 + 1] = xw[1];
410:           x[i2 + 2] = xw[2];
411:           x[i2 + 3] = xw[3];
412:           x[i2 + 4] = xw[4];
413:           idiag += 25;
414:           i2 += 5;
415:         }
416:         break;
417:       case 6:
418:         PetscKernel_v_gets_A_times_w_6(x, idiag, b);
419:         t[0] = b[0];
420:         t[1] = b[1];
421:         t[2] = b[2];
422:         t[3] = b[3];
423:         t[4] = b[4];
424:         t[5] = b[5];
425:         i2   = 6;
426:         idiag += 36;
427:         for (i = 1; i < m; i++) {
428:           v    = aa + 36 * ai[i];
429:           vi   = aj + ai[i];
430:           nz   = diag[i] - ai[i];
431:           s[0] = b[i2];
432:           s[1] = b[i2 + 1];
433:           s[2] = b[i2 + 2];
434:           s[3] = b[i2 + 3];
435:           s[4] = b[i2 + 4];
436:           s[5] = b[i2 + 5];
437:           while (nz--) {
438:             idx   = 6 * (*vi++);
439:             xw[0] = x[idx];
440:             xw[1] = x[1 + idx];
441:             xw[2] = x[2 + idx];
442:             xw[3] = x[3 + idx];
443:             xw[4] = x[4 + idx];
444:             xw[5] = x[5 + idx];
445:             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
446:             v += 36;
447:           }
448:           t[i2]     = s[0];
449:           t[i2 + 1] = s[1];
450:           t[i2 + 2] = s[2];
451:           t[i2 + 3] = s[3];
452:           t[i2 + 4] = s[4];
453:           t[i2 + 5] = s[5];
454:           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
455:           x[i2]     = xw[0];
456:           x[i2 + 1] = xw[1];
457:           x[i2 + 2] = xw[2];
458:           x[i2 + 3] = xw[3];
459:           x[i2 + 4] = xw[4];
460:           x[i2 + 5] = xw[5];
461:           idiag += 36;
462:           i2 += 6;
463:         }
464:         break;
465:       case 7:
466:         PetscKernel_v_gets_A_times_w_7(x, idiag, b);
467:         t[0] = b[0];
468:         t[1] = b[1];
469:         t[2] = b[2];
470:         t[3] = b[3];
471:         t[4] = b[4];
472:         t[5] = b[5];
473:         t[6] = b[6];
474:         i2   = 7;
475:         idiag += 49;
476:         for (i = 1; i < m; i++) {
477:           v    = aa + 49 * ai[i];
478:           vi   = aj + ai[i];
479:           nz   = diag[i] - ai[i];
480:           s[0] = b[i2];
481:           s[1] = b[i2 + 1];
482:           s[2] = b[i2 + 2];
483:           s[3] = b[i2 + 3];
484:           s[4] = b[i2 + 4];
485:           s[5] = b[i2 + 5];
486:           s[6] = b[i2 + 6];
487:           while (nz--) {
488:             idx   = 7 * (*vi++);
489:             xw[0] = x[idx];
490:             xw[1] = x[1 + idx];
491:             xw[2] = x[2 + idx];
492:             xw[3] = x[3 + idx];
493:             xw[4] = x[4 + idx];
494:             xw[5] = x[5 + idx];
495:             xw[6] = x[6 + idx];
496:             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
497:             v += 49;
498:           }
499:           t[i2]     = s[0];
500:           t[i2 + 1] = s[1];
501:           t[i2 + 2] = s[2];
502:           t[i2 + 3] = s[3];
503:           t[i2 + 4] = s[4];
504:           t[i2 + 5] = s[5];
505:           t[i2 + 6] = s[6];
506:           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
507:           x[i2]     = xw[0];
508:           x[i2 + 1] = xw[1];
509:           x[i2 + 2] = xw[2];
510:           x[i2 + 3] = xw[3];
511:           x[i2 + 4] = xw[4];
512:           x[i2 + 5] = xw[5];
513:           x[i2 + 6] = xw[6];
514:           idiag += 49;
515:           i2 += 7;
516:         }
517:         break;
518:       default:
519:         PetscKernel_w_gets_Ar_times_v(bs, bs, b, idiag, x);
520:         PetscCall(PetscArraycpy(t, b, bs));
521:         i2 = bs;
522:         idiag += bs2;
523:         for (i = 1; i < m; i++) {
524:           v  = aa + bs2 * ai[i];
525:           vi = aj + ai[i];
526:           nz = diag[i] - ai[i];

528:           PetscCall(PetscArraycpy(w, b + i2, bs));
529:           /* copy all rows of x that are needed into contiguous space */
530:           workt = work;
531:           for (j = 0; j < nz; j++) {
532:             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
533:             workt += bs;
534:           }
535:           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
536:           PetscCall(PetscArraycpy(t + i2, w, bs));
537:           PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);

539:           idiag += bs2;
540:           i2 += bs;
541:         }
542:         break;
543:       }
544:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
545:       PetscCall(PetscLogFlops(1.0 * bs2 * a->nz));
546:       xb = t;
547:     } else xb = b;
548:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
549:       idiag = a->idiag + bs2 * (a->mbs - 1);
550:       i2    = bs * (m - 1);
551:       switch (bs) {
552:       case 1:
553:         s[0] = xb[i2];
554:         PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
555:         x[i2] = xw[0];
556:         i2 -= 1;
557:         for (i = m - 2; i >= 0; i--) {
558:           v    = aa + (diag[i] + 1);
559:           vi   = aj + diag[i] + 1;
560:           nz   = ai[i + 1] - diag[i] - 1;
561:           s[0] = xb[i2];
562:           for (j = 0; j < nz; j++) {
563:             xw[0] = x[vi[j]];
564:             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
565:           }
566:           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
567:           x[i2] = xw[0];
568:           idiag -= 1;
569:           i2 -= 1;
570:         }
571:         break;
572:       case 2:
573:         s[0] = xb[i2];
574:         s[1] = xb[i2 + 1];
575:         PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
576:         x[i2]     = xw[0];
577:         x[i2 + 1] = xw[1];
578:         i2 -= 2;
579:         idiag -= 4;
580:         for (i = m - 2; i >= 0; i--) {
581:           v    = aa + 4 * (diag[i] + 1);
582:           vi   = aj + diag[i] + 1;
583:           nz   = ai[i + 1] - diag[i] - 1;
584:           s[0] = xb[i2];
585:           s[1] = xb[i2 + 1];
586:           for (j = 0; j < nz; j++) {
587:             idx   = 2 * vi[j];
588:             it    = 4 * j;
589:             xw[0] = x[idx];
590:             xw[1] = x[1 + idx];
591:             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
592:           }
593:           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
594:           x[i2]     = xw[0];
595:           x[i2 + 1] = xw[1];
596:           idiag -= 4;
597:           i2 -= 2;
598:         }
599:         break;
600:       case 3:
601:         s[0] = xb[i2];
602:         s[1] = xb[i2 + 1];
603:         s[2] = xb[i2 + 2];
604:         PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
605:         x[i2]     = xw[0];
606:         x[i2 + 1] = xw[1];
607:         x[i2 + 2] = xw[2];
608:         i2 -= 3;
609:         idiag -= 9;
610:         for (i = m - 2; i >= 0; i--) {
611:           v    = aa + 9 * (diag[i] + 1);
612:           vi   = aj + diag[i] + 1;
613:           nz   = ai[i + 1] - diag[i] - 1;
614:           s[0] = xb[i2];
615:           s[1] = xb[i2 + 1];
616:           s[2] = xb[i2 + 2];
617:           while (nz--) {
618:             idx   = 3 * (*vi++);
619:             xw[0] = x[idx];
620:             xw[1] = x[1 + idx];
621:             xw[2] = x[2 + idx];
622:             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
623:             v += 9;
624:           }
625:           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
626:           x[i2]     = xw[0];
627:           x[i2 + 1] = xw[1];
628:           x[i2 + 2] = xw[2];
629:           idiag -= 9;
630:           i2 -= 3;
631:         }
632:         break;
633:       case 4:
634:         s[0] = xb[i2];
635:         s[1] = xb[i2 + 1];
636:         s[2] = xb[i2 + 2];
637:         s[3] = xb[i2 + 3];
638:         PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
639:         x[i2]     = xw[0];
640:         x[i2 + 1] = xw[1];
641:         x[i2 + 2] = xw[2];
642:         x[i2 + 3] = xw[3];
643:         i2 -= 4;
644:         idiag -= 16;
645:         for (i = m - 2; i >= 0; i--) {
646:           v    = aa + 16 * (diag[i] + 1);
647:           vi   = aj + diag[i] + 1;
648:           nz   = ai[i + 1] - diag[i] - 1;
649:           s[0] = xb[i2];
650:           s[1] = xb[i2 + 1];
651:           s[2] = xb[i2 + 2];
652:           s[3] = xb[i2 + 3];
653:           while (nz--) {
654:             idx   = 4 * (*vi++);
655:             xw[0] = x[idx];
656:             xw[1] = x[1 + idx];
657:             xw[2] = x[2 + idx];
658:             xw[3] = x[3 + idx];
659:             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
660:             v += 16;
661:           }
662:           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
663:           x[i2]     = xw[0];
664:           x[i2 + 1] = xw[1];
665:           x[i2 + 2] = xw[2];
666:           x[i2 + 3] = xw[3];
667:           idiag -= 16;
668:           i2 -= 4;
669:         }
670:         break;
671:       case 5:
672:         s[0] = xb[i2];
673:         s[1] = xb[i2 + 1];
674:         s[2] = xb[i2 + 2];
675:         s[3] = xb[i2 + 3];
676:         s[4] = xb[i2 + 4];
677:         PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
678:         x[i2]     = xw[0];
679:         x[i2 + 1] = xw[1];
680:         x[i2 + 2] = xw[2];
681:         x[i2 + 3] = xw[3];
682:         x[i2 + 4] = xw[4];
683:         i2 -= 5;
684:         idiag -= 25;
685:         for (i = m - 2; i >= 0; i--) {
686:           v    = aa + 25 * (diag[i] + 1);
687:           vi   = aj + diag[i] + 1;
688:           nz   = ai[i + 1] - diag[i] - 1;
689:           s[0] = xb[i2];
690:           s[1] = xb[i2 + 1];
691:           s[2] = xb[i2 + 2];
692:           s[3] = xb[i2 + 3];
693:           s[4] = xb[i2 + 4];
694:           while (nz--) {
695:             idx   = 5 * (*vi++);
696:             xw[0] = x[idx];
697:             xw[1] = x[1 + idx];
698:             xw[2] = x[2 + idx];
699:             xw[3] = x[3 + idx];
700:             xw[4] = x[4 + idx];
701:             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
702:             v += 25;
703:           }
704:           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
705:           x[i2]     = xw[0];
706:           x[i2 + 1] = xw[1];
707:           x[i2 + 2] = xw[2];
708:           x[i2 + 3] = xw[3];
709:           x[i2 + 4] = xw[4];
710:           idiag -= 25;
711:           i2 -= 5;
712:         }
713:         break;
714:       case 6:
715:         s[0] = xb[i2];
716:         s[1] = xb[i2 + 1];
717:         s[2] = xb[i2 + 2];
718:         s[3] = xb[i2 + 3];
719:         s[4] = xb[i2 + 4];
720:         s[5] = xb[i2 + 5];
721:         PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
722:         x[i2]     = xw[0];
723:         x[i2 + 1] = xw[1];
724:         x[i2 + 2] = xw[2];
725:         x[i2 + 3] = xw[3];
726:         x[i2 + 4] = xw[4];
727:         x[i2 + 5] = xw[5];
728:         i2 -= 6;
729:         idiag -= 36;
730:         for (i = m - 2; i >= 0; i--) {
731:           v    = aa + 36 * (diag[i] + 1);
732:           vi   = aj + diag[i] + 1;
733:           nz   = ai[i + 1] - diag[i] - 1;
734:           s[0] = xb[i2];
735:           s[1] = xb[i2 + 1];
736:           s[2] = xb[i2 + 2];
737:           s[3] = xb[i2 + 3];
738:           s[4] = xb[i2 + 4];
739:           s[5] = xb[i2 + 5];
740:           while (nz--) {
741:             idx   = 6 * (*vi++);
742:             xw[0] = x[idx];
743:             xw[1] = x[1 + idx];
744:             xw[2] = x[2 + idx];
745:             xw[3] = x[3 + idx];
746:             xw[4] = x[4 + idx];
747:             xw[5] = x[5 + idx];
748:             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
749:             v += 36;
750:           }
751:           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
752:           x[i2]     = xw[0];
753:           x[i2 + 1] = xw[1];
754:           x[i2 + 2] = xw[2];
755:           x[i2 + 3] = xw[3];
756:           x[i2 + 4] = xw[4];
757:           x[i2 + 5] = xw[5];
758:           idiag -= 36;
759:           i2 -= 6;
760:         }
761:         break;
762:       case 7:
763:         s[0] = xb[i2];
764:         s[1] = xb[i2 + 1];
765:         s[2] = xb[i2 + 2];
766:         s[3] = xb[i2 + 3];
767:         s[4] = xb[i2 + 4];
768:         s[5] = xb[i2 + 5];
769:         s[6] = xb[i2 + 6];
770:         PetscKernel_v_gets_A_times_w_7(x, idiag, b);
771:         x[i2]     = xw[0];
772:         x[i2 + 1] = xw[1];
773:         x[i2 + 2] = xw[2];
774:         x[i2 + 3] = xw[3];
775:         x[i2 + 4] = xw[4];
776:         x[i2 + 5] = xw[5];
777:         x[i2 + 6] = xw[6];
778:         i2 -= 7;
779:         idiag -= 49;
780:         for (i = m - 2; i >= 0; i--) {
781:           v    = aa + 49 * (diag[i] + 1);
782:           vi   = aj + diag[i] + 1;
783:           nz   = ai[i + 1] - diag[i] - 1;
784:           s[0] = xb[i2];
785:           s[1] = xb[i2 + 1];
786:           s[2] = xb[i2 + 2];
787:           s[3] = xb[i2 + 3];
788:           s[4] = xb[i2 + 4];
789:           s[5] = xb[i2 + 5];
790:           s[6] = xb[i2 + 6];
791:           while (nz--) {
792:             idx   = 7 * (*vi++);
793:             xw[0] = x[idx];
794:             xw[1] = x[1 + idx];
795:             xw[2] = x[2 + idx];
796:             xw[3] = x[3 + idx];
797:             xw[4] = x[4 + idx];
798:             xw[5] = x[5 + idx];
799:             xw[6] = x[6 + idx];
800:             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
801:             v += 49;
802:           }
803:           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
804:           x[i2]     = xw[0];
805:           x[i2 + 1] = xw[1];
806:           x[i2 + 2] = xw[2];
807:           x[i2 + 3] = xw[3];
808:           x[i2 + 4] = xw[4];
809:           x[i2 + 5] = xw[5];
810:           x[i2 + 6] = xw[6];
811:           idiag -= 49;
812:           i2 -= 7;
813:         }
814:         break;
815:       default:
816:         PetscCall(PetscArraycpy(w, xb + i2, bs));
817:         PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
818:         i2 -= bs;
819:         idiag -= bs2;
820:         for (i = m - 2; i >= 0; i--) {
821:           v  = aa + bs2 * (diag[i] + 1);
822:           vi = aj + diag[i] + 1;
823:           nz = ai[i + 1] - diag[i] - 1;

825:           PetscCall(PetscArraycpy(w, xb + i2, bs));
826:           /* copy all rows of x that are needed into contiguous space */
827:           workt = work;
828:           for (j = 0; j < nz; j++) {
829:             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
830:             workt += bs;
831:           }
832:           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
833:           PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);

835:           idiag -= bs2;
836:           i2 -= bs;
837:         }
838:         break;
839:       }
840:       PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz)));
841:     }
842:     its--;
843:   }
844:   while (its--) {
845:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
846:       idiag = a->idiag;
847:       i2    = 0;
848:       switch (bs) {
849:       case 1:
850:         for (i = 0; i < m; i++) {
851:           v    = aa + ai[i];
852:           vi   = aj + ai[i];
853:           nz   = ai[i + 1] - ai[i];
854:           s[0] = b[i2];
855:           for (j = 0; j < nz; j++) {
856:             xw[0] = x[vi[j]];
857:             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
858:           }
859:           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
860:           x[i2] += xw[0];
861:           idiag += 1;
862:           i2 += 1;
863:         }
864:         break;
865:       case 2:
866:         for (i = 0; i < m; i++) {
867:           v    = aa + 4 * ai[i];
868:           vi   = aj + ai[i];
869:           nz   = ai[i + 1] - ai[i];
870:           s[0] = b[i2];
871:           s[1] = b[i2 + 1];
872:           for (j = 0; j < nz; j++) {
873:             idx   = 2 * vi[j];
874:             it    = 4 * j;
875:             xw[0] = x[idx];
876:             xw[1] = x[1 + idx];
877:             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
878:           }
879:           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
880:           x[i2] += xw[0];
881:           x[i2 + 1] += xw[1];
882:           idiag += 4;
883:           i2 += 2;
884:         }
885:         break;
886:       case 3:
887:         for (i = 0; i < m; i++) {
888:           v    = aa + 9 * ai[i];
889:           vi   = aj + ai[i];
890:           nz   = ai[i + 1] - ai[i];
891:           s[0] = b[i2];
892:           s[1] = b[i2 + 1];
893:           s[2] = b[i2 + 2];
894:           while (nz--) {
895:             idx   = 3 * (*vi++);
896:             xw[0] = x[idx];
897:             xw[1] = x[1 + idx];
898:             xw[2] = x[2 + idx];
899:             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
900:             v += 9;
901:           }
902:           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
903:           x[i2] += xw[0];
904:           x[i2 + 1] += xw[1];
905:           x[i2 + 2] += xw[2];
906:           idiag += 9;
907:           i2 += 3;
908:         }
909:         break;
910:       case 4:
911:         for (i = 0; i < m; i++) {
912:           v    = aa + 16 * ai[i];
913:           vi   = aj + ai[i];
914:           nz   = ai[i + 1] - ai[i];
915:           s[0] = b[i2];
916:           s[1] = b[i2 + 1];
917:           s[2] = b[i2 + 2];
918:           s[3] = b[i2 + 3];
919:           while (nz--) {
920:             idx   = 4 * (*vi++);
921:             xw[0] = x[idx];
922:             xw[1] = x[1 + idx];
923:             xw[2] = x[2 + idx];
924:             xw[3] = x[3 + idx];
925:             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
926:             v += 16;
927:           }
928:           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
929:           x[i2] += xw[0];
930:           x[i2 + 1] += xw[1];
931:           x[i2 + 2] += xw[2];
932:           x[i2 + 3] += xw[3];
933:           idiag += 16;
934:           i2 += 4;
935:         }
936:         break;
937:       case 5:
938:         for (i = 0; i < m; i++) {
939:           v    = aa + 25 * ai[i];
940:           vi   = aj + ai[i];
941:           nz   = ai[i + 1] - ai[i];
942:           s[0] = b[i2];
943:           s[1] = b[i2 + 1];
944:           s[2] = b[i2 + 2];
945:           s[3] = b[i2 + 3];
946:           s[4] = b[i2 + 4];
947:           while (nz--) {
948:             idx   = 5 * (*vi++);
949:             xw[0] = x[idx];
950:             xw[1] = x[1 + idx];
951:             xw[2] = x[2 + idx];
952:             xw[3] = x[3 + idx];
953:             xw[4] = x[4 + idx];
954:             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
955:             v += 25;
956:           }
957:           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
958:           x[i2] += xw[0];
959:           x[i2 + 1] += xw[1];
960:           x[i2 + 2] += xw[2];
961:           x[i2 + 3] += xw[3];
962:           x[i2 + 4] += xw[4];
963:           idiag += 25;
964:           i2 += 5;
965:         }
966:         break;
967:       case 6:
968:         for (i = 0; i < m; i++) {
969:           v    = aa + 36 * ai[i];
970:           vi   = aj + ai[i];
971:           nz   = ai[i + 1] - ai[i];
972:           s[0] = b[i2];
973:           s[1] = b[i2 + 1];
974:           s[2] = b[i2 + 2];
975:           s[3] = b[i2 + 3];
976:           s[4] = b[i2 + 4];
977:           s[5] = b[i2 + 5];
978:           while (nz--) {
979:             idx   = 6 * (*vi++);
980:             xw[0] = x[idx];
981:             xw[1] = x[1 + idx];
982:             xw[2] = x[2 + idx];
983:             xw[3] = x[3 + idx];
984:             xw[4] = x[4 + idx];
985:             xw[5] = x[5 + idx];
986:             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
987:             v += 36;
988:           }
989:           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
990:           x[i2] += xw[0];
991:           x[i2 + 1] += xw[1];
992:           x[i2 + 2] += xw[2];
993:           x[i2 + 3] += xw[3];
994:           x[i2 + 4] += xw[4];
995:           x[i2 + 5] += xw[5];
996:           idiag += 36;
997:           i2 += 6;
998:         }
999:         break;
1000:       case 7:
1001:         for (i = 0; i < m; i++) {
1002:           v    = aa + 49 * ai[i];
1003:           vi   = aj + ai[i];
1004:           nz   = ai[i + 1] - ai[i];
1005:           s[0] = b[i2];
1006:           s[1] = b[i2 + 1];
1007:           s[2] = b[i2 + 2];
1008:           s[3] = b[i2 + 3];
1009:           s[4] = b[i2 + 4];
1010:           s[5] = b[i2 + 5];
1011:           s[6] = b[i2 + 6];
1012:           while (nz--) {
1013:             idx   = 7 * (*vi++);
1014:             xw[0] = x[idx];
1015:             xw[1] = x[1 + idx];
1016:             xw[2] = x[2 + idx];
1017:             xw[3] = x[3 + idx];
1018:             xw[4] = x[4 + idx];
1019:             xw[5] = x[5 + idx];
1020:             xw[6] = x[6 + idx];
1021:             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
1022:             v += 49;
1023:           }
1024:           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
1025:           x[i2] += xw[0];
1026:           x[i2 + 1] += xw[1];
1027:           x[i2 + 2] += xw[2];
1028:           x[i2 + 3] += xw[3];
1029:           x[i2 + 4] += xw[4];
1030:           x[i2 + 5] += xw[5];
1031:           x[i2 + 6] += xw[6];
1032:           idiag += 49;
1033:           i2 += 7;
1034:         }
1035:         break;
1036:       default:
1037:         for (i = 0; i < m; i++) {
1038:           v  = aa + bs2 * ai[i];
1039:           vi = aj + ai[i];
1040:           nz = ai[i + 1] - ai[i];

1042:           PetscCall(PetscArraycpy(w, b + i2, bs));
1043:           /* copy all rows of x that are needed into contiguous space */
1044:           workt = work;
1045:           for (j = 0; j < nz; j++) {
1046:             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
1047:             workt += bs;
1048:           }
1049:           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
1050:           PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2);

1052:           idiag += bs2;
1053:           i2 += bs;
1054:         }
1055:         break;
1056:       }
1057:       PetscCall(PetscLogFlops(2.0 * bs2 * a->nz));
1058:     }
1059:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1060:       idiag = a->idiag + bs2 * (a->mbs - 1);
1061:       i2    = bs * (m - 1);
1062:       switch (bs) {
1063:       case 1:
1064:         for (i = m - 1; i >= 0; i--) {
1065:           v    = aa + ai[i];
1066:           vi   = aj + ai[i];
1067:           nz   = ai[i + 1] - ai[i];
1068:           s[0] = b[i2];
1069:           for (j = 0; j < nz; j++) {
1070:             xw[0] = x[vi[j]];
1071:             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
1072:           }
1073:           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
1074:           x[i2] += xw[0];
1075:           idiag -= 1;
1076:           i2 -= 1;
1077:         }
1078:         break;
1079:       case 2:
1080:         for (i = m - 1; i >= 0; i--) {
1081:           v    = aa + 4 * ai[i];
1082:           vi   = aj + ai[i];
1083:           nz   = ai[i + 1] - ai[i];
1084:           s[0] = b[i2];
1085:           s[1] = b[i2 + 1];
1086:           for (j = 0; j < nz; j++) {
1087:             idx   = 2 * vi[j];
1088:             it    = 4 * j;
1089:             xw[0] = x[idx];
1090:             xw[1] = x[1 + idx];
1091:             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
1092:           }
1093:           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
1094:           x[i2] += xw[0];
1095:           x[i2 + 1] += xw[1];
1096:           idiag -= 4;
1097:           i2 -= 2;
1098:         }
1099:         break;
1100:       case 3:
1101:         for (i = m - 1; i >= 0; i--) {
1102:           v    = aa + 9 * ai[i];
1103:           vi   = aj + ai[i];
1104:           nz   = ai[i + 1] - ai[i];
1105:           s[0] = b[i2];
1106:           s[1] = b[i2 + 1];
1107:           s[2] = b[i2 + 2];
1108:           while (nz--) {
1109:             idx   = 3 * (*vi++);
1110:             xw[0] = x[idx];
1111:             xw[1] = x[1 + idx];
1112:             xw[2] = x[2 + idx];
1113:             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
1114:             v += 9;
1115:           }
1116:           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
1117:           x[i2] += xw[0];
1118:           x[i2 + 1] += xw[1];
1119:           x[i2 + 2] += xw[2];
1120:           idiag -= 9;
1121:           i2 -= 3;
1122:         }
1123:         break;
1124:       case 4:
1125:         for (i = m - 1; i >= 0; i--) {
1126:           v    = aa + 16 * ai[i];
1127:           vi   = aj + ai[i];
1128:           nz   = ai[i + 1] - ai[i];
1129:           s[0] = b[i2];
1130:           s[1] = b[i2 + 1];
1131:           s[2] = b[i2 + 2];
1132:           s[3] = b[i2 + 3];
1133:           while (nz--) {
1134:             idx   = 4 * (*vi++);
1135:             xw[0] = x[idx];
1136:             xw[1] = x[1 + idx];
1137:             xw[2] = x[2 + idx];
1138:             xw[3] = x[3 + idx];
1139:             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
1140:             v += 16;
1141:           }
1142:           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
1143:           x[i2] += xw[0];
1144:           x[i2 + 1] += xw[1];
1145:           x[i2 + 2] += xw[2];
1146:           x[i2 + 3] += xw[3];
1147:           idiag -= 16;
1148:           i2 -= 4;
1149:         }
1150:         break;
1151:       case 5:
1152:         for (i = m - 1; i >= 0; i--) {
1153:           v    = aa + 25 * ai[i];
1154:           vi   = aj + ai[i];
1155:           nz   = ai[i + 1] - ai[i];
1156:           s[0] = b[i2];
1157:           s[1] = b[i2 + 1];
1158:           s[2] = b[i2 + 2];
1159:           s[3] = b[i2 + 3];
1160:           s[4] = b[i2 + 4];
1161:           while (nz--) {
1162:             idx   = 5 * (*vi++);
1163:             xw[0] = x[idx];
1164:             xw[1] = x[1 + idx];
1165:             xw[2] = x[2 + idx];
1166:             xw[3] = x[3 + idx];
1167:             xw[4] = x[4 + idx];
1168:             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
1169:             v += 25;
1170:           }
1171:           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
1172:           x[i2] += xw[0];
1173:           x[i2 + 1] += xw[1];
1174:           x[i2 + 2] += xw[2];
1175:           x[i2 + 3] += xw[3];
1176:           x[i2 + 4] += xw[4];
1177:           idiag -= 25;
1178:           i2 -= 5;
1179:         }
1180:         break;
1181:       case 6:
1182:         for (i = m - 1; i >= 0; i--) {
1183:           v    = aa + 36 * ai[i];
1184:           vi   = aj + ai[i];
1185:           nz   = ai[i + 1] - ai[i];
1186:           s[0] = b[i2];
1187:           s[1] = b[i2 + 1];
1188:           s[2] = b[i2 + 2];
1189:           s[3] = b[i2 + 3];
1190:           s[4] = b[i2 + 4];
1191:           s[5] = b[i2 + 5];
1192:           while (nz--) {
1193:             idx   = 6 * (*vi++);
1194:             xw[0] = x[idx];
1195:             xw[1] = x[1 + idx];
1196:             xw[2] = x[2 + idx];
1197:             xw[3] = x[3 + idx];
1198:             xw[4] = x[4 + idx];
1199:             xw[5] = x[5 + idx];
1200:             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
1201:             v += 36;
1202:           }
1203:           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
1204:           x[i2] += xw[0];
1205:           x[i2 + 1] += xw[1];
1206:           x[i2 + 2] += xw[2];
1207:           x[i2 + 3] += xw[3];
1208:           x[i2 + 4] += xw[4];
1209:           x[i2 + 5] += xw[5];
1210:           idiag -= 36;
1211:           i2 -= 6;
1212:         }
1213:         break;
1214:       case 7:
1215:         for (i = m - 1; i >= 0; i--) {
1216:           v    = aa + 49 * ai[i];
1217:           vi   = aj + ai[i];
1218:           nz   = ai[i + 1] - ai[i];
1219:           s[0] = b[i2];
1220:           s[1] = b[i2 + 1];
1221:           s[2] = b[i2 + 2];
1222:           s[3] = b[i2 + 3];
1223:           s[4] = b[i2 + 4];
1224:           s[5] = b[i2 + 5];
1225:           s[6] = b[i2 + 6];
1226:           while (nz--) {
1227:             idx   = 7 * (*vi++);
1228:             xw[0] = x[idx];
1229:             xw[1] = x[1 + idx];
1230:             xw[2] = x[2 + idx];
1231:             xw[3] = x[3 + idx];
1232:             xw[4] = x[4 + idx];
1233:             xw[5] = x[5 + idx];
1234:             xw[6] = x[6 + idx];
1235:             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
1236:             v += 49;
1237:           }
1238:           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
1239:           x[i2] += xw[0];
1240:           x[i2 + 1] += xw[1];
1241:           x[i2 + 2] += xw[2];
1242:           x[i2 + 3] += xw[3];
1243:           x[i2 + 4] += xw[4];
1244:           x[i2 + 5] += xw[5];
1245:           x[i2 + 6] += xw[6];
1246:           idiag -= 49;
1247:           i2 -= 7;
1248:         }
1249:         break;
1250:       default:
1251:         for (i = m - 1; i >= 0; i--) {
1252:           v  = aa + bs2 * ai[i];
1253:           vi = aj + ai[i];
1254:           nz = ai[i + 1] - ai[i];

1256:           PetscCall(PetscArraycpy(w, b + i2, bs));
1257:           /* copy all rows of x that are needed into contiguous space */
1258:           workt = work;
1259:           for (j = 0; j < nz; j++) {
1260:             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
1261:             workt += bs;
1262:           }
1263:           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
1264:           PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2);

1266:           idiag -= bs2;
1267:           i2 -= bs;
1268:         }
1269:         break;
1270:       }
1271:       PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz)));
1272:     }
1273:   }
1274:   PetscCall(VecRestoreArray(xx, &x));
1275:   PetscCall(VecRestoreArrayRead(bb, &b));
1276:   PetscFunctionReturn(PETSC_SUCCESS);
1277: }

1279: /*
1280:     Special version for direct calls from Fortran (Used in PETSc-fun3d)
1281: */
1282: #if defined(PETSC_HAVE_FORTRAN_CAPS)
1283:   #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
1284: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1285:   #define matsetvaluesblocked4_ matsetvaluesblocked4
1286: #endif

1288: PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA, PetscInt *mm, const PetscInt im[], PetscInt *nn, const PetscInt in[], const PetscScalar v[])
1289: {
1290:   Mat                A = *AA;
1291:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
1292:   PetscInt          *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, N, m = *mm, n = *nn;
1293:   PetscInt          *ai = a->i, *ailen = a->ilen;
1294:   PetscInt          *aj = a->j, stepval, lastcol = -1;
1295:   const PetscScalar *value = v;
1296:   MatScalar         *ap, *aa = a->a, *bap;

1298:   PetscFunctionBegin;
1299:   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Can only be called with a block size of 4");
1300:   stepval = (n - 1) * 4;
1301:   for (k = 0; k < m; k++) { /* loop over added rows */
1302:     row  = im[k];
1303:     rp   = aj + ai[row];
1304:     ap   = aa + 16 * ai[row];
1305:     nrow = ailen[row];
1306:     low  = 0;
1307:     high = nrow;
1308:     for (l = 0; l < n; l++) { /* loop over added columns */
1309:       col = in[l];
1310:       if (col <= lastcol) low = 0;
1311:       else high = nrow;
1312:       lastcol = col;
1313:       value   = v + k * (stepval + 4 + l) * 4;
1314:       while (high - low > 7) {
1315:         t = (low + high) / 2;
1316:         if (rp[t] > col) high = t;
1317:         else low = t;
1318:       }
1319:       for (i = low; i < high; i++) {
1320:         if (rp[i] > col) break;
1321:         if (rp[i] == col) {
1322:           bap = ap + 16 * i;
1323:           for (ii = 0; ii < 4; ii++, value += stepval) {
1324:             for (jj = ii; jj < 16; jj += 4) bap[jj] += *value++;
1325:           }
1326:           goto noinsert2;
1327:         }
1328:       }
1329:       N = nrow++ - 1;
1330:       high++; /* added new column index thus must search to one higher than before */
1331:       /* shift up all the later entries in this row */
1332:       for (ii = N; ii >= i; ii--) {
1333:         rp[ii + 1] = rp[ii];
1334:         PetscCallVoid(PetscArraycpy(ap + 16 * (ii + 1), ap + 16 * (ii), 16));
1335:       }
1336:       if (N >= i) PetscCallVoid(PetscArrayzero(ap + 16 * i, 16));
1337:       rp[i] = col;
1338:       bap   = ap + 16 * i;
1339:       for (ii = 0; ii < 4; ii++, value += stepval) {
1340:         for (jj = ii; jj < 16; jj += 4) bap[jj] = *value++;
1341:       }
1342:     noinsert2:;
1343:       low = i;
1344:     }
1345:     ailen[row] = nrow;
1346:   }
1347:   PetscFunctionReturnVoid();
1348: }

1350: #if defined(PETSC_HAVE_FORTRAN_CAPS)
1351:   #define matsetvalues4_ MATSETVALUES4
1352: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1353:   #define matsetvalues4_ matsetvalues4
1354: #endif

1356: PETSC_EXTERN void matsetvalues4_(Mat *AA, PetscInt *mm, PetscInt *im, PetscInt *nn, PetscInt *in, PetscScalar *v)
1357: {
1358:   Mat          A = *AA;
1359:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1360:   PetscInt    *rp, k, low, high, t, row, nrow, i, col, l, N, n = *nn, m = *mm;
1361:   PetscInt    *ai = a->i, *ailen = a->ilen;
1362:   PetscInt    *aj = a->j, brow, bcol;
1363:   PetscInt     ridx, cidx, lastcol = -1;
1364:   MatScalar   *ap, value, *aa      = a->a, *bap;

1366:   PetscFunctionBegin;
1367:   for (k = 0; k < m; k++) { /* loop over added rows */
1368:     row  = im[k];
1369:     brow = row / 4;
1370:     rp   = aj + ai[brow];
1371:     ap   = aa + 16 * ai[brow];
1372:     nrow = ailen[brow];
1373:     low  = 0;
1374:     high = nrow;
1375:     for (l = 0; l < n; l++) { /* loop over added columns */
1376:       col   = in[l];
1377:       bcol  = col / 4;
1378:       ridx  = row % 4;
1379:       cidx  = col % 4;
1380:       value = v[l + k * n];
1381:       if (col <= lastcol) low = 0;
1382:       else high = nrow;
1383:       lastcol = col;
1384:       while (high - low > 7) {
1385:         t = (low + high) / 2;
1386:         if (rp[t] > bcol) high = t;
1387:         else low = t;
1388:       }
1389:       for (i = low; i < high; i++) {
1390:         if (rp[i] > bcol) break;
1391:         if (rp[i] == bcol) {
1392:           bap = ap + 16 * i + 4 * cidx + ridx;
1393:           *bap += value;
1394:           goto noinsert1;
1395:         }
1396:       }
1397:       N = nrow++ - 1;
1398:       high++; /* added new column thus must search to one higher than before */
1399:       /* shift up all the later entries in this row */
1400:       PetscCallVoid(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
1401:       PetscCallVoid(PetscArraymove(ap + 16 * i + 16, ap + 16 * i, 16 * (N - i + 1)));
1402:       PetscCallVoid(PetscArrayzero(ap + 16 * i, 16));
1403:       rp[i]                        = bcol;
1404:       ap[16 * i + 4 * cidx + ridx] = value;
1405:     noinsert1:;
1406:       low = i;
1407:     }
1408:     ailen[brow] = nrow;
1409:   }
1410:   PetscFunctionReturnVoid();
1411: }

1413: static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *inia[], const PetscInt *inja[], PetscBool *done)
1414: {
1415:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1416:   PetscInt     i, j, n = a->mbs, nz = a->i[n], *tia, *tja, bs = A->rmap->bs, k, l, cnt;
1417:   PetscInt   **ia = (PetscInt **)inia, **ja = (PetscInt **)inja;

1419:   PetscFunctionBegin;
1420:   *nn = n;
1421:   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
1422:   if (symmetric) {
1423:     PetscCall(MatToSymmetricIJ_SeqAIJ(n, a->i, a->j, PETSC_TRUE, 0, 0, &tia, &tja));
1424:     nz = tia[n];
1425:   } else {
1426:     tia = a->i;
1427:     tja = a->j;
1428:   }

1430:   if (!blockcompressed && bs > 1) {
1431:     (*nn) *= bs;
1432:     /* malloc & create the natural set of indices */
1433:     PetscCall(PetscMalloc1((n + 1) * bs, ia));
1434:     if (n) {
1435:       (*ia)[0] = oshift;
1436:       for (j = 1; j < bs; j++) (*ia)[j] = (tia[1] - tia[0]) * bs + (*ia)[j - 1];
1437:     }

1439:     for (i = 1; i < n; i++) {
1440:       (*ia)[i * bs] = (tia[i] - tia[i - 1]) * bs + (*ia)[i * bs - 1];
1441:       for (j = 1; j < bs; j++) (*ia)[i * bs + j] = (tia[i + 1] - tia[i]) * bs + (*ia)[i * bs + j - 1];
1442:     }
1443:     if (n) (*ia)[n * bs] = (tia[n] - tia[n - 1]) * bs + (*ia)[n * bs - 1];

1445:     if (inja) {
1446:       PetscCall(PetscMalloc1(nz * bs * bs, ja));
1447:       cnt = 0;
1448:       for (i = 0; i < n; i++) {
1449:         for (j = 0; j < bs; j++) {
1450:           for (k = tia[i]; k < tia[i + 1]; k++) {
1451:             for (l = 0; l < bs; l++) (*ja)[cnt++] = bs * tja[k] + l;
1452:           }
1453:         }
1454:       }
1455:     }

1457:     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1458:       PetscCall(PetscFree(tia));
1459:       PetscCall(PetscFree(tja));
1460:     }
1461:   } else if (oshift == 1) {
1462:     if (symmetric) {
1463:       nz = tia[A->rmap->n / bs];
1464:       /*  add 1 to i and j indices */
1465:       for (i = 0; i < A->rmap->n / bs + 1; i++) tia[i] = tia[i] + 1;
1466:       *ia = tia;
1467:       if (ja) {
1468:         for (i = 0; i < nz; i++) tja[i] = tja[i] + 1;
1469:         *ja = tja;
1470:       }
1471:     } else {
1472:       nz = a->i[A->rmap->n / bs];
1473:       /* malloc space and  add 1 to i and j indices */
1474:       PetscCall(PetscMalloc1(A->rmap->n / bs + 1, ia));
1475:       for (i = 0; i < A->rmap->n / bs + 1; i++) (*ia)[i] = a->i[i] + 1;
1476:       if (ja) {
1477:         PetscCall(PetscMalloc1(nz, ja));
1478:         for (i = 0; i < nz; i++) (*ja)[i] = a->j[i] + 1;
1479:       }
1480:     }
1481:   } else {
1482:     *ia = tia;
1483:     if (ja) *ja = tja;
1484:   }
1485:   PetscFunctionReturn(PETSC_SUCCESS);
1486: }

1488: static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
1489: {
1490:   PetscFunctionBegin;
1491:   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
1492:   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1493:     PetscCall(PetscFree(*ia));
1494:     if (ja) PetscCall(PetscFree(*ja));
1495:   }
1496:   PetscFunctionReturn(PETSC_SUCCESS);
1497: }

1499: PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1500: {
1501:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;

1503:   PetscFunctionBegin;
1504:   if (A->hash_active) {
1505:     PetscInt bs;
1506:     A->ops[0] = a->cops;
1507:     PetscCall(PetscHMapIJVDestroy(&a->ht));
1508:     PetscCall(MatGetBlockSize(A, &bs));
1509:     if (bs > 1) PetscCall(PetscHSetIJDestroy(&a->bht));
1510:     PetscCall(PetscFree(a->dnz));
1511:     PetscCall(PetscFree(a->bdnz));
1512:     A->hash_active = PETSC_FALSE;
1513:   }
1514:   PetscCall(PetscLogObjectState((PetscObject)A, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT ", NZ=%" PetscInt_FMT, A->rmap->N, A->cmap->n, a->nz));
1515:   PetscCall(MatSeqXAIJFreeAIJ(A, &a->a, &a->j, &a->i));
1516:   PetscCall(ISDestroy(&a->row));
1517:   PetscCall(ISDestroy(&a->col));
1518:   PetscCall(PetscFree(a->diag));
1519:   PetscCall(PetscFree(a->idiag));
1520:   if (a->free_imax_ilen) PetscCall(PetscFree2(a->imax, a->ilen));
1521:   PetscCall(PetscFree(a->solve_work));
1522:   PetscCall(PetscFree(a->mult_work));
1523:   PetscCall(PetscFree(a->sor_workt));
1524:   PetscCall(PetscFree(a->sor_work));
1525:   PetscCall(ISDestroy(&a->icol));
1526:   PetscCall(PetscFree(a->saved_values));
1527:   PetscCall(PetscFree2(a->compressedrow.i, a->compressedrow.rindex));

1529:   PetscCall(MatDestroy(&a->sbaijMat));
1530:   PetscCall(MatDestroy(&a->parent));
1531:   PetscCall(PetscFree(A->data));

1533:   PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL));
1534:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJGetArray_C", NULL));
1535:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJRestoreArray_C", NULL));
1536:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatStoreValues_C", NULL));
1537:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatRetrieveValues_C", NULL));
1538:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetColumnIndices_C", NULL));
1539:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqaij_C", NULL));
1540:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqsbaij_C", NULL));
1541:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", NULL));
1542:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocationCSR_C", NULL));
1543:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqbstrm_C", NULL));
1544:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatIsTranspose_C", NULL));
1545: #if defined(PETSC_HAVE_HYPRE)
1546:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_hypre_C", NULL));
1547: #endif
1548:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_is_C", NULL));
1549:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
1550:   PetscFunctionReturn(PETSC_SUCCESS);
1551: }

1553: static PetscErrorCode MatSetOption_SeqBAIJ(Mat A, MatOption op, PetscBool flg)
1554: {
1555:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;

1557:   PetscFunctionBegin;
1558:   switch (op) {
1559:   case MAT_ROW_ORIENTED:
1560:     a->roworiented = flg;
1561:     break;
1562:   case MAT_KEEP_NONZERO_PATTERN:
1563:     a->keepnonzeropattern = flg;
1564:     break;
1565:   case MAT_NEW_NONZERO_LOCATIONS:
1566:     a->nonew = (flg ? 0 : 1);
1567:     break;
1568:   case MAT_NEW_NONZERO_LOCATION_ERR:
1569:     a->nonew = (flg ? -1 : 0);
1570:     break;
1571:   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1572:     a->nonew = (flg ? -2 : 0);
1573:     break;
1574:   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1575:     a->nounused = (flg ? -1 : 0);
1576:     break;
1577:   default:
1578:     break;
1579:   }
1580:   PetscFunctionReturn(PETSC_SUCCESS);
1581: }

1583: /* used for both SeqBAIJ and SeqSBAIJ matrices */
1584: PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v, PetscInt *ai, PetscInt *aj, PetscScalar *aa)
1585: {
1586:   PetscInt     itmp, i, j, k, M, bn, bp, *idx_i, bs, bs2;
1587:   MatScalar   *aa_i;
1588:   PetscScalar *v_i;

1590:   PetscFunctionBegin;
1591:   bs  = A->rmap->bs;
1592:   bs2 = bs * bs;
1593:   PetscCheck(row >= 0 && row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range", row);

1595:   bn  = row / bs; /* Block number */
1596:   bp  = row % bs; /* Block Position */
1597:   M   = ai[bn + 1] - ai[bn];
1598:   *nz = bs * M;

1600:   if (v) {
1601:     *v = NULL;
1602:     if (*nz) {
1603:       PetscCall(PetscMalloc1(*nz, v));
1604:       for (i = 0; i < M; i++) { /* for each block in the block row */
1605:         v_i  = *v + i * bs;
1606:         aa_i = aa + bs2 * (ai[bn] + i);
1607:         for (j = bp, k = 0; j < bs2; j += bs, k++) v_i[k] = aa_i[j];
1608:       }
1609:     }
1610:   }

1612:   if (idx) {
1613:     *idx = NULL;
1614:     if (*nz) {
1615:       PetscCall(PetscMalloc1(*nz, idx));
1616:       for (i = 0; i < M; i++) { /* for each block in the block row */
1617:         idx_i = *idx + i * bs;
1618:         itmp  = bs * aj[ai[bn] + i];
1619:         for (j = 0; j < bs; j++) idx_i[j] = itmp++;
1620:       }
1621:     }
1622:   }
1623:   PetscFunctionReturn(PETSC_SUCCESS);
1624: }

1626: PetscErrorCode MatGetRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1627: {
1628:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;

1630:   PetscFunctionBegin;
1631:   PetscCall(MatGetRow_SeqBAIJ_private(A, row, nz, idx, v, a->i, a->j, a->a));
1632:   PetscFunctionReturn(PETSC_SUCCESS);
1633: }

1635: PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1636: {
1637:   PetscFunctionBegin;
1638:   if (idx) PetscCall(PetscFree(*idx));
1639:   if (v) PetscCall(PetscFree(*v));
1640:   PetscFunctionReturn(PETSC_SUCCESS);
1641: }

1643: static PetscErrorCode MatTranspose_SeqBAIJ(Mat A, MatReuse reuse, Mat *B)
1644: {
1645:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *at;
1646:   Mat          C;
1647:   PetscInt     i, j, k, *aj = a->j, *ai = a->i, bs = A->rmap->bs, mbs = a->mbs, nbs = a->nbs, *atfill;
1648:   PetscInt     bs2 = a->bs2, *ati, *atj, anzj, kr;
1649:   MatScalar   *ata, *aa = a->a;

1651:   PetscFunctionBegin;
1652:   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
1653:   PetscCall(PetscCalloc1(1 + nbs, &atfill));
1654:   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1655:     for (i = 0; i < ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */

1657:     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C));
1658:     PetscCall(MatSetSizes(C, A->cmap->n, A->rmap->N, A->cmap->n, A->rmap->N));
1659:     PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
1660:     PetscCall(MatSeqBAIJSetPreallocation(C, bs, 0, atfill));

1662:     at  = (Mat_SeqBAIJ *)C->data;
1663:     ati = at->i;
1664:     for (i = 0; i < nbs; i++) at->ilen[i] = at->imax[i] = ati[i + 1] - ati[i];
1665:   } else {
1666:     C   = *B;
1667:     at  = (Mat_SeqBAIJ *)C->data;
1668:     ati = at->i;
1669:   }

1671:   atj = at->j;
1672:   ata = at->a;

1674:   /* Copy ati into atfill so we have locations of the next free space in atj */
1675:   PetscCall(PetscArraycpy(atfill, ati, nbs));

1677:   /* Walk through A row-wise and mark nonzero entries of A^T. */
1678:   for (i = 0; i < mbs; i++) {
1679:     anzj = ai[i + 1] - ai[i];
1680:     for (j = 0; j < anzj; j++) {
1681:       atj[atfill[*aj]] = i;
1682:       for (kr = 0; kr < bs; kr++) {
1683:         for (k = 0; k < bs; k++) ata[bs2 * atfill[*aj] + k * bs + kr] = *aa++;
1684:       }
1685:       atfill[*aj++] += 1;
1686:     }
1687:   }
1688:   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
1689:   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));

1691:   /* Clean up temporary space and complete requests. */
1692:   PetscCall(PetscFree(atfill));

1694:   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1695:     PetscCall(MatSetBlockSizes(C, A->cmap->bs, A->rmap->bs));
1696:     *B = C;
1697:   } else {
1698:     PetscCall(MatHeaderMerge(A, &C));
1699:   }
1700:   PetscFunctionReturn(PETSC_SUCCESS);
1701: }

1703: static PetscErrorCode MatCompare_SeqBAIJ_Private(Mat A, Mat B, PetscReal tol, PetscBool *flg)
1704: {
1705:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;

1707:   PetscFunctionBegin;
1708:   /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */
1709:   if (A->rmap->N != B->rmap->N || A->cmap->n != B->cmap->n || A->rmap->bs != B->rmap->bs || a->nz != b->nz) {
1710:     *flg = PETSC_FALSE;
1711:     PetscFunctionReturn(PETSC_SUCCESS);
1712:   }

1714:   /* if the a->i are the same */
1715:   PetscCall(PetscArraycmp(a->i, b->i, a->mbs + 1, flg));
1716:   if (!*flg) PetscFunctionReturn(PETSC_SUCCESS);

1718:   /* if a->j are the same */
1719:   PetscCall(PetscArraycmp(a->j, b->j, a->nz, flg));
1720:   if (!*flg) PetscFunctionReturn(PETSC_SUCCESS);

1722:   if (tol == 0.0) PetscCall(PetscArraycmp(a->a, b->a, a->nz * A->rmap->bs * A->rmap->bs, flg)); /* if a->a are the same */
1723:   else {
1724:     *flg = PETSC_TRUE;
1725:     for (PetscInt i = 0; (i < a->nz * A->rmap->bs * A->rmap->bs) && *flg; ++i)
1726:       if (PetscAbsScalar(a->a[i] - b->a[i]) > tol) *flg = PETSC_FALSE;
1727:   }
1728:   PetscFunctionReturn(PETSC_SUCCESS);
1729: }

1731: static PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A, Mat B, PetscReal tol, PetscBool *f)
1732: {
1733:   Mat Btrans;

1735:   PetscFunctionBegin;
1736:   PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &Btrans));
1737:   PetscCall(MatCompare_SeqBAIJ_Private(A, Btrans, tol, f));
1738:   PetscCall(MatDestroy(&Btrans));
1739:   PetscFunctionReturn(PETSC_SUCCESS);
1740: }

1742: static PetscErrorCode MatEqual_SeqBAIJ(Mat A, Mat B, PetscBool *flg)
1743: {
1744:   PetscFunctionBegin;
1745:   PetscCall(MatCompare_SeqBAIJ_Private(A, B, 0.0, flg));
1746:   PetscFunctionReturn(PETSC_SUCCESS);
1747: }

1749: /* Used for both SeqBAIJ and SeqSBAIJ matrices */
1750: PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat, PetscViewer viewer)
1751: {
1752:   Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)mat->data;
1753:   PetscInt     header[4], M, N, m, bs, nz, cnt, i, j, k, l;
1754:   PetscInt    *rowlens, *colidxs;
1755:   PetscScalar *matvals;

1757:   PetscFunctionBegin;
1758:   PetscCall(PetscViewerSetUp(viewer));

1760:   M  = mat->rmap->N;
1761:   N  = mat->cmap->N;
1762:   m  = mat->rmap->n;
1763:   bs = mat->rmap->bs;
1764:   nz = bs * bs * A->nz;

1766:   /* write matrix header */
1767:   header[0] = MAT_FILE_CLASSID;
1768:   header[1] = M;
1769:   header[2] = N;
1770:   header[3] = nz;
1771:   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));

1773:   /* store row lengths */
1774:   PetscCall(PetscMalloc1(m, &rowlens));
1775:   for (cnt = 0, i = 0; i < A->mbs; i++)
1776:     for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i]);
1777:   PetscCall(PetscViewerBinaryWrite(viewer, rowlens, m, PETSC_INT));
1778:   PetscCall(PetscFree(rowlens));

1780:   /* store column indices  */
1781:   PetscCall(PetscMalloc1(nz, &colidxs));
1782:   for (cnt = 0, i = 0; i < A->mbs; i++)
1783:     for (k = 0; k < bs; k++)
1784:       for (j = A->i[i]; j < A->i[i + 1]; j++)
1785:         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[j] + l;
1786:   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
1787:   PetscCall(PetscViewerBinaryWrite(viewer, colidxs, nz, PETSC_INT));
1788:   PetscCall(PetscFree(colidxs));

1790:   /* store nonzero values */
1791:   PetscCall(PetscMalloc1(nz, &matvals));
1792:   for (cnt = 0, i = 0; i < A->mbs; i++)
1793:     for (k = 0; k < bs; k++)
1794:       for (j = A->i[i]; j < A->i[i + 1]; j++)
1795:         for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * j + l) + k];
1796:   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
1797:   PetscCall(PetscViewerBinaryWrite(viewer, matvals, nz, PETSC_SCALAR));
1798:   PetscCall(PetscFree(matvals));

1800:   /* write block size option to the viewer's .info file */
1801:   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1802:   PetscFunctionReturn(PETSC_SUCCESS);
1803: }

1805: static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A, PetscViewer viewer)
1806: {
1807:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1808:   PetscInt     i, bs = A->rmap->bs, k;

1810:   PetscFunctionBegin;
1811:   PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
1812:   for (i = 0; i < a->mbs; i++) {
1813:     PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT "-%" PetscInt_FMT ":", i * bs, i * bs + bs - 1));
1814:     for (k = a->i[i]; k < a->i[i + 1]; k++) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT "-%" PetscInt_FMT ") ", bs * a->j[k], bs * a->j[k] + bs - 1));
1815:     PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
1816:   }
1817:   PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
1818:   PetscFunctionReturn(PETSC_SUCCESS);
1819: }

1821: static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A, PetscViewer viewer)
1822: {
1823:   Mat_SeqBAIJ      *a = (Mat_SeqBAIJ *)A->data;
1824:   PetscInt          i, j, bs = A->rmap->bs, k, l, bs2 = a->bs2;
1825:   PetscViewerFormat format;

1827:   PetscFunctionBegin;
1828:   if (A->structure_only) {
1829:     PetscCall(MatView_SeqBAIJ_ASCII_structonly(A, viewer));
1830:     PetscFunctionReturn(PETSC_SUCCESS);
1831:   }

1833:   PetscCall(PetscViewerGetFormat(viewer, &format));
1834:   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1835:     PetscCall(PetscViewerASCIIPrintf(viewer, "  block size is %" PetscInt_FMT "\n", bs));
1836:   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1837:     const char *matname;
1838:     Mat         aij;
1839:     PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &aij));
1840:     PetscCall(PetscObjectGetName((PetscObject)A, &matname));
1841:     PetscCall(PetscObjectSetName((PetscObject)aij, matname));
1842:     PetscCall(MatView(aij, viewer));
1843:     PetscCall(MatDestroy(&aij));
1844:   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1845:     PetscFunctionReturn(PETSC_SUCCESS);
1846:   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1847:     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
1848:     for (i = 0; i < a->mbs; i++) {
1849:       for (j = 0; j < bs; j++) {
1850:         PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j));
1851:         for (k = a->i[i]; k < a->i[i + 1]; k++) {
1852:           for (l = 0; l < bs; l++) {
1853: #if defined(PETSC_USE_COMPLEX)
1854:             if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
1855:               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
1856:             } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
1857:               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
1858:             } else if (PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
1859:               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j])));
1860:             }
1861: #else
1862:             if (a->a[bs2 * k + l * bs + j] != 0.0) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j]));
1863: #endif
1864:           }
1865:         }
1866:         PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
1867:       }
1868:     }
1869:     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
1870:   } else {
1871:     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
1872:     for (i = 0; i < a->mbs; i++) {
1873:       for (j = 0; j < bs; j++) {
1874:         PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j));
1875:         for (k = a->i[i]; k < a->i[i + 1]; k++) {
1876:           for (l = 0; l < bs; l++) {
1877: #if defined(PETSC_USE_COMPLEX)
1878:             if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0) {
1879:               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
1880:             } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0) {
1881:               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
1882:             } else {
1883:               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j])));
1884:             }
1885: #else
1886:             PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j]));
1887: #endif
1888:           }
1889:         }
1890:         PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
1891:       }
1892:     }
1893:     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
1894:   }
1895:   PetscCall(PetscViewerFlush(viewer));
1896:   PetscFunctionReturn(PETSC_SUCCESS);
1897: }

1899: #include <petscdraw.h>
1900: static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw, void *Aa)
1901: {
1902:   Mat               A = (Mat)Aa;
1903:   Mat_SeqBAIJ      *a = (Mat_SeqBAIJ *)A->data;
1904:   PetscInt          row, i, j, k, l, mbs = a->mbs, bs = A->rmap->bs, bs2 = a->bs2;
1905:   PetscReal         xl, yl, xr, yr, x_l, x_r, y_l, y_r;
1906:   MatScalar        *aa;
1907:   PetscViewer       viewer;
1908:   PetscViewerFormat format;
1909:   int               color;

1911:   PetscFunctionBegin;
1912:   PetscCall(PetscObjectQuery((PetscObject)A, "Zoomviewer", (PetscObject *)&viewer));
1913:   PetscCall(PetscViewerGetFormat(viewer, &format));
1914:   PetscCall(PetscDrawGetCoordinates(draw, &xl, &yl, &xr, &yr));

1916:   /* loop over matrix elements drawing boxes */

1918:   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1919:     PetscDrawCollectiveBegin(draw);
1920:     /* Blue for negative, Cyan for zero and  Red for positive */
1921:     color = PETSC_DRAW_BLUE;
1922:     for (i = 0, row = 0; i < mbs; i++, row += bs) {
1923:       for (j = a->i[i]; j < a->i[i + 1]; j++) {
1924:         y_l = A->rmap->N - row - 1.0;
1925:         y_r = y_l + 1.0;
1926:         x_l = a->j[j] * bs;
1927:         x_r = x_l + 1.0;
1928:         aa  = a->a + j * bs2;
1929:         for (k = 0; k < bs; k++) {
1930:           for (l = 0; l < bs; l++) {
1931:             if (PetscRealPart(*aa++) >= 0.) continue;
1932:             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
1933:           }
1934:         }
1935:       }
1936:     }
1937:     color = PETSC_DRAW_CYAN;
1938:     for (i = 0, row = 0; i < mbs; i++, row += bs) {
1939:       for (j = a->i[i]; j < a->i[i + 1]; j++) {
1940:         y_l = A->rmap->N - row - 1.0;
1941:         y_r = y_l + 1.0;
1942:         x_l = a->j[j] * bs;
1943:         x_r = x_l + 1.0;
1944:         aa  = a->a + j * bs2;
1945:         for (k = 0; k < bs; k++) {
1946:           for (l = 0; l < bs; l++) {
1947:             if (PetscRealPart(*aa++) != 0.) continue;
1948:             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
1949:           }
1950:         }
1951:       }
1952:     }
1953:     color = PETSC_DRAW_RED;
1954:     for (i = 0, row = 0; i < mbs; i++, row += bs) {
1955:       for (j = a->i[i]; j < a->i[i + 1]; j++) {
1956:         y_l = A->rmap->N - row - 1.0;
1957:         y_r = y_l + 1.0;
1958:         x_l = a->j[j] * bs;
1959:         x_r = x_l + 1.0;
1960:         aa  = a->a + j * bs2;
1961:         for (k = 0; k < bs; k++) {
1962:           for (l = 0; l < bs; l++) {
1963:             if (PetscRealPart(*aa++) <= 0.) continue;
1964:             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
1965:           }
1966:         }
1967:       }
1968:     }
1969:     PetscDrawCollectiveEnd(draw);
1970:   } else {
1971:     /* use contour shading to indicate magnitude of values */
1972:     /* first determine max of all nonzero values */
1973:     PetscReal minv = 0.0, maxv = 0.0;
1974:     PetscDraw popup;

1976:     for (i = 0; i < a->nz * a->bs2; i++) {
1977:       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1978:     }
1979:     if (minv >= maxv) maxv = minv + PETSC_SMALL;
1980:     PetscCall(PetscDrawGetPopup(draw, &popup));
1981:     PetscCall(PetscDrawScalePopup(popup, 0.0, maxv));

1983:     PetscDrawCollectiveBegin(draw);
1984:     for (i = 0, row = 0; i < mbs; i++, row += bs) {
1985:       for (j = a->i[i]; j < a->i[i + 1]; j++) {
1986:         y_l = A->rmap->N - row - 1.0;
1987:         y_r = y_l + 1.0;
1988:         x_l = a->j[j] * bs;
1989:         x_r = x_l + 1.0;
1990:         aa  = a->a + j * bs2;
1991:         for (k = 0; k < bs; k++) {
1992:           for (l = 0; l < bs; l++) {
1993:             MatScalar v = *aa++;
1994:             color       = PetscDrawRealToColor(PetscAbsScalar(v), minv, maxv);
1995:             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
1996:           }
1997:         }
1998:       }
1999:     }
2000:     PetscDrawCollectiveEnd(draw);
2001:   }
2002:   PetscFunctionReturn(PETSC_SUCCESS);
2003: }

2005: static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A, PetscViewer viewer)
2006: {
2007:   PetscReal xl, yl, xr, yr, w, h;
2008:   PetscDraw draw;
2009:   PetscBool isnull;

2011:   PetscFunctionBegin;
2012:   PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
2013:   PetscCall(PetscDrawIsNull(draw, &isnull));
2014:   if (isnull) PetscFunctionReturn(PETSC_SUCCESS);

2016:   xr = A->cmap->n;
2017:   yr = A->rmap->N;
2018:   h  = yr / 10.0;
2019:   w  = xr / 10.0;
2020:   xr += w;
2021:   yr += h;
2022:   xl = -w;
2023:   yl = -h;
2024:   PetscCall(PetscDrawSetCoordinates(draw, xl, yl, xr, yr));
2025:   PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", (PetscObject)viewer));
2026:   PetscCall(PetscDrawZoom(draw, MatView_SeqBAIJ_Draw_Zoom, A));
2027:   PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", NULL));
2028:   PetscCall(PetscDrawSave(draw));
2029:   PetscFunctionReturn(PETSC_SUCCESS);
2030: }

2032: PetscErrorCode MatView_SeqBAIJ(Mat A, PetscViewer viewer)
2033: {
2034:   PetscBool isascii, isbinary, isdraw;

2036:   PetscFunctionBegin;
2037:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
2038:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
2039:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
2040:   if (isascii) {
2041:     PetscCall(MatView_SeqBAIJ_ASCII(A, viewer));
2042:   } else if (isbinary) {
2043:     PetscCall(MatView_SeqBAIJ_Binary(A, viewer));
2044:   } else if (isdraw) {
2045:     PetscCall(MatView_SeqBAIJ_Draw(A, viewer));
2046:   } else {
2047:     Mat B;
2048:     PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
2049:     PetscCall(MatView(B, viewer));
2050:     PetscCall(MatDestroy(&B));
2051:   }
2052:   PetscFunctionReturn(PETSC_SUCCESS);
2053: }

2055: PetscErrorCode MatGetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], PetscScalar v[])
2056: {
2057:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2058:   PetscInt    *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
2059:   PetscInt    *ai = a->i, *ailen = a->ilen;
2060:   PetscInt     brow, bcol, ridx, cidx, bs = A->rmap->bs, bs2 = a->bs2;
2061:   MatScalar   *ap, *aa = a->a;

2063:   PetscFunctionBegin;
2064:   for (k = 0; k < m; k++) { /* loop over rows */
2065:     row  = im[k];
2066:     brow = row / bs;
2067:     if (row < 0) {
2068:       v += n;
2069:       continue;
2070:     } /* negative row */
2071:     PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " too large", row);
2072:     rp   = PetscSafePointerPlusOffset(aj, ai[brow]);
2073:     ap   = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]);
2074:     nrow = ailen[brow];
2075:     for (l = 0; l < n; l++) { /* loop over columns */
2076:       if (in[l] < 0) {
2077:         v++;
2078:         continue;
2079:       } /* negative column */
2080:       PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column %" PetscInt_FMT " too large", in[l]);
2081:       col  = in[l];
2082:       bcol = col / bs;
2083:       cidx = col % bs;
2084:       ridx = row % bs;
2085:       high = nrow;
2086:       low  = 0; /* assume unsorted */
2087:       while (high - low > 5) {
2088:         t = (low + high) / 2;
2089:         if (rp[t] > bcol) high = t;
2090:         else low = t;
2091:       }
2092:       for (i = low; i < high; i++) {
2093:         if (rp[i] > bcol) break;
2094:         if (rp[i] == bcol) {
2095:           *v++ = ap[bs2 * i + bs * cidx + ridx];
2096:           goto finished;
2097:         }
2098:       }
2099:       *v++ = 0.0;
2100:     finished:;
2101:     }
2102:   }
2103:   PetscFunctionReturn(PETSC_SUCCESS);
2104: }

2106: PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is)
2107: {
2108:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
2109:   PetscInt          *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, rmax, N, lastcol = -1;
2110:   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
2111:   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs, stepval;
2112:   PetscBool          roworiented = a->roworiented;
2113:   const PetscScalar *value       = v;
2114:   MatScalar         *ap = NULL, *aa = a->a, *bap;

2116:   PetscFunctionBegin;
2117:   if (roworiented) {
2118:     stepval = (n - 1) * bs;
2119:   } else {
2120:     stepval = (m - 1) * bs;
2121:   }
2122:   for (k = 0; k < m; k++) { /* loop over added rows */
2123:     row = im[k];
2124:     if (row < 0) continue;
2125:     PetscCheck(row < a->mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block row index too large %" PetscInt_FMT " max %" PetscInt_FMT, row, a->mbs - 1);
2126:     rp = aj + ai[row];
2127:     if (!A->structure_only) ap = aa + bs2 * ai[row];
2128:     rmax = imax[row];
2129:     nrow = ailen[row];
2130:     low  = 0;
2131:     high = nrow;
2132:     for (l = 0; l < n; l++) { /* loop over added columns */
2133:       if (in[l] < 0) continue;
2134:       PetscCheck(in[l] < a->nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block column index too large %" PetscInt_FMT " max %" PetscInt_FMT, in[l], a->nbs - 1);
2135:       col = in[l];
2136:       if (!A->structure_only) {
2137:         if (roworiented) {
2138:           value = v + (k * (stepval + bs) + l) * bs;
2139:         } else {
2140:           value = v + (l * (stepval + bs) + k) * bs;
2141:         }
2142:       }
2143:       if (col <= lastcol) low = 0;
2144:       else high = nrow;
2145:       lastcol = col;
2146:       while (high - low > 7) {
2147:         t = (low + high) / 2;
2148:         if (rp[t] > col) high = t;
2149:         else low = t;
2150:       }
2151:       for (i = low; i < high; i++) {
2152:         if (rp[i] > col) break;
2153:         if (rp[i] == col) {
2154:           if (A->structure_only) goto noinsert2;
2155:           bap = ap + bs2 * i;
2156:           if (roworiented) {
2157:             if (is == ADD_VALUES) {
2158:               for (ii = 0; ii < bs; ii++, value += stepval) {
2159:                 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
2160:               }
2161:             } else {
2162:               for (ii = 0; ii < bs; ii++, value += stepval) {
2163:                 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
2164:               }
2165:             }
2166:           } else {
2167:             if (is == ADD_VALUES) {
2168:               for (ii = 0; ii < bs; ii++, value += bs + stepval) {
2169:                 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
2170:                 bap += bs;
2171:               }
2172:             } else {
2173:               for (ii = 0; ii < bs; ii++, value += bs + stepval) {
2174:                 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
2175:                 bap += bs;
2176:               }
2177:             }
2178:           }
2179:           goto noinsert2;
2180:         }
2181:       }
2182:       if (nonew == 1) goto noinsert2;
2183:       PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked index new nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
2184:       if (A->structure_only) {
2185:         MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, row, col, rmax, ai, aj, rp, imax, nonew, MatScalar);
2186:       } else {
2187:         MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
2188:       }
2189:       N = nrow++ - 1;
2190:       high++;
2191:       /* shift up all the later entries in this row */
2192:       PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
2193:       rp[i] = col;
2194:       if (!A->structure_only) {
2195:         PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
2196:         bap = ap + bs2 * i;
2197:         if (roworiented) {
2198:           for (ii = 0; ii < bs; ii++, value += stepval) {
2199:             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
2200:           }
2201:         } else {
2202:           for (ii = 0; ii < bs; ii++, value += stepval) {
2203:             for (jj = 0; jj < bs; jj++) *bap++ = *value++;
2204:           }
2205:         }
2206:       }
2207:     noinsert2:;
2208:       low = i;
2209:     }
2210:     ailen[row] = nrow;
2211:   }
2212:   PetscFunctionReturn(PETSC_SUCCESS);
2213: }

2215: PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A, MatAssemblyType mode)
2216: {
2217:   Mat_SeqBAIJ *a      = (Mat_SeqBAIJ *)A->data;
2218:   PetscInt     fshift = 0, i, *ai = a->i, *aj = a->j, *imax = a->imax;
2219:   PetscInt     m = A->rmap->N, *ip, N, *ailen = a->ilen;
2220:   PetscInt     mbs = a->mbs, bs2 = a->bs2, rmax = 0;
2221:   MatScalar   *aa    = a->a, *ap;
2222:   PetscReal    ratio = 0.6;

2224:   PetscFunctionBegin;
2225:   if (mode == MAT_FLUSH_ASSEMBLY || (A->was_assembled && A->ass_nonzerostate == A->nonzerostate)) PetscFunctionReturn(PETSC_SUCCESS);

2227:   if (m) rmax = ailen[0];
2228:   for (i = 1; i < mbs; i++) {
2229:     /* move each row back by the amount of empty slots (fshift) before it*/
2230:     fshift += imax[i - 1] - ailen[i - 1];
2231:     rmax = PetscMax(rmax, ailen[i]);
2232:     if (fshift) {
2233:       ip = aj + ai[i];
2234:       ap = aa + bs2 * ai[i];
2235:       N  = ailen[i];
2236:       PetscCall(PetscArraymove(ip - fshift, ip, N));
2237:       if (!A->structure_only) PetscCall(PetscArraymove(ap - bs2 * fshift, ap, bs2 * N));
2238:     }
2239:     ai[i] = ai[i - 1] + ailen[i - 1];
2240:   }
2241:   if (mbs) {
2242:     fshift += imax[mbs - 1] - ailen[mbs - 1];
2243:     ai[mbs] = ai[mbs - 1] + ailen[mbs - 1];
2244:   }

2246:   /* reset ilen and imax for each row */
2247:   a->nonzerorowcnt = 0;
2248:   if (A->structure_only) {
2249:     PetscCall(PetscFree2(a->imax, a->ilen));
2250:   } else { /* !A->structure_only */
2251:     for (i = 0; i < mbs; i++) {
2252:       ailen[i] = imax[i] = ai[i + 1] - ai[i];
2253:       a->nonzerorowcnt += ((ai[i + 1] - ai[i]) > 0);
2254:     }
2255:   }
2256:   a->nz = ai[mbs];

2258:   /* diagonals may have moved, so kill the diagonal pointers */
2259:   a->idiagvalid = PETSC_FALSE;
2260:   if (fshift) PetscCheck(a->nounused != -1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unused space detected in matrix: %" PetscInt_FMT " X %" PetscInt_FMT " block size %" PetscInt_FMT ", %" PetscInt_FMT " unneeded", m, A->cmap->n, A->rmap->bs, fshift * bs2);
2261:   PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT ", block size %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded, %" PetscInt_FMT " used\n", m, A->cmap->n, A->rmap->bs, fshift * bs2, a->nz * bs2));
2262:   PetscCall(PetscInfo(A, "Number of mallocs during MatSetValues is %" PetscInt_FMT "\n", a->reallocs));
2263:   PetscCall(PetscInfo(A, "Most nonzeros blocks in any row is %" PetscInt_FMT "\n", rmax));

2265:   A->info.mallocs += a->reallocs;
2266:   a->reallocs         = 0;
2267:   A->info.nz_unneeded = (PetscReal)fshift * bs2;
2268:   a->rmax             = rmax;

2270:   if (!A->structure_only) PetscCall(MatCheckCompressedRow(A, a->nonzerorowcnt, &a->compressedrow, a->i, mbs, ratio));
2271:   PetscFunctionReturn(PETSC_SUCCESS);
2272: }

2274: /*
2275:    This function returns an array of flags which indicate the locations of contiguous
2276:    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
2277:    then the resulting sizes = [3,1,1,3,1] corresponding to sets [(0,1,2),(3),(5),(6,7,8),(9)]
2278:    Assume: sizes should be long enough to hold all the values.
2279: */
2280: static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[], PetscInt n, PetscInt bs, PetscInt sizes[], PetscInt *bs_max)
2281: {
2282:   PetscInt j = 0;

2284:   PetscFunctionBegin;
2285:   for (PetscInt i = 0; i < n; j++) {
2286:     PetscInt row = idx[i];
2287:     if (row % bs != 0) { /* Not the beginning of a block */
2288:       sizes[j] = 1;
2289:       i++;
2290:     } else if (i + bs > n) { /* complete block doesn't exist (at idx end) */
2291:       sizes[j] = 1;          /* Also makes sure at least 'bs' values exist for next else */
2292:       i++;
2293:     } else { /* Beginning of the block, so check if the complete block exists */
2294:       PetscBool flg = PETSC_TRUE;
2295:       for (PetscInt k = 1; k < bs; k++) {
2296:         if (row + k != idx[i + k]) { /* break in the block */
2297:           flg = PETSC_FALSE;
2298:           break;
2299:         }
2300:       }
2301:       if (flg) { /* No break in the bs */
2302:         sizes[j] = bs;
2303:         i += bs;
2304:       } else {
2305:         sizes[j] = 1;
2306:         i++;
2307:       }
2308:     }
2309:   }
2310:   *bs_max = j;
2311:   PetscFunctionReturn(PETSC_SUCCESS);
2312: }

2314: PetscErrorCode MatZeroRows_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b)
2315: {
2316:   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ *)A->data;
2317:   PetscInt           i, j, k, count, *rows;
2318:   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2, *sizes, row, bs_max;
2319:   PetscScalar        zero = 0.0;
2320:   MatScalar         *aa;
2321:   const PetscScalar *xx;
2322:   PetscScalar       *bb;

2324:   PetscFunctionBegin;
2325:   /* fix right-hand side if needed */
2326:   if (x && b) {
2327:     PetscCall(VecGetArrayRead(x, &xx));
2328:     PetscCall(VecGetArray(b, &bb));
2329:     for (i = 0; i < is_n; i++) bb[is_idx[i]] = diag * xx[is_idx[i]];
2330:     PetscCall(VecRestoreArrayRead(x, &xx));
2331:     PetscCall(VecRestoreArray(b, &bb));
2332:   }

2334:   /* Make a copy of the IS and  sort it */
2335:   /* allocate memory for rows,sizes */
2336:   PetscCall(PetscMalloc2(is_n, &rows, 2 * is_n, &sizes));

2338:   /* copy IS values to rows, and sort them */
2339:   for (i = 0; i < is_n; i++) rows[i] = is_idx[i];
2340:   PetscCall(PetscSortInt(is_n, rows));

2342:   if (baij->keepnonzeropattern) {
2343:     for (i = 0; i < is_n; i++) sizes[i] = 1;
2344:     bs_max = is_n;
2345:   } else {
2346:     PetscCall(MatZeroRows_SeqBAIJ_Check_Blocks(rows, is_n, bs, sizes, &bs_max));
2347:     A->nonzerostate++;
2348:   }

2350:   for (i = 0, j = 0; i < bs_max; j += sizes[i], i++) {
2351:     row = rows[j];
2352:     PetscCheck(row >= 0 && row <= A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", row);
2353:     count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
2354:     aa    = baij->a + baij->i[row / bs] * bs2 + (row % bs);
2355:     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2356:       if (diag != (PetscScalar)0.0) {
2357:         if (baij->ilen[row / bs] > 0) {
2358:           baij->ilen[row / bs]       = 1;
2359:           baij->j[baij->i[row / bs]] = row / bs;

2361:           PetscCall(PetscArrayzero(aa, count * bs));
2362:         }
2363:         /* Now insert all the diagonal values for this bs */
2364:         for (k = 0; k < bs; k++) PetscUseTypeMethod(A, setvalues, 1, rows + j + k, 1, rows + j + k, &diag, INSERT_VALUES);
2365:       } else { /* (diag == 0.0) */
2366:         baij->ilen[row / bs] = 0;
2367:       } /* end (diag == 0.0) */
2368:     } else { /* (sizes[i] != bs) */
2369:       PetscAssert(sizes[i] == 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal Error. Value should be 1");
2370:       for (k = 0; k < count; k++) {
2371:         aa[0] = zero;
2372:         aa += bs;
2373:       }
2374:       if (diag != (PetscScalar)0.0) PetscUseTypeMethod(A, setvalues, 1, rows + j, 1, rows + j, &diag, INSERT_VALUES);
2375:     }
2376:   }

2378:   PetscCall(PetscFree2(rows, sizes));
2379:   PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY));
2380:   PetscFunctionReturn(PETSC_SUCCESS);
2381: }

2383: static PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b)
2384: {
2385:   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ *)A->data;
2386:   PetscInt           i, j, k, count;
2387:   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2, row, col;
2388:   PetscScalar        zero = 0.0;
2389:   MatScalar         *aa;
2390:   const PetscScalar *xx;
2391:   PetscScalar       *bb;
2392:   PetscBool         *zeroed, vecs = PETSC_FALSE;

2394:   PetscFunctionBegin;
2395:   /* fix right-hand side if needed */
2396:   if (x && b) {
2397:     PetscCall(VecGetArrayRead(x, &xx));
2398:     PetscCall(VecGetArray(b, &bb));
2399:     vecs = PETSC_TRUE;
2400:   }

2402:   /* zero the columns */
2403:   PetscCall(PetscCalloc1(A->rmap->n, &zeroed));
2404:   for (i = 0; i < is_n; i++) {
2405:     PetscCheck(is_idx[i] >= 0 && is_idx[i] < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", is_idx[i]);
2406:     zeroed[is_idx[i]] = PETSC_TRUE;
2407:   }
2408:   for (i = 0; i < A->rmap->N; i++) {
2409:     if (!zeroed[i]) {
2410:       row = i / bs;
2411:       for (j = baij->i[row]; j < baij->i[row + 1]; j++) {
2412:         for (k = 0; k < bs; k++) {
2413:           col = bs * baij->j[j] + k;
2414:           if (zeroed[col]) {
2415:             aa = baij->a + j * bs2 + (i % bs) + bs * k;
2416:             if (vecs) bb[i] -= aa[0] * xx[col];
2417:             aa[0] = 0.0;
2418:           }
2419:         }
2420:       }
2421:     } else if (vecs) bb[i] = diag * xx[i];
2422:   }
2423:   PetscCall(PetscFree(zeroed));
2424:   if (vecs) {
2425:     PetscCall(VecRestoreArrayRead(x, &xx));
2426:     PetscCall(VecRestoreArray(b, &bb));
2427:   }

2429:   /* zero the rows */
2430:   for (i = 0; i < is_n; i++) {
2431:     row   = is_idx[i];
2432:     count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
2433:     aa    = baij->a + baij->i[row / bs] * bs2 + (row % bs);
2434:     for (k = 0; k < count; k++) {
2435:       aa[0] = zero;
2436:       aa += bs;
2437:     }
2438:     if (diag != (PetscScalar)0.0) PetscUseTypeMethod(A, setvalues, 1, &row, 1, &row, &diag, INSERT_VALUES);
2439:   }
2440:   PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY));
2441:   PetscFunctionReturn(PETSC_SUCCESS);
2442: }

2444: PetscErrorCode MatSetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is)
2445: {
2446:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2447:   PetscInt    *rp, k, low, high, t, ii, row, nrow, i, col, l, rmax, N, lastcol = -1;
2448:   PetscInt    *imax = a->imax, *ai = a->i, *ailen = a->ilen;
2449:   PetscInt    *aj = a->j, nonew = a->nonew, bs = A->rmap->bs, brow, bcol;
2450:   PetscInt     ridx, cidx, bs2                 = a->bs2;
2451:   PetscBool    roworiented = a->roworiented;
2452:   MatScalar   *ap = NULL, value = 0.0, *aa = a->a, *bap;

2454:   PetscFunctionBegin;
2455:   for (k = 0; k < m; k++) { /* loop over added rows */
2456:     row  = im[k];
2457:     brow = row / bs;
2458:     if (row < 0) continue;
2459:     PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, row, A->rmap->N - 1);
2460:     rp = PetscSafePointerPlusOffset(aj, ai[brow]);
2461:     if (!A->structure_only) ap = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]);
2462:     rmax = imax[brow];
2463:     nrow = ailen[brow];
2464:     low  = 0;
2465:     high = nrow;
2466:     for (l = 0; l < n; l++) { /* loop over added columns */
2467:       if (in[l] < 0) continue;
2468:       PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[l], A->cmap->n - 1);
2469:       col  = in[l];
2470:       bcol = col / bs;
2471:       ridx = row % bs;
2472:       cidx = col % bs;
2473:       if (!A->structure_only) {
2474:         if (roworiented) {
2475:           value = v[l + k * n];
2476:         } else {
2477:           value = v[k + l * m];
2478:         }
2479:       }
2480:       if (col <= lastcol) low = 0;
2481:       else high = nrow;
2482:       lastcol = col;
2483:       while (high - low > 7) {
2484:         t = (low + high) / 2;
2485:         if (rp[t] > bcol) high = t;
2486:         else low = t;
2487:       }
2488:       for (i = low; i < high; i++) {
2489:         if (rp[i] > bcol) break;
2490:         if (rp[i] == bcol) {
2491:           bap = PetscSafePointerPlusOffset(ap, bs2 * i + bs * cidx + ridx);
2492:           if (!A->structure_only) {
2493:             if (is == ADD_VALUES) *bap += value;
2494:             else *bap = value;
2495:           }
2496:           goto noinsert1;
2497:         }
2498:       }
2499:       if (nonew == 1) goto noinsert1;
2500:       PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
2501:       if (A->structure_only) {
2502:         MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, brow, bcol, rmax, ai, aj, rp, imax, nonew, MatScalar);
2503:       } else {
2504:         MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
2505:       }
2506:       N = nrow++ - 1;
2507:       high++;
2508:       /* shift up all the later entries in this row */
2509:       PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
2510:       rp[i] = bcol;
2511:       if (!A->structure_only) {
2512:         PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
2513:         PetscCall(PetscArrayzero(ap + bs2 * i, bs2));
2514:         ap[bs2 * i + bs * cidx + ridx] = value;
2515:       }
2516:       a->nz++;
2517:     noinsert1:;
2518:       low = i;
2519:     }
2520:     ailen[brow] = nrow;
2521:   }
2522:   PetscFunctionReturn(PETSC_SUCCESS);
2523: }

2525: static PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA, IS row, IS col, const MatFactorInfo *info)
2526: {
2527:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inA->data;
2528:   Mat          outA;
2529:   PetscBool    row_identity, col_identity;

2531:   PetscFunctionBegin;
2532:   PetscCheck(info->levels == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only levels = 0 supported for in-place ILU");
2533:   PetscCall(ISIdentity(row, &row_identity));
2534:   PetscCall(ISIdentity(col, &col_identity));
2535:   PetscCheck(row_identity && col_identity, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Row and column permutations must be identity for in-place ILU");

2537:   outA            = inA;
2538:   inA->factortype = MAT_FACTOR_LU;
2539:   PetscCall(PetscFree(inA->solvertype));
2540:   PetscCall(PetscStrallocpy(MATSOLVERPETSC, &inA->solvertype));

2542:   PetscCall(PetscObjectReference((PetscObject)row));
2543:   PetscCall(ISDestroy(&a->row));
2544:   a->row = row;
2545:   PetscCall(PetscObjectReference((PetscObject)col));
2546:   PetscCall(ISDestroy(&a->col));
2547:   a->col = col;

2549:   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2550:   PetscCall(ISDestroy(&a->icol));
2551:   PetscCall(ISInvertPermutation(col, PETSC_DECIDE, &a->icol));

2553:   PetscCall(MatSeqBAIJSetNumericFactorization_inplace(inA, (PetscBool)(row_identity && col_identity)));
2554:   if (!a->solve_work) PetscCall(PetscMalloc1(inA->rmap->N + inA->rmap->bs, &a->solve_work));
2555:   PetscCall(MatLUFactorNumeric(outA, inA, info));
2556:   PetscFunctionReturn(PETSC_SUCCESS);
2557: }

2559: static PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat, const PetscInt *indices)
2560: {
2561:   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;

2563:   PetscFunctionBegin;
2564:   baij->nz = baij->maxnz;
2565:   PetscCall(PetscArraycpy(baij->j, indices, baij->nz));
2566:   PetscCall(PetscArraycpy(baij->ilen, baij->imax, baij->mbs));
2567:   PetscFunctionReturn(PETSC_SUCCESS);
2568: }

2570: /*@
2571:   MatSeqBAIJSetColumnIndices - Set the column indices for all the block rows in the matrix.

2573:   Input Parameters:
2574: + mat     - the `MATSEQBAIJ` matrix
2575: - indices - the block column indices

2577:   Level: advanced

2579:   Notes:
2580:   This can be called if you have precomputed the nonzero structure of the
2581:   matrix and want to provide it to the matrix object to improve the performance
2582:   of the `MatSetValues()` operation.

2584:   You MUST have set the correct numbers of nonzeros per row in the call to
2585:   `MatCreateSeqBAIJ()`, and the columns indices MUST be sorted.

2587:   MUST be called before any calls to `MatSetValues()`

2589: .seealso: [](ch_matrices), `Mat`, `MATSEQBAIJ`, `MatSetValues()`
2590: @*/
2591: PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat, PetscInt *indices)
2592: {
2593:   PetscFunctionBegin;
2595:   PetscAssertPointer(indices, 2);
2596:   PetscUseMethod(mat, "MatSeqBAIJSetColumnIndices_C", (Mat, const PetscInt *), (mat, (const PetscInt *)indices));
2597:   PetscFunctionReturn(PETSC_SUCCESS);
2598: }

2600: static PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A, Vec v, PetscInt idx[])
2601: {
2602:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2603:   PetscInt     i, j, n, row, bs, *ai, *aj, mbs;
2604:   PetscReal    atmp;
2605:   PetscScalar *x, zero = 0.0;
2606:   MatScalar   *aa;
2607:   PetscInt     ncols, brow, krow, kcol;

2609:   PetscFunctionBegin;
2610:   PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
2611:   bs  = A->rmap->bs;
2612:   aa  = a->a;
2613:   ai  = a->i;
2614:   aj  = a->j;
2615:   mbs = a->mbs;

2617:   PetscCall(VecSet(v, zero));
2618:   PetscCall(VecGetArray(v, &x));
2619:   PetscCall(VecGetLocalSize(v, &n));
2620:   PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector");
2621:   for (i = 0; i < mbs; i++) {
2622:     ncols = ai[1] - ai[0];
2623:     ai++;
2624:     brow = bs * i;
2625:     for (j = 0; j < ncols; j++) {
2626:       for (kcol = 0; kcol < bs; kcol++) {
2627:         for (krow = 0; krow < bs; krow++) {
2628:           atmp = PetscAbsScalar(*aa);
2629:           aa++;
2630:           row = brow + krow; /* row index */
2631:           if (PetscAbsScalar(x[row]) < atmp) {
2632:             x[row] = atmp;
2633:             if (idx) idx[row] = bs * (*aj) + kcol;
2634:           }
2635:         }
2636:       }
2637:       aj++;
2638:     }
2639:   }
2640:   PetscCall(VecRestoreArray(v, &x));
2641:   PetscFunctionReturn(PETSC_SUCCESS);
2642: }

2644: static PetscErrorCode MatGetRowSumAbs_SeqBAIJ(Mat A, Vec v)
2645: {
2646:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2647:   PetscInt     i, j, n, row, bs, *ai, mbs;
2648:   PetscReal    atmp;
2649:   PetscScalar *x, zero = 0.0;
2650:   MatScalar   *aa;
2651:   PetscInt     ncols, brow, krow, kcol;

2653:   PetscFunctionBegin;
2654:   PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
2655:   bs  = A->rmap->bs;
2656:   aa  = a->a;
2657:   ai  = a->i;
2658:   mbs = a->mbs;

2660:   PetscCall(VecSet(v, zero));
2661:   PetscCall(VecGetArrayWrite(v, &x));
2662:   PetscCall(VecGetLocalSize(v, &n));
2663:   PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector");
2664:   for (i = 0; i < mbs; i++) {
2665:     ncols = ai[1] - ai[0];
2666:     ai++;
2667:     brow = bs * i;
2668:     for (j = 0; j < ncols; j++) {
2669:       for (kcol = 0; kcol < bs; kcol++) {
2670:         for (krow = 0; krow < bs; krow++) {
2671:           atmp = PetscAbsScalar(*aa);
2672:           aa++;
2673:           row = brow + krow; /* row index */
2674:           x[row] += atmp;
2675:         }
2676:       }
2677:     }
2678:   }
2679:   PetscCall(VecRestoreArrayWrite(v, &x));
2680:   PetscFunctionReturn(PETSC_SUCCESS);
2681: }

2683: static PetscErrorCode MatCopy_SeqBAIJ(Mat A, Mat B, MatStructure str)
2684: {
2685:   PetscFunctionBegin;
2686:   /* If the two matrices have the same copy implementation, use fast copy. */
2687:   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2688:     Mat_SeqBAIJ *a    = (Mat_SeqBAIJ *)A->data;
2689:     Mat_SeqBAIJ *b    = (Mat_SeqBAIJ *)B->data;
2690:     PetscInt     ambs = a->mbs, bmbs = b->mbs, abs = A->rmap->bs, bbs = B->rmap->bs, bs2 = abs * abs;

2692:     PetscCheck(a->i[ambs] == b->i[bmbs], PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Number of nonzero blocks in matrices A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", a->i[ambs], b->i[bmbs]);
2693:     PetscCheck(abs == bbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Block size A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", abs, bbs);
2694:     PetscCall(PetscArraycpy(b->a, a->a, bs2 * a->i[ambs]));
2695:     PetscCall(PetscObjectStateIncrease((PetscObject)B));
2696:   } else {
2697:     PetscCall(MatCopy_Basic(A, B, str));
2698:   }
2699:   PetscFunctionReturn(PETSC_SUCCESS);
2700: }

2702: static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A, PetscScalar *array[])
2703: {
2704:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;

2706:   PetscFunctionBegin;
2707:   *array = a->a;
2708:   PetscFunctionReturn(PETSC_SUCCESS);
2709: }

2711: static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A, PetscScalar *array[])
2712: {
2713:   PetscFunctionBegin;
2714:   *array = NULL;
2715:   PetscFunctionReturn(PETSC_SUCCESS);
2716: }

2718: PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y, Mat X, PetscInt *nnz)
2719: {
2720:   PetscInt     bs = Y->rmap->bs, mbs = Y->rmap->N / bs;
2721:   Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data;
2722:   Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data;

2724:   PetscFunctionBegin;
2725:   /* Set the number of nonzeros in the new matrix */
2726:   PetscCall(MatAXPYGetPreallocation_SeqX_private(mbs, x->i, x->j, y->i, y->j, nnz));
2727:   PetscFunctionReturn(PETSC_SUCCESS);
2728: }

2730: PetscErrorCode MatAXPY_SeqBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
2731: {
2732:   Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data, *y = (Mat_SeqBAIJ *)Y->data;
2733:   PetscInt     bs = Y->rmap->bs, bs2 = bs * bs;
2734:   PetscBLASInt one = 1;

2736:   PetscFunctionBegin;
2737:   if (str == UNKNOWN_NONZERO_PATTERN || (PetscDefined(USE_DEBUG) && str == SAME_NONZERO_PATTERN)) {
2738:     PetscBool e = x->nz == y->nz && x->mbs == y->mbs && bs == X->rmap->bs ? PETSC_TRUE : PETSC_FALSE;
2739:     if (e) {
2740:       PetscCall(PetscArraycmp(x->i, y->i, x->mbs + 1, &e));
2741:       if (e) {
2742:         PetscCall(PetscArraycmp(x->j, y->j, x->i[x->mbs], &e));
2743:         if (e) str = SAME_NONZERO_PATTERN;
2744:       }
2745:     }
2746:     if (!e) PetscCheck(str != SAME_NONZERO_PATTERN, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "MatStructure is not SAME_NONZERO_PATTERN");
2747:   }
2748:   if (str == SAME_NONZERO_PATTERN) {
2749:     PetscScalar  alpha = a;
2750:     PetscBLASInt bnz;
2751:     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
2752:     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
2753:     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
2754:   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2755:     PetscCall(MatAXPY_Basic(Y, a, X, str));
2756:   } else {
2757:     Mat       B;
2758:     PetscInt *nnz;
2759:     PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size");
2760:     PetscCall(PetscMalloc1(Y->rmap->N, &nnz));
2761:     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
2762:     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
2763:     PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
2764:     PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
2765:     PetscCall(MatSetType(B, (MatType)((PetscObject)Y)->type_name));
2766:     PetscCall(MatAXPYGetPreallocation_SeqBAIJ(Y, X, nnz));
2767:     PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz));
2768:     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
2769:     PetscCall(MatHeaderMerge(Y, &B));
2770:     PetscCall(PetscFree(nnz));
2771:   }
2772:   PetscFunctionReturn(PETSC_SUCCESS);
2773: }

2775: PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A)
2776: {
2777: #if PetscDefined(USE_COMPLEX)
2778:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2779:   PetscInt     i, nz = a->bs2 * a->i[a->mbs];
2780:   MatScalar   *aa = a->a;

2782:   PetscFunctionBegin;
2783:   for (i = 0; i < nz; i++) aa[i] = PetscConj(aa[i]);
2784:   PetscFunctionReturn(PETSC_SUCCESS);
2785: #else
2786:   (void)A;
2787:   return PETSC_SUCCESS;
2788: #endif
2789: }

2791: static PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2792: {
2793: #if PetscDefined(USE_COMPLEX)
2794:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2795:   PetscInt     i, nz = a->bs2 * a->i[a->mbs];
2796:   MatScalar   *aa = a->a;

2798:   PetscFunctionBegin;
2799:   for (i = 0; i < nz; i++) aa[i] = PetscRealPart(aa[i]);
2800:   PetscFunctionReturn(PETSC_SUCCESS);
2801: #else
2802:   (void)A;
2803:   return PETSC_SUCCESS;
2804: #endif
2805: }

2807: static PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2808: {
2809: #if PetscDefined(USE_COMPLEX)
2810:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2811:   PetscInt     i, nz = a->bs2 * a->i[a->mbs];
2812:   MatScalar   *aa = a->a;

2814:   PetscFunctionBegin;
2815:   for (i = 0; i < nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2816:   PetscFunctionReturn(PETSC_SUCCESS);
2817: #else
2818:   (void)A;
2819:   return PETSC_SUCCESS;
2820: #endif
2821: }

2823: /*
2824:     Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code
2825: */
2826: static PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
2827: {
2828:   Mat_SeqBAIJ *a  = (Mat_SeqBAIJ *)A->data;
2829:   PetscInt     bs = A->rmap->bs, i, *collengths, *cia, *cja, n = A->cmap->n / bs, m = A->rmap->n / bs;
2830:   PetscInt     nz = a->i[m], row, *jj, mr, col;

2832:   PetscFunctionBegin;
2833:   *nn = n;
2834:   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
2835:   PetscCheck(!symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not for BAIJ matrices");
2836:   PetscCall(PetscCalloc1(n, &collengths));
2837:   PetscCall(PetscMalloc1(n + 1, &cia));
2838:   PetscCall(PetscMalloc1(nz, &cja));
2839:   jj = a->j;
2840:   for (i = 0; i < nz; i++) collengths[jj[i]]++;
2841:   cia[0] = oshift;
2842:   for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i];
2843:   PetscCall(PetscArrayzero(collengths, n));
2844:   jj = a->j;
2845:   for (row = 0; row < m; row++) {
2846:     mr = a->i[row + 1] - a->i[row];
2847:     for (i = 0; i < mr; i++) {
2848:       col = *jj++;

2850:       cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2851:     }
2852:   }
2853:   PetscCall(PetscFree(collengths));
2854:   *ia = cia;
2855:   *ja = cja;
2856:   PetscFunctionReturn(PETSC_SUCCESS);
2857: }

2859: static PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
2860: {
2861:   PetscFunctionBegin;
2862:   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
2863:   PetscCall(PetscFree(*ia));
2864:   PetscCall(PetscFree(*ja));
2865:   PetscFunctionReturn(PETSC_SUCCESS);
2866: }

2868: /*
2869:  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2870:  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2871:  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2872:  */
2873: PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done)
2874: {
2875:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2876:   PetscInt     i, *collengths, *cia, *cja, n = a->nbs, m = a->mbs;
2877:   PetscInt     nz = a->i[m], row, *jj, mr, col;
2878:   PetscInt    *cspidx;

2880:   PetscFunctionBegin;
2881:   *nn = n;
2882:   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);

2884:   PetscCall(PetscCalloc1(n, &collengths));
2885:   PetscCall(PetscMalloc1(n + 1, &cia));
2886:   PetscCall(PetscMalloc1(nz, &cja));
2887:   PetscCall(PetscMalloc1(nz, &cspidx));
2888:   jj = a->j;
2889:   for (i = 0; i < nz; i++) collengths[jj[i]]++;
2890:   cia[0] = oshift;
2891:   for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i];
2892:   PetscCall(PetscArrayzero(collengths, n));
2893:   jj = a->j;
2894:   for (row = 0; row < m; row++) {
2895:     mr = a->i[row + 1] - a->i[row];
2896:     for (i = 0; i < mr; i++) {
2897:       col                                         = *jj++;
2898:       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2899:       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2900:     }
2901:   }
2902:   PetscCall(PetscFree(collengths));
2903:   *ia    = cia;
2904:   *ja    = cja;
2905:   *spidx = cspidx;
2906:   PetscFunctionReturn(PETSC_SUCCESS);
2907: }

2909: PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done)
2910: {
2911:   PetscFunctionBegin;
2912:   PetscCall(MatRestoreColumnIJ_SeqBAIJ(A, oshift, symmetric, inodecompressed, n, ia, ja, done));
2913:   PetscCall(PetscFree(*spidx));
2914:   PetscFunctionReturn(PETSC_SUCCESS);
2915: }

2917: static PetscErrorCode MatShift_SeqBAIJ(Mat Y, PetscScalar a)
2918: {
2919:   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)Y->data;

2921:   PetscFunctionBegin;
2922:   if (!Y->preallocated || !aij->nz) PetscCall(MatSeqBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL));
2923:   PetscCall(MatShift_Basic(Y, a));
2924:   PetscFunctionReturn(PETSC_SUCCESS);
2925: }

2927: PetscErrorCode MatEliminateZeros_SeqBAIJ(Mat A, PetscBool keep)
2928: {
2929:   Mat_SeqBAIJ *a      = (Mat_SeqBAIJ *)A->data;
2930:   PetscInt     fshift = 0, fshift_prev = 0, i, *ai = a->i, *aj = a->j, *imax = a->imax, j, k;
2931:   PetscInt     m = A->rmap->N, *ailen = a->ilen;
2932:   PetscInt     mbs = a->mbs, bs2 = a->bs2, rmax = 0;
2933:   MatScalar   *aa = a->a, *ap;
2934:   PetscBool    zero;

2936:   PetscFunctionBegin;
2937:   PetscCheck(A->assembled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot eliminate zeros for unassembled matrix");
2938:   if (m) rmax = ailen[0];
2939:   for (i = 1; i <= mbs; i++) {
2940:     for (k = ai[i - 1]; k < ai[i]; k++) {
2941:       zero = PETSC_TRUE;
2942:       ap   = aa + bs2 * k;
2943:       for (j = 0; j < bs2 && zero; j++) {
2944:         if (ap[j] != 0.0) zero = PETSC_FALSE;
2945:       }
2946:       if (zero && (aj[k] != i - 1 || !keep)) fshift++;
2947:       else {
2948:         if (zero && aj[k] == i - 1) PetscCall(PetscInfo(A, "Keep the diagonal block at row %" PetscInt_FMT "\n", i - 1));
2949:         aj[k - fshift] = aj[k];
2950:         PetscCall(PetscArraymove(ap - bs2 * fshift, ap, bs2));
2951:       }
2952:     }
2953:     ai[i - 1] -= fshift_prev;
2954:     fshift_prev  = fshift;
2955:     ailen[i - 1] = imax[i - 1] = ai[i] - fshift - ai[i - 1];
2956:     a->nonzerorowcnt += ((ai[i] - fshift - ai[i - 1]) > 0);
2957:     rmax = PetscMax(rmax, ailen[i - 1]);
2958:   }
2959:   if (fshift) {
2960:     if (mbs) {
2961:       ai[mbs] -= fshift;
2962:       a->nz = ai[mbs];
2963:     }
2964:     PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT "; zeros eliminated: %" PetscInt_FMT "; nonzeros left: %" PetscInt_FMT "\n", m, A->cmap->n, fshift, a->nz));
2965:     A->nonzerostate++;
2966:     A->info.nz_unneeded += (PetscReal)fshift;
2967:     a->rmax = rmax;
2968:     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
2969:     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
2970:   }
2971:   PetscFunctionReturn(PETSC_SUCCESS);
2972: }

2974: static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2975:                                        MatGetRow_SeqBAIJ,
2976:                                        MatRestoreRow_SeqBAIJ,
2977:                                        MatMult_SeqBAIJ_N,
2978:                                        /* 4*/ MatMultAdd_SeqBAIJ_N,
2979:                                        MatMultTranspose_SeqBAIJ,
2980:                                        MatMultTransposeAdd_SeqBAIJ,
2981:                                        NULL,
2982:                                        NULL,
2983:                                        NULL,
2984:                                        /* 10*/ NULL,
2985:                                        MatLUFactor_SeqBAIJ,
2986:                                        NULL,
2987:                                        NULL,
2988:                                        MatTranspose_SeqBAIJ,
2989:                                        /* 15*/ MatGetInfo_SeqBAIJ,
2990:                                        MatEqual_SeqBAIJ,
2991:                                        MatGetDiagonal_SeqBAIJ,
2992:                                        MatDiagonalScale_SeqBAIJ,
2993:                                        MatNorm_SeqBAIJ,
2994:                                        /* 20*/ NULL,
2995:                                        MatAssemblyEnd_SeqBAIJ,
2996:                                        MatSetOption_SeqBAIJ,
2997:                                        MatZeroEntries_SeqBAIJ,
2998:                                        /* 24*/ MatZeroRows_SeqBAIJ,
2999:                                        NULL,
3000:                                        NULL,
3001:                                        NULL,
3002:                                        NULL,
3003:                                        /* 29*/ MatSetUp_Seq_Hash,
3004:                                        NULL,
3005:                                        NULL,
3006:                                        NULL,
3007:                                        NULL,
3008:                                        /* 34*/ MatDuplicate_SeqBAIJ,
3009:                                        NULL,
3010:                                        NULL,
3011:                                        MatILUFactor_SeqBAIJ,
3012:                                        NULL,
3013:                                        /* 39*/ MatAXPY_SeqBAIJ,
3014:                                        MatCreateSubMatrices_SeqBAIJ,
3015:                                        MatIncreaseOverlap_SeqBAIJ,
3016:                                        MatGetValues_SeqBAIJ,
3017:                                        MatCopy_SeqBAIJ,
3018:                                        /* 44*/ NULL,
3019:                                        MatScale_SeqBAIJ,
3020:                                        MatShift_SeqBAIJ,
3021:                                        NULL,
3022:                                        MatZeroRowsColumns_SeqBAIJ,
3023:                                        /* 49*/ NULL,
3024:                                        MatGetRowIJ_SeqBAIJ,
3025:                                        MatRestoreRowIJ_SeqBAIJ,
3026:                                        MatGetColumnIJ_SeqBAIJ,
3027:                                        MatRestoreColumnIJ_SeqBAIJ,
3028:                                        /* 54*/ MatFDColoringCreate_SeqXAIJ,
3029:                                        NULL,
3030:                                        NULL,
3031:                                        NULL,
3032:                                        MatSetValuesBlocked_SeqBAIJ,
3033:                                        /* 59*/ MatCreateSubMatrix_SeqBAIJ,
3034:                                        MatDestroy_SeqBAIJ,
3035:                                        MatView_SeqBAIJ,
3036:                                        NULL,
3037:                                        NULL,
3038:                                        /* 64*/ NULL,
3039:                                        NULL,
3040:                                        NULL,
3041:                                        NULL,
3042:                                        MatGetRowMaxAbs_SeqBAIJ,
3043:                                        /* 69*/ NULL,
3044:                                        MatConvert_Basic,
3045:                                        NULL,
3046:                                        MatFDColoringApply_BAIJ,
3047:                                        NULL,
3048:                                        /* 74*/ NULL,
3049:                                        NULL,
3050:                                        NULL,
3051:                                        NULL,
3052:                                        MatLoad_SeqBAIJ,
3053:                                        /* 79*/ NULL,
3054:                                        NULL,
3055:                                        NULL,
3056:                                        NULL,
3057:                                        NULL,
3058:                                        /* 84*/ NULL,
3059:                                        NULL,
3060:                                        NULL,
3061:                                        NULL,
3062:                                        NULL,
3063:                                        /* 89*/ NULL,
3064:                                        NULL,
3065:                                        NULL,
3066:                                        NULL,
3067:                                        MatConjugate_SeqBAIJ,
3068:                                        /* 94*/ NULL,
3069:                                        NULL,
3070:                                        MatRealPart_SeqBAIJ,
3071:                                        MatImaginaryPart_SeqBAIJ,
3072:                                        NULL,
3073:                                        /* 99*/ NULL,
3074:                                        NULL,
3075:                                        NULL,
3076:                                        NULL,
3077:                                        NULL,
3078:                                        /*104*/ NULL,
3079:                                        NULL,
3080:                                        NULL,
3081:                                        NULL,
3082:                                        NULL,
3083:                                        /*109*/ NULL,
3084:                                        NULL,
3085:                                        MatMultHermitianTranspose_SeqBAIJ,
3086:                                        MatMultHermitianTransposeAdd_SeqBAIJ,
3087:                                        NULL,
3088:                                        /*114*/ NULL,
3089:                                        MatGetColumnReductions_SeqBAIJ,
3090:                                        MatInvertBlockDiagonal_SeqBAIJ,
3091:                                        NULL,
3092:                                        NULL,
3093:                                        /*119*/ NULL,
3094:                                        NULL,
3095:                                        NULL,
3096:                                        NULL,
3097:                                        NULL,
3098:                                        /*124*/ NULL,
3099:                                        NULL,
3100:                                        MatSetBlockSizes_Default,
3101:                                        NULL,
3102:                                        MatFDColoringSetUp_SeqXAIJ,
3103:                                        /*129*/ NULL,
3104:                                        MatCreateMPIMatConcatenateSeqMat_SeqBAIJ,
3105:                                        MatDestroySubMatrices_SeqBAIJ,
3106:                                        NULL,
3107:                                        NULL,
3108:                                        /*134*/ NULL,
3109:                                        NULL,
3110:                                        MatEliminateZeros_SeqBAIJ,
3111:                                        MatGetRowSumAbs_SeqBAIJ,
3112:                                        NULL,
3113:                                        /*139*/ NULL,
3114:                                        NULL,
3115:                                        MatCopyHashToXAIJ_Seq_Hash,
3116:                                        NULL,
3117:                                        NULL};

3119: static PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat)
3120: {
3121:   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
3122:   PetscInt     nz  = aij->i[aij->mbs] * aij->bs2;

3124:   PetscFunctionBegin;
3125:   PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");

3127:   /* allocate space for values if not already there */
3128:   if (!aij->saved_values) PetscCall(PetscMalloc1(nz + 1, &aij->saved_values));

3130:   /* copy values over */
3131:   PetscCall(PetscArraycpy(aij->saved_values, aij->a, nz));
3132:   PetscFunctionReturn(PETSC_SUCCESS);
3133: }

3135: static PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat)
3136: {
3137:   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
3138:   PetscInt     nz  = aij->i[aij->mbs] * aij->bs2;

3140:   PetscFunctionBegin;
3141:   PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3142:   PetscCheck(aij->saved_values, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatStoreValues(A);first");

3144:   /* copy values over */
3145:   PetscCall(PetscArraycpy(aij->a, aij->saved_values, nz));
3146:   PetscFunctionReturn(PETSC_SUCCESS);
3147: }

3149: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *);
3150: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType, MatReuse, Mat *);

3152: PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[])
3153: {
3154:   Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data;
3155:   PetscInt     i, mbs, nbs, bs2;
3156:   PetscBool    flg = PETSC_FALSE, skipallocation = PETSC_FALSE, realalloc = PETSC_FALSE;

3158:   PetscFunctionBegin;
3159:   if (B->hash_active) {
3160:     PetscInt bs;
3161:     B->ops[0] = b->cops;
3162:     PetscCall(PetscHMapIJVDestroy(&b->ht));
3163:     PetscCall(MatGetBlockSize(B, &bs));
3164:     if (bs > 1) PetscCall(PetscHSetIJDestroy(&b->bht));
3165:     PetscCall(PetscFree(b->dnz));
3166:     PetscCall(PetscFree(b->bdnz));
3167:     B->hash_active = PETSC_FALSE;
3168:   }
3169:   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
3170:   if (nz == MAT_SKIP_ALLOCATION) {
3171:     skipallocation = PETSC_TRUE;
3172:     nz             = 0;
3173:   }

3175:   PetscCall(MatSetBlockSize(B, bs));
3176:   PetscCall(PetscLayoutSetUp(B->rmap));
3177:   PetscCall(PetscLayoutSetUp(B->cmap));
3178:   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));

3180:   B->preallocated = PETSC_TRUE;

3182:   mbs = B->rmap->n / bs;
3183:   nbs = B->cmap->n / bs;
3184:   bs2 = bs * bs;

3186:   PetscCheck(mbs * bs == B->rmap->n && nbs * bs == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number rows %" PetscInt_FMT ", cols %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, B->cmap->n, bs);

3188:   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3189:   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nz cannot be less than 0: value %" PetscInt_FMT, nz);
3190:   if (nnz) {
3191:     for (i = 0; i < mbs; i++) {
3192:       PetscCheck(nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, nnz[i]);
3193:       PetscCheck(nnz[i] <= nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be greater than block row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT, i, nnz[i], nbs);
3194:     }
3195:   }

3197:   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Optimize options for SEQBAIJ matrix 2 ", "Mat");
3198:   PetscCall(PetscOptionsBool("-mat_no_unroll", "Do not optimize for block size (slow)", NULL, flg, &flg, NULL));
3199:   PetscOptionsEnd();

3201:   if (!flg) {
3202:     switch (bs) {
3203:     case 1:
3204:       B->ops->mult    = MatMult_SeqBAIJ_1;
3205:       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
3206:       break;
3207:     case 2:
3208:       B->ops->mult    = MatMult_SeqBAIJ_2;
3209:       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
3210:       break;
3211:     case 3:
3212:       B->ops->mult    = MatMult_SeqBAIJ_3;
3213:       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
3214:       break;
3215:     case 4:
3216:       B->ops->mult    = MatMult_SeqBAIJ_4;
3217:       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
3218:       break;
3219:     case 5:
3220:       B->ops->mult    = MatMult_SeqBAIJ_5;
3221:       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
3222:       break;
3223:     case 6:
3224:       B->ops->mult    = MatMult_SeqBAIJ_6;
3225:       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
3226:       break;
3227:     case 7:
3228:       B->ops->mult    = MatMult_SeqBAIJ_7;
3229:       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
3230:       break;
3231:     case 9: {
3232:       PetscInt version = 1;
3233:       PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
3234:       switch (version) {
3235: #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
3236:       case 1:
3237:         B->ops->mult    = MatMult_SeqBAIJ_9_AVX2;
3238:         B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2;
3239:         PetscCall(PetscInfo(B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3240:         break;
3241: #endif
3242:       default:
3243:         B->ops->mult    = MatMult_SeqBAIJ_N;
3244:         B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3245:         PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3246:         break;
3247:       }
3248:       break;
3249:     }
3250:     case 11:
3251:       B->ops->mult    = MatMult_SeqBAIJ_11;
3252:       B->ops->multadd = MatMultAdd_SeqBAIJ_11;
3253:       break;
3254:     case 12: {
3255:       PetscInt version = 1;
3256:       PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
3257:       switch (version) {
3258:       case 1:
3259:         B->ops->mult    = MatMult_SeqBAIJ_12_ver1;
3260:         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
3261:         PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3262:         break;
3263:       case 2:
3264:         B->ops->mult    = MatMult_SeqBAIJ_12_ver2;
3265:         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver2;
3266:         PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3267:         break;
3268: #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
3269:       case 3:
3270:         B->ops->mult    = MatMult_SeqBAIJ_12_AVX2;
3271:         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
3272:         PetscCall(PetscInfo(B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3273:         break;
3274: #endif
3275:       default:
3276:         B->ops->mult    = MatMult_SeqBAIJ_N;
3277:         B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3278:         PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3279:         break;
3280:       }
3281:       break;
3282:     }
3283:     case 15: {
3284:       PetscInt version = 1;
3285:       PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
3286:       switch (version) {
3287:       case 1:
3288:         B->ops->mult = MatMult_SeqBAIJ_15_ver1;
3289:         PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3290:         break;
3291:       case 2:
3292:         B->ops->mult = MatMult_SeqBAIJ_15_ver2;
3293:         PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3294:         break;
3295:       case 3:
3296:         B->ops->mult = MatMult_SeqBAIJ_15_ver3;
3297:         PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3298:         break;
3299:       case 4:
3300:         B->ops->mult = MatMult_SeqBAIJ_15_ver4;
3301:         PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3302:         break;
3303:       default:
3304:         B->ops->mult = MatMult_SeqBAIJ_N;
3305:         PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3306:         break;
3307:       }
3308:       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3309:       break;
3310:     }
3311:     default:
3312:       B->ops->mult    = MatMult_SeqBAIJ_N;
3313:       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3314:       PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3315:       break;
3316:     }
3317:   }
3318:   B->ops->sor = MatSOR_SeqBAIJ;
3319:   b->mbs      = mbs;
3320:   b->nbs      = nbs;
3321:   if (!skipallocation) {
3322:     if (!b->imax) {
3323:       PetscCall(PetscMalloc2(mbs, &b->imax, mbs, &b->ilen));

3325:       b->free_imax_ilen = PETSC_TRUE;
3326:     }
3327:     /* b->ilen will count nonzeros in each block row so far. */
3328:     for (i = 0; i < mbs; i++) b->ilen[i] = 0;
3329:     if (!nnz) {
3330:       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3331:       else if (nz < 0) nz = 1;
3332:       nz = PetscMin(nz, nbs);
3333:       for (i = 0; i < mbs; i++) b->imax[i] = nz;
3334:       PetscCall(PetscIntMultError(nz, mbs, &nz));
3335:     } else {
3336:       PetscInt64 nz64 = 0;
3337:       for (i = 0; i < mbs; i++) {
3338:         b->imax[i] = nnz[i];
3339:         nz64 += nnz[i];
3340:       }
3341:       PetscCall(PetscIntCast(nz64, &nz));
3342:     }

3344:     /* allocate the matrix space */
3345:     PetscCall(MatSeqXAIJFreeAIJ(B, &b->a, &b->j, &b->i));
3346:     PetscCall(PetscShmgetAllocateArray(nz, sizeof(PetscInt), (void **)&b->j));
3347:     PetscCall(PetscShmgetAllocateArray(B->rmap->N + 1, sizeof(PetscInt), (void **)&b->i));
3348:     if (B->structure_only) {
3349:       b->free_a = PETSC_FALSE;
3350:     } else {
3351:       PetscInt nzbs2 = 0;
3352:       PetscCall(PetscIntMultError(nz, bs2, &nzbs2));
3353:       PetscCall(PetscShmgetAllocateArray(nzbs2, sizeof(PetscScalar), (void **)&b->a));
3354:       b->free_a = PETSC_TRUE;
3355:       PetscCall(PetscArrayzero(b->a, nzbs2));
3356:     }
3357:     b->free_ij = PETSC_TRUE;
3358:     PetscCall(PetscArrayzero(b->j, nz));

3360:     b->i[0] = 0;
3361:     for (i = 1; i < mbs + 1; i++) b->i[i] = b->i[i - 1] + b->imax[i - 1];
3362:   } else {
3363:     b->free_a  = PETSC_FALSE;
3364:     b->free_ij = PETSC_FALSE;
3365:   }

3367:   b->bs2              = bs2;
3368:   b->mbs              = mbs;
3369:   b->nz               = 0;
3370:   b->maxnz            = nz;
3371:   B->info.nz_unneeded = (PetscReal)b->maxnz * bs2;
3372:   B->was_assembled    = PETSC_FALSE;
3373:   B->assembled        = PETSC_FALSE;
3374:   if (realalloc) PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
3375:   PetscFunctionReturn(PETSC_SUCCESS);
3376: }

3378: static PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
3379: {
3380:   PetscInt     i, m, nz, nz_max = 0, *nnz;
3381:   PetscScalar *values      = NULL;
3382:   PetscBool    roworiented = ((Mat_SeqBAIJ *)B->data)->roworiented;

3384:   PetscFunctionBegin;
3385:   PetscCheck(bs >= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs);
3386:   PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
3387:   PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
3388:   PetscCall(PetscLayoutSetUp(B->rmap));
3389:   PetscCall(PetscLayoutSetUp(B->cmap));
3390:   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
3391:   m = B->rmap->n / bs;

3393:   PetscCheck(ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
3394:   PetscCall(PetscMalloc1(m + 1, &nnz));
3395:   for (i = 0; i < m; i++) {
3396:     nz = ii[i + 1] - ii[i];
3397:     PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
3398:     nz_max = PetscMax(nz_max, nz);
3399:     nnz[i] = nz;
3400:   }
3401:   PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz));
3402:   PetscCall(PetscFree(nnz));

3404:   values = (PetscScalar *)V;
3405:   if (!values) PetscCall(PetscCalloc1(bs * bs * (nz_max + 1), &values));
3406:   for (i = 0; i < m; i++) {
3407:     PetscInt        ncols = ii[i + 1] - ii[i];
3408:     const PetscInt *icols = jj + ii[i];
3409:     if (bs == 1 || !roworiented) {
3410:       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
3411:       PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, ncols, icols, svals, INSERT_VALUES));
3412:     } else {
3413:       PetscInt j;
3414:       for (j = 0; j < ncols; j++) {
3415:         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
3416:         PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, 1, &icols[j], svals, INSERT_VALUES));
3417:       }
3418:     }
3419:   }
3420:   if (!V) PetscCall(PetscFree(values));
3421:   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
3422:   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3423:   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3424:   PetscFunctionReturn(PETSC_SUCCESS);
3425: }

3427: /*@C
3428:   MatSeqBAIJGetArray - gives read/write access to the array where the data for a `MATSEQBAIJ` matrix is stored

3430:   Not Collective

3432:   Input Parameter:
3433: . A - a `MATSEQBAIJ` matrix

3435:   Output Parameter:
3436: . array - pointer to the data

3438:   Level: intermediate

3440: .seealso: [](ch_matrices), `Mat`, `MATSEQBAIJ`, `MatSeqBAIJRestoreArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()`
3441: @*/
3442: PetscErrorCode MatSeqBAIJGetArray(Mat A, PetscScalar *array[])
3443: {
3444:   PetscFunctionBegin;
3445:   PetscUseMethod(A, "MatSeqBAIJGetArray_C", (Mat, PetscScalar **), (A, array));
3446:   PetscFunctionReturn(PETSC_SUCCESS);
3447: }

3449: /*@C
3450:   MatSeqBAIJRestoreArray - returns access to the array where the data for a `MATSEQBAIJ` matrix is stored obtained by `MatSeqBAIJGetArray()`

3452:   Not Collective

3454:   Input Parameters:
3455: + A     - a `MATSEQBAIJ` matrix
3456: - array - pointer to the data

3458:   Level: intermediate

3460: .seealso: [](ch_matrices), `Mat`, `MatSeqBAIJGetArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()`
3461: @*/
3462: PetscErrorCode MatSeqBAIJRestoreArray(Mat A, PetscScalar *array[])
3463: {
3464:   PetscFunctionBegin;
3465:   PetscUseMethod(A, "MatSeqBAIJRestoreArray_C", (Mat, PetscScalar **), (A, array));
3466:   PetscFunctionReturn(PETSC_SUCCESS);
3467: }

3469: /*MC
3470:    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3471:    block sparse compressed row format.

3473:    Options Database Keys:
3474: + -mat_type seqbaij - sets the matrix type to `MATSEQBAIJ` during a call to `MatSetFromOptions()`
3475: - -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS)

3477:    Level: beginner

3479:    Notes:
3480:    `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
3481:    space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored

3483:    Run with `-info` to see what version of the matrix-vector product is being used

3485: .seealso: [](ch_matrices), `Mat`, `MatCreateSeqBAIJ()`
3486: M*/

3488: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType, MatReuse, Mat *);

3490: PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3491: {
3492:   PetscMPIInt  size;
3493:   Mat_SeqBAIJ *b;

3495:   PetscFunctionBegin;
3496:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
3497:   PetscCheck(size == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Comm must be of size 1");

3499:   PetscCall(PetscNew(&b));
3500:   B->data   = (void *)b;
3501:   B->ops[0] = MatOps_Values;

3503:   b->row          = NULL;
3504:   b->col          = NULL;
3505:   b->icol         = NULL;
3506:   b->reallocs     = 0;
3507:   b->saved_values = NULL;

3509:   b->roworiented        = PETSC_TRUE;
3510:   b->nonew              = 0;
3511:   b->diag               = NULL;
3512:   B->spptr              = NULL;
3513:   B->info.nz_unneeded   = (PetscReal)b->maxnz * b->bs2;
3514:   b->keepnonzeropattern = PETSC_FALSE;

3516:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJGetArray_C", MatSeqBAIJGetArray_SeqBAIJ));
3517:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJRestoreArray_C", MatSeqBAIJRestoreArray_SeqBAIJ));
3518:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_SeqBAIJ));
3519:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_SeqBAIJ));
3520:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetColumnIndices_C", MatSeqBAIJSetColumnIndices_SeqBAIJ));
3521:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqaij_C", MatConvert_SeqBAIJ_SeqAIJ));
3522:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqsbaij_C", MatConvert_SeqBAIJ_SeqSBAIJ));
3523:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocation_C", MatSeqBAIJSetPreallocation_SeqBAIJ));
3524:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocationCSR_C", MatSeqBAIJSetPreallocationCSR_SeqBAIJ));
3525:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_SeqBAIJ));
3526: #if defined(PETSC_HAVE_HYPRE)
3527:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_hypre_C", MatConvert_AIJ_HYPRE));
3528: #endif
3529:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_is_C", MatConvert_XAIJ_IS));
3530:   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ));
3531:   PetscFunctionReturn(PETSC_SUCCESS);
3532: }

3534: PETSC_INTERN PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C, Mat A, MatDuplicateOption cpvalues, PetscBool mallocmatspace)
3535: {
3536:   Mat_SeqBAIJ *c = (Mat_SeqBAIJ *)C->data, *a = (Mat_SeqBAIJ *)A->data;
3537:   PetscInt     i, mbs = a->mbs, nz = a->nz, bs2 = a->bs2;

3539:   PetscFunctionBegin;
3540:   PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONGSTATE, "Cannot duplicate unassembled matrix");
3541:   PetscCheck(a->i[mbs] == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupt matrix");

3543:   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3544:     c->imax           = a->imax;
3545:     c->ilen           = a->ilen;
3546:     c->free_imax_ilen = PETSC_FALSE;
3547:   } else {
3548:     PetscCall(PetscMalloc2(mbs, &c->imax, mbs, &c->ilen));
3549:     for (i = 0; i < mbs; i++) {
3550:       c->imax[i] = a->imax[i];
3551:       c->ilen[i] = a->ilen[i];
3552:     }
3553:     c->free_imax_ilen = PETSC_TRUE;
3554:   }

3556:   /* allocate the matrix space */
3557:   if (mallocmatspace) {
3558:     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3559:       PetscCall(PetscShmgetAllocateArray(bs2 * nz, sizeof(PetscScalar), (void **)&c->a));
3560:       PetscCall(PetscArrayzero(c->a, bs2 * nz));
3561:       c->free_a       = PETSC_TRUE;
3562:       c->i            = a->i;
3563:       c->j            = a->j;
3564:       c->free_ij      = PETSC_FALSE;
3565:       c->parent       = A;
3566:       C->preallocated = PETSC_TRUE;
3567:       C->assembled    = PETSC_TRUE;

3569:       PetscCall(PetscObjectReference((PetscObject)A));
3570:       PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3571:       PetscCall(MatSetOption(C, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3572:     } else {
3573:       PetscCall(PetscShmgetAllocateArray(bs2 * nz, sizeof(PetscScalar), (void **)&c->a));
3574:       PetscCall(PetscShmgetAllocateArray(nz, sizeof(PetscInt), (void **)&c->j));
3575:       PetscCall(PetscShmgetAllocateArray(mbs + 1, sizeof(PetscInt), (void **)&c->i));
3576:       c->free_a  = PETSC_TRUE;
3577:       c->free_ij = PETSC_TRUE;

3579:       PetscCall(PetscArraycpy(c->i, a->i, mbs + 1));
3580:       if (mbs > 0) {
3581:         PetscCall(PetscArraycpy(c->j, a->j, nz));
3582:         if (cpvalues == MAT_COPY_VALUES) {
3583:           PetscCall(PetscArraycpy(c->a, a->a, bs2 * nz));
3584:         } else {
3585:           PetscCall(PetscArrayzero(c->a, bs2 * nz));
3586:         }
3587:       }
3588:       C->preallocated = PETSC_TRUE;
3589:       C->assembled    = PETSC_TRUE;
3590:     }
3591:   }

3593:   c->roworiented = a->roworiented;
3594:   c->nonew       = a->nonew;

3596:   PetscCall(PetscLayoutReference(A->rmap, &C->rmap));
3597:   PetscCall(PetscLayoutReference(A->cmap, &C->cmap));

3599:   c->bs2        = a->bs2;
3600:   c->mbs        = a->mbs;
3601:   c->nbs        = a->nbs;
3602:   c->nz         = a->nz;
3603:   c->maxnz      = a->nz; /* Since we allocate exactly the right amount */
3604:   c->solve_work = NULL;
3605:   c->mult_work  = NULL;
3606:   c->sor_workt  = NULL;
3607:   c->sor_work   = NULL;

3609:   c->compressedrow.use   = a->compressedrow.use;
3610:   c->compressedrow.nrows = a->compressedrow.nrows;
3611:   if (a->compressedrow.use) {
3612:     i = a->compressedrow.nrows;
3613:     PetscCall(PetscMalloc2(i + 1, &c->compressedrow.i, i + 1, &c->compressedrow.rindex));
3614:     PetscCall(PetscArraycpy(c->compressedrow.i, a->compressedrow.i, i + 1));
3615:     PetscCall(PetscArraycpy(c->compressedrow.rindex, a->compressedrow.rindex, i));
3616:   } else {
3617:     c->compressedrow.use    = PETSC_FALSE;
3618:     c->compressedrow.i      = NULL;
3619:     c->compressedrow.rindex = NULL;
3620:   }
3621:   c->nonzerorowcnt = a->nonzerorowcnt;
3622:   C->nonzerostate  = A->nonzerostate;

3624:   PetscCall(PetscFunctionListDuplicate(((PetscObject)A)->qlist, &((PetscObject)C)->qlist));
3625:   PetscFunctionReturn(PETSC_SUCCESS);
3626: }

3628: PetscErrorCode MatDuplicate_SeqBAIJ(Mat A, MatDuplicateOption cpvalues, Mat *B)
3629: {
3630:   PetscFunctionBegin;
3631:   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B));
3632:   PetscCall(MatSetSizes(*B, A->rmap->N, A->cmap->n, A->rmap->N, A->cmap->n));
3633:   PetscCall(MatSetType(*B, MATSEQBAIJ));
3634:   PetscCall(MatDuplicateNoCreate_SeqBAIJ(*B, A, cpvalues, PETSC_TRUE));
3635:   PetscFunctionReturn(PETSC_SUCCESS);
3636: }

3638: /* Used for both SeqBAIJ and SeqSBAIJ matrices */
3639: PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat, PetscViewer viewer)
3640: {
3641:   PetscInt     header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k;
3642:   PetscInt    *rowidxs, *colidxs;
3643:   PetscScalar *matvals;

3645:   PetscFunctionBegin;
3646:   PetscCall(PetscViewerSetUp(viewer));

3648:   /* read matrix header */
3649:   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3650:   PetscCheck(header[0] == MAT_FILE_CLASSID, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3651:   M  = header[1];
3652:   N  = header[2];
3653:   nz = header[3];
3654:   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3655:   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3656:   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as SeqBAIJ");

3658:   /* set block sizes from the viewer's .info file */
3659:   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3660:   /* set local and global sizes if not set already */
3661:   if (mat->rmap->n < 0) mat->rmap->n = M;
3662:   if (mat->cmap->n < 0) mat->cmap->n = N;
3663:   if (mat->rmap->N < 0) mat->rmap->N = M;
3664:   if (mat->cmap->N < 0) mat->cmap->N = N;
3665:   PetscCall(PetscLayoutSetUp(mat->rmap));
3666:   PetscCall(PetscLayoutSetUp(mat->cmap));

3668:   /* check if the matrix sizes are correct */
3669:   PetscCall(MatGetSize(mat, &rows, &cols));
3670:   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3671:   PetscCall(MatGetBlockSize(mat, &bs));
3672:   PetscCall(MatGetLocalSize(mat, &m, &n));
3673:   mbs = m / bs;
3674:   nbs = n / bs;

3676:   /* read in row lengths, column indices and nonzero values */
3677:   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3678:   PetscCall(PetscViewerBinaryRead(viewer, rowidxs + 1, m, NULL, PETSC_INT));
3679:   rowidxs[0] = 0;
3680:   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3681:   sum = rowidxs[m];
3682:   PetscCheck(sum == nz, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);

3684:   /* read in column indices and nonzero values */
3685:   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, nz, &matvals));
3686:   PetscCall(PetscViewerBinaryRead(viewer, colidxs, rowidxs[m], NULL, PETSC_INT));
3687:   PetscCall(PetscViewerBinaryRead(viewer, matvals, rowidxs[m], NULL, PETSC_SCALAR));

3689:   {               /* preallocate matrix storage */
3690:     PetscBT   bt; /* helper bit set to count nonzeros */
3691:     PetscInt *nnz;
3692:     PetscBool sbaij;

3694:     PetscCall(PetscBTCreate(nbs, &bt));
3695:     PetscCall(PetscCalloc1(mbs, &nnz));
3696:     PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATSEQSBAIJ, &sbaij));
3697:     for (i = 0; i < mbs; i++) {
3698:       PetscCall(PetscBTMemzero(nbs, bt));
3699:       for (k = 0; k < bs; k++) {
3700:         PetscInt row = bs * i + k;
3701:         for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) {
3702:           PetscInt col = colidxs[j];
3703:           if (!sbaij || col >= row)
3704:             if (!PetscBTLookupSet(bt, col / bs)) nnz[i]++;
3705:         }
3706:       }
3707:     }
3708:     PetscCall(PetscBTDestroy(&bt));
3709:     PetscCall(MatSeqBAIJSetPreallocation(mat, bs, 0, nnz));
3710:     PetscCall(MatSeqSBAIJSetPreallocation(mat, bs, 0, nnz));
3711:     PetscCall(PetscFree(nnz));
3712:   }

3714:   /* store matrix values */
3715:   for (i = 0; i < m; i++) {
3716:     PetscInt row = i, s = rowidxs[i], e = rowidxs[i + 1];
3717:     PetscUseTypeMethod(mat, setvalues, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES);
3718:   }

3720:   PetscCall(PetscFree(rowidxs));
3721:   PetscCall(PetscFree2(colidxs, matvals));
3722:   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
3723:   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
3724:   PetscFunctionReturn(PETSC_SUCCESS);
3725: }

3727: PetscErrorCode MatLoad_SeqBAIJ(Mat mat, PetscViewer viewer)
3728: {
3729:   PetscBool isbinary;

3731:   PetscFunctionBegin;
3732:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3733:   PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
3734:   PetscCall(MatLoad_SeqBAIJ_Binary(mat, viewer));
3735:   PetscFunctionReturn(PETSC_SUCCESS);
3736: }

3738: /*@
3739:   MatCreateSeqBAIJ - Creates a sparse matrix in `MATSEQAIJ` (block
3740:   compressed row) format.  For good matrix assembly performance the
3741:   user should preallocate the matrix storage by setting the parameter `nz`
3742:   (or the array `nnz`).

3744:   Collective

3746:   Input Parameters:
3747: + comm - MPI communicator, set to `PETSC_COMM_SELF`
3748: . bs   - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
3749:          blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3750: . m    - number of rows
3751: . n    - number of columns
3752: . nz   - number of nonzero blocks  per block row (same for all rows)
3753: - nnz  - array containing the number of nonzero blocks in the various block rows
3754:          (possibly different for each block row) or `NULL`

3756:   Output Parameter:
3757: . A - the matrix

3759:   Options Database Keys:
3760: + -mat_no_unroll  - uses code that does not unroll the loops in the block calculations (much slower)
3761: - -mat_block_size - size of the blocks to use

3763:   Level: intermediate

3765:   Notes:
3766:   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
3767:   MatXXXXSetPreallocation() paradigm instead of this routine directly.
3768:   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]

3770:   The number of rows and columns must be divisible by blocksize.

3772:   If the `nnz` parameter is given then the `nz` parameter is ignored

3774:   A nonzero block is any block that as 1 or more nonzeros in it

3776:   The `MATSEQBAIJ` format is fully compatible with standard Fortran
3777:   storage.  That is, the stored row and column indices can begin at
3778:   either one (as in Fortran) or zero.

3780:   Specify the preallocated storage with either `nz` or `nnz` (not both).
3781:   Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory
3782:   allocation.  See [Sparse Matrices](sec_matsparse) for details.
3783:   matrices.

3785: .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`
3786: @*/
3787: PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A)
3788: {
3789:   PetscFunctionBegin;
3790:   PetscCall(MatCreate(comm, A));
3791:   PetscCall(MatSetSizes(*A, m, n, m, n));
3792:   PetscCall(MatSetType(*A, MATSEQBAIJ));
3793:   PetscCall(MatSeqBAIJSetPreallocation(*A, bs, nz, (PetscInt *)nnz));
3794:   PetscFunctionReturn(PETSC_SUCCESS);
3795: }

3797: /*@
3798:   MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3799:   per row in the matrix. For good matrix assembly performance the
3800:   user should preallocate the matrix storage by setting the parameter `nz`
3801:   (or the array `nnz`).

3803:   Collective

3805:   Input Parameters:
3806: + B   - the matrix
3807: . bs  - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
3808:         blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3809: . nz  - number of block nonzeros per block row (same for all rows)
3810: - nnz - array containing the number of block nonzeros in the various block rows
3811:         (possibly different for each block row) or `NULL`

3813:   Options Database Keys:
3814: + -mat_no_unroll  - uses code that does not unroll the loops in the block calculations (much slower)
3815: - -mat_block_size - size of the blocks to use

3817:   Level: intermediate

3819:   Notes:
3820:   If the `nnz` parameter is given then the `nz` parameter is ignored

3822:   You can call `MatGetInfo()` to get information on how effective the preallocation was;
3823:   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3824:   You can also run with the option `-info` and look for messages with the string
3825:   malloc in them to see if additional memory allocation was needed.

3827:   The `MATSEQBAIJ` format is fully compatible with standard Fortran
3828:   storage.  That is, the stored row and column indices can begin at
3829:   either one (as in Fortran) or zero.

3831:   Specify the preallocated storage with either `nz` or `nnz` (not both).
3832:   Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory
3833:   allocation.  See [Sparse Matrices](sec_matsparse) for details.

3835: .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatGetInfo()`
3836: @*/
3837: PetscErrorCode MatSeqBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[])
3838: {
3839:   PetscFunctionBegin;
3843:   PetscTryMethod(B, "MatSeqBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[]), (B, bs, nz, nnz));
3844:   PetscFunctionReturn(PETSC_SUCCESS);
3845: }

3847: /*@C
3848:   MatSeqBAIJSetPreallocationCSR - Creates a sparse sequential matrix in `MATSEQBAIJ` format using the given nonzero structure and (optional) numerical values

3850:   Collective

3852:   Input Parameters:
3853: + B  - the matrix
3854: . bs - the blocksize
3855: . i  - the indices into `j` for the start of each local row (indices start with zero)
3856: . j  - the column indices for each local row (indices start with zero) these must be sorted for each row
3857: - v  - optional values in the matrix, use `NULL` if not provided

3859:   Level: advanced

3861:   Notes:
3862:   The `i`,`j`,`v` values are COPIED with this routine; to avoid the copy use `MatCreateSeqBAIJWithArrays()`

3864:   The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`.  For example, C programs
3865:   may want to use the default `MAT_ROW_ORIENTED` of `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is
3866:   over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
3867:   `MAT_ROW_ORIENTED` of `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3868:   block column and the second index is over columns within a block.

3870:   Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well

3872: .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatSeqBAIJSetPreallocation()`, `MATSEQBAIJ`
3873: @*/
3874: PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
3875: {
3876:   PetscFunctionBegin;
3880:   PetscTryMethod(B, "MatSeqBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
3881:   PetscFunctionReturn(PETSC_SUCCESS);
3882: }

3884: /*@
3885:   MatCreateSeqBAIJWithArrays - Creates a `MATSEQBAIJ` matrix using matrix elements provided by the user.

3887:   Collective

3889:   Input Parameters:
3890: + comm - must be an MPI communicator of size 1
3891: . bs   - size of block
3892: . m    - number of rows
3893: . n    - number of columns
3894: . i    - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix
3895: . j    - column indices
3896: - a    - matrix values

3898:   Output Parameter:
3899: . mat - the matrix

3901:   Level: advanced

3903:   Notes:
3904:   The `i`, `j`, and `a` arrays are not copied by this routine, the user must free these arrays
3905:   once the matrix is destroyed

3907:   You cannot set new nonzero locations into this matrix, that will generate an error.

3909:   The `i` and `j` indices are 0 based

3911:   When block size is greater than 1 the matrix values must be stored using the `MATSEQBAIJ` storage format

3913:   The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3914:   the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3915:   block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3916:   with column-major ordering within blocks.

3918: .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateBAIJ()`, `MatCreateSeqBAIJ()`
3919: @*/
3920: PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt i[], PetscInt j[], PetscScalar a[], Mat *mat)
3921: {
3922:   Mat_SeqBAIJ *baij;

3924:   PetscFunctionBegin;
3925:   PetscCheck(bs == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "block size %" PetscInt_FMT " > 1 is not supported yet", bs);
3926:   if (m > 0) PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");

3928:   PetscCall(MatCreate(comm, mat));
3929:   PetscCall(MatSetSizes(*mat, m, n, m, n));
3930:   PetscCall(MatSetType(*mat, MATSEQBAIJ));
3931:   PetscCall(MatSeqBAIJSetPreallocation(*mat, bs, MAT_SKIP_ALLOCATION, NULL));
3932:   baij = (Mat_SeqBAIJ *)(*mat)->data;
3933:   PetscCall(PetscMalloc2(m, &baij->imax, m, &baij->ilen));

3935:   baij->i = i;
3936:   baij->j = j;
3937:   baij->a = a;

3939:   baij->nonew          = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3940:   baij->free_a         = PETSC_FALSE;
3941:   baij->free_ij        = PETSC_FALSE;
3942:   baij->free_imax_ilen = PETSC_TRUE;

3944:   for (PetscInt ii = 0; ii < m; ii++) {
3945:     const PetscInt row_len = i[ii + 1] - i[ii];

3947:     baij->ilen[ii] = baij->imax[ii] = row_len;
3948:     PetscCheck(row_len >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row length in i (row indices) row = %" PetscInt_FMT " length = %" PetscInt_FMT, ii, row_len);
3949:   }
3950:   if (PetscDefined(USE_DEBUG)) {
3951:     for (PetscInt ii = 0; ii < baij->i[m]; ii++) {
3952:       PetscCheck(j[ii] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative column index at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]);
3953:       PetscCheck(j[ii] <= n - 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index to large at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]);
3954:     }
3955:   }

3957:   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
3958:   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
3959:   PetscFunctionReturn(PETSC_SUCCESS);
3960: }

3962: PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
3963: {
3964:   PetscFunctionBegin;
3965:   PetscCall(MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm, inmat, n, scall, outmat));
3966:   PetscFunctionReturn(PETSC_SUCCESS);
3967: }