Actual source code: fdaij.c

  1: #include <../src/mat/impls/aij/seq/aij.h>
  2: #include <../src/mat/impls/baij/seq/baij.h>
  3: #include <../src/mat/impls/sell/seq/sell.h>
  4: #include <petsc/private/isimpl.h>

  6: /*
  7:     This routine is shared by SeqAIJ and SeqBAIJ matrices,
  8:     since it operators only on the nonzero structure of the elements or blocks.
  9: */
 10: PetscErrorCode MatFDColoringCreate_SeqXAIJ(Mat mat, ISColoring iscoloring, MatFDColoring c)
 11: {
 12:   PetscInt  bs, nis = iscoloring->n, m = mat->rmap->n;
 13:   PetscBool isBAIJ, isSELL;

 15:   PetscFunctionBegin;
 16:   /* set default brows and bcols for speedup inserting the dense matrix into sparse Jacobian */
 17:   PetscCall(MatGetBlockSize(mat, &bs));
 18:   PetscCall(PetscObjectBaseTypeCompare((PetscObject)mat, MATSEQBAIJ, &isBAIJ));
 19:   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATSEQSELL, &isSELL));
 20:   if (isBAIJ) {
 21:     c->brows = m;
 22:     c->bcols = 1;
 23:   } else { /* seqaij matrix */
 24:     /* bcols is chosen s.t. dy-array takes 50% of memory space as mat */
 25:     PetscReal mem;
 26:     PetscInt  nz, brows, bcols;
 27:     if (isSELL) {
 28:       Mat_SeqSELL *spA = (Mat_SeqSELL *)mat->data;
 29:       nz               = spA->nz;
 30:     } else {
 31:       Mat_SeqAIJ *spA = (Mat_SeqAIJ *)mat->data;
 32:       nz              = spA->nz;
 33:     }

 35:     bs    = 1; /* only bs=1 is supported for SeqAIJ matrix */
 36:     mem   = nz * (sizeof(PetscScalar) + sizeof(PetscInt)) + 3 * m * sizeof(PetscInt);
 37:     bcols = (PetscInt)(0.5 * mem / (m * sizeof(PetscScalar)));
 38:     if (!bcols) bcols = 1;
 39:     brows = 1000 / bcols;
 40:     if (bcols > nis) bcols = nis;
 41:     if (brows == 0 || brows > m) brows = m;
 42:     c->brows = brows;
 43:     c->bcols = bcols;
 44:   }

 46:   c->M       = mat->rmap->N / bs; /* set total rows, columns and local rows */
 47:   c->N       = mat->cmap->N / bs;
 48:   c->m       = mat->rmap->N / bs;
 49:   c->rstart  = 0;
 50:   c->ncolors = nis;
 51:   c->ctype   = iscoloring->ctype;
 52:   PetscFunctionReturn(PETSC_SUCCESS);
 53: }

 55: /*
 56:  Reorder Jentry such that blocked brows*bols of entries from dense matrix are inserted into Jacobian for improved cache performance
 57:    Input Parameters:
 58: +  mat - the matrix containing the nonzero structure of the Jacobian
 59: .  color - the coloring context
 60: -  nz - number of local non-zeros in mat
 61: */
 62: PetscErrorCode MatFDColoringSetUpBlocked_AIJ_Private(Mat mat, MatFDColoring c, PetscInt nz)
 63: {
 64:   PetscInt  i, j, nrows, nbcols, brows = c->brows, bcols = c->bcols, mbs = c->m, nis = c->ncolors;
 65:   PetscInt *color_start, *row_start, *nrows_new, nz_new, row_end;

 67:   PetscFunctionBegin;
 68:   if (brows < 1 || brows > mbs) brows = mbs;
 69:   PetscCall(PetscMalloc2(bcols + 1, &color_start, bcols, &row_start));
 70:   PetscCall(PetscCalloc1(nis, &nrows_new));
 71:   PetscCall(PetscMalloc1(bcols * mat->rmap->n, &c->dy));

 73:   nz_new             = 0;
 74:   nbcols             = 0;
 75:   color_start[bcols] = 0;

 77:   if (c->htype[0] == 'd') { /*  c->htype == 'ds', use MatEntry */
 78:     MatEntry *Jentry_new, *Jentry = c->matentry;

 80:     PetscCall(PetscMalloc1(nz, &Jentry_new));
 81:     for (i = 0; i < nis; i += bcols) { /* loop over colors */
 82:       if (i + bcols > nis) {
 83:         color_start[nis - i] = color_start[bcols];
 84:         bcols                = nis - i;
 85:       }

 87:       color_start[0] = color_start[bcols];
 88:       for (j = 0; j < bcols; j++) {
 89:         color_start[j + 1] = c->nrows[i + j] + color_start[j];
 90:         row_start[j]       = 0;
 91:       }

 93:       row_end = brows;
 94:       if (row_end > mbs) row_end = mbs;

 96:       while (row_end <= mbs) {        /* loop over block rows */
 97:         for (j = 0; j < bcols; j++) { /* loop over block columns */
 98:           nrows = c->nrows[i + j];
 99:           nz    = color_start[j];
100:           while (row_start[j] < nrows) {
101:             if (Jentry[nz].row >= row_end) {
102:               color_start[j] = nz;
103:               break;
104:             } else {                                                 /* copy Jentry[nz] to Jentry_new[nz_new] */
105:               Jentry_new[nz_new].row     = Jentry[nz].row + j * mbs; /* index in dy-array */
106:               Jentry_new[nz_new].col     = Jentry[nz].col;
107:               Jentry_new[nz_new].valaddr = Jentry[nz].valaddr;
108:               nz_new++;
109:               nz++;
110:               row_start[j]++;
111:             }
112:           }
113:         }
114:         if (row_end == mbs) break;
115:         row_end += brows;
116:         if (row_end > mbs) row_end = mbs;
117:       }
118:       nrows_new[nbcols++] = nz_new;
119:     }
120:     PetscCall(PetscFree(Jentry));
121:     c->matentry = Jentry_new;
122:   } else { /*  c->htype == 'wp', use MatEntry2 */
123:     MatEntry2 *Jentry2_new, *Jentry2 = c->matentry2;

125:     PetscCall(PetscMalloc1(nz, &Jentry2_new));
126:     for (i = 0; i < nis; i += bcols) { /* loop over colors */
127:       if (i + bcols > nis) {
128:         color_start[nis - i] = color_start[bcols];
129:         bcols                = nis - i;
130:       }

132:       color_start[0] = color_start[bcols];
133:       for (j = 0; j < bcols; j++) {
134:         color_start[j + 1] = c->nrows[i + j] + color_start[j];
135:         row_start[j]       = 0;
136:       }

138:       row_end = brows;
139:       if (row_end > mbs) row_end = mbs;

141:       while (row_end <= mbs) {        /* loop over block rows */
142:         for (j = 0; j < bcols; j++) { /* loop over block columns */
143:           nrows = c->nrows[i + j];
144:           nz    = color_start[j];
145:           while (row_start[j] < nrows) {
146:             if (Jentry2[nz].row >= row_end) {
147:               color_start[j] = nz;
148:               break;
149:             } else {                                                   /* copy Jentry2[nz] to Jentry2_new[nz_new] */
150:               Jentry2_new[nz_new].row     = Jentry2[nz].row + j * mbs; /* index in dy-array */
151:               Jentry2_new[nz_new].valaddr = Jentry2[nz].valaddr;
152:               nz_new++;
153:               nz++;
154:               row_start[j]++;
155:             }
156:           }
157:         }
158:         if (row_end == mbs) break;
159:         row_end += brows;
160:         if (row_end > mbs) row_end = mbs;
161:       }
162:       nrows_new[nbcols++] = nz_new;
163:     }
164:     PetscCall(PetscFree(Jentry2));
165:     c->matentry2 = Jentry2_new;
166:   } /* ---------------------------------------------*/

168:   PetscCall(PetscFree2(color_start, row_start));

170:   for (i = nbcols - 1; i > 0; i--) nrows_new[i] -= nrows_new[i - 1];
171:   PetscCall(PetscFree(c->nrows));
172:   c->nrows = nrows_new;
173:   PetscFunctionReturn(PETSC_SUCCESS);
174: }

176: PetscErrorCode MatFDColoringSetUp_SeqXAIJ(Mat mat, ISColoring iscoloring, MatFDColoring c)
177: {
178:   PetscInt           i, n, nrows, mbs = c->m, j, k, m, ncols, col, nis = iscoloring->n, *rowhit, bs, bs2, *spidx, nz, tmp;
179:   const PetscInt    *is, *row, *ci, *cj;
180:   PetscBool          isBAIJ, isSELL;
181:   const PetscScalar *A_val;
182:   PetscScalar      **valaddrhit;
183:   MatEntry          *Jentry;
184:   MatEntry2         *Jentry2;

186:   PetscFunctionBegin;
187:   PetscCall(ISColoringGetIS(iscoloring, PETSC_OWN_POINTER, PETSC_IGNORE, &c->isa));

189:   PetscCall(MatGetBlockSize(mat, &bs));
190:   PetscCall(PetscObjectBaseTypeCompare((PetscObject)mat, MATSEQBAIJ, &isBAIJ));
191:   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATSEQSELL, &isSELL));
192:   if (isBAIJ) {
193:     Mat_SeqBAIJ *spA = (Mat_SeqBAIJ *)mat->data;

195:     A_val = spA->a;
196:     nz    = spA->nz;
197:   } else if (isSELL) {
198:     Mat_SeqSELL *spA = (Mat_SeqSELL *)mat->data;

200:     A_val = spA->val;
201:     nz    = spA->nz;
202:     bs    = 1; /* only bs=1 is supported for SeqSELL matrix */
203:   } else {
204:     Mat_SeqAIJ *spA = (Mat_SeqAIJ *)mat->data;

206:     A_val = spA->a;
207:     nz    = spA->nz;
208:     bs    = 1; /* only bs=1 is supported for SeqAIJ matrix */
209:   }

211:   PetscCall(PetscMalloc2(nis, &c->ncolumns, nis, &c->columns));
212:   PetscCall(PetscMalloc1(nis, &c->nrows)); /* nrows is freed separately from ncolumns and columns */

214:   if (c->htype[0] == 'd') {
215:     PetscCall(PetscMalloc1(nz, &Jentry));
216:     c->matentry = Jentry;
217:   } else if (c->htype[0] == 'w') {
218:     PetscCall(PetscMalloc1(nz, &Jentry2));
219:     c->matentry2 = Jentry2;
220:   } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "htype is not supported");

222:   if (isBAIJ) {
223:     PetscCall(MatGetColumnIJ_SeqBAIJ_Color(mat, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &ci, &cj, &spidx, NULL));
224:   } else if (isSELL) {
225:     PetscCall(MatGetColumnIJ_SeqSELL_Color(mat, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &ci, &cj, &spidx, NULL));
226:   } else {
227:     PetscCall(MatGetColumnIJ_SeqAIJ_Color(mat, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &ci, &cj, &spidx, NULL));
228:   }

230:   PetscCall(PetscCalloc1(c->m, &rowhit));
231:   PetscCall(PetscMalloc1(c->m, &valaddrhit));

233:   nz = 0;
234:   for (i = 0; i < nis; i++) { /* loop over colors */
235:     PetscCall(ISGetLocalSize(c->isa[i], &n));
236:     PetscCall(ISGetIndices(c->isa[i], &is));

238:     c->ncolumns[i] = n;
239:     c->columns[i]  = (PetscInt *)is;
240:     /* note: we know that c->isa is going to be around as long at the c->columns values */
241:     PetscCall(ISRestoreIndices(c->isa[i], &is));

243:     /* fast, crude version requires O(N*N) work */
244:     bs2   = bs * bs;
245:     nrows = 0;
246:     for (j = 0; j < n; j++) { /* loop over columns */
247:       col = is[j];
248:       tmp = ci[col];
249:       row = cj + tmp;
250:       m   = ci[col + 1] - tmp;
251:       nrows += m;
252:       for (k = 0; k < m; k++) { /* loop over columns marking them in rowhit */
253:         rowhit[*row]       = col + 1;
254:         valaddrhit[*row++] = (PetscScalar *)&A_val[bs2 * spidx[tmp + k]];
255:       }
256:     }
257:     c->nrows[i] = nrows; /* total num of rows for this color */

259:     if (c->htype[0] == 'd') {
260:       for (j = 0; j < mbs; j++) { /* loop over rows */
261:         if (rowhit[j]) {
262:           Jentry[nz].row     = j;             /* local row index */
263:           Jentry[nz].col     = rowhit[j] - 1; /* local column index */
264:           Jentry[nz].valaddr = valaddrhit[j]; /* address of mat value for this entry */
265:           nz++;
266:           rowhit[j] = 0.0; /* zero rowhit for reuse */
267:         }
268:       }
269:     } else {                      /* c->htype == 'wp' */
270:       for (j = 0; j < mbs; j++) { /* loop over rows */
271:         if (rowhit[j]) {
272:           Jentry2[nz].row     = j;             /* local row index */
273:           Jentry2[nz].valaddr = valaddrhit[j]; /* address of mat value for this entry */
274:           nz++;
275:           rowhit[j] = 0.0; /* zero rowhit for reuse */
276:         }
277:       }
278:     }
279:   }

281:   if (c->bcols > 1) { /* reorder Jentry for faster MatFDColoringApply() */
282:     PetscCall(MatFDColoringSetUpBlocked_AIJ_Private(mat, c, nz));
283:   }

285:   if (isBAIJ) {
286:     PetscCall(MatRestoreColumnIJ_SeqBAIJ_Color(mat, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &ci, &cj, &spidx, NULL));
287:     PetscCall(PetscMalloc1(bs * mat->rmap->n, &c->dy));
288:   } else if (isSELL) {
289:     PetscCall(MatRestoreColumnIJ_SeqSELL_Color(mat, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &ci, &cj, &spidx, NULL));
290:   } else {
291:     PetscCall(MatRestoreColumnIJ_SeqAIJ_Color(mat, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &ci, &cj, &spidx, NULL));
292:   }
293:   PetscCall(PetscFree(rowhit));
294:   PetscCall(PetscFree(valaddrhit));
295:   PetscCall(ISColoringRestoreIS(iscoloring, PETSC_OWN_POINTER, &c->isa));

297:   PetscCall(VecCreateGhost(PetscObjectComm((PetscObject)mat), mat->rmap->n, PETSC_DETERMINE, 0, NULL, &c->vscale));
298:   PetscCall(PetscInfo(c, "ncolors %" PetscInt_FMT ", brows %" PetscInt_FMT " and bcols %" PetscInt_FMT " are used.\n", c->ncolors, c->brows, c->bcols));
299:   PetscFunctionReturn(PETSC_SUCCESS);
300: }