Actual source code: baijfact3.c

  1: /*
  2:     Factorization code for BAIJ format.
  3: */
  4: #include <../src/mat/impls/baij/seq/baij.h>
  5: #include <petsc/private/kernels/blockinvert.h>

  7: /*
  8:    This is used to set the numeric factorization for both LU and ILU symbolic factorization
  9: */
 10: PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat fact, PetscBool natural)
 11: {
 12:   PetscFunctionBegin;
 13:   if (natural) {
 14:     switch (fact->rmap->bs) {
 15:     case 1:
 16:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
 17:       break;
 18:     case 2:
 19:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering;
 20:       break;
 21:     case 3:
 22:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering;
 23:       break;
 24:     case 4:
 25:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering;
 26:       break;
 27:     case 5:
 28:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering;
 29:       break;
 30:     case 6:
 31:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering;
 32:       break;
 33:     case 7:
 34:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering;
 35:       break;
 36:     case 9:
 37: #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
 38:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_9_NaturalOrdering;
 39: #else
 40:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
 41: #endif
 42:       break;
 43:     case 15:
 44:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_15_NaturalOrdering;
 45:       break;
 46:     default:
 47:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
 48:       break;
 49:     }
 50:   } else {
 51:     switch (fact->rmap->bs) {
 52:     case 1:
 53:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
 54:       break;
 55:     case 2:
 56:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
 57:       break;
 58:     case 3:
 59:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
 60:       break;
 61:     case 4:
 62:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
 63:       break;
 64:     case 5:
 65:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
 66:       break;
 67:     case 6:
 68:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
 69:       break;
 70:     case 7:
 71:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
 72:       break;
 73:     default:
 74:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
 75:       break;
 76:     }
 77:   }
 78:   PetscFunctionReturn(PETSC_SUCCESS);
 79: }

 81: PetscErrorCode MatSeqBAIJSetNumericFactorization_inplace(Mat inA, PetscBool natural)
 82: {
 83:   PetscFunctionBegin;
 84:   if (natural) {
 85:     switch (inA->rmap->bs) {
 86:     case 1:
 87:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1_inplace;
 88:       break;
 89:     case 2:
 90:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering_inplace;
 91:       break;
 92:     case 3:
 93:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering_inplace;
 94:       break;
 95:     case 4:
 96:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_inplace;
 97:       break;
 98:     case 5:
 99:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering_inplace;
100:       break;
101:     case 6:
102:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering_inplace;
103:       break;
104:     case 7:
105:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering_inplace;
106:       break;
107:     default:
108:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_inplace;
109:       break;
110:     }
111:   } else {
112:     switch (inA->rmap->bs) {
113:     case 1:
114:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1_inplace;
115:       break;
116:     case 2:
117:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_inplace;
118:       break;
119:     case 3:
120:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_inplace;
121:       break;
122:     case 4:
123:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_inplace;
124:       break;
125:     case 5:
126:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_inplace;
127:       break;
128:     case 6:
129:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_inplace;
130:       break;
131:     case 7:
132:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_inplace;
133:       break;
134:     default:
135:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_inplace;
136:       break;
137:     }
138:   }
139:   PetscFunctionReturn(PETSC_SUCCESS);
140: }

142: /*
143:     The symbolic factorization code is identical to that for AIJ format,
144:   except for very small changes since this is now a SeqBAIJ datastructure.
145:   NOT good code reuse.
146: */
147: #include <petscbt.h>
148: #include <../src/mat/utils/freespace.h>

150: PetscErrorCode MatLUFactorSymbolic_SeqBAIJ(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
151: {
152:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data, *b;
153:   PetscInt           n = a->mbs, bs = A->rmap->bs, bs2 = a->bs2;
154:   PetscBool          row_identity, col_identity, both_identity;
155:   IS                 isicol;
156:   const PetscInt    *r, *ic;
157:   PetscInt           i, *ai = a->i, *aj = a->j;
158:   PetscInt          *bi, *bj, *ajtmp;
159:   PetscInt          *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
160:   PetscReal          f;
161:   PetscInt           nlnk, *lnk, k, **bi_ptr;
162:   PetscFreeSpaceList free_space = NULL, current_space = NULL;
163:   PetscBT            lnkbt;
164:   PetscBool          missing;

166:   PetscFunctionBegin;
167:   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square");
168:   PetscCall(MatMissingDiagonal(A, &missing, &i));
169:   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);

171:   if (bs > 1) { /* check shifttype */
172:     PetscCheck(info->shifttype != (PetscReal)MAT_SHIFT_NONZERO && info->shifttype != (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only MAT_SHIFT_NONE and MAT_SHIFT_INBLOCKS are supported for BAIJ matrix");
173:   }

175:   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
176:   PetscCall(ISGetIndices(isrow, &r));
177:   PetscCall(ISGetIndices(isicol, &ic));

179:   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
180:   PetscCall(PetscMalloc1(n + 1, &bi));
181:   PetscCall(PetscMalloc1(n + 1, &bdiag));
182:   bi[0] = bdiag[0] = 0;

184:   /* linked list for storing column indices of the active row */
185:   nlnk = n + 1;
186:   PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));

188:   PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im));

190:   /* initial FreeSpace size is f*(ai[n]+1) */
191:   f = info->fill;
192:   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));

194:   current_space = free_space;

196:   for (i = 0; i < n; i++) {
197:     /* copy previous fill into linked list */
198:     nzi   = 0;
199:     nnz   = ai[r[i] + 1] - ai[r[i]];
200:     ajtmp = aj + ai[r[i]];
201:     PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt));
202:     nzi += nlnk;

204:     /* add pivot rows into linked list */
205:     row = lnk[n];
206:     while (row < i) {
207:       nzbd  = bdiag[row] + 1;     /* num of entries in the row with column index <= row */
208:       ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
209:       PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im));
210:       nzi += nlnk;
211:       row = lnk[row];
212:     }
213:     bi[i + 1] = bi[i] + nzi;
214:     im[i]     = nzi;

216:     /* mark bdiag */
217:     nzbd = 0;
218:     nnz  = nzi;
219:     k    = lnk[n];
220:     while (nnz-- && k < i) {
221:       nzbd++;
222:       k = lnk[k];
223:     }
224:     bdiag[i] = nzbd; /* note : bdaig[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */

226:     /* if free space is not available, make more free space */
227:     if (current_space->local_remaining < nzi) {
228:       nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(n - i, nzi)); /* estimated and max additional space needed */
229:       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
230:       reallocs++;
231:     }

233:     /* copy data into free space, then initialize lnk */
234:     PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt));

236:     bi_ptr[i] = current_space->array;
237:     current_space->array += nzi;
238:     current_space->local_used += nzi;
239:     current_space->local_remaining -= nzi;
240:   }

242:   PetscCall(ISRestoreIndices(isrow, &r));
243:   PetscCall(ISRestoreIndices(isicol, &ic));

245:   /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
246:   PetscCall(PetscMalloc1(bi[n] + 1, &bj));
247:   PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag));
248:   PetscCall(PetscLLDestroy(lnk, lnkbt));
249:   PetscCall(PetscFree2(bi_ptr, im));

251:   /* put together the new matrix */
252:   PetscCall(MatSeqBAIJSetPreallocation(B, bs, MAT_SKIP_ALLOCATION, NULL));
253:   b = (Mat_SeqBAIJ *)B->data;

255:   b->free_ij = PETSC_TRUE;
256:   PetscCall(PetscShmgetAllocateArray((bdiag[0] + 1) * bs2, sizeof(PetscScalar), (void **)&b->a));
257:   b->free_a        = PETSC_TRUE;
258:   b->j             = bj;
259:   b->i             = bi;
260:   b->diag          = bdiag;
261:   b->free_diag     = PETSC_TRUE;
262:   b->ilen          = NULL;
263:   b->imax          = NULL;
264:   b->row           = isrow;
265:   b->col           = iscol;
266:   b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE;

268:   PetscCall(PetscObjectReference((PetscObject)isrow));
269:   PetscCall(PetscObjectReference((PetscObject)iscol));
270:   b->icol = isicol;
271:   PetscCall(PetscMalloc1(bs * n + bs, &b->solve_work));

273:   b->maxnz = b->nz = bdiag[0] + 1;

275:   B->factortype            = MAT_FACTOR_LU;
276:   B->info.factor_mallocs   = reallocs;
277:   B->info.fill_ratio_given = f;

279:   if (ai[n] != 0) {
280:     B->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
281:   } else {
282:     B->info.fill_ratio_needed = 0.0;
283:   }
284: #if defined(PETSC_USE_INFO)
285:   if (ai[n] != 0) {
286:     PetscReal af = B->info.fill_ratio_needed;
287:     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
288:     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
289:     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
290:     PetscCall(PetscInfo(A, "for best performance.\n"));
291:   } else {
292:     PetscCall(PetscInfo(A, "Empty matrix\n"));
293:   }
294: #endif

296:   PetscCall(ISIdentity(isrow, &row_identity));
297:   PetscCall(ISIdentity(iscol, &col_identity));

299:   both_identity = (PetscBool)(row_identity && col_identity);

301:   PetscCall(MatSeqBAIJSetNumericFactorization(B, both_identity));
302:   PetscFunctionReturn(PETSC_SUCCESS);
303: }

305: #if 0
306: // unused
307: static PetscErrorCode MatLUFactorSymbolic_SeqBAIJ_inplace(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
308: {
309:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data, *b;
310:   PetscInt           n = a->mbs, bs = A->rmap->bs, bs2 = a->bs2;
311:   PetscBool          row_identity, col_identity, both_identity;
312:   IS                 isicol;
313:   const PetscInt    *r, *ic;
314:   PetscInt           i, *ai = a->i, *aj = a->j;
315:   PetscInt          *bi, *bj, *ajtmp;
316:   PetscInt          *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
317:   PetscReal          f;
318:   PetscInt           nlnk, *lnk, k, **bi_ptr;
319:   PetscFreeSpaceList free_space = NULL, current_space = NULL;
320:   PetscBT            lnkbt;
321:   PetscBool          missing;

323:   PetscFunctionBegin;
324:   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square");
325:   PetscCall(MatMissingDiagonal(A, &missing, &i));
326:   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);

328:   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
329:   PetscCall(ISGetIndices(isrow, &r));
330:   PetscCall(ISGetIndices(isicol, &ic));

332:   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
333:   PetscCall(PetscMalloc1(n + 1, &bi));
334:   PetscCall(PetscMalloc1(n + 1, &bdiag));

336:   bi[0] = bdiag[0] = 0;

338:   /* linked list for storing column indices of the active row */
339:   nlnk = n + 1;
340:   PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));

342:   PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im));

344:   /* initial FreeSpace size is f*(ai[n]+1) */
345:   f = info->fill;
346:   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
347:   current_space = free_space;

349:   for (i = 0; i < n; i++) {
350:     /* copy previous fill into linked list */
351:     nzi   = 0;
352:     nnz   = ai[r[i] + 1] - ai[r[i]];
353:     ajtmp = aj + ai[r[i]];
354:     PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt));
355:     nzi += nlnk;

357:     /* add pivot rows into linked list */
358:     row = lnk[n];
359:     while (row < i) {
360:       nzbd  = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
361:       ajtmp = bi_ptr[row] + nzbd;       /* points to the entry next to the diagonal */
362:       PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im));
363:       nzi += nlnk;
364:       row = lnk[row];
365:     }
366:     bi[i + 1] = bi[i] + nzi;
367:     im[i]     = nzi;

369:     /* mark bdiag */
370:     nzbd = 0;
371:     nnz  = nzi;
372:     k    = lnk[n];
373:     while (nnz-- && k < i) {
374:       nzbd++;
375:       k = lnk[k];
376:     }
377:     bdiag[i] = bi[i] + nzbd;

379:     /* if free space is not available, make more free space */
380:     if (current_space->local_remaining < nzi) {
381:       nnz = PetscIntMultTruncate(n - i, nzi); /* estimated and max additional space needed */
382:       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
383:       reallocs++;
384:     }

386:     /* copy data into free space, then initialize lnk */
387:     PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt));

389:     bi_ptr[i] = current_space->array;
390:     current_space->array += nzi;
391:     current_space->local_used += nzi;
392:     current_space->local_remaining -= nzi;
393:   }
394:   #if defined(PETSC_USE_INFO)
395:   if (ai[n] != 0) {
396:     PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
397:     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
398:     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
399:     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
400:     PetscCall(PetscInfo(A, "for best performance.\n"));
401:   } else {
402:     PetscCall(PetscInfo(A, "Empty matrix\n"));
403:   }
404:   #endif

406:   PetscCall(ISRestoreIndices(isrow, &r));
407:   PetscCall(ISRestoreIndices(isicol, &ic));

409:   /* destroy list of free space and other temporary array(s) */
410:   PetscCall(PetscMalloc1(bi[n] + 1, &bj));
411:   PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
412:   PetscCall(PetscLLDestroy(lnk, lnkbt));
413:   PetscCall(PetscFree2(bi_ptr, im));

415:   /* put together the new matrix */
416:   PetscCall(MatSeqBAIJSetPreallocation(B, bs, MAT_SKIP_ALLOCATION, NULL));
417:   b = (Mat_SeqBAIJ *)B->data;
418:   b->free_ij      = PETSC_TRUE;
419:   PetscCall(PetscShmgetAllocateArray((bi[n] + 1) * bs2,,sizeof(PetscScalar),(void **)&b->a));
420:   b->free_a       = PETSC_TRUE;
421:   b->j             = bj;
422:   b->i             = bi;
423:   b->diag          = bdiag;
424:   b->free_diag     = PETSC_TRUE;
425:   b->ilen          = NULL;
426:   b->imax          = NULL;
427:   b->row           = isrow;
428:   b->col           = iscol;
429:   b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE;

431:   PetscCall(PetscObjectReference((PetscObject)isrow));
432:   PetscCall(PetscObjectReference((PetscObject)iscol));
433:   b->icol = isicol;

435:   PetscCall(PetscMalloc1(bs * n + bs, &b->solve_work));

437:   b->maxnz = b->nz = bi[n];

439:   B->factortype            = MAT_FACTOR_LU;
440:   B->info.factor_mallocs   = reallocs;
441:   B->info.fill_ratio_given = f;

443:   if (ai[n] != 0) {
444:     B->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
445:   } else {
446:     B->info.fill_ratio_needed = 0.0;
447:   }

449:   PetscCall(ISIdentity(isrow, &row_identity));
450:   PetscCall(ISIdentity(iscol, &col_identity));

452:   both_identity = (PetscBool)(row_identity && col_identity);

454:   PetscCall(MatSeqBAIJSetNumericFactorization_inplace(B, both_identity));
455:   PetscFunctionReturn(PETSC_SUCCESS);
456: }
457: #endif