Actual source code: spbas.c

  1: #include <../src/mat/impls/aij/seq/aij.h>
  2: #include <../src/mat/impls/aij/seq/bas/spbas.h>

  4: /*MC
  5:   MATSOLVERBAS -  Provides ICC(k) with drop tolerance

  7:   Works with `MATAIJ`  matrices

  9:   Options Database Key:
 10: . -pc_factor_levels l - number of levels of fill

 12:   Level: intermediate

 14:    Contributed by: Bas van 't Hof

 16:    Note:
 17:     Since this currently hooked up to use drop tolerance it should produce the same factors and hence convergence as the PETSc ICC, for higher
 18:      levels of fill it does not. This needs to be investigated. Unless you are interested in drop tolerance ICC and willing to work through the code
 19:      we recommend not using this functionality.

 21: .seealso: [](ch_matrices), `Mat`, `PCFactorSetMatSolverType()`, `MatSolverType`, `PCFactorSetLevels()`, `PCFactorSetDropTolerance()`
 22: M*/

 24: /*
 25:   spbas_memory_requirement:
 26:     Calculate the number of bytes needed to store the matrix
 27: */
 28: size_t spbas_memory_requirement(spbas_matrix matrix)
 29: {
 30:   size_t memreq = 6 * sizeof(PetscInt) +             /* nrows, ncols, nnz, n_alloc_icol, n_alloc_val, col_idx_type */
 31:                   sizeof(PetscBool) +                /* block_data */
 32:                   sizeof(PetscScalar **) +           /* values */
 33:                   sizeof(PetscScalar *) +            /* alloc_val */
 34:                   2 * sizeof(PetscInt **) +          /* icols, icols0 */
 35:                   2 * sizeof(PetscInt *) +           /* row_nnz, alloc_icol */
 36:                   matrix.nrows * sizeof(PetscInt) +  /* row_nnz[*] */
 37:                   matrix.nrows * sizeof(PetscInt *); /* icols[*] */

 39:   /* icol0[*] */
 40:   if (matrix.col_idx_type == SPBAS_OFFSET_ARRAY) memreq += matrix.nrows * sizeof(PetscInt);

 42:   /* icols[*][*] */
 43:   if (matrix.block_data) memreq += matrix.n_alloc_icol * sizeof(PetscInt);
 44:   else memreq += matrix.nnz * sizeof(PetscInt);

 46:   if (matrix.values) {
 47:     memreq += matrix.nrows * sizeof(PetscScalar *); /* values[*] */
 48:     /* values[*][*] */
 49:     if (matrix.block_data) memreq += matrix.n_alloc_val * sizeof(PetscScalar);
 50:     else memreq += matrix.nnz * sizeof(PetscScalar);
 51:   }
 52:   return memreq;
 53: }

 55: /*
 56:   spbas_allocate_pattern:
 57:     allocate the pattern arrays row_nnz, icols and optionally values
 58: */
 59: static PetscErrorCode spbas_allocate_pattern(spbas_matrix *result, PetscBool do_values)
 60: {
 61:   PetscInt nrows        = result->nrows;
 62:   PetscInt col_idx_type = result->col_idx_type;

 64:   PetscFunctionBegin;
 65:   /* Allocate sparseness pattern */
 66:   PetscCall(PetscMalloc1(nrows, &result->row_nnz));
 67:   PetscCall(PetscMalloc1(nrows, &result->icols));

 69:   /* If offsets are given wrt an array, create array */
 70:   if (col_idx_type == SPBAS_OFFSET_ARRAY) {
 71:     PetscCall(PetscMalloc1(nrows, &result->icol0));
 72:   } else {
 73:     result->icol0 = NULL;
 74:   }

 76:   /* If values are given, allocate values array */
 77:   if (do_values) {
 78:     PetscCall(PetscMalloc1(nrows, &result->values));
 79:   } else {
 80:     result->values = NULL;
 81:   }
 82:   PetscFunctionReturn(PETSC_SUCCESS);
 83: }

 85: /*
 86: spbas_allocate_data:
 87:    in case of block_data:
 88:        Allocate the data arrays alloc_icol and optionally alloc_val,
 89:        set appropriate pointers from icols and values;
 90:    in case of !block_data:
 91:        Allocate the arrays icols[i] and optionally values[i]
 92: */
 93: static PetscErrorCode spbas_allocate_data(spbas_matrix *result)
 94: {
 95:   PetscInt  i;
 96:   PetscInt  nnz   = result->nnz;
 97:   PetscInt  nrows = result->nrows;
 98:   PetscInt  r_nnz;
 99:   PetscBool do_values  = (result->values) ? PETSC_TRUE : PETSC_FALSE;
100:   PetscBool block_data = result->block_data;

102:   PetscFunctionBegin;
103:   if (block_data) {
104:     /* Allocate the column number array and point to it */
105:     result->n_alloc_icol = nnz;

107:     PetscCall(PetscMalloc1(nnz, &result->alloc_icol));

109:     result->icols[0] = result->alloc_icol;
110:     for (i = 1; i < nrows; i++) result->icols[i] = result->icols[i - 1] + result->row_nnz[i - 1];

112:     /* Allocate the value array and point to it */
113:     if (do_values) {
114:       result->n_alloc_val = nnz;

116:       PetscCall(PetscMalloc1(nnz, &result->alloc_val));

118:       result->values[0] = result->alloc_val;
119:       for (i = 1; i < nrows; i++) result->values[i] = result->values[i - 1] + result->row_nnz[i - 1];
120:     }
121:   } else {
122:     for (i = 0; i < nrows; i++) {
123:       r_nnz = result->row_nnz[i];
124:       PetscCall(PetscMalloc1(r_nnz, &result->icols[i]));
125:     }
126:     if (do_values) {
127:       for (i = 0; i < nrows; i++) {
128:         r_nnz = result->row_nnz[i];
129:         PetscCall(PetscMalloc1(r_nnz, &result->values[i]));
130:       }
131:     }
132:   }
133:   PetscFunctionReturn(PETSC_SUCCESS);
134: }

136: /*
137:   spbas_row_order_icol
138:      determine if row i1 should come
139:        + before row i2 in the sorted rows (return -1),
140:        + after (return 1)
141:        + is identical (return 0).
142: */
143: static int spbas_row_order_icol(PetscInt i1, PetscInt i2, PetscInt *irow_in, PetscInt *icol_in, PetscInt col_idx_type)
144: {
145:   PetscInt  j;
146:   PetscInt  nnz1  = irow_in[i1 + 1] - irow_in[i1];
147:   PetscInt  nnz2  = irow_in[i2 + 1] - irow_in[i2];
148:   PetscInt *icol1 = &icol_in[irow_in[i1]];
149:   PetscInt *icol2 = &icol_in[irow_in[i2]];

151:   if (nnz1 < nnz2) return -1;
152:   if (nnz1 > nnz2) return 1;

154:   if (col_idx_type == SPBAS_COLUMN_NUMBERS) {
155:     for (j = 0; j < nnz1; j++) {
156:       if (icol1[j] < icol2[j]) return -1;
157:       if (icol1[j] > icol2[j]) return 1;
158:     }
159:   } else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {
160:     for (j = 0; j < nnz1; j++) {
161:       if (icol1[j] - i1 < icol2[j] - i2) return -1;
162:       if (icol1[j] - i1 > icol2[j] - i2) return 1;
163:     }
164:   } else if (col_idx_type == SPBAS_OFFSET_ARRAY) {
165:     for (j = 1; j < nnz1; j++) {
166:       if (icol1[j] - icol1[0] < icol2[j] - icol2[0]) return -1;
167:       if (icol1[j] - icol1[0] > icol2[j] - icol2[0]) return 1;
168:     }
169:   }
170:   return 0;
171: }

173: /*
174:   spbas_mergesort_icols:
175:     return a sorting of the rows in which identical sparseness patterns are
176:     next to each other
177: */
178: static PetscErrorCode spbas_mergesort_icols(PetscInt nrows, PetscInt *irow_in, PetscInt *icol_in, PetscInt col_idx_type, PetscInt *isort)
179: {
180:   PetscInt  istep;                /* Chunk-sizes of already sorted parts of arrays */
181:   PetscInt  i, i1, i2;            /* Loop counters for (partly) sorted arrays */
182:   PetscInt  istart, i1end, i2end; /* start of newly sorted array part, end of both  parts */
183:   PetscInt *ialloc;               /* Allocated arrays */
184:   PetscInt *iswap;                /* auxiliary pointers for swapping */
185:   PetscInt *ihlp1;                /* Pointers to new version of arrays, */
186:   PetscInt *ihlp2;                /* Pointers to previous version of arrays, */

188:   PetscFunctionBegin;
189:   PetscCall(PetscMalloc1(nrows, &ialloc));

191:   ihlp1 = ialloc;
192:   ihlp2 = isort;

194:   /* Sorted array chunks are first 1 long, and increase until they are the complete array */
195:   for (istep = 1; istep < nrows; istep *= 2) {
196:     /*
197:       Combine sorted parts
198:           istart:istart+istep-1 and istart+istep-1:istart+2*istep-1
199:       of ihlp2 and vhlp2

201:       into one sorted part
202:           istart:istart+2*istep-1
203:       of ihlp1 and vhlp1
204:     */
205:     for (istart = 0; istart < nrows; istart += 2 * istep) {
206:       /* Set counters and bound array part endings */
207:       i1    = istart;
208:       i1end = i1 + istep;
209:       if (i1end > nrows) i1end = nrows;
210:       i2    = istart + istep;
211:       i2end = i2 + istep;
212:       if (i2end > nrows) i2end = nrows;

214:       /* Merge the two array parts */
215:       for (i = istart; i < i2end; i++) {
216:         if (i1 < i1end && i2 < i2end && spbas_row_order_icol(ihlp2[i1], ihlp2[i2], irow_in, icol_in, col_idx_type) < 0) {
217:           ihlp1[i] = ihlp2[i1];
218:           i1++;
219:         } else if (i2 < i2end) {
220:           ihlp1[i] = ihlp2[i2];
221:           i2++;
222:         } else {
223:           ihlp1[i] = ihlp2[i1];
224:           i1++;
225:         }
226:       }
227:     }

229:     /* Swap the two array sets */
230:     iswap = ihlp2;
231:     ihlp2 = ihlp1;
232:     ihlp1 = iswap;
233:   }

235:   /* Copy one more time in case the sorted arrays are the temporary ones */
236:   if (ihlp2 != isort) {
237:     for (i = 0; i < nrows; i++) isort[i] = ihlp2[i];
238:   }
239:   PetscCall(PetscFree(ialloc));
240:   PetscFunctionReturn(PETSC_SUCCESS);
241: }

243: /*
244:   spbas_compress_pattern:
245:      calculate a compressed sparseness pattern for a sparseness pattern
246:      given in compressed row storage. The compressed sparseness pattern may
247:      require (much) less memory.
248: */
249: PetscErrorCode spbas_compress_pattern(PetscInt *irow_in, PetscInt *icol_in, PetscInt nrows, PetscInt ncols, PetscInt col_idx_type, spbas_matrix *B, PetscReal *mem_reduction)
250: {
251:   PetscInt        nnz      = irow_in[nrows];
252:   size_t          mem_orig = (nrows + nnz) * sizeof(PetscInt);
253:   size_t          mem_compressed;
254:   PetscInt       *isort;
255:   PetscInt       *icols;
256:   PetscInt        row_nnz;
257:   PetscInt       *ipoint;
258:   PetscBool      *used;
259:   PetscInt        ptr;
260:   PetscInt        i, j;
261:   const PetscBool no_values = PETSC_FALSE;

263:   PetscFunctionBegin;
264:   /* Allocate the structure of the new matrix */
265:   B->nrows        = nrows;
266:   B->ncols        = ncols;
267:   B->nnz          = nnz;
268:   B->col_idx_type = col_idx_type;
269:   B->block_data   = PETSC_TRUE;

271:   PetscCall(spbas_allocate_pattern(B, no_values));

273:   /* When using an offset array, set it */
274:   if (col_idx_type == SPBAS_OFFSET_ARRAY) {
275:     for (i = 0; i < nrows; i++) B->icol0[i] = icol_in[irow_in[i]];
276:   }

278:   /* Allocate the ordering for the rows */
279:   PetscCall(PetscMalloc1(nrows, &isort));
280:   PetscCall(PetscMalloc1(nrows, &ipoint));
281:   PetscCall(PetscCalloc1(nrows, &used));

283:   for (i = 0; i < nrows; i++) {
284:     B->row_nnz[i] = irow_in[i + 1] - irow_in[i];
285:     isort[i]      = i;
286:     ipoint[i]     = i;
287:   }

289:   /* Sort the rows so that identical columns will be next to each other */
290:   PetscCall(spbas_mergesort_icols(nrows, irow_in, icol_in, col_idx_type, isort));
291:   PetscCall(PetscInfo(NULL, "Rows have been sorted for patterns\n"));

293:   /* Replace identical rows with the first one in the list */
294:   for (i = 1; i < nrows; i++) {
295:     if (spbas_row_order_icol(isort[i - 1], isort[i], irow_in, icol_in, col_idx_type) == 0) ipoint[isort[i]] = ipoint[isort[i - 1]];
296:   }

298:   /* Collect the rows which are used*/
299:   for (i = 0; i < nrows; i++) used[ipoint[i]] = PETSC_TRUE;

301:   /* Calculate needed memory */
302:   B->n_alloc_icol = 0;
303:   for (i = 0; i < nrows; i++) {
304:     if (used[i]) B->n_alloc_icol += B->row_nnz[i];
305:   }
306:   PetscCall(PetscMalloc1(B->n_alloc_icol, &B->alloc_icol));

308:   /* Fill in the diagonal offsets for the rows which store their own data */
309:   ptr = 0;
310:   for (i = 0; i < B->nrows; i++) {
311:     if (used[i]) {
312:       B->icols[i] = &B->alloc_icol[ptr];
313:       icols       = &icol_in[irow_in[i]];
314:       row_nnz     = B->row_nnz[i];
315:       if (col_idx_type == SPBAS_COLUMN_NUMBERS) {
316:         for (j = 0; j < row_nnz; j++) B->icols[i][j] = icols[j];
317:       } else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {
318:         for (j = 0; j < row_nnz; j++) B->icols[i][j] = icols[j] - i;
319:       } else if (col_idx_type == SPBAS_OFFSET_ARRAY) {
320:         for (j = 0; j < row_nnz; j++) B->icols[i][j] = icols[j] - icols[0];
321:       }
322:       ptr += B->row_nnz[i];
323:     }
324:   }

326:   /* Point to the right places for all data */
327:   for (i = 0; i < nrows; i++) B->icols[i] = B->icols[ipoint[i]];
328:   PetscCall(PetscInfo(NULL, "Row patterns have been compressed\n"));
329:   PetscCall(PetscInfo(NULL, "         (%g nonzeros per row)\n", (double)((PetscReal)nnz / (PetscReal)nrows)));

331:   PetscCall(PetscFree(isort));
332:   PetscCall(PetscFree(used));
333:   PetscCall(PetscFree(ipoint));

335:   mem_compressed = spbas_memory_requirement(*B);
336:   *mem_reduction = 100.0 * (PetscReal)(mem_orig - mem_compressed) / (PetscReal)mem_orig;
337:   PetscFunctionReturn(PETSC_SUCCESS);
338: }

340: /*
341:    spbas_incomplete_cholesky
342:        Incomplete Cholesky decomposition
343: */
344: #include <../src/mat/impls/aij/seq/bas/spbas_cholesky.h>

346: /*
347:   spbas_delete : de-allocate the arrays owned by this matrix
348: */
349: PetscErrorCode spbas_delete(spbas_matrix matrix)
350: {
351:   PetscInt i;

353:   PetscFunctionBegin;
354:   if (matrix.block_data) {
355:     PetscCall(PetscFree(matrix.alloc_icol));
356:     if (matrix.values) PetscCall(PetscFree(matrix.alloc_val));
357:   } else {
358:     for (i = 0; i < matrix.nrows; i++) PetscCall(PetscFree(matrix.icols[i]));
359:     PetscCall(PetscFree(matrix.icols));
360:     if (matrix.values) {
361:       for (i = 0; i < matrix.nrows; i++) PetscCall(PetscFree(matrix.values[i]));
362:     }
363:   }

365:   PetscCall(PetscFree(matrix.row_nnz));
366:   PetscCall(PetscFree(matrix.icols));
367:   if (matrix.col_idx_type == SPBAS_OFFSET_ARRAY) PetscCall(PetscFree(matrix.icol0));
368:   PetscCall(PetscFree(matrix.values));
369:   PetscFunctionReturn(PETSC_SUCCESS);
370: }

372: /*
373: spbas_matrix_to_crs:
374:    Convert an spbas_matrix to compessed row storage
375: */
376: PetscErrorCode spbas_matrix_to_crs(spbas_matrix matrix_A, MatScalar **val_out, PetscInt **irow_out, PetscInt **icol_out)
377: {
378:   PetscInt     nrows = matrix_A.nrows;
379:   PetscInt     nnz   = matrix_A.nnz;
380:   PetscInt     i, j, r_nnz, i0;
381:   PetscInt    *irow;
382:   PetscInt    *icol;
383:   PetscInt    *icol_A;
384:   MatScalar   *val;
385:   PetscScalar *val_A;
386:   PetscInt     col_idx_type = matrix_A.col_idx_type;
387:   PetscBool    do_values    = matrix_A.values ? PETSC_TRUE : PETSC_FALSE;

389:   PetscFunctionBegin;
390:   PetscCall(PetscMalloc1(nrows + 1, &irow));
391:   PetscCall(PetscMalloc1(nnz, &icol));
392:   *icol_out = icol;
393:   *irow_out = irow;
394:   if (do_values) {
395:     PetscCall(PetscMalloc1(nnz, &val));
396:     *val_out  = val;
397:     *icol_out = icol;
398:     *irow_out = irow;
399:   }

401:   irow[0] = 0;
402:   for (i = 0; i < nrows; i++) {
403:     r_nnz       = matrix_A.row_nnz[i];
404:     i0          = irow[i];
405:     irow[i + 1] = i0 + r_nnz;
406:     icol_A      = matrix_A.icols[i];

408:     if (do_values) {
409:       val_A = matrix_A.values[i];
410:       for (j = 0; j < r_nnz; j++) {
411:         icol[i0 + j] = icol_A[j];
412:         val[i0 + j]  = val_A[j];
413:       }
414:     } else {
415:       for (j = 0; j < r_nnz; j++) icol[i0 + j] = icol_A[j];
416:     }

418:     if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {
419:       for (j = 0; j < r_nnz; j++) icol[i0 + j] += i;
420:     } else if (col_idx_type == SPBAS_OFFSET_ARRAY) {
421:       i0 = matrix_A.icol0[i];
422:       for (j = 0; j < r_nnz; j++) icol[i0 + j] += i0;
423:     }
424:   }
425:   PetscFunctionReturn(PETSC_SUCCESS);
426: }

428: /*
429:     spbas_transpose
430:        return the transpose of a matrix
431: */
432: PetscErrorCode spbas_transpose(spbas_matrix in_matrix, spbas_matrix *result)
433: {
434:   PetscInt     col_idx_type = in_matrix.col_idx_type;
435:   PetscInt     nnz          = in_matrix.nnz;
436:   PetscInt     ncols        = in_matrix.nrows;
437:   PetscInt     nrows        = in_matrix.ncols;
438:   PetscInt     i, j, k;
439:   PetscInt     r_nnz;
440:   PetscInt    *irow;
441:   PetscInt     icol0 = 0;
442:   PetscScalar *val;

444:   PetscFunctionBegin;
445:   /* Copy input values */
446:   result->nrows        = nrows;
447:   result->ncols        = ncols;
448:   result->nnz          = nnz;
449:   result->col_idx_type = SPBAS_COLUMN_NUMBERS;
450:   result->block_data   = PETSC_TRUE;

452:   /* Allocate sparseness pattern */
453:   PetscCall(spbas_allocate_pattern(result, in_matrix.values ? PETSC_TRUE : PETSC_FALSE));

455:   /*  Count the number of nonzeros in each row */
456:   for (i = 0; i < nrows; i++) result->row_nnz[i] = 0;

458:   for (i = 0; i < ncols; i++) {
459:     r_nnz = in_matrix.row_nnz[i];
460:     irow  = in_matrix.icols[i];
461:     if (col_idx_type == SPBAS_COLUMN_NUMBERS) {
462:       for (j = 0; j < r_nnz; j++) result->row_nnz[irow[j]]++;
463:     } else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {
464:       for (j = 0; j < r_nnz; j++) result->row_nnz[i + irow[j]]++;
465:     } else if (col_idx_type == SPBAS_OFFSET_ARRAY) {
466:       icol0 = in_matrix.icol0[i];
467:       for (j = 0; j < r_nnz; j++) result->row_nnz[icol0 + irow[j]]++;
468:     }
469:   }

471:   /* Set the pointers to the data */
472:   PetscCall(spbas_allocate_data(result));

474:   /* Reset the number of nonzeros in each row */
475:   for (i = 0; i < nrows; i++) result->row_nnz[i] = 0;

477:   /* Fill the data arrays */
478:   if (in_matrix.values) {
479:     for (i = 0; i < ncols; i++) {
480:       r_nnz = in_matrix.row_nnz[i];
481:       irow  = in_matrix.icols[i];
482:       val   = in_matrix.values[i];

484:       if (col_idx_type == SPBAS_COLUMN_NUMBERS) icol0 = 0;
485:       else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) icol0 = i;
486:       else if (col_idx_type == SPBAS_OFFSET_ARRAY) icol0 = in_matrix.icol0[i];
487:       for (j = 0; j < r_nnz; j++) {
488:         k                                     = icol0 + irow[j];
489:         result->icols[k][result->row_nnz[k]]  = i;
490:         result->values[k][result->row_nnz[k]] = val[j];
491:         result->row_nnz[k]++;
492:       }
493:     }
494:   } else {
495:     for (i = 0; i < ncols; i++) {
496:       r_nnz = in_matrix.row_nnz[i];
497:       irow  = in_matrix.icols[i];

499:       if (col_idx_type == SPBAS_COLUMN_NUMBERS) icol0 = 0;
500:       else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) icol0 = i;
501:       else if (col_idx_type == SPBAS_OFFSET_ARRAY) icol0 = in_matrix.icol0[i];

503:       for (j = 0; j < r_nnz; j++) {
504:         k                                    = icol0 + irow[j];
505:         result->icols[k][result->row_nnz[k]] = i;
506:         result->row_nnz[k]++;
507:       }
508:     }
509:   }
510:   PetscFunctionReturn(PETSC_SUCCESS);
511: }

513: /*
514:    spbas_mergesort

516:       mergesort for an array of integers and an array of associated
517:       reals

519:       on output, icol[0..nnz-1] is increasing;
520:                   val[0..nnz-1] has undergone the same permutation as icol

522:       NB: val may be NULL: in that case, only the integers are sorted

524: */
525: static PetscErrorCode spbas_mergesort(PetscInt nnz, PetscInt *icol, PetscScalar *val)
526: {
527:   PetscInt     istep;                /* Chunk-sizes of already sorted parts of arrays */
528:   PetscInt     i, i1, i2;            /* Loop counters for (partly) sorted arrays */
529:   PetscInt     istart, i1end, i2end; /* start of newly sorted array part, end of both parts */
530:   PetscInt    *ialloc;               /* Allocated arrays */
531:   PetscScalar *valloc = NULL;
532:   PetscInt    *iswap; /* auxiliary pointers for swapping */
533:   PetscScalar *vswap;
534:   PetscInt    *ihlp1;        /* Pointers to new version of arrays, */
535:   PetscScalar *vhlp1 = NULL; /* (arrays under construction) */
536:   PetscInt    *ihlp2;        /* Pointers to previous version of arrays, */
537:   PetscScalar *vhlp2 = NULL;

539:   PetscFunctionBegin;
540:   PetscCall(PetscMalloc1(nnz, &ialloc));
541:   ihlp1 = ialloc;
542:   ihlp2 = icol;

544:   if (val) {
545:     PetscCall(PetscMalloc1(nnz, &valloc));
546:     vhlp1 = valloc;
547:     vhlp2 = val;
548:   }

550:   /* Sorted array chunks are first 1 long, and increase until they are the complete array */
551:   for (istep = 1; istep < nnz; istep *= 2) {
552:     /*
553:       Combine sorted parts
554:           istart:istart+istep-1 and istart+istep-1:istart+2*istep-1
555:       of ihlp2 and vhlp2

557:       into one sorted part
558:           istart:istart+2*istep-1
559:       of ihlp1 and vhlp1
560:     */
561:     for (istart = 0; istart < nnz; istart += 2 * istep) {
562:       /* Set counters and bound array part endings */
563:       i1    = istart;
564:       i1end = i1 + istep;
565:       if (i1end > nnz) i1end = nnz;
566:       i2    = istart + istep;
567:       i2end = i2 + istep;
568:       if (i2end > nnz) i2end = nnz;

570:       /* Merge the two array parts */
571:       if (val) {
572:         for (i = istart; i < i2end; i++) {
573:           if (i1 < i1end && i2 < i2end && ihlp2[i1] < ihlp2[i2]) {
574:             ihlp1[i] = ihlp2[i1];
575:             vhlp1[i] = vhlp2[i1];
576:             i1++;
577:           } else if (i2 < i2end) {
578:             ihlp1[i] = ihlp2[i2];
579:             vhlp1[i] = vhlp2[i2];
580:             i2++;
581:           } else {
582:             ihlp1[i] = ihlp2[i1];
583:             vhlp1[i] = vhlp2[i1];
584:             i1++;
585:           }
586:         }
587:       } else {
588:         for (i = istart; i < i2end; i++) {
589:           if (i1 < i1end && i2 < i2end && ihlp2[i1] < ihlp2[i2]) {
590:             ihlp1[i] = ihlp2[i1];
591:             i1++;
592:           } else if (i2 < i2end) {
593:             ihlp1[i] = ihlp2[i2];
594:             i2++;
595:           } else {
596:             ihlp1[i] = ihlp2[i1];
597:             i1++;
598:           }
599:         }
600:       }
601:     }

603:     /* Swap the two array sets */
604:     iswap = ihlp2;
605:     ihlp2 = ihlp1;
606:     ihlp1 = iswap;
607:     vswap = vhlp2;
608:     vhlp2 = vhlp1;
609:     vhlp1 = vswap;
610:   }

612:   /* Copy one more time in case the sorted arrays are the temporary ones */
613:   if (ihlp2 != icol) {
614:     for (i = 0; i < nnz; i++) icol[i] = ihlp2[i];
615:     if (val) {
616:       for (i = 0; i < nnz; i++) val[i] = vhlp2[i];
617:     }
618:   }

620:   PetscCall(PetscFree(ialloc));
621:   if (val) PetscCall(PetscFree(valloc));
622:   PetscFunctionReturn(PETSC_SUCCESS);
623: }

625: /*
626:   spbas_apply_reordering_rows:
627:     apply the given reordering to the rows:  matrix_A = matrix_A(perm,:);
628: */
629: static PetscErrorCode spbas_apply_reordering_rows(spbas_matrix *matrix_A, const PetscInt *permutation)
630: {
631:   PetscInt      i, j, ip;
632:   PetscInt      nrows = matrix_A->nrows;
633:   PetscInt     *row_nnz;
634:   PetscInt    **icols;
635:   PetscBool     do_values = matrix_A->values ? PETSC_TRUE : PETSC_FALSE;
636:   PetscScalar **vals      = NULL;

638:   PetscFunctionBegin;
639:   PetscCheck(matrix_A->col_idx_type == SPBAS_DIAGONAL_OFFSETS, PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "must have diagonal offsets in pattern");

641:   if (do_values) PetscCall(PetscMalloc1(nrows, &vals));
642:   PetscCall(PetscMalloc1(nrows, &row_nnz));
643:   PetscCall(PetscMalloc1(nrows, &icols));

645:   for (i = 0; i < nrows; i++) {
646:     ip = permutation[i];
647:     if (do_values) vals[i] = matrix_A->values[ip];
648:     icols[i]   = matrix_A->icols[ip];
649:     row_nnz[i] = matrix_A->row_nnz[ip];
650:     for (j = 0; j < row_nnz[i]; j++) icols[i][j] += ip - i;
651:   }

653:   if (do_values) PetscCall(PetscFree(matrix_A->values));
654:   PetscCall(PetscFree(matrix_A->icols));
655:   PetscCall(PetscFree(matrix_A->row_nnz));

657:   if (do_values) matrix_A->values = vals;
658:   matrix_A->icols   = icols;
659:   matrix_A->row_nnz = row_nnz;
660:   PetscFunctionReturn(PETSC_SUCCESS);
661: }

663: /*
664:   spbas_apply_reordering_cols:
665:     apply the given reordering to the columns:  matrix_A(:,perm) = matrix_A;
666: */
667: static PetscErrorCode spbas_apply_reordering_cols(spbas_matrix *matrix_A, const PetscInt *permutation)
668: {
669:   PetscInt     i, j;
670:   PetscInt     nrows = matrix_A->nrows;
671:   PetscInt     row_nnz;
672:   PetscInt    *icols;
673:   PetscBool    do_values = matrix_A->values ? PETSC_TRUE : PETSC_FALSE;
674:   PetscScalar *vals      = NULL;

676:   PetscFunctionBegin;
677:   PetscCheck(matrix_A->col_idx_type == SPBAS_DIAGONAL_OFFSETS, PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "must have diagonal offsets in pattern");

679:   for (i = 0; i < nrows; i++) {
680:     icols   = matrix_A->icols[i];
681:     row_nnz = matrix_A->row_nnz[i];
682:     if (do_values) vals = matrix_A->values[i];

684:     for (j = 0; j < row_nnz; j++) icols[j] = permutation[i + icols[j]] - i;
685:     PetscCall(spbas_mergesort(row_nnz, icols, vals));
686:   }
687:   PetscFunctionReturn(PETSC_SUCCESS);
688: }

690: /*
691:   spbas_apply_reordering:
692:     apply the given reordering:  matrix_A(perm,perm) = matrix_A;
693: */
694: PetscErrorCode spbas_apply_reordering(spbas_matrix *matrix_A, const PetscInt *permutation, const PetscInt *inv_perm)
695: {
696:   PetscFunctionBegin;
697:   PetscCall(spbas_apply_reordering_rows(matrix_A, inv_perm));
698:   PetscCall(spbas_apply_reordering_cols(matrix_A, permutation));
699:   PetscFunctionReturn(PETSC_SUCCESS);
700: }

702: PetscErrorCode spbas_pattern_only(PetscInt nrows, PetscInt ncols, PetscInt *ai, PetscInt *aj, spbas_matrix *result)
703: {
704:   spbas_matrix retval;
705:   PetscInt     i, j, i0, r_nnz;

707:   PetscFunctionBegin;
708:   /* Copy input values */
709:   retval.nrows = nrows;
710:   retval.ncols = ncols;
711:   retval.nnz   = ai[nrows];

713:   retval.block_data   = PETSC_TRUE;
714:   retval.col_idx_type = SPBAS_DIAGONAL_OFFSETS;

716:   /* Allocate output matrix */
717:   PetscCall(spbas_allocate_pattern(&retval, PETSC_FALSE));
718:   for (i = 0; i < nrows; i++) retval.row_nnz[i] = ai[i + 1] - ai[i];
719:   PetscCall(spbas_allocate_data(&retval));
720:   /* Copy the structure */
721:   for (i = 0; i < retval.nrows; i++) {
722:     i0    = ai[i];
723:     r_nnz = ai[i + 1] - i0;

725:     for (j = 0; j < r_nnz; j++) retval.icols[i][j] = aj[i0 + j] - i;
726:   }
727:   *result = retval;
728:   PetscFunctionReturn(PETSC_SUCCESS);
729: }

731: /*
732:    spbas_mark_row_power:
733:       Mark the columns in row 'row' which are nonzero in
734:           matrix^2log(marker).
735: */
736: static PetscErrorCode spbas_mark_row_power(PetscInt     *iwork,     /* marker-vector */
737:                                            PetscInt      row,       /* row for which the columns are marked */
738:                                            spbas_matrix *in_matrix, /* matrix for which the power is being  calculated */
739:                                            PetscInt      marker,    /* marker-value: 2^power */
740:                                            PetscInt      minmrk,    /* lower bound for marked points */
741:                                            PetscInt      maxmrk)    /* upper bound for marked points */
742: {
743:   PetscInt i, j, nnz;

745:   PetscFunctionBegin;
746:   nnz = in_matrix->row_nnz[row];

748:   /* For higher powers, call this function recursively */
749:   if (marker > 1) {
750:     for (i = 0; i < nnz; i++) {
751:       j = row + in_matrix->icols[row][i];
752:       if (minmrk <= j && j < maxmrk && iwork[j] < marker) {
753:         PetscCall(spbas_mark_row_power(iwork, row + in_matrix->icols[row][i], in_matrix, marker / 2, minmrk, maxmrk));
754:         iwork[j] |= marker;
755:       }
756:     }
757:   } else {
758:     /*  Mark the columns reached */
759:     for (i = 0; i < nnz; i++) {
760:       j = row + in_matrix->icols[row][i];
761:       if (minmrk <= j && j < maxmrk) iwork[j] |= 1;
762:     }
763:   }
764:   PetscFunctionReturn(PETSC_SUCCESS);
765: }

767: /*
768:    spbas_power
769:       Calculate sparseness patterns for incomplete Cholesky decompositions
770:       of a given order: (almost) all nonzeros of the matrix^(order+1) which
771:       are inside the band width are found and stored in the output sparseness
772:       pattern.
773: */
774: PetscErrorCode spbas_power(spbas_matrix in_matrix, PetscInt power, spbas_matrix *result)
775: {
776:   spbas_matrix retval;
777:   PetscInt     nrows = in_matrix.nrows;
778:   PetscInt     ncols = in_matrix.ncols;
779:   PetscInt     i, j, kend;
780:   PetscInt     nnz, inz;
781:   PetscInt    *iwork;
782:   PetscInt     marker;
783:   PetscInt     maxmrk = 0;

785:   PetscFunctionBegin;
786:   PetscCheck(in_matrix.col_idx_type == SPBAS_DIAGONAL_OFFSETS, PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "must have diagonal offsets in pattern");
787:   PetscCheck(ncols == nrows, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Dimension error");
788:   PetscCheck(!in_matrix.values, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Input array must be sparseness pattern (no values)");
789:   PetscCheck(power > 0, PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Power must be 1 or up");

791:   /* Copy input values*/
792:   retval.nrows        = ncols;
793:   retval.ncols        = nrows;
794:   retval.nnz          = 0;
795:   retval.col_idx_type = SPBAS_DIAGONAL_OFFSETS;
796:   retval.block_data   = PETSC_FALSE;

798:   /* Allocate sparseness pattern */
799:   PetscCall(spbas_allocate_pattern(&retval, in_matrix.values ? PETSC_TRUE : PETSC_FALSE));

801:   /* Allocate marker array: note sure the max needed so use the max of the two */
802:   PetscCall(PetscCalloc1(PetscMax(ncols, nrows), &iwork));

804:   /* Calculate marker values */
805:   marker = 1;
806:   for (i = 1; i < power; i++) marker *= 2;

808:   for (i = 0; i < nrows; i++) {
809:     /* Calculate the pattern for each row */

811:     nnz  = in_matrix.row_nnz[i];
812:     kend = i + in_matrix.icols[i][nnz - 1];
813:     if (maxmrk <= kend) maxmrk = kend + 1;
814:     PetscCall(spbas_mark_row_power(iwork, i, &in_matrix, marker, i, maxmrk));

816:     /* Count the columns*/
817:     nnz = 0;
818:     for (j = i; j < maxmrk; j++) nnz += (iwork[j] != 0);

820:     /* Allocate the column indices */
821:     retval.row_nnz[i] = nnz;
822:     PetscCall(PetscMalloc1(nnz, &retval.icols[i]));

824:     /* Administrate the column indices */
825:     inz = 0;
826:     for (j = i; j < maxmrk; j++) {
827:       if (iwork[j]) {
828:         retval.icols[i][inz] = j - i;
829:         inz++;
830:         iwork[j] = 0;
831:       }
832:     }
833:     retval.nnz += nnz;
834:   }
835:   PetscCall(PetscFree(iwork));
836:   *result = retval;
837:   PetscFunctionReturn(PETSC_SUCCESS);
838: }

840: /*
841:    spbas_keep_upper:
842:       remove the lower part of the matrix: keep the upper part
843: */
844: PetscErrorCode spbas_keep_upper(spbas_matrix *inout_matrix)
845: {
846:   PetscInt i, j;
847:   PetscInt jstart;

849:   PetscFunctionBegin;
850:   PetscCheck(!inout_matrix->block_data, PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Not yet for block data matrices");
851:   for (i = 0; i < inout_matrix->nrows; i++) {
852:     for (jstart = 0; (jstart < inout_matrix->row_nnz[i]) && (inout_matrix->icols[i][jstart] < 0); jstart++) { }
853:     if (jstart > 0) {
854:       for (j = 0; j < inout_matrix->row_nnz[i] - jstart; j++) inout_matrix->icols[i][j] = inout_matrix->icols[i][j + jstart];

856:       if (inout_matrix->values) {
857:         for (j = 0; j < inout_matrix->row_nnz[i] - jstart; j++) inout_matrix->values[i][j] = inout_matrix->values[i][j + jstart];
858:       }

860:       inout_matrix->row_nnz[i] -= jstart;

862:       inout_matrix->icols[i] = (PetscInt *)realloc((void *)inout_matrix->icols[i], inout_matrix->row_nnz[i] * sizeof(PetscInt));

864:       if (inout_matrix->values) inout_matrix->values[i] = (PetscScalar *)realloc((void *)inout_matrix->values[i], inout_matrix->row_nnz[i] * sizeof(PetscScalar));
865:       inout_matrix->nnz -= jstart;
866:     }
867:   }
868:   PetscFunctionReturn(PETSC_SUCCESS);
869: }