Actual source code: spbas.c

  1: #include <../src/mat/impls/aij/seq/aij.h>
  2: #include <../src/mat/impls/aij/seq/bas/spbas.h>

  4: /*MC
  5:   MATSOLVERBAS -  Provides ICC(k) with drop tolerance

  7:   Works with `MATAIJ`  matrices

  9:   Options Database Key:
 10: . -pc_factor_levels l - number of levels of fill

 12:   Level: intermediate

 14:    Contributed by: Bas van 't Hof

 16:    Note:
 17:     Since this currently hooked up to use drop tolerance it should produce the same factors and hence convergence as the PETSc ICC, for higher
 18:      levels of fill it does not. This needs to be investigated. Unless you are interested in drop tolerance ICC and willing to work through the code
 19:      we recommend not using this functionality.

 21: .seealso: [](ch_matrices), `Mat`, `PCFactorSetMatSolverType()`, `MatSolverType`, `PCFactorSetLevels()`, `PCFactorSetDropTolerance()`
 22: M*/

 24: /*
 25:   spbas_memory_requirement:
 26:     Calculate the number of bytes needed to store the matrix
 27: */
 28: size_t spbas_memory_requirement(spbas_matrix matrix)
 29: {
 30:   size_t memreq = 6 * sizeof(PetscInt) +             /* nrows, ncols, nnz, n_alloc_icol, n_alloc_val, col_idx_type */
 31:                   sizeof(PetscBool) +                /* block_data */
 32:                   sizeof(PetscScalar **) +           /* values */
 33:                   sizeof(PetscScalar *) +            /* alloc_val */
 34:                   2 * sizeof(PetscInt **) +          /* icols, icols0 */
 35:                   2 * sizeof(PetscInt *) +           /* row_nnz, alloc_icol */
 36:                   matrix.nrows * sizeof(PetscInt) +  /* row_nnz[*] */
 37:                   matrix.nrows * sizeof(PetscInt *); /* icols[*] */

 39:   /* icol0[*] */
 40:   if (matrix.col_idx_type == SPBAS_OFFSET_ARRAY) memreq += matrix.nrows * sizeof(PetscInt);

 42:   /* icols[*][*] */
 43:   if (matrix.block_data) memreq += matrix.n_alloc_icol * sizeof(PetscInt);
 44:   else memreq += matrix.nnz * sizeof(PetscInt);

 46:   if (matrix.values) {
 47:     memreq += matrix.nrows * sizeof(PetscScalar *); /* values[*] */
 48:     /* values[*][*] */
 49:     if (matrix.block_data) memreq += matrix.n_alloc_val * sizeof(PetscScalar);
 50:     else memreq += matrix.nnz * sizeof(PetscScalar);
 51:   }
 52:   return memreq;
 53: }

 55: /*
 56:   spbas_allocate_pattern:
 57:     allocate the pattern arrays row_nnz, icols and optionally values
 58: */
 59: static PetscErrorCode spbas_allocate_pattern(spbas_matrix *result, PetscBool do_values)
 60: {
 61:   PetscInt nrows        = result->nrows;
 62:   PetscInt col_idx_type = result->col_idx_type;

 64:   PetscFunctionBegin;
 65:   /* Allocate sparseness pattern */
 66:   PetscCall(PetscMalloc1(nrows, &result->row_nnz));
 67:   PetscCall(PetscMalloc1(nrows, &result->icols));

 69:   /* If offsets are given wrt an array, create array */
 70:   if (col_idx_type == SPBAS_OFFSET_ARRAY) {
 71:     PetscCall(PetscMalloc1(nrows, &result->icol0));
 72:   } else {
 73:     result->icol0 = NULL;
 74:   }

 76:   /* If values are given, allocate values array */
 77:   if (do_values) {
 78:     PetscCall(PetscMalloc1(nrows, &result->values));
 79:   } else {
 80:     result->values = NULL;
 81:   }
 82:   PetscFunctionReturn(PETSC_SUCCESS);
 83: }

 85: /*
 86: spbas_allocate_data:
 87:    in case of block_data:
 88:        Allocate the data arrays alloc_icol and optionally alloc_val,
 89:        set appropriate pointers from icols and values;
 90:    in case of !block_data:
 91:        Allocate the arrays icols[i] and optionally values[i]
 92: */
 93: static PetscErrorCode spbas_allocate_data(spbas_matrix *result)
 94: {
 95:   PetscInt  i;
 96:   PetscInt  nnz   = result->nnz;
 97:   PetscInt  nrows = result->nrows;
 98:   PetscInt  r_nnz;
 99:   PetscBool do_values  = (result->values) ? PETSC_TRUE : PETSC_FALSE;
100:   PetscBool block_data = result->block_data;

102:   PetscFunctionBegin;
103:   if (block_data) {
104:     /* Allocate the column number array and point to it */
105:     result->n_alloc_icol = nnz;

107:     PetscCall(PetscMalloc1(nnz, &result->alloc_icol));

109:     result->icols[0] = result->alloc_icol;
110:     for (i = 1; i < nrows; i++) result->icols[i] = result->icols[i - 1] + result->row_nnz[i - 1];

112:     /* Allocate the value array and point to it */
113:     if (do_values) {
114:       result->n_alloc_val = nnz;

116:       PetscCall(PetscMalloc1(nnz, &result->alloc_val));

118:       result->values[0] = result->alloc_val;
119:       for (i = 1; i < nrows; i++) result->values[i] = result->values[i - 1] + result->row_nnz[i - 1];
120:     }
121:   } else {
122:     for (i = 0; i < nrows; i++) {
123:       r_nnz = result->row_nnz[i];
124:       PetscCall(PetscMalloc1(r_nnz, &result->icols[i]));
125:     }
126:     if (do_values) {
127:       for (i = 0; i < nrows; i++) {
128:         r_nnz = result->row_nnz[i];
129:         PetscCall(PetscMalloc1(r_nnz, &result->values[i]));
130:       }
131:     }
132:   }
133:   PetscFunctionReturn(PETSC_SUCCESS);
134: }

136: /*
137:   spbas_row_order_icol
138:      determine if row i1 should come
139:        + before row i2 in the sorted rows (return -1),
140:        + after (return 1)
141:        + is identical (return 0).
142: */
143: static int spbas_row_order_icol(PetscInt i1, PetscInt i2, PetscInt *irow_in, PetscInt *icol_in, PetscInt col_idx_type)
144: {
145:   PetscInt  j;
146:   PetscInt  nnz1  = irow_in[i1 + 1] - irow_in[i1];
147:   PetscInt  nnz2  = irow_in[i2 + 1] - irow_in[i2];
148:   PetscInt *icol1 = &icol_in[irow_in[i1]];
149:   PetscInt *icol2 = &icol_in[irow_in[i2]];

151:   if (nnz1 < nnz2) return -1;
152:   if (nnz1 > nnz2) return 1;

154:   if (col_idx_type == SPBAS_COLUMN_NUMBERS) {
155:     for (j = 0; j < nnz1; j++) {
156:       if (icol1[j] < icol2[j]) return -1;
157:       if (icol1[j] > icol2[j]) return 1;
158:     }
159:   } else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {
160:     for (j = 0; j < nnz1; j++) {
161:       if (icol1[j] - i1 < icol2[j] - i2) return -1;
162:       if (icol1[j] - i1 > icol2[j] - i2) return 1;
163:     }
164:   } else if (col_idx_type == SPBAS_OFFSET_ARRAY) {
165:     for (j = 1; j < nnz1; j++) {
166:       if (icol1[j] - icol1[0] < icol2[j] - icol2[0]) return -1;
167:       if (icol1[j] - icol1[0] > icol2[j] - icol2[0]) return 1;
168:     }
169:   }
170:   return 0;
171: }

173: /*
174:   spbas_mergesort_icols:
175:     return a sorting of the rows in which identical sparseness patterns are
176:     next to each other
177: */
178: static PetscErrorCode spbas_mergesort_icols(PetscInt nrows, PetscInt *irow_in, PetscInt *icol_in, PetscInt col_idx_type, PetscInt *isort)
179: {
180:   PetscInt  i, i1, i2;            /* Loop counters for (partly) sorted arrays */
181:   PetscInt  istart, i1end, i2end; /* start of newly sorted array part, end of both  parts */
182:   PetscInt *ialloc;               /* Allocated arrays */
183:   PetscInt *iswap;                /* auxiliary pointers for swapping */
184:   PetscInt *ihlp1;                /* Pointers to new version of arrays, */
185:   PetscInt *ihlp2;                /* Pointers to previous version of arrays, */

187:   PetscFunctionBegin;
188:   PetscCall(PetscMalloc1(nrows, &ialloc));

190:   ihlp1 = ialloc;
191:   ihlp2 = isort;

193:   /* Sorted array chunks are first 1 long, and increase until they are the complete array */
194:   for (PetscInt istep = 1; istep < nrows; istep *= 2) {
195:     /*
196:       Combine sorted parts
197:           istart:istart+istep-1 and istart+istep-1:istart+2*istep-1
198:       of ihlp2 and vhlp2

200:       into one sorted part
201:           istart:istart+2*istep-1
202:       of ihlp1 and vhlp1
203:     */
204:     for (istart = 0; istart < nrows; istart += 2 * istep) {
205:       /* Set counters and bound array part endings */
206:       i1    = istart;
207:       i1end = i1 + istep;
208:       if (i1end > nrows) i1end = nrows;
209:       i2    = istart + istep;
210:       i2end = i2 + istep;
211:       if (i2end > nrows) i2end = nrows;

213:       /* Merge the two array parts */
214:       for (i = istart; i < i2end; i++) {
215:         if (i1 < i1end && i2 < i2end && spbas_row_order_icol(ihlp2[i1], ihlp2[i2], irow_in, icol_in, col_idx_type) < 0) {
216:           ihlp1[i] = ihlp2[i1];
217:           i1++;
218:         } else if (i2 < i2end) {
219:           ihlp1[i] = ihlp2[i2];
220:           i2++;
221:         } else {
222:           ihlp1[i] = ihlp2[i1];
223:           i1++;
224:         }
225:       }
226:     }

228:     /* Swap the two array sets */
229:     iswap = ihlp2;
230:     ihlp2 = ihlp1;
231:     ihlp1 = iswap;
232:   }

234:   /* Copy one more time in case the sorted arrays are the temporary ones */
235:   if (ihlp2 != isort) {
236:     for (i = 0; i < nrows; i++) isort[i] = ihlp2[i];
237:   }
238:   PetscCall(PetscFree(ialloc));
239:   PetscFunctionReturn(PETSC_SUCCESS);
240: }

242: /*
243:   spbas_compress_pattern:
244:      calculate a compressed sparseness pattern for a sparseness pattern
245:      given in compressed row storage. The compressed sparseness pattern may
246:      require (much) less memory.
247: */
248: PetscErrorCode spbas_compress_pattern(PetscInt *irow_in, PetscInt *icol_in, PetscInt nrows, PetscInt ncols, PetscInt col_idx_type, spbas_matrix *B, PetscReal *mem_reduction)
249: {
250:   PetscInt        nnz      = irow_in[nrows];
251:   size_t          mem_orig = (nrows + nnz) * sizeof(PetscInt);
252:   size_t          mem_compressed;
253:   PetscInt       *isort;
254:   PetscInt       *icols;
255:   PetscInt        row_nnz;
256:   PetscInt       *ipoint;
257:   PetscBool      *used;
258:   PetscInt        ptr;
259:   PetscInt        i, j;
260:   const PetscBool no_values = PETSC_FALSE;

262:   PetscFunctionBegin;
263:   /* Allocate the structure of the new matrix */
264:   B->nrows        = nrows;
265:   B->ncols        = ncols;
266:   B->nnz          = nnz;
267:   B->col_idx_type = col_idx_type;
268:   B->block_data   = PETSC_TRUE;

270:   PetscCall(spbas_allocate_pattern(B, no_values));

272:   /* When using an offset array, set it */
273:   if (col_idx_type == SPBAS_OFFSET_ARRAY) {
274:     for (i = 0; i < nrows; i++) B->icol0[i] = icol_in[irow_in[i]];
275:   }

277:   /* Allocate the ordering for the rows */
278:   PetscCall(PetscMalloc1(nrows, &isort));
279:   PetscCall(PetscMalloc1(nrows, &ipoint));
280:   PetscCall(PetscCalloc1(nrows, &used));

282:   for (i = 0; i < nrows; i++) {
283:     B->row_nnz[i] = irow_in[i + 1] - irow_in[i];
284:     isort[i]      = i;
285:     ipoint[i]     = i;
286:   }

288:   /* Sort the rows so that identical columns will be next to each other */
289:   PetscCall(spbas_mergesort_icols(nrows, irow_in, icol_in, col_idx_type, isort));
290:   PetscCall(PetscInfo(NULL, "Rows have been sorted for patterns\n"));

292:   /* Replace identical rows with the first one in the list */
293:   for (i = 1; i < nrows; i++) {
294:     if (spbas_row_order_icol(isort[i - 1], isort[i], irow_in, icol_in, col_idx_type) == 0) ipoint[isort[i]] = ipoint[isort[i - 1]];
295:   }

297:   /* Collect the rows which are used*/
298:   for (i = 0; i < nrows; i++) used[ipoint[i]] = PETSC_TRUE;

300:   /* Calculate needed memory */
301:   B->n_alloc_icol = 0;
302:   for (i = 0; i < nrows; i++) {
303:     if (used[i]) B->n_alloc_icol += B->row_nnz[i];
304:   }
305:   PetscCall(PetscMalloc1(B->n_alloc_icol, &B->alloc_icol));

307:   /* Fill in the diagonal offsets for the rows which store their own data */
308:   ptr = 0;
309:   for (i = 0; i < B->nrows; i++) {
310:     if (used[i]) {
311:       B->icols[i] = &B->alloc_icol[ptr];
312:       icols       = &icol_in[irow_in[i]];
313:       row_nnz     = B->row_nnz[i];
314:       if (col_idx_type == SPBAS_COLUMN_NUMBERS) {
315:         for (j = 0; j < row_nnz; j++) B->icols[i][j] = icols[j];
316:       } else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {
317:         for (j = 0; j < row_nnz; j++) B->icols[i][j] = icols[j] - i;
318:       } else if (col_idx_type == SPBAS_OFFSET_ARRAY) {
319:         for (j = 0; j < row_nnz; j++) B->icols[i][j] = icols[j] - icols[0];
320:       }
321:       ptr += B->row_nnz[i];
322:     }
323:   }

325:   /* Point to the right places for all data */
326:   for (i = 0; i < nrows; i++) B->icols[i] = B->icols[ipoint[i]];
327:   PetscCall(PetscInfo(NULL, "Row patterns have been compressed\n"));
328:   PetscCall(PetscInfo(NULL, "         (%g nonzeros per row)\n", (double)((PetscReal)nnz / (PetscReal)nrows)));

330:   PetscCall(PetscFree(isort));
331:   PetscCall(PetscFree(used));
332:   PetscCall(PetscFree(ipoint));

334:   mem_compressed = spbas_memory_requirement(*B);
335:   *mem_reduction = 100.0 * (PetscReal)(mem_orig - mem_compressed) / (PetscReal)mem_orig;
336:   PetscFunctionReturn(PETSC_SUCCESS);
337: }

339: /*
340:    spbas_incomplete_cholesky
341:        Incomplete Cholesky decomposition
342: */
343: #include <../src/mat/impls/aij/seq/bas/spbas_cholesky.h>

345: /*
346:   spbas_delete : de-allocate the arrays owned by this matrix
347: */
348: PetscErrorCode spbas_delete(spbas_matrix matrix)
349: {
350:   PetscInt i;

352:   PetscFunctionBegin;
353:   if (matrix.block_data) {
354:     PetscCall(PetscFree(matrix.alloc_icol));
355:     if (matrix.values) PetscCall(PetscFree(matrix.alloc_val));
356:   } else {
357:     for (i = 0; i < matrix.nrows; i++) PetscCall(PetscFree(matrix.icols[i]));
358:     PetscCall(PetscFree(matrix.icols));
359:     if (matrix.values) {
360:       for (i = 0; i < matrix.nrows; i++) PetscCall(PetscFree(matrix.values[i]));
361:     }
362:   }

364:   PetscCall(PetscFree(matrix.row_nnz));
365:   PetscCall(PetscFree(matrix.icols));
366:   if (matrix.col_idx_type == SPBAS_OFFSET_ARRAY) PetscCall(PetscFree(matrix.icol0));
367:   PetscCall(PetscFree(matrix.values));
368:   PetscFunctionReturn(PETSC_SUCCESS);
369: }

371: /*
372: spbas_matrix_to_crs:
373:    Convert an spbas_matrix to compessed row storage
374: */
375: PetscErrorCode spbas_matrix_to_crs(spbas_matrix matrix_A, MatScalar **val_out, PetscInt **irow_out, PetscInt **icol_out)
376: {
377:   PetscInt     nrows = matrix_A.nrows;
378:   PetscInt     nnz   = matrix_A.nnz;
379:   PetscInt     i, j, r_nnz, i0;
380:   PetscInt    *irow;
381:   PetscInt    *icol;
382:   PetscInt    *icol_A;
383:   MatScalar   *val;
384:   PetscScalar *val_A;
385:   PetscInt     col_idx_type = matrix_A.col_idx_type;
386:   PetscBool    do_values    = matrix_A.values ? PETSC_TRUE : PETSC_FALSE;

388:   PetscFunctionBegin;
389:   PetscCall(PetscMalloc1(nrows + 1, &irow));
390:   PetscCall(PetscMalloc1(nnz, &icol));
391:   *icol_out = icol;
392:   *irow_out = irow;
393:   if (do_values) {
394:     PetscCall(PetscMalloc1(nnz, &val));
395:     *val_out  = val;
396:     *icol_out = icol;
397:     *irow_out = irow;
398:   }

400:   irow[0] = 0;
401:   for (i = 0; i < nrows; i++) {
402:     r_nnz       = matrix_A.row_nnz[i];
403:     i0          = irow[i];
404:     irow[i + 1] = i0 + r_nnz;
405:     icol_A      = matrix_A.icols[i];

407:     if (do_values) {
408:       val_A = matrix_A.values[i];
409:       for (j = 0; j < r_nnz; j++) {
410:         icol[i0 + j] = icol_A[j];
411:         val[i0 + j]  = val_A[j];
412:       }
413:     } else {
414:       for (j = 0; j < r_nnz; j++) icol[i0 + j] = icol_A[j];
415:     }

417:     if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {
418:       for (j = 0; j < r_nnz; j++) icol[i0 + j] += i;
419:     } else if (col_idx_type == SPBAS_OFFSET_ARRAY) {
420:       i0 = matrix_A.icol0[i];
421:       for (j = 0; j < r_nnz; j++) icol[i0 + j] += i0;
422:     }
423:   }
424:   PetscFunctionReturn(PETSC_SUCCESS);
425: }

427: /*
428:     spbas_transpose
429:        return the transpose of a matrix
430: */
431: PetscErrorCode spbas_transpose(spbas_matrix in_matrix, spbas_matrix *result)
432: {
433:   PetscInt     col_idx_type = in_matrix.col_idx_type;
434:   PetscInt     nnz          = in_matrix.nnz;
435:   PetscInt     ncols        = in_matrix.nrows;
436:   PetscInt     nrows        = in_matrix.ncols;
437:   PetscInt     i, j, k;
438:   PetscInt     r_nnz;
439:   PetscInt    *irow;
440:   PetscInt     icol0 = 0;
441:   PetscScalar *val;

443:   PetscFunctionBegin;
444:   /* Copy input values */
445:   result->nrows        = nrows;
446:   result->ncols        = ncols;
447:   result->nnz          = nnz;
448:   result->col_idx_type = SPBAS_COLUMN_NUMBERS;
449:   result->block_data   = PETSC_TRUE;

451:   /* Allocate sparseness pattern */
452:   PetscCall(spbas_allocate_pattern(result, in_matrix.values ? PETSC_TRUE : PETSC_FALSE));

454:   /*  Count the number of nonzeros in each row */
455:   for (i = 0; i < nrows; i++) result->row_nnz[i] = 0;

457:   for (i = 0; i < ncols; i++) {
458:     r_nnz = in_matrix.row_nnz[i];
459:     irow  = in_matrix.icols[i];
460:     if (col_idx_type == SPBAS_COLUMN_NUMBERS) {
461:       for (j = 0; j < r_nnz; j++) result->row_nnz[irow[j]]++;
462:     } else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {
463:       for (j = 0; j < r_nnz; j++) result->row_nnz[i + irow[j]]++;
464:     } else if (col_idx_type == SPBAS_OFFSET_ARRAY) {
465:       icol0 = in_matrix.icol0[i];
466:       for (j = 0; j < r_nnz; j++) result->row_nnz[icol0 + irow[j]]++;
467:     }
468:   }

470:   /* Set the pointers to the data */
471:   PetscCall(spbas_allocate_data(result));

473:   /* Reset the number of nonzeros in each row */
474:   for (i = 0; i < nrows; i++) result->row_nnz[i] = 0;

476:   /* Fill the data arrays */
477:   if (in_matrix.values) {
478:     for (i = 0; i < ncols; i++) {
479:       r_nnz = in_matrix.row_nnz[i];
480:       irow  = in_matrix.icols[i];
481:       val   = in_matrix.values[i];

483:       if (col_idx_type == SPBAS_COLUMN_NUMBERS) icol0 = 0;
484:       else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) icol0 = i;
485:       else if (col_idx_type == SPBAS_OFFSET_ARRAY) icol0 = in_matrix.icol0[i];
486:       for (j = 0; j < r_nnz; j++) {
487:         k                                     = icol0 + irow[j];
488:         result->icols[k][result->row_nnz[k]]  = i;
489:         result->values[k][result->row_nnz[k]] = val[j];
490:         result->row_nnz[k]++;
491:       }
492:     }
493:   } else {
494:     for (i = 0; i < ncols; i++) {
495:       r_nnz = in_matrix.row_nnz[i];
496:       irow  = in_matrix.icols[i];

498:       if (col_idx_type == SPBAS_COLUMN_NUMBERS) icol0 = 0;
499:       else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) icol0 = i;
500:       else if (col_idx_type == SPBAS_OFFSET_ARRAY) icol0 = in_matrix.icol0[i];

502:       for (j = 0; j < r_nnz; j++) {
503:         k                                    = icol0 + irow[j];
504:         result->icols[k][result->row_nnz[k]] = i;
505:         result->row_nnz[k]++;
506:       }
507:     }
508:   }
509:   PetscFunctionReturn(PETSC_SUCCESS);
510: }

512: /*
513:    spbas_mergesort

515:       mergesort for an array of integers and an array of associated
516:       reals

518:       on output, icol[0..nnz-1] is increasing;
519:                   val[0..nnz-1] has undergone the same permutation as icol

521:       NB: val may be NULL: in that case, only the integers are sorted

523: */
524: static PetscErrorCode spbas_mergesort(PetscInt nnz, PetscInt *icol, PetscScalar *val)
525: {
526:   PetscInt     i, i1, i2;            /* Loop counters for (partly) sorted arrays */
527:   PetscInt     istart, i1end, i2end; /* start of newly sorted array part, end of both parts */
528:   PetscInt    *ialloc;               /* Allocated arrays */
529:   PetscScalar *valloc = NULL;
530:   PetscInt    *iswap; /* auxiliary pointers for swapping */
531:   PetscScalar *vswap;
532:   PetscInt    *ihlp1;        /* Pointers to new version of arrays, */
533:   PetscScalar *vhlp1 = NULL; /* (arrays under construction) */
534:   PetscInt    *ihlp2;        /* Pointers to previous version of arrays, */
535:   PetscScalar *vhlp2 = NULL;

537:   PetscFunctionBegin;
538:   PetscCall(PetscMalloc1(nnz, &ialloc));
539:   ihlp1 = ialloc;
540:   ihlp2 = icol;

542:   if (val) {
543:     PetscCall(PetscMalloc1(nnz, &valloc));
544:     vhlp1 = valloc;
545:     vhlp2 = val;
546:   }

548:   /* Sorted array chunks are first 1 long, and increase until they are the complete array */
549:   for (PetscInt istep = 1; istep < nnz; istep *= 2) {
550:     /*
551:       Combine sorted parts
552:           istart:istart+istep-1 and istart+istep-1:istart+2*istep-1
553:       of ihlp2 and vhlp2

555:       into one sorted part
556:           istart:istart+2*istep-1
557:       of ihlp1 and vhlp1
558:     */
559:     for (istart = 0; istart < nnz; istart += 2 * istep) {
560:       /* Set counters and bound array part endings */
561:       i1    = istart;
562:       i1end = i1 + istep;
563:       if (i1end > nnz) i1end = nnz;
564:       i2    = istart + istep;
565:       i2end = i2 + istep;
566:       if (i2end > nnz) i2end = nnz;

568:       /* Merge the two array parts */
569:       if (val) {
570:         for (i = istart; i < i2end; i++) {
571:           if (i1 < i1end && i2 < i2end && ihlp2[i1] < ihlp2[i2]) {
572:             ihlp1[i] = ihlp2[i1];
573:             vhlp1[i] = vhlp2[i1];
574:             i1++;
575:           } else if (i2 < i2end) {
576:             ihlp1[i] = ihlp2[i2];
577:             vhlp1[i] = vhlp2[i2];
578:             i2++;
579:           } else {
580:             ihlp1[i] = ihlp2[i1];
581:             vhlp1[i] = vhlp2[i1];
582:             i1++;
583:           }
584:         }
585:       } else {
586:         for (i = istart; i < i2end; i++) {
587:           if (i1 < i1end && i2 < i2end && ihlp2[i1] < ihlp2[i2]) {
588:             ihlp1[i] = ihlp2[i1];
589:             i1++;
590:           } else if (i2 < i2end) {
591:             ihlp1[i] = ihlp2[i2];
592:             i2++;
593:           } else {
594:             ihlp1[i] = ihlp2[i1];
595:             i1++;
596:           }
597:         }
598:       }
599:     }

601:     /* Swap the two array sets */
602:     iswap = ihlp2;
603:     ihlp2 = ihlp1;
604:     ihlp1 = iswap;
605:     vswap = vhlp2;
606:     vhlp2 = vhlp1;
607:     vhlp1 = vswap;
608:   }

610:   /* Copy one more time in case the sorted arrays are the temporary ones */
611:   if (ihlp2 != icol) {
612:     for (i = 0; i < nnz; i++) icol[i] = ihlp2[i];
613:     if (val) {
614:       for (i = 0; i < nnz; i++) val[i] = vhlp2[i];
615:     }
616:   }

618:   PetscCall(PetscFree(ialloc));
619:   if (val) PetscCall(PetscFree(valloc));
620:   PetscFunctionReturn(PETSC_SUCCESS);
621: }

623: /*
624:   spbas_apply_reordering_rows:
625:     apply the given reordering to the rows:  matrix_A = matrix_A(perm,:);
626: */
627: static PetscErrorCode spbas_apply_reordering_rows(spbas_matrix *matrix_A, const PetscInt *permutation)
628: {
629:   PetscInt      i, j, ip;
630:   PetscInt      nrows = matrix_A->nrows;
631:   PetscInt     *row_nnz;
632:   PetscInt    **icols;
633:   PetscBool     do_values = matrix_A->values ? PETSC_TRUE : PETSC_FALSE;
634:   PetscScalar **vals      = NULL;

636:   PetscFunctionBegin;
637:   PetscCheck(matrix_A->col_idx_type == SPBAS_DIAGONAL_OFFSETS, PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "must have diagonal offsets in pattern");

639:   if (do_values) PetscCall(PetscMalloc1(nrows, &vals));
640:   PetscCall(PetscMalloc1(nrows, &row_nnz));
641:   PetscCall(PetscMalloc1(nrows, &icols));

643:   for (i = 0; i < nrows; i++) {
644:     ip = permutation[i];
645:     if (do_values) vals[i] = matrix_A->values[ip];
646:     icols[i]   = matrix_A->icols[ip];
647:     row_nnz[i] = matrix_A->row_nnz[ip];
648:     for (j = 0; j < row_nnz[i]; j++) icols[i][j] += ip - i;
649:   }

651:   if (do_values) PetscCall(PetscFree(matrix_A->values));
652:   PetscCall(PetscFree(matrix_A->icols));
653:   PetscCall(PetscFree(matrix_A->row_nnz));

655:   if (do_values) matrix_A->values = vals;
656:   matrix_A->icols   = icols;
657:   matrix_A->row_nnz = row_nnz;
658:   PetscFunctionReturn(PETSC_SUCCESS);
659: }

661: /*
662:   spbas_apply_reordering_cols:
663:     apply the given reordering to the columns:  matrix_A(:,perm) = matrix_A;
664: */
665: static PetscErrorCode spbas_apply_reordering_cols(spbas_matrix *matrix_A, const PetscInt *permutation)
666: {
667:   PetscInt     i, j;
668:   PetscInt     nrows = matrix_A->nrows;
669:   PetscInt     row_nnz;
670:   PetscInt    *icols;
671:   PetscBool    do_values = matrix_A->values ? PETSC_TRUE : PETSC_FALSE;
672:   PetscScalar *vals      = NULL;

674:   PetscFunctionBegin;
675:   PetscCheck(matrix_A->col_idx_type == SPBAS_DIAGONAL_OFFSETS, PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "must have diagonal offsets in pattern");

677:   for (i = 0; i < nrows; i++) {
678:     icols   = matrix_A->icols[i];
679:     row_nnz = matrix_A->row_nnz[i];
680:     if (do_values) vals = matrix_A->values[i];

682:     for (j = 0; j < row_nnz; j++) icols[j] = permutation[i + icols[j]] - i;
683:     PetscCall(spbas_mergesort(row_nnz, icols, vals));
684:   }
685:   PetscFunctionReturn(PETSC_SUCCESS);
686: }

688: /*
689:   spbas_apply_reordering:
690:     apply the given reordering:  matrix_A(perm,perm) = matrix_A;
691: */
692: PetscErrorCode spbas_apply_reordering(spbas_matrix *matrix_A, const PetscInt *permutation, const PetscInt *inv_perm)
693: {
694:   PetscFunctionBegin;
695:   PetscCall(spbas_apply_reordering_rows(matrix_A, inv_perm));
696:   PetscCall(spbas_apply_reordering_cols(matrix_A, permutation));
697:   PetscFunctionReturn(PETSC_SUCCESS);
698: }

700: PetscErrorCode spbas_pattern_only(PetscInt nrows, PetscInt ncols, PetscInt *ai, PetscInt *aj, spbas_matrix *result)
701: {
702:   spbas_matrix retval;
703:   PetscInt     i, j, i0, r_nnz;

705:   PetscFunctionBegin;
706:   /* Copy input values */
707:   retval.nrows = nrows;
708:   retval.ncols = ncols;
709:   retval.nnz   = ai[nrows];

711:   retval.block_data   = PETSC_TRUE;
712:   retval.col_idx_type = SPBAS_DIAGONAL_OFFSETS;

714:   /* Allocate output matrix */
715:   PetscCall(spbas_allocate_pattern(&retval, PETSC_FALSE));
716:   for (i = 0; i < nrows; i++) retval.row_nnz[i] = ai[i + 1] - ai[i];
717:   PetscCall(spbas_allocate_data(&retval));
718:   /* Copy the structure */
719:   for (i = 0; i < retval.nrows; i++) {
720:     i0    = ai[i];
721:     r_nnz = ai[i + 1] - i0;

723:     for (j = 0; j < r_nnz; j++) retval.icols[i][j] = aj[i0 + j] - i;
724:   }
725:   *result = retval;
726:   PetscFunctionReturn(PETSC_SUCCESS);
727: }

729: /*
730:    spbas_mark_row_power:
731:       Mark the columns in row 'row' which are nonzero in
732:           matrix^2log(marker).
733: */
734: static PetscErrorCode spbas_mark_row_power(PetscInt     *iwork,     /* marker-vector */
735:                                            PetscInt      row,       /* row for which the columns are marked */
736:                                            spbas_matrix *in_matrix, /* matrix for which the power is being  calculated */
737:                                            PetscInt      marker,    /* marker-value: 2^power */
738:                                            PetscInt      minmrk,    /* lower bound for marked points */
739:                                            PetscInt      maxmrk)    /* upper bound for marked points */
740: {
741:   PetscInt i, j, nnz;

743:   PetscFunctionBegin;
744:   nnz = in_matrix->row_nnz[row];

746:   /* For higher powers, call this function recursively */
747:   if (marker > 1) {
748:     for (i = 0; i < nnz; i++) {
749:       j = row + in_matrix->icols[row][i];
750:       if (minmrk <= j && j < maxmrk && iwork[j] < marker) {
751:         PetscCall(spbas_mark_row_power(iwork, row + in_matrix->icols[row][i], in_matrix, marker / 2, minmrk, maxmrk));
752:         iwork[j] |= marker;
753:       }
754:     }
755:   } else {
756:     /*  Mark the columns reached */
757:     for (i = 0; i < nnz; i++) {
758:       j = row + in_matrix->icols[row][i];
759:       if (minmrk <= j && j < maxmrk) iwork[j] |= 1;
760:     }
761:   }
762:   PetscFunctionReturn(PETSC_SUCCESS);
763: }

765: /*
766:    spbas_power
767:       Calculate sparseness patterns for incomplete Cholesky decompositions
768:       of a given order: (almost) all nonzeros of the matrix^(order+1) which
769:       are inside the band width are found and stored in the output sparseness
770:       pattern.
771: */
772: PetscErrorCode spbas_power(spbas_matrix in_matrix, PetscInt power, spbas_matrix *result)
773: {
774:   spbas_matrix retval;
775:   PetscInt     nrows = in_matrix.nrows;
776:   PetscInt     ncols = in_matrix.ncols;
777:   PetscInt     i, j, kend;
778:   PetscInt     nnz, inz;
779:   PetscInt    *iwork;
780:   PetscInt     marker;
781:   PetscInt     maxmrk = 0;

783:   PetscFunctionBegin;
784:   PetscCheck(in_matrix.col_idx_type == SPBAS_DIAGONAL_OFFSETS, PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "must have diagonal offsets in pattern");
785:   PetscCheck(ncols == nrows, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Dimension error");
786:   PetscCheck(!in_matrix.values, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Input array must be sparseness pattern (no values)");
787:   PetscCheck(power > 0, PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Power must be 1 or up");

789:   /* Copy input values*/
790:   retval.nrows        = ncols;
791:   retval.ncols        = nrows;
792:   retval.nnz          = 0;
793:   retval.col_idx_type = SPBAS_DIAGONAL_OFFSETS;
794:   retval.block_data   = PETSC_FALSE;

796:   /* Allocate sparseness pattern */
797:   PetscCall(spbas_allocate_pattern(&retval, in_matrix.values ? PETSC_TRUE : PETSC_FALSE));

799:   /* Allocate marker array: note sure the max needed so use the max of the two */
800:   PetscCall(PetscCalloc1(PetscMax(ncols, nrows), &iwork));

802:   /* Calculate marker values */
803:   marker = 1;
804:   for (i = 1; i < power; i++) marker *= 2;

806:   for (i = 0; i < nrows; i++) {
807:     /* Calculate the pattern for each row */

809:     nnz  = in_matrix.row_nnz[i];
810:     kend = i + in_matrix.icols[i][nnz - 1];
811:     if (maxmrk <= kend) maxmrk = kend + 1;
812:     PetscCall(spbas_mark_row_power(iwork, i, &in_matrix, marker, i, maxmrk));

814:     /* Count the columns*/
815:     nnz = 0;
816:     for (j = i; j < maxmrk; j++) nnz += (iwork[j] != 0);

818:     /* Allocate the column indices */
819:     retval.row_nnz[i] = nnz;
820:     PetscCall(PetscMalloc1(nnz, &retval.icols[i]));

822:     /* Administrate the column indices */
823:     inz = 0;
824:     for (j = i; j < maxmrk; j++) {
825:       if (iwork[j]) {
826:         retval.icols[i][inz] = j - i;
827:         inz++;
828:         iwork[j] = 0;
829:       }
830:     }
831:     retval.nnz += nnz;
832:   }
833:   PetscCall(PetscFree(iwork));
834:   *result = retval;
835:   PetscFunctionReturn(PETSC_SUCCESS);
836: }

838: /*
839:    spbas_keep_upper:
840:       remove the lower part of the matrix: keep the upper part
841: */
842: PetscErrorCode spbas_keep_upper(spbas_matrix *inout_matrix)
843: {
844:   PetscInt jstart;

846:   PetscFunctionBegin;
847:   PetscCheck(!inout_matrix->block_data, PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Not yet for block data matrices");
848:   for (PetscInt i = 0; i < inout_matrix->nrows; i++) {
849:     for (jstart = 0; (jstart < inout_matrix->row_nnz[i]) && (inout_matrix->icols[i][jstart] < 0); jstart++) { }
850:     if (jstart > 0) {
851:       for (PetscInt j = 0; j < inout_matrix->row_nnz[i] - jstart; j++) inout_matrix->icols[i][j] = inout_matrix->icols[i][j + jstart];

853:       if (inout_matrix->values) {
854:         for (PetscInt j = 0; j < inout_matrix->row_nnz[i] - jstart; j++) inout_matrix->values[i][j] = inout_matrix->values[i][j + jstart];
855:       }

857:       inout_matrix->row_nnz[i] -= jstart;

859:       inout_matrix->icols[i] = (PetscInt *)realloc((void *)inout_matrix->icols[i], inout_matrix->row_nnz[i] * sizeof(PetscInt));

861:       if (inout_matrix->values) inout_matrix->values[i] = (PetscScalar *)realloc((void *)inout_matrix->values[i], inout_matrix->row_nnz[i] * sizeof(PetscScalar));
862:       inout_matrix->nnz -= jstart;
863:     }
864:   }
865:   PetscFunctionReturn(PETSC_SUCCESS);
866: }