Actual source code: spbas_cholesky.h
1: #pragma once
2: /*
3: spbas_cholesky_row_alloc:
4: in the data arrays, find a place where another row may be stored.
5: Return PETSC_ERR_MEM if there is insufficient space to store the
6: row, so garbage collection and/or re-allocation may be done.
7: */
8: static PetscBool spbas_cholesky_row_alloc(spbas_matrix retval, PetscInt k, PetscInt r_nnz, PetscInt *n_alloc_used)
9: {
10: retval.icols[k] = &retval.alloc_icol[*n_alloc_used];
11: retval.values[k] = &retval.alloc_val[*n_alloc_used];
12: *n_alloc_used += r_nnz;
13: return (*n_alloc_used > retval.n_alloc_icol) ? PETSC_FALSE : PETSC_TRUE;
14: }
16: /*
17: spbas_cholesky_garbage_collect:
18: move the rows which have been calculated so far, as well as
19: those currently under construction, to the front of the
20: array, while putting them in the proper order.
21: When it seems necessary, enlarge the current arrays.
23: NB: re-allocation is being simulated using
24: PetscMalloc, memcpy, PetscFree, because
25: PetscRealloc does not seem to exist.
27: */
28: static PetscErrorCode spbas_cholesky_garbage_collect(spbas_matrix *result, /* I/O: the Cholesky factor matrix being constructed.
29: Only the storage, not the contents of this matrix is changed in this function */
30: PetscInt i_row, /* I : Number of rows for which the final contents are known */
31: PetscInt *n_row_alloc_ok, /* I/O: Number of rows which are already in their final
32: places in the arrays: they need not be moved any more */
33: PetscInt *n_alloc_used, /* I/O: */
34: PetscInt *max_row_nnz) /* I : Over-estimate of the number of nonzeros needed to store each row */
35: {
36: /* PSEUDO-CODE:
37: 1. Choose the appropriate size for the arrays
38: 2. Rescue the arrays which would be lost during garbage collection
39: 3. Reallocate and correct administration
40: 4. Move all arrays so that they are in proper order */
42: PetscInt i, j;
43: PetscInt nrows = result->nrows;
44: PetscInt n_alloc_ok = 0;
45: PetscInt n_alloc_ok_max = 0;
46: PetscInt need_already = 0;
47: PetscInt max_need_extra = 0;
48: PetscInt n_alloc_max, n_alloc_est, n_alloc;
49: PetscInt n_alloc_now = result->n_alloc_icol;
50: PetscInt *alloc_icol_old = result->alloc_icol;
51: PetscScalar *alloc_val_old = result->alloc_val;
52: PetscInt *icol_rescue;
53: PetscScalar *val_rescue;
54: PetscInt n_rescue;
55: PetscInt i_here, i_last, n_copy;
56: const PetscReal xtra_perc = 20;
58: PetscFunctionBegin;
59: /*********************************************************
60: 1. Choose appropriate array size
61: Count number of rows and memory usage which is already final */
62: for (i = 0; i < i_row; i++) {
63: n_alloc_ok += result->row_nnz[i];
64: n_alloc_ok_max += max_row_nnz[i];
65: }
67: /* Count currently needed memory usage and future memory requirements
68: (max, predicted)*/
69: for (i = i_row; i < nrows; i++) {
70: if (!result->row_nnz[i]) {
71: max_need_extra += max_row_nnz[i];
72: } else {
73: need_already += max_row_nnz[i];
74: }
75: }
77: /* Make maximal and realistic memory requirement estimates */
78: n_alloc_max = n_alloc_ok + need_already + max_need_extra;
79: n_alloc_est = n_alloc_ok + need_already + (int)(((PetscReal)max_need_extra) * ((PetscReal)n_alloc_ok) / ((PetscReal)n_alloc_ok_max));
81: /* Choose array sizes */
82: if (n_alloc_max == n_alloc_est) n_alloc = n_alloc_max;
83: else if (n_alloc_now >= n_alloc_est) n_alloc = n_alloc_now;
84: else if (n_alloc_max < n_alloc_est * (1 + xtra_perc / 100.0)) n_alloc = n_alloc_max;
85: else n_alloc = (int)(n_alloc_est * (1 + xtra_perc / 100.0));
87: /* If new estimate is less than what we already have,
88: don't reallocate, just garbage-collect */
89: if (n_alloc_max != n_alloc_est && n_alloc < result->n_alloc_icol) n_alloc = result->n_alloc_icol;
91: /* Motivate dimension choice */
92: PetscCall(PetscInfo(NULL, " Allocating %" PetscInt_FMT " nonzeros: ", n_alloc)); /* checkbadSource \n */
93: if (n_alloc_max == n_alloc_est) {
94: PetscCall(PetscInfo(NULL, "this is the correct size\n"));
95: } else if (n_alloc_now >= n_alloc_est) {
96: PetscCall(PetscInfo(NULL, "the current size, which seems enough\n"));
97: } else if (n_alloc_max < n_alloc_est * (1 + xtra_perc / 100.0)) {
98: PetscCall(PetscInfo(NULL, "the maximum estimate\n"));
99: } else {
100: PetscCall(PetscInfo(NULL, "%6.2f %% more than the estimate\n", (double)xtra_perc));
101: }
103: /**********************************************************
104: 2. Rescue arrays which would be lost
105: Count how many rows and nonzeros will have to be rescued
106: when moving all arrays in place */
107: n_rescue = 0;
108: if (*n_row_alloc_ok == 0) *n_alloc_used = 0;
109: else {
110: i = *n_row_alloc_ok - 1;
112: PetscCall(PetscIntCast(result->icols[i] - result->alloc_icol + result->row_nnz[i], n_alloc_used));
113: }
115: for (i = *n_row_alloc_ok; i < nrows; i++) {
116: PetscCall(PetscIntCast(result->icols[i] - result->alloc_icol, &i_here));
117: i_last = i_here + result->row_nnz[i];
118: if (result->row_nnz[i] > 0) {
119: if (*n_alloc_used > i_here || i_last > n_alloc) n_rescue += result->row_nnz[i];
121: if (i < i_row) *n_alloc_used += result->row_nnz[i];
122: else *n_alloc_used += max_row_nnz[i];
123: }
124: }
126: /* Allocate rescue arrays */
127: PetscCall(PetscMalloc1(n_rescue, &icol_rescue));
128: PetscCall(PetscMalloc1(n_rescue, &val_rescue));
130: /* Rescue the arrays which need rescuing */
131: n_rescue = 0;
132: if (*n_row_alloc_ok == 0) *n_alloc_used = 0;
133: else {
134: i = *n_row_alloc_ok - 1;
135: PetscCall(PetscIntCast(result->icols[i] - result->alloc_icol + result->row_nnz[i], n_alloc_used));
136: }
138: for (i = *n_row_alloc_ok; i < nrows; i++) {
139: PetscCall(PetscIntCast(result->icols[i] - result->alloc_icol, &i_here));
140: i_last = i_here + result->row_nnz[i];
141: if (result->row_nnz[i] > 0) {
142: if (*n_alloc_used > i_here || i_last > n_alloc) {
143: PetscCall(PetscArraycpy(&icol_rescue[n_rescue], result->icols[i], result->row_nnz[i]));
144: PetscCall(PetscArraycpy(&val_rescue[n_rescue], result->values[i], result->row_nnz[i]));
145: n_rescue += result->row_nnz[i];
146: }
148: if (i < i_row) *n_alloc_used += result->row_nnz[i];
149: else *n_alloc_used += max_row_nnz[i];
150: }
151: }
153: /**********************************************************
154: 3. Reallocate and correct administration */
156: if (n_alloc != result->n_alloc_icol) {
157: n_copy = PetscMin(n_alloc, result->n_alloc_icol);
159: /* PETSC knows no REALLOC, so we'll REALLOC ourselves.
161: Allocate new icol-data, copy old contents */
162: PetscCall(PetscMalloc1(n_alloc, &result->alloc_icol));
163: PetscCall(PetscArraycpy(result->alloc_icol, alloc_icol_old, n_copy));
165: /* Update administration, Reset pointers to new arrays */
166: result->n_alloc_icol = n_alloc;
167: for (i = 0; i < nrows; i++) {
168: result->icols[i] = result->alloc_icol + (result->icols[i] - alloc_icol_old);
169: result->values[i] = result->alloc_val + (result->icols[i] - result->alloc_icol);
170: }
172: /* Delete old array */
173: PetscCall(PetscFree(alloc_icol_old));
175: /* Allocate new value-data, copy old contents */
176: PetscCall(PetscMalloc1(n_alloc, &result->alloc_val));
177: PetscCall(PetscArraycpy(result->alloc_val, alloc_val_old, n_copy));
179: /* Update administration, Reset pointers to new arrays */
180: result->n_alloc_val = n_alloc;
181: for (i = 0; i < nrows; i++) result->values[i] = result->alloc_val + (result->icols[i] - result->alloc_icol);
183: /* Delete old array */
184: PetscCall(PetscFree(alloc_val_old));
185: }
187: /*********************************************************
188: 4. Copy all the arrays to their proper places */
189: n_rescue = 0;
190: if (*n_row_alloc_ok == 0) *n_alloc_used = 0;
191: else {
192: i = *n_row_alloc_ok - 1;
194: PetscCall(PetscIntCast(result->icols[i] - result->alloc_icol + result->row_nnz[i], n_alloc_used));
195: }
197: for (i = *n_row_alloc_ok; i < nrows; i++) {
198: PetscCall(PetscIntCast(result->icols[i] - result->alloc_icol, &i_here));
199: i_last = i_here + result->row_nnz[i];
201: result->icols[i] = result->alloc_icol + *n_alloc_used;
202: result->values[i] = result->alloc_val + *n_alloc_used;
204: if (result->row_nnz[i] > 0) {
205: if (*n_alloc_used > i_here || i_last > n_alloc) {
206: PetscCall(PetscArraycpy(result->icols[i], &icol_rescue[n_rescue], result->row_nnz[i]));
207: PetscCall(PetscArraycpy(result->values[i], &val_rescue[n_rescue], result->row_nnz[i]));
209: n_rescue += result->row_nnz[i];
210: } else {
211: for (j = 0; j < result->row_nnz[i]; j++) {
212: result->icols[i][j] = result->alloc_icol[i_here + j];
213: result->values[i][j] = result->alloc_val[i_here + j];
214: }
215: }
216: if (i < i_row) *n_alloc_used += result->row_nnz[i];
217: else *n_alloc_used += max_row_nnz[i];
218: }
219: }
221: /* Delete the rescue arrays */
222: PetscCall(PetscFree(icol_rescue));
223: PetscCall(PetscFree(val_rescue));
225: *n_row_alloc_ok = i_row;
226: PetscFunctionReturn(PETSC_SUCCESS);
227: }
229: /*
230: spbas_incomplete_cholesky:
231: incomplete Cholesky decomposition of a square, symmetric,
232: positive definite matrix.
234: In case negative diagonals are encountered, function returns
235: NEGATIVE_DIAGONAL. When this happens, call this function again
236: with a larger epsdiag_in, a less sparse pattern, and/or a smaller
237: droptol
238: */
239: PetscErrorCode spbas_incomplete_cholesky(Mat A, const PetscInt *rip, const PetscInt *riip, spbas_matrix pattern, PetscReal droptol, PetscReal epsdiag_in, spbas_matrix *matrix_L, PetscBool *success)
240: {
241: PetscInt jL;
242: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
243: PetscInt *ai = a->i, *aj = a->j;
244: MatScalar *aa = a->a;
245: PetscInt nrows, ncols;
246: PetscInt *max_row_nnz;
247: spbas_matrix retval;
248: PetscScalar *diag;
249: PetscScalar *val;
250: PetscScalar *lvec;
251: PetscScalar epsdiag;
252: PetscInt i, j, k;
253: const PetscBool do_values = PETSC_TRUE;
254: PetscInt *r1_icol;
255: PetscScalar *r1_val;
256: PetscInt *r_icol;
257: PetscInt r_nnz;
258: PetscScalar *r_val;
259: PetscInt *A_icol;
260: PetscInt A_nnz;
261: PetscScalar *A_val;
262: PetscInt *p_icol;
263: PetscInt p_nnz;
264: PetscInt n_row_alloc_ok = 0; /* number of rows which have been stored correctly in the matrix */
265: PetscInt n_alloc_used = 0; /* part of result->icols and result->values which is currently being used */
267: PetscFunctionBegin;
268: /* Convert the Manteuffel shift from 'fraction of average diagonal' to dimensioned value */
269: PetscCall(MatGetSize(A, &nrows, &ncols));
270: PetscCall(MatGetTrace(A, &epsdiag));
272: epsdiag *= epsdiag_in / nrows;
274: PetscCall(PetscInfo(NULL, " Dimensioned Manteuffel shift %g Drop tolerance %g\n", (double)PetscRealPart(epsdiag), (double)droptol));
276: if (droptol < 1e-10) droptol = 1e-10;
278: retval.nrows = nrows;
279: retval.ncols = nrows;
280: retval.nnz = pattern.nnz / 10;
281: retval.col_idx_type = SPBAS_COLUMN_NUMBERS;
282: retval.block_data = PETSC_TRUE;
284: PetscCall(spbas_allocate_pattern(&retval, do_values));
285: PetscCall(PetscArrayzero(retval.row_nnz, nrows));
286: PetscCall(spbas_allocate_data(&retval));
287: retval.nnz = 0;
289: PetscCall(PetscMalloc1(nrows, &diag));
290: PetscCall(PetscCalloc1(nrows, &val));
291: PetscCall(PetscCalloc1(nrows, &lvec));
292: PetscCall(PetscCalloc1(nrows, &max_row_nnz));
294: /* Count the nonzeros on transpose of pattern */
295: for (i = 0; i < nrows; i++) {
296: p_nnz = pattern.row_nnz[i];
297: p_icol = pattern.icols[i];
298: for (j = 0; j < p_nnz; j++) max_row_nnz[i + p_icol[j]]++;
299: }
301: /* Calculate rows of L */
302: for (i = 0; i < nrows; i++) {
303: p_nnz = pattern.row_nnz[i];
304: p_icol = pattern.icols[i];
306: r_nnz = retval.row_nnz[i];
307: r_icol = retval.icols[i];
308: r_val = retval.values[i];
309: A_nnz = ai[rip[i] + 1] - ai[rip[i]];
310: A_icol = &aj[ai[rip[i]]];
311: A_val = &aa[ai[rip[i]]];
313: /* Calculate val += A(i,i:n)'; */
314: for (j = 0; j < A_nnz; j++) {
315: k = riip[A_icol[j]];
316: if (k >= i) val[k] = A_val[j];
317: }
319: /* Add regularization */
320: val[i] += epsdiag;
322: /* Calculate lvec = diag(D(0:i-1)) * L(0:i-1,i);
323: val(i) = A(i,i) - L(0:i-1,i)' * lvec */
324: for (j = 0; j < r_nnz; j++) {
325: k = r_icol[j];
326: lvec[k] = diag[k] * r_val[j];
327: val[i] -= r_val[j] * lvec[k];
328: }
330: /* Calculate the new diagonal */
331: diag[i] = val[i];
332: if (PetscRealPart(diag[i]) < droptol) {
333: PetscCall(PetscInfo(NULL, "Error in spbas_incomplete_cholesky:\n"));
334: PetscCall(PetscInfo(NULL, "Negative diagonal in row %" PetscInt_FMT "\n", i + 1));
336: /* Delete the whole matrix at once. */
337: PetscCall(spbas_delete(retval));
338: *success = PETSC_FALSE;
339: PetscFunctionReturn(PETSC_SUCCESS);
340: }
342: /* If necessary, allocate arrays */
343: if (r_nnz == 0) {
344: PetscBool success = spbas_cholesky_row_alloc(retval, i, 1, &n_alloc_used);
345: PetscCheck(success, PETSC_COMM_SELF, PETSC_ERR_MEM, "spbas_cholesky_row_alloc() failed");
346: r_icol = retval.icols[i];
347: r_val = retval.values[i];
348: }
350: /* Now, fill in */
351: r_icol[r_nnz] = i;
352: r_val[r_nnz] = 1.0;
353: r_nnz++;
354: retval.row_nnz[i]++;
356: retval.nnz += r_nnz;
358: /* Calculate
359: val(i+1:n) = (A(i,i+1:n)- L(0:i-1,i+1:n)' * lvec)/diag(i) */
360: for (j = 1; j < p_nnz; j++) {
361: k = i + p_icol[j];
362: r1_icol = retval.icols[k];
363: r1_val = retval.values[k];
364: for (jL = 0; jL < retval.row_nnz[k]; jL++) val[k] -= r1_val[jL] * lvec[r1_icol[jL]];
365: val[k] /= diag[i];
367: if (PetscAbsScalar(val[k]) > droptol || PetscAbsScalar(val[k]) < -droptol) {
368: /* If necessary, allocate arrays */
369: if (!retval.row_nnz[k]) {
370: PetscBool flag, success = spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used);
371: if (!success) {
372: PetscCall(spbas_cholesky_garbage_collect(&retval, i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz));
373: flag = spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used);
374: PetscCheck(flag, PETSC_COMM_SELF, PETSC_ERR_MEM, "Allocation in spbas_cholesky_row_alloc() failed");
375: r_icol = retval.icols[i];
376: }
377: }
379: retval.icols[k][retval.row_nnz[k]] = i;
380: retval.values[k][retval.row_nnz[k]] = val[k];
381: retval.row_nnz[k]++;
382: }
383: val[k] = 0;
384: }
386: /* Erase the values used in the work arrays */
387: for (j = 0; j < r_nnz; j++) lvec[r_icol[j]] = 0;
388: }
390: PetscCall(PetscFree(lvec));
391: PetscCall(PetscFree(val));
393: PetscCall(spbas_cholesky_garbage_collect(&retval, nrows, &n_row_alloc_ok, &n_alloc_used, max_row_nnz));
394: PetscCall(PetscFree(max_row_nnz));
396: /* Place the inverse of the diagonals in the matrix */
397: for (i = 0; i < nrows; i++) {
398: r_nnz = retval.row_nnz[i];
400: retval.values[i][r_nnz - 1] = 1.0 / diag[i];
401: for (j = 0; j < r_nnz - 1; j++) retval.values[i][j] *= -1;
402: }
403: PetscCall(PetscFree(diag));
404: *matrix_L = retval;
405: *success = PETSC_TRUE;
406: PetscFunctionReturn(PETSC_SUCCESS);
407: }