Actual source code: spbas_cholesky.h

  1: #pragma once
  2: /*
  3:    spbas_cholesky_row_alloc:
  4:       in the data arrays, find a place where another row may be stored.
  5:       Return PETSC_ERR_MEM if there is insufficient space to store the
  6:       row, so garbage collection and/or re-allocation may be done.
  7: */
  8: static PetscBool spbas_cholesky_row_alloc(spbas_matrix retval, PetscInt k, PetscInt r_nnz, PetscInt *n_alloc_used)
  9: {
 10:   retval.icols[k]  = &retval.alloc_icol[*n_alloc_used];
 11:   retval.values[k] = &retval.alloc_val[*n_alloc_used];
 12:   *n_alloc_used += r_nnz;
 13:   return (*n_alloc_used > retval.n_alloc_icol) ? PETSC_FALSE : PETSC_TRUE;
 14: }

 16: /*
 17:   spbas_cholesky_garbage_collect:
 18:      move the rows which have been calculated so far, as well as
 19:      those currently under construction, to the front of the
 20:      array, while putting them in the proper order.
 21:      When it seems necessary, enlarge the current arrays.

 23:      NB: re-allocation is being simulated using
 24:          PetscMalloc, memcpy, PetscFree, because
 25:          PetscRealloc does not seem to exist.

 27: */
 28: static PetscErrorCode spbas_cholesky_garbage_collect(spbas_matrix *result,         /* I/O: the Cholesky factor matrix being constructed.
 29:                                                                                     Only the storage, not the contents of this matrix is changed in this function */
 30:                                                      PetscInt      i_row,          /* I  : Number of rows for which the final contents are known */
 31:                                                      PetscInt     *n_row_alloc_ok, /* I/O: Number of rows which are already in their final
 32:                                                                                     places in the arrays: they need not be moved any more */
 33:                                                      PetscInt     *n_alloc_used,   /* I/O:  */
 34:                                                      PetscInt     *max_row_nnz)        /* I  : Over-estimate of the number of nonzeros needed to store each row */
 35: {
 36:   /* PSEUDO-CODE:
 37:   1. Choose the appropriate size for the arrays
 38:   2. Rescue the arrays which would be lost during garbage collection
 39:   3. Reallocate and correct administration
 40:   4. Move all arrays so that they are in proper order */

 42:   PetscInt        i, j;
 43:   PetscInt        nrows          = result->nrows;
 44:   PetscInt        n_alloc_ok     = 0;
 45:   PetscInt        n_alloc_ok_max = 0;
 46:   PetscInt        need_already   = 0;
 47:   PetscInt        max_need_extra = 0;
 48:   PetscInt        n_alloc_max, n_alloc_est, n_alloc;
 49:   PetscInt        n_alloc_now    = result->n_alloc_icol;
 50:   PetscInt       *alloc_icol_old = result->alloc_icol;
 51:   PetscScalar    *alloc_val_old  = result->alloc_val;
 52:   PetscInt       *icol_rescue;
 53:   PetscScalar    *val_rescue;
 54:   PetscInt        n_rescue;
 55:   PetscInt        i_here, i_last, n_copy;
 56:   const PetscReal xtra_perc = 20;

 58:   PetscFunctionBegin;
 59:   /*********************************************************
 60:   1. Choose appropriate array size
 61:   Count number of rows and memory usage which is already final */
 62:   for (i = 0; i < i_row; i++) {
 63:     n_alloc_ok += result->row_nnz[i];
 64:     n_alloc_ok_max += max_row_nnz[i];
 65:   }

 67:   /* Count currently needed memory usage and future memory requirements
 68:     (max, predicted)*/
 69:   for (i = i_row; i < nrows; i++) {
 70:     if (!result->row_nnz[i]) {
 71:       max_need_extra += max_row_nnz[i];
 72:     } else {
 73:       need_already += max_row_nnz[i];
 74:     }
 75:   }

 77:   /* Make maximal and realistic memory requirement estimates */
 78:   n_alloc_max = n_alloc_ok + need_already + max_need_extra;
 79:   n_alloc_est = n_alloc_ok + need_already + (int)(((PetscReal)max_need_extra) * ((PetscReal)n_alloc_ok) / ((PetscReal)n_alloc_ok_max));

 81:   /* Choose array sizes */
 82:   if (n_alloc_max == n_alloc_est) n_alloc = n_alloc_max;
 83:   else if (n_alloc_now >= n_alloc_est) n_alloc = n_alloc_now;
 84:   else if (n_alloc_max < n_alloc_est * (1 + xtra_perc / 100.0)) n_alloc = n_alloc_max;
 85:   else n_alloc = (int)(n_alloc_est * (1 + xtra_perc / 100.0));

 87:   /* If new estimate is less than what we already have,
 88:     don't reallocate, just garbage-collect */
 89:   if (n_alloc_max != n_alloc_est && n_alloc < result->n_alloc_icol) n_alloc = result->n_alloc_icol;

 91:   /* Motivate dimension choice */
 92:   PetscCall(PetscInfo(NULL, "   Allocating %" PetscInt_FMT " nonzeros: ", n_alloc)); /* checkbadSource \n */
 93:   if (n_alloc_max == n_alloc_est) {
 94:     PetscCall(PetscInfo(NULL, "this is the correct size\n"));
 95:   } else if (n_alloc_now >= n_alloc_est) {
 96:     PetscCall(PetscInfo(NULL, "the current size, which seems enough\n"));
 97:   } else if (n_alloc_max < n_alloc_est * (1 + xtra_perc / 100.0)) {
 98:     PetscCall(PetscInfo(NULL, "the maximum estimate\n"));
 99:   } else {
100:     PetscCall(PetscInfo(NULL, "%6.2f %% more than the estimate\n", (double)xtra_perc));
101:   }

103:   /**********************************************************
104:   2. Rescue arrays which would be lost
105:   Count how many rows and nonzeros will have to be rescued
106:   when moving all arrays in place */
107:   n_rescue = 0;
108:   if (*n_row_alloc_ok == 0) *n_alloc_used = 0;
109:   else {
110:     i = *n_row_alloc_ok - 1;

112:     *n_alloc_used = (result->icols[i] - result->alloc_icol) + result->row_nnz[i];
113:   }

115:   for (i = *n_row_alloc_ok; i < nrows; i++) {
116:     i_here = result->icols[i] - result->alloc_icol;
117:     i_last = i_here + result->row_nnz[i];
118:     if (result->row_nnz[i] > 0) {
119:       if (*n_alloc_used > i_here || i_last > n_alloc) n_rescue += result->row_nnz[i];

121:       if (i < i_row) *n_alloc_used += result->row_nnz[i];
122:       else *n_alloc_used += max_row_nnz[i];
123:     }
124:   }

126:   /* Allocate rescue arrays */
127:   PetscCall(PetscMalloc1(n_rescue, &icol_rescue));
128:   PetscCall(PetscMalloc1(n_rescue, &val_rescue));

130:   /* Rescue the arrays which need rescuing */
131:   n_rescue = 0;
132:   if (*n_row_alloc_ok == 0) *n_alloc_used = 0;
133:   else {
134:     i             = *n_row_alloc_ok - 1;
135:     *n_alloc_used = (result->icols[i] - result->alloc_icol) + result->row_nnz[i];
136:   }

138:   for (i = *n_row_alloc_ok; i < nrows; i++) {
139:     i_here = result->icols[i] - result->alloc_icol;
140:     i_last = i_here + result->row_nnz[i];
141:     if (result->row_nnz[i] > 0) {
142:       if (*n_alloc_used > i_here || i_last > n_alloc) {
143:         PetscCall(PetscArraycpy(&icol_rescue[n_rescue], result->icols[i], result->row_nnz[i]));
144:         PetscCall(PetscArraycpy(&val_rescue[n_rescue], result->values[i], result->row_nnz[i]));
145:         n_rescue += result->row_nnz[i];
146:       }

148:       if (i < i_row) *n_alloc_used += result->row_nnz[i];
149:       else *n_alloc_used += max_row_nnz[i];
150:     }
151:   }

153:   /**********************************************************
154:   3. Reallocate and correct administration */

156:   if (n_alloc != result->n_alloc_icol) {
157:     n_copy = PetscMin(n_alloc, result->n_alloc_icol);

159:     /* PETSC knows no REALLOC, so we'll REALLOC ourselves.

161:         Allocate new icol-data, copy old contents */
162:     PetscCall(PetscMalloc1(n_alloc, &result->alloc_icol));
163:     PetscCall(PetscArraycpy(result->alloc_icol, alloc_icol_old, n_copy));

165:     /* Update administration, Reset pointers to new arrays  */
166:     result->n_alloc_icol = n_alloc;
167:     for (i = 0; i < nrows; i++) {
168:       result->icols[i]  = result->alloc_icol + (result->icols[i] - alloc_icol_old);
169:       result->values[i] = result->alloc_val + (result->icols[i] - result->alloc_icol);
170:     }

172:     /* Delete old array */
173:     PetscCall(PetscFree(alloc_icol_old));

175:     /* Allocate new value-data, copy old contents */
176:     PetscCall(PetscMalloc1(n_alloc, &result->alloc_val));
177:     PetscCall(PetscArraycpy(result->alloc_val, alloc_val_old, n_copy));

179:     /* Update administration, Reset pointers to new arrays  */
180:     result->n_alloc_val = n_alloc;
181:     for (i = 0; i < nrows; i++) result->values[i] = result->alloc_val + (result->icols[i] - result->alloc_icol);

183:     /* Delete old array */
184:     PetscCall(PetscFree(alloc_val_old));
185:   }

187:   /*********************************************************
188:   4. Copy all the arrays to their proper places */
189:   n_rescue = 0;
190:   if (*n_row_alloc_ok == 0) *n_alloc_used = 0;
191:   else {
192:     i = *n_row_alloc_ok - 1;

194:     *n_alloc_used = (result->icols[i] - result->alloc_icol) + result->row_nnz[i];
195:   }

197:   for (i = *n_row_alloc_ok; i < nrows; i++) {
198:     i_here = result->icols[i] - result->alloc_icol;
199:     i_last = i_here + result->row_nnz[i];

201:     result->icols[i]  = result->alloc_icol + *n_alloc_used;
202:     result->values[i] = result->alloc_val + *n_alloc_used;

204:     if (result->row_nnz[i] > 0) {
205:       if (*n_alloc_used > i_here || i_last > n_alloc) {
206:         PetscCall(PetscArraycpy(result->icols[i], &icol_rescue[n_rescue], result->row_nnz[i]));
207:         PetscCall(PetscArraycpy(result->values[i], &val_rescue[n_rescue], result->row_nnz[i]));

209:         n_rescue += result->row_nnz[i];
210:       } else {
211:         for (j = 0; j < result->row_nnz[i]; j++) {
212:           result->icols[i][j]  = result->alloc_icol[i_here + j];
213:           result->values[i][j] = result->alloc_val[i_here + j];
214:         }
215:       }
216:       if (i < i_row) *n_alloc_used += result->row_nnz[i];
217:       else *n_alloc_used += max_row_nnz[i];
218:     }
219:   }

221:   /* Delete the rescue arrays */
222:   PetscCall(PetscFree(icol_rescue));
223:   PetscCall(PetscFree(val_rescue));

225:   *n_row_alloc_ok = i_row;
226:   PetscFunctionReturn(PETSC_SUCCESS);
227: }

229: /*
230:   spbas_incomplete_cholesky:
231:      incomplete Cholesky decomposition of a square, symmetric,
232:      positive definite matrix.

234:      In case negative diagonals are encountered, function returns
235:      NEGATIVE_DIAGONAL. When this happens, call this function again
236:      with a larger epsdiag_in, a less sparse pattern, and/or a smaller
237:      droptol
238: */
239: PetscErrorCode spbas_incomplete_cholesky(Mat A, const PetscInt *rip, const PetscInt *riip, spbas_matrix pattern, PetscReal droptol, PetscReal epsdiag_in, spbas_matrix *matrix_L, PetscBool *success)
240: {
241:   PetscInt        jL;
242:   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *)A->data;
243:   PetscInt       *ai = a->i, *aj = a->j;
244:   MatScalar      *aa = a->a;
245:   PetscInt        nrows, ncols;
246:   PetscInt       *max_row_nnz;
247:   spbas_matrix    retval;
248:   PetscScalar    *diag;
249:   PetscScalar    *val;
250:   PetscScalar    *lvec;
251:   PetscScalar     epsdiag;
252:   PetscInt        i, j, k;
253:   const PetscBool do_values = PETSC_TRUE;
254:   PetscInt       *r1_icol;
255:   PetscScalar    *r1_val;
256:   PetscInt       *r_icol;
257:   PetscInt        r_nnz;
258:   PetscScalar    *r_val;
259:   PetscInt       *A_icol;
260:   PetscInt        A_nnz;
261:   PetscScalar    *A_val;
262:   PetscInt       *p_icol;
263:   PetscInt        p_nnz;
264:   PetscInt        n_row_alloc_ok = 0; /* number of rows which have been stored   correctly in the matrix */
265:   PetscInt        n_alloc_used   = 0; /* part of result->icols and result->values   which is currently being used */

267:   PetscFunctionBegin;
268:   /* Convert the Manteuffel shift from 'fraction of average diagonal' to   dimensioned value */
269:   PetscCall(MatGetSize(A, &nrows, &ncols));
270:   PetscCall(MatGetTrace(A, &epsdiag));

272:   epsdiag *= epsdiag_in / nrows;

274:   PetscCall(PetscInfo(NULL, "   Dimensioned Manteuffel shift %g Drop tolerance %g\n", (double)PetscRealPart(epsdiag), (double)droptol));

276:   if (droptol < 1e-10) droptol = 1e-10;

278:   retval.nrows        = nrows;
279:   retval.ncols        = nrows;
280:   retval.nnz          = pattern.nnz / 10;
281:   retval.col_idx_type = SPBAS_COLUMN_NUMBERS;
282:   retval.block_data   = PETSC_TRUE;

284:   PetscCall(spbas_allocate_pattern(&retval, do_values));
285:   PetscCall(PetscArrayzero(retval.row_nnz, nrows));
286:   PetscCall(spbas_allocate_data(&retval));
287:   retval.nnz = 0;

289:   PetscCall(PetscMalloc1(nrows, &diag));
290:   PetscCall(PetscCalloc1(nrows, &val));
291:   PetscCall(PetscCalloc1(nrows, &lvec));
292:   PetscCall(PetscCalloc1(nrows, &max_row_nnz));

294:   /* Count the nonzeros on transpose of pattern */
295:   for (i = 0; i < nrows; i++) {
296:     p_nnz  = pattern.row_nnz[i];
297:     p_icol = pattern.icols[i];
298:     for (j = 0; j < p_nnz; j++) max_row_nnz[i + p_icol[j]]++;
299:   }

301:   /* Calculate rows of L */
302:   for (i = 0; i < nrows; i++) {
303:     p_nnz  = pattern.row_nnz[i];
304:     p_icol = pattern.icols[i];

306:     r_nnz  = retval.row_nnz[i];
307:     r_icol = retval.icols[i];
308:     r_val  = retval.values[i];
309:     A_nnz  = ai[rip[i] + 1] - ai[rip[i]];
310:     A_icol = &aj[ai[rip[i]]];
311:     A_val  = &aa[ai[rip[i]]];

313:     /* Calculate  val += A(i,i:n)'; */
314:     for (j = 0; j < A_nnz; j++) {
315:       k = riip[A_icol[j]];
316:       if (k >= i) val[k] = A_val[j];
317:     }

319:     /*  Add regularization */
320:     val[i] += epsdiag;

322:     /* Calculate lvec   = diag(D(0:i-1)) * L(0:i-1,i);
323:         val(i) = A(i,i) - L(0:i-1,i)' * lvec */
324:     for (j = 0; j < r_nnz; j++) {
325:       k       = r_icol[j];
326:       lvec[k] = diag[k] * r_val[j];
327:       val[i] -= r_val[j] * lvec[k];
328:     }

330:     /* Calculate the new diagonal */
331:     diag[i] = val[i];
332:     if (PetscRealPart(diag[i]) < droptol) {
333:       PetscCall(PetscInfo(NULL, "Error in spbas_incomplete_cholesky:\n"));
334:       PetscCall(PetscInfo(NULL, "Negative diagonal in row %" PetscInt_FMT "\n", i + 1));

336:       /* Delete the whole matrix at once. */
337:       PetscCall(spbas_delete(retval));
338:       *success = PETSC_FALSE;
339:       PetscFunctionReturn(PETSC_SUCCESS);
340:     }

342:     /* If necessary, allocate arrays */
343:     if (r_nnz == 0) {
344:       PetscBool success = spbas_cholesky_row_alloc(retval, i, 1, &n_alloc_used);
345:       PetscCheck(success, PETSC_COMM_SELF, PETSC_ERR_MEM, "spbas_cholesky_row_alloc() failed");
346:       r_icol = retval.icols[i];
347:       r_val  = retval.values[i];
348:     }

350:     /* Now, fill in */
351:     r_icol[r_nnz] = i;
352:     r_val[r_nnz]  = 1.0;
353:     r_nnz++;
354:     retval.row_nnz[i]++;

356:     retval.nnz += r_nnz;

358:     /* Calculate
359:         val(i+1:n) = (A(i,i+1:n)- L(0:i-1,i+1:n)' * lvec)/diag(i) */
360:     for (j = 1; j < p_nnz; j++) {
361:       k       = i + p_icol[j];
362:       r1_icol = retval.icols[k];
363:       r1_val  = retval.values[k];
364:       for (jL = 0; jL < retval.row_nnz[k]; jL++) val[k] -= r1_val[jL] * lvec[r1_icol[jL]];
365:       val[k] /= diag[i];

367:       if (PetscAbsScalar(val[k]) > droptol || PetscAbsScalar(val[k]) < -droptol) {
368:         /* If necessary, allocate arrays */
369:         if (!retval.row_nnz[k]) {
370:           PetscBool flag, success = spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used);
371:           if (!success) {
372:             PetscCall(spbas_cholesky_garbage_collect(&retval, i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz));
373:             flag = spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used);
374:             PetscCheck(flag, PETSC_COMM_SELF, PETSC_ERR_MEM, "Allocation in spbas_cholesky_row_alloc() failed");
375:             r_icol = retval.icols[i];
376:           }
377:         }

379:         retval.icols[k][retval.row_nnz[k]]  = i;
380:         retval.values[k][retval.row_nnz[k]] = val[k];
381:         retval.row_nnz[k]++;
382:       }
383:       val[k] = 0;
384:     }

386:     /* Erase the values used in the work arrays */
387:     for (j = 0; j < r_nnz; j++) lvec[r_icol[j]] = 0;
388:   }

390:   PetscCall(PetscFree(lvec));
391:   PetscCall(PetscFree(val));

393:   PetscCall(spbas_cholesky_garbage_collect(&retval, nrows, &n_row_alloc_ok, &n_alloc_used, max_row_nnz));
394:   PetscCall(PetscFree(max_row_nnz));

396:   /* Place the inverse of the diagonals in the matrix */
397:   for (i = 0; i < nrows; i++) {
398:     r_nnz = retval.row_nnz[i];

400:     retval.values[i][r_nnz - 1] = 1.0 / diag[i];
401:     for (j = 0; j < r_nnz - 1; j++) retval.values[i][j] *= -1;
402:   }
403:   PetscCall(PetscFree(diag));
404:   *matrix_L = retval;
405:   *success  = PETSC_TRUE;
406:   PetscFunctionReturn(PETSC_SUCCESS);
407: }