Actual source code: daensemble.c

  1: #include <petscda.h>
  2: #include <petsc/private/daimpl.h>
  3: #include <petscblaslapack.h>
  4: #include <petsc/private/daensembleimpl.h>

  6: /*
  7:      Code that is shared between multiple PetscDA ensemble methods including PETSCDAETKF and PETSCDALETKF

  9: */
 10: /*  T-Matrix Factorization and Application Methods [Alg 6.4 line 7] */

 12: /*
 13:    Tolerance for matrix square root verification in debug mode
 14:    Use a more relaxed tolerance to account for accumulated floating-point errors
 15:    in multiple matrix operations (Y^T * T * Y involves 3 matrix multiplications).
 16:    A tolerance of 1e-2 (1%) is reasonable for numerical verification. */
 17: #define MATRIX_SQRT_TOLERANCE_FACTOR 1.0e-2

 19: /*
 20:   PetscDAEnsembleTFactor_Cholesky - Computes Cholesky factorization of T

 22:   Input Parameters:
 23: + da - the PetscDA context
 24: - S  - normalized innovation matrix (obs_size x m)

 26:   Notes:
 27:   Computes the lower triangular Cholesky factor L such that T = L * L^T.
 28:   Then zeros out the upper triangular part to ensure L is strictly lower triangular.
 29: */
 30: static PetscErrorCode PetscDAEnsembleTFactor_Cholesky(PetscDA da)
 31: {
 32:   PetscDA_Ensemble *en = (PetscDA_Ensemble *)da->data;
 33:   PetscBLASInt      n, lda, info;
 34:   PetscScalar      *a_array;
 35:   PetscInt          m_T, N_T, i, j;

 37:   PetscFunctionBegin;
 38:   /* Initialize or update L_cholesky matrix */
 39:   if (!en->L_cholesky) {
 40:     PetscCall(MatDuplicate(en->I_StS, MAT_COPY_VALUES, &en->L_cholesky));
 41:   } else {
 42:     PetscCall(MatCopy(en->I_StS, en->L_cholesky, SAME_NONZERO_PATTERN));
 43:   }

 45:   /* Get matrix dimensions and convert to BLAS int */
 46:   PetscCall(MatGetSize(en->L_cholesky, &m_T, &N_T));
 47:   PetscCheck(m_T == N_T, PetscObjectComm((PetscObject)en->L_cholesky), PETSC_ERR_ARG_WRONG, "Matrix must be square for Cholesky");
 48:   PetscCall(PetscBLASIntCast(N_T, &n));
 49:   lda = n;

 51:   /* Get array from dense matrix */
 52:   PetscCall(MatDenseGetArrayWrite(en->L_cholesky, &a_array));

 54:   /* Compute Cholesky factorization: A = L * L^T (lower triangular) */
 55:   PetscCallBLAS("LAPACKpotrf", LAPACKpotrf_("L", &n, a_array, &lda, &info));
 56:   PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in LAPACK Cholesky factorization (xPOTRF): info=%" PetscInt_FMT ". Matrix T is not positive definite.", (PetscInt)info);

 58:   /* Zero out upper triangular part (LAPACK leaves it unchanged) */
 59:   for (j = 0; j < n; j++) {
 60:     for (i = 0; i < j; i++) a_array[i + j * lda] = 0.0;
 61:   }

 63:   /* Restore array and finalize matrix */
 64:   PetscCall(MatDenseRestoreArrayWrite(en->L_cholesky, &a_array));
 65:   PetscCall(MatAssemblyBegin(en->L_cholesky, MAT_FINAL_ASSEMBLY));
 66:   PetscCall(MatAssemblyEnd(en->L_cholesky, MAT_FINAL_ASSEMBLY));
 67:   PetscFunctionReturn(PETSC_SUCCESS);
 68: }

 70: /*
 71:   PetscDAEnsembleTFactor_Eigen - Computes Eigendecomposition of T

 73:   Input Parameters:
 74: + da - the PetscDA context
 75: - S  - normalized innovation matrix (obs_size x m)

 77:   Notes:
 78:   Computes eigenvectors V and eigenvalues D such that T = V * D * V^T.
 79: */
 80: static PetscErrorCode PetscDAEnsembleTFactor_Eigen(PetscDA da)
 81: {
 82:   PetscDA_Ensemble *en = (PetscDA_Ensemble *)da->data;
 83:   PetscBLASInt      n, lda, lwork, info;
 84:   PetscScalar      *a_array, *work, *eig_array;
 85:   PetscInt          m_V, N_V;
 86: #if defined(PETSC_USE_COMPLEX)
 87:   PetscReal *rwork = NULL;
 88: #endif

 90:   PetscFunctionBegin;
 91:   /* Initialize or update V matrix */
 92:   if (!en->V) {
 93:     PetscCall(MatDuplicate(en->I_StS, MAT_COPY_VALUES, &en->V));
 94:   } else {
 95:     PetscCall(MatCopy(en->I_StS, en->V, SAME_NONZERO_PATTERN));
 96:   }

 98:   /* Initialize or update eigenvalue vector */
 99:   if (!en->sqrt_eigen_vals) PetscCall(MatCreateVecs(en->I_StS, &en->sqrt_eigen_vals, NULL));

101:   /* Get matrix dimensions */
102:   PetscCall(MatGetSize(en->V, &m_V, &N_V));
103:   PetscCheck(m_V == N_V, PetscObjectComm((PetscObject)en->V), PETSC_ERR_ARG_WRONG, "Matrix must be square");
104:   PetscCall(PetscBLASIntCast(N_V, &n));
105:   lda = n;

107:   /* Get arrays */
108:   PetscCall(MatDenseGetArrayWrite(en->V, &a_array));
109:   PetscCall(VecGetArrayWrite(en->sqrt_eigen_vals, &eig_array));

111:   /* Query optimal workspace size */
112:   lwork = -1;
113:   PetscCall(PetscMalloc1(1, &work));
114: #if defined(PETSC_USE_COMPLEX)
115:   PetscCall(PetscMalloc1(PetscMax(1, 3 * n - 2), &rwork));
116:   PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &n, a_array, &lda, (PetscReal *)eig_array, work, &lwork, rwork, &info));
117: #else
118:   PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &n, a_array, &lda, eig_array, work, &lwork, &info));
119: #endif
120:   PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in LAPACK routine xSYEV work query: info=%" PetscInt_FMT, (PetscInt)info);

122:   /* Allocate workspace */
123:   lwork = (PetscBLASInt)PetscRealPart(work[0]);
124:   PetscCall(PetscFree(work));
125:   PetscCall(PetscMalloc1(lwork, &work));

127:   /* Compute eigendecomposition */
128: #if defined(PETSC_USE_COMPLEX)
129:   PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &n, a_array, &lda, (PetscReal *)eig_array, work, &lwork, rwork, &info));
130:   PetscCall(PetscFree(rwork));
131: #else
132:   PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &n, a_array, &lda, eig_array, work, &lwork, &info));
133: #endif
134:   PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in LAPACK routine xSYEV: info=%" PetscInt_FMT, (PetscInt)info);

136:   /* Cleanup */
137:   PetscCall(PetscFree(work));
138:   PetscCall(VecRestoreArrayWrite(en->sqrt_eigen_vals, &eig_array));
139:   PetscCall(MatDenseRestoreArrayWrite(en->V, &a_array));

141:   /* Compute sqrt(eigenvalues) */
142:   PetscCall(VecSqrtAbs(en->sqrt_eigen_vals));

144:   /* Debug verification: Ensure V * D * V^T == T */
145:   if (PetscDefined(USE_DEBUG)) {
146:     PetscReal norm_T, norm_diff, relative_error;
147:     Mat       V_D, VDVt;

149:     /* Compute D * V^T by scaling rows */
150:     PetscCall(MatDuplicate(en->V, MAT_COPY_VALUES, &V_D));

152:     /* Restore D for verification (since sqrt_eigen_vals currently holds sqrt(D)) */
153:     PetscCall(VecPointwiseMult(en->sqrt_eigen_vals, en->sqrt_eigen_vals, en->sqrt_eigen_vals));

155:     PetscCall(MatDiagonalScale(V_D, NULL, en->sqrt_eigen_vals));

157:     /* Compute V * D * V^T */
158:     PetscCall(MatMatTransposeMult(V_D, en->V, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &VDVt));

160:     /* Compute ||V*D*V^T - T|| / ||T|| */
161:     PetscCall(MatAXPY(VDVt, -1.0, en->I_StS, SAME_NONZERO_PATTERN));
162:     PetscCall(MatNorm(en->I_StS, NORM_FROBENIUS, &norm_T));
163:     PetscCall(MatNorm(VDVt, NORM_FROBENIUS, &norm_diff));

165:     PetscCheck(norm_T > 0, PetscObjectComm((PetscObject)da), PETSC_ERR_PLIB, "T = 0");
166:     relative_error = norm_diff / norm_T;
167:     PetscCheck(relative_error < MATRIX_SQRT_TOLERANCE_FACTOR, PetscObjectComm((PetscObject)da), PETSC_ERR_PLIB, "Eigendecomposition verification failed: ||V*D*V^T - T||/||T|| = %g", (double)relative_error);

169:     /* Restore sqrt(D) back to sqrt_eigen_vals */
170:     PetscCall(VecSqrtAbs(en->sqrt_eigen_vals));

172:     /* Cleanup debug matrices */
173:     PetscCall(MatDestroy(&V_D));
174:     PetscCall(MatDestroy(&VDVt));
175:   }
176:   PetscFunctionReturn(PETSC_SUCCESS);
177: }

179: /*@
180:   PetscDAEnsembleTFactor - Compute and store factorization of T matrix

182:   Collective

184:   Input Parameters:
185: + da - the `PetscDA` context
186: - S  - normalized innovation matrix (obs_size x m)

188:   Notes:
189:   This function computes $T = I + S^T * S$ and stores its factorization based on
190:   the selected `PetscDASqrtType`.

192:   - For CHOLESKY mode: computes the lower triangular Cholesky factor $L$ such that $T = L * L^T$.
193:   - For EIGEN mode: computes eigenvectors $V$ and eigenvalues $D$ such that $T = V * D * V^T$.

195:   The implementation uses matrix reuse (`MAT_REUSE_MATRIX`) to minimize memory allocation
196:   overhead when the ensemble size remains constant across analysis cycles.

198:   Level: advanced

200: .seealso: [](ch_da), `PetscDA`, `PETSCDAETKF`, `PETSCDALETKF`, `PetscDAEnsembleApplyTInverse()`, `PetscDAEnsembleApplySqrtTInverse()`
201: @*/
202: PetscErrorCode PetscDAEnsembleTFactor(PetscDA da, Mat S)
203: {
204:   PetscDA_Ensemble *en = (PetscDA_Ensemble *)da->data;
205:   PetscInt          m, s_rows, s_cols;
206:   MatReuse          scall = MAT_INITIAL_MATRIX;

208:   PetscFunctionBegin;
211:   PetscCall(MatGetSize(S, &s_rows, &s_cols));
212:   m = s_cols; /* Ensemble size */
213:   PetscCheck(m > 0, PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_OUTOFRANGE, "Innovation matrix S must have positive columns, got %" PetscInt_FMT, m);
214:   PetscCheck(m == en->size, PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_INCOMP, "S matrix columns (%" PetscInt_FMT ") must match ensemble size (%" PetscInt_FMT ") defined in PetscDA", m, en->size);

216:   /* 2. Manage Resource Reuse */
217:   /* Check if we can reuse the T matrix (I_StS) and dependent factors */
218:   if (en->I_StS) {
219:     PetscInt t_rows, t_cols;
220:     PetscCall(MatGetSize(en->I_StS, &t_rows, &t_cols));

222:     /* If dimensions have changed, we must fully reallocate */
223:     if (t_rows != m || t_cols != m) {
224:       PetscCall(MatDestroy(&en->I_StS));
225:       PetscCall(MatDestroy(&en->V));
226:       PetscCall(MatDestroy(&en->L_cholesky));
227:       PetscCall(VecDestroy(&en->sqrt_eigen_vals));
228:       scall = MAT_INITIAL_MATRIX;
229:       PetscCall(PetscInfo(da, "Ensemble size changed (old: %" PetscInt_FMT ", new: %" PetscInt_FMT "), reallocating T matrix and factors\n", t_rows, m));
230:     } else {
231:       scall = MAT_REUSE_MATRIX;
232:     }
233:   }

235:   /* 3. Compute T = I + S^T * S */
236:   /*
237:      MatTransposeMatMult computes C = A^T * B (here C = S^T * S).
238:      When using MAT_REUSE_MATRIX, the existing C is overwritten with the new result.
239:   */
240:   PetscCall(MatTransposeMatMult(S, S, scall, PETSC_DEFAULT, &en->I_StS));

242:   /* Add Identity: T = (1/rho)I + S^T*S */
243:   PetscCall(MatShift(en->I_StS, 1.0 / en->inflation));

245:   /* 4. Compute Factorization based on strategy */
246:   switch (en->sqrt_type) {
247:   case PETSCDA_SQRT_CHOLESKY:
248:     PetscCall(PetscDAEnsembleTFactor_Cholesky(da));
249:     break;
250:   case PETSCDA_SQRT_EIGEN:
251:     PetscCall(PetscDAEnsembleTFactor_Eigen(da));
252:     break;
253:   default:
254:     SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_OUTOFRANGE, "Unsupported PetscDA square-root type %" PetscInt_FMT, (PetscInt)en->sqrt_type);
255:   }
256:   PetscFunctionReturn(PETSC_SUCCESS);
257: }

259: /*
260:   ApplyTInverse_Cholesky - Helper for Cholesky solver path
261: */
262: static PetscErrorCode ApplyTInverse_Cholesky(PetscDA da, Vec sdel, Vec w)
263: {
264:   PetscDA_Ensemble  *en = (PetscDA_Ensemble *)da->data;
265:   PetscBLASInt       n, lda, nrhs, info;
266:   const PetscScalar *a_array;
267:   PetscScalar       *b_array;
268:   PetscInt           m_L, N_L;

270:   PetscFunctionBegin;
271:   PetscCheck(en->L_cholesky, PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONGSTATE, "Cholesky factor not computed");

273:   /* Get dimensions */
274:   PetscCall(MatGetSize(en->L_cholesky, &m_L, &N_L));
275:   PetscCall(PetscBLASIntCast(N_L, &n));
276:   lda  = n;
277:   nrhs = 1;

279:   /* Copy sdel to w for in-place solve */
280:   PetscCall(VecCopy(sdel, w));

282:   /* Get arrays */
283:   PetscCall(MatDenseGetArrayRead(en->L_cholesky, &a_array));
284:   PetscCall(VecGetArray(w, &b_array));

286:   /* Solve L * L^T * w = sdel using LAPACK's Cholesky solve (xPOTRS) */
287:   /* Note: POTRS expects the input B (w) to contain the RHS, and overwrites it with the solution */
288:   PetscCallBLAS("LAPACKpotrs", LAPACKpotrs_("L", &n, &nrhs, a_array, &lda, b_array, &n, &info));
289:   PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in LAPACK Cholesky solve (xPOTRS): info=%" PetscInt_FMT, (PetscInt)info);

291:   /* Restore arrays */
292:   PetscCall(MatDenseRestoreArrayRead(en->L_cholesky, &a_array));
293:   PetscCall(VecRestoreArray(w, &b_array));
294:   PetscFunctionReturn(PETSC_SUCCESS);
295: }

297: /*
298:   ApplyTInverse_Eigen - Helper for Eigendecomposition solver path
299: */
300: static PetscErrorCode ApplyTInverse_Eigen(PetscDA da, Vec sdel, Vec w)
301: {
302:   PetscDA_Ensemble *en = (PetscDA_Ensemble *)da->data;
303:   Vec               temp;

305:   PetscFunctionBegin;
306:   PetscCheck(en->V, PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONGSTATE, "Eigenvectors not computed");
307:   PetscCheck(en->sqrt_eigen_vals, PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONGSTATE, "Eigenvalues not computed");

309:   /* Allocate temporary vector for projection */
310:   PetscCall(VecDuplicate(sdel, &temp));

312:   /* 1. Project onto eigenvectors: temp = V^T * sdel */
313:   PetscCall(MatMultTranspose(en->V, sdel, temp));

315:   /* 2. Scale by inverse eigenvalues: temp = D^{-1} * temp */
316:   /* We store sqrt(D), so divide twice: temp = (temp / sqrt(D)) / sqrt(D) */
317:   PetscCall(VecPointwiseDivide(temp, temp, en->sqrt_eigen_vals));
318:   PetscCall(VecPointwiseDivide(temp, temp, en->sqrt_eigen_vals));

320:   /* 3. Map back to standard basis: w = V * temp */
321:   PetscCall(MatMult(en->V, temp, w));

323:   PetscCall(VecDestroy(&temp));
324:   PetscFunctionReturn(PETSC_SUCCESS);
325: }

327: /*@
328:   PetscDAEnsembleApplyTInverse - Apply T^{-1} to a vector [Alg 6.4 line 8]

330:   Collective

332:   Input Parameters:
333: + da   - the `PetscDA` context
334: - sdel - input vector S^T-delta

336:   Output Parameter:
337: . w - output vector w = T^{-1} * sdel

339:   Notes:
340:   This function applies the inverse of T = I + S^T S using the stored
341:   factorization. For CHOLESKY mode, it uses triangular solves. For EIGEN mode,
342:   it uses the eigendecomposition (T^{-1} = V D^{-1} V^T).

344:   Level: advanced

346: .seealso: [](ch_da), `PetscDA`, `PETSCDAETKF`, `PETSCDALETKF`, `PetscDAEnsembleTFactor()`, `PetscDAEnsembleApplySqrtTInverse()`
347: @*/
348: PetscErrorCode PetscDAEnsembleApplyTInverse(PetscDA da, Vec sdel, Vec w)
349: {
350:   PetscDA_Ensemble *en = (PetscDA_Ensemble *)da->data;

352:   PetscFunctionBegin;

357:   PetscCheck(en->I_StS, PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONGSTATE, "T matrix not factored. Call PetscDAEnsembleTFactor first");

359:   switch (en->sqrt_type) {
360:   case PETSCDA_SQRT_CHOLESKY:
361:     PetscCall(ApplyTInverse_Cholesky(da, sdel, w));
362:     break;
363:   case PETSCDA_SQRT_EIGEN:
364:     PetscCall(ApplyTInverse_Eigen(da, sdel, w));
365:     break;
366:   default:
367:     SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_OUTOFRANGE, "Unsupported PetscDA square-root type %" PetscInt_FMT, (PetscInt)en->sqrt_type);
368:   }
369:   PetscFunctionReturn(PETSC_SUCCESS);
370: }

372: /*
373:   ApplySqrtTInverse_Cholesky - Computes Y = L^{-T} * U using Cholesky factorization

375:   Notes:
376:   For T = L * L^T (Cholesky factorization), this computes the ASYMMETRIC square root
377:   T^{-1/2} = L^{-T} (upper triangular).

379:   This satisfies the product property:
380:     T^{-1/2} * (T^{-1/2})^T = L^{-T} * L^{-1} = (L * L^T)^{-1} = T^{-1}

382:   WARNING: L^{-T} is upper triangular and NOT symmetric. This is valid for ETKF where
383:   the global ensemble transform W = X_a * T^{-1/2} does not require symmetry. However,
384:   LETKF requires a SYMMETRIC square root T^{-1/2} = V * D^{-1/2} * V^T for the local
385:   ensemble perturbation update. Use PETSCDA_SQRT_EIGEN for LETKF.

387:   This requires solving L^T * Y = U for Y.
388: */
389: static PetscErrorCode ApplySqrtTInverse_Cholesky(PetscDA da, Mat U, Mat Y)
390: {
391:   PetscDA_Ensemble *en = (PetscDA_Ensemble *)da->data;

393:   PetscBLASInt       n, lda, nrhs, info;
394:   const PetscScalar *l_array;
395:   PetscScalar       *y_array;
396:   PetscInt           m_L, N_L, m_U, N_U;
397:   Mat                U_identity = NULL;

399:   PetscFunctionBegin;
400:   PetscCheck(en->L_cholesky, PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONGSTATE, "Cholesky factor not computed");

402:   PetscCall(MatGetSize(en->L_cholesky, &m_L, &N_L));

404:   /* Handle NULL U (identity matrix case) */
405:   if (!U) {
406:     /* Create identity matrix of size m_L x m_L */
407:     PetscCall(MatCreateDense(PetscObjectComm((PetscObject)en->L_cholesky), PETSC_DECIDE, PETSC_DECIDE, m_L, m_L, NULL, &U_identity));
408:     PetscCall(PetscObjectSetOptionsPrefix((PetscObject)U_identity, "dense_"));
409:     PetscCall(MatSetFromOptions(U_identity));
410:     PetscCall(MatSetUp(U_identity));
411:     PetscCall(MatShift(U_identity, 1.0)); /* Set diagonal to 1 */
412:     U = U_identity;
413:   }

415:   PetscCall(MatGetSize(U, &m_U, &N_U));
416:   PetscCheck(m_L == m_U, PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_INCOMP, "Cholesky factor rows (%" PetscInt_FMT ") must match U rows (%" PetscInt_FMT ")", m_L, m_U);

418:   PetscCall(PetscBLASIntCast(N_L, &n));
419:   PetscCall(PetscBLASIntCast(N_U, &nrhs));
420:   lda = n;

422:   /* Initialize Y with U for in-place solve */
423:   PetscCall(MatCopy(U, Y, SAME_NONZERO_PATTERN));

425:   /* Get direct array access */
426:   PetscCall(MatDenseGetArrayRead(en->L_cholesky, &l_array));
427:   PetscCall(MatDenseGetArrayWrite(Y, &y_array));

429:   /* Solve L^T * Y = U using LAPACK triangular solve (L is lower, so L^T is upper)
430:      TRTRS args: UPLO='L', TRANS='T', DIAG='N' */
431:   PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("L", "T", "N", &n, &nrhs, (PetscScalar *)l_array, &lda, y_array, &n, &info));
432:   PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in LAPACK triangular solve (xTRTRS): info=%" PetscInt_FMT, (PetscInt)info);

434:   /* Restore arrays */
435:   PetscCall(MatDenseRestoreArrayRead(en->L_cholesky, &l_array));
436:   PetscCall(MatDenseRestoreArrayWrite(Y, &y_array));

438:   /* Cleanup temporary identity matrix if created */
439:   if (U_identity) PetscCall(MatDestroy(&U_identity));
440:   PetscFunctionReturn(PETSC_SUCCESS);
441: }

443: /*
444:   ApplySqrtTInverse_Eigen - Computes Y = V * D^{-1/2} * V^T * U.

446:   Notes:
447:   This computes the symmetric square root T^{-1/2} = V * D^{-1/2} * V^T.
448:   The operation is performed as Y = V * (D^{-1/2} * (V^T * U)) to strictly follow
449:   linear algebra operations for general matrix U.
450: */
451: static PetscErrorCode ApplySqrtTInverse_Eigen(PetscDA da, Mat U, Mat Y)
452: {
453:   PetscDA_Ensemble *en = (PetscDA_Ensemble *)da->data;
454:   Mat               W;
455:   Vec               diag_inv;

457:   PetscFunctionBegin;
458:   PetscCheck(en->V, PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONGSTATE, "Eigenvectors not computed");
459:   PetscCheck(en->sqrt_eigen_vals, PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONGSTATE, "Eigenvalues not computed");

461:   /* Prepare inverse sqrt eigenvalues: D^{-1/2}
462:      Note: en->sqrt_eigen_vals currently stores sqrt(D) */
463:   PetscCall(VecDuplicate(en->sqrt_eigen_vals, &diag_inv));
464:   PetscCall(VecCopy(en->sqrt_eigen_vals, diag_inv));
465:   PetscCall(VecReciprocal(diag_inv)); /* Now diag_inv contains 1/sqrt(D) = D^{-1/2} */

467:   if (U) {
468:     /* General case: Compute Y = V * D^{-1/2} * V^T * U */
469:     /* Step 1: Compute W = V^T * U (Project U onto eigenbasis) */
470:     PetscCall(MatTransposeMatMult(en->V, U, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &W));

472:     /* Step 2: Scale rows of W by D^{-1/2}: W <- D^{-1/2} * W */
473:     PetscCall(MatDiagonalScale(W, diag_inv, NULL));

475:     /* Step 3: Compute Y = V * W (Project back to standard basis)
476:        Y = V * (D^{-1/2} * V^T * U) */
477:     {
478:       Mat Y_temp;
479:       PetscCall(MatMatMult(en->V, W, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &Y_temp));
480:       PetscCall(MatCopy(Y_temp, Y, SAME_NONZERO_PATTERN));
481:       PetscCall(MatDestroy(&Y_temp));
482:     }

484:     /* Cleanup */
485:     PetscCall(MatDestroy(&W));
486:   } else {
487:     /* U is NULL (identity): Compute Y = V * D^{-1/2} * V^T directly */
488:     /* Step 1: Compute W = V * D^{-1/2} (scale columns of V) */
489:     PetscCall(MatDuplicate(en->V, MAT_COPY_VALUES, &W));
490:     PetscCall(MatDiagonalScale(W, NULL, diag_inv));

492:     /* Step 2: Compute Y = W * V^T = V * D^{-1/2} * V^T */
493:     {
494:       Mat Y_temp;
495:       PetscCall(MatMatTransposeMult(W, en->V, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &Y_temp));
496:       PetscCall(MatCopy(Y_temp, Y, SAME_NONZERO_PATTERN));
497:       PetscCall(MatDestroy(&Y_temp));
498:     }

500:     /* Cleanup */
501:     PetscCall(MatDestroy(&W));
502:   }

504:   PetscCall(VecDestroy(&diag_inv));
505:   PetscFunctionReturn(PETSC_SUCCESS);
506: }

508: /*@
509:   PetscDAEnsembleApplySqrtTInverse - Apply T^{-1/2} to a matrix U [Alg 6.4 line 9]

511:   Collective

513:   Input Parameters:
514: + da - the `PetscDA` context
515: - U  - input matrix (usually Identity, but can be general)

517:   Output Parameter:
518: . Y - output matrix Y = T^{-1/2} * U

520:   Notes:
521:   This function applies the inverse square root of T = I + S^T * S using the
522:   stored factorization.

524:   - For CHOLESKY mode: Computes Y = L^{-T} U
525:   - For EIGEN mode: Computes Y = V D^{-1/2} V^T U

527:   Both results satisfy Y^T * T * Y = U^T * U, preserving the metric.

529:   Level: advanced

531: .seealso: [](ch_da), `PetscDA`, `PETSCDAETKF`, `PETSCDALETKF`, `PetscDAEnsembleTFactor()`, `PetscDAEnsembleApplyTInverse()`
532: @*/
533: PetscErrorCode PetscDAEnsembleApplySqrtTInverse(PetscDA da, Mat U, Mat Y)
534: {
535:   PetscDA_Ensemble *en = (PetscDA_Ensemble *)da->data;

537:   PetscFunctionBegin;

542:   PetscCheck(en->I_StS, PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONGSTATE, "I_StS matrix not created. Call PetscDAEnsembleTFactor first");

544:   switch (en->sqrt_type) {
545:   case PETSCDA_SQRT_CHOLESKY:
546:     PetscCall(ApplySqrtTInverse_Cholesky(da, U, Y));
547:     break;
548:   case PETSCDA_SQRT_EIGEN:
549:     PetscCall(ApplySqrtTInverse_Eigen(da, U, Y));
550:     break;
551:   default:
552:     SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_OUTOFRANGE, "Unsupported PetscDA square-root type %" PetscInt_FMT, (PetscInt)en->sqrt_type);
553:   }

555:   /* Debugging verification: Check that metric is preserved
556:      Verify that Y^T * T * Y = U^T * U (or Y^T * T * Y = I if U is NULL) */
557:   if (PetscDefined(USE_DEBUG) && U) {
558:     Mat       YtTY, UtU, T_Y;
559:     PetscReal norm_ref, norm_diff;

561:     /* Compute LHS: Y^T * T * Y */
562:     PetscCall(MatMatMult(en->I_StS, Y, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &T_Y));     /* T * Y */
563:     PetscCall(MatTransposeMatMult(Y, T_Y, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &YtTY)); /* Y^T * (T * Y) */

565:     /* Compute RHS: U^T * U */
566:     PetscCall(MatTransposeMatMult(U, U, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &UtU));

568:     /* Compute difference: Diff = LHS - RHS */
569:     PetscCall(MatAXPY(YtTY, -1.0, UtU, SAME_NONZERO_PATTERN));

571:     /* Check norms */
572:     PetscCall(MatNorm(UtU, NORM_FROBENIUS, &norm_ref));
573:     PetscCall(MatNorm(YtTY, NORM_FROBENIUS, &norm_diff));

575:     if (norm_ref > 0.0) PetscCheck(norm_diff / norm_ref < MATRIX_SQRT_TOLERANCE_FACTOR, PETSC_COMM_SELF, PETSC_ERR_PLIB, "T^{-1/2} verification failed. ||Y^T*T*Y - U^T*U||/||U^T*U|| = %g", (double)(norm_diff / norm_ref));

577:     /* Cleanup debug matrices */
578:     PetscCall(MatDestroy(&T_Y));
579:     PetscCall(MatDestroy(&YtTY));
580:     PetscCall(MatDestroy(&UtU));
581:   }
582:   PetscFunctionReturn(PETSC_SUCCESS);
583: }

585: /*@
586:   PetscDAEnsembleSetSqrtType - Selects the reduced-space square-root algorithm used during analysis.

588:   Logically Collective

590:   Input Parameters:
591: + da   - the `PetscDA` object
592: - type - either `PETSCDA_SQRT_CHOLESKY` or `PETSCDA_SQRT_EIGEN`

594:   Options Database Key:
595: . -petscda_ensemble_sqrt_type <cholesky or eigen> - set the `PetscDASqrtType`

597:   Level: advanced

599: .seealso: [](ch_da), `PetscDA`, `PETSCDAETKF`, `PETSCDALETKF`, `PetscDASqrtType`, `PetscDAEnsembleGetSqrtType()`
600: @*/
601: PetscErrorCode PetscDAEnsembleSetSqrtType(PetscDA da, PetscDASqrtType type)
602: {
603:   PetscDA_Ensemble *en = (PetscDA_Ensemble *)da->data;

605:   PetscFunctionBegin;
607:   PetscCheck(type == PETSCDA_SQRT_CHOLESKY || type == PETSCDA_SQRT_EIGEN, PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_OUTOFRANGE, "Invalid PetscDA square-root type %" PetscInt_FMT, (PetscInt)type);

609:   en->sqrt_type = type;
610:   PetscFunctionReturn(PETSC_SUCCESS);
611: }

613: /*@
614:   PetscDAEnsembleGetSqrtType - Retrieves the current square-root implementation configured for analysis.

616:   Not Collective

618:   Input Parameters:
619: . da - the `PetscDA` object

621:   Output Parameter:
622: . type - on output, the configured `PetscDASqrtType`

624:   Level: advanced

626: .seealso: [](ch_da), `PetscDA`, `PETSCDAETKF`, `PETSCDALETKF`, `PetscDAEnsembleSetSqrtType()`
627: @*/
628: PetscErrorCode PetscDAEnsembleGetSqrtType(PetscDA da, PetscDASqrtType *type)
629: {
630:   PetscDA_Ensemble *en = (PetscDA_Ensemble *)da->data;

632:   PetscFunctionBegin;
634:   PetscAssertPointer(type, 2);
635:   *type = en->sqrt_type;
636:   PetscFunctionReturn(PETSC_SUCCESS);
637: }

639: /*@
640:   PetscDAEnsembleSetInflation - Sets the inflation factor for the data assimilation method.

642:   Logically Collective

644:   Input Parameters:
645: + da        - the `PetscDA` context
646: - inflation - the inflation factor (must be >= 1.0)

648:   Level: intermediate

650: .seealso: [](ch_da), `PetscDA`, `PETSCDAETKF`, `PETSCDALETKF`, `PetscDAEnsembleGetInflation()`
651: @*/
652: PetscErrorCode PetscDAEnsembleSetInflation(PetscDA da, PetscReal inflation)
653: {
654:   PetscDA_Ensemble *en = (PetscDA_Ensemble *)da->data;

656:   PetscFunctionBegin;
659:   PetscCheck(inflation >= 1.0, PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_OUTOFRANGE, "Inflation factor must be >= 1.0, got %g", (double)inflation);
660:   en->inflation = inflation;
661:   PetscFunctionReturn(PETSC_SUCCESS);
662: }

664: /*@
665:   PetscDAEnsembleGetInflation - Gets the inflation factor for the data assimilation method.

667:   Not Collective

669:   Input Parameter:
670: . da - the `PetscDA` context

672:   Output Parameter:
673: . inflation - the inflation factor

675:   Level: intermediate

677: .seealso: [](ch_da), `PetscDA`, `PETSCDAETKF`, `PETSCDALETKF`, `PetscDAEnsembleSetInflation()`
678: @*/
679: PetscErrorCode PetscDAEnsembleGetInflation(PetscDA da, PetscReal *inflation)
680: {
681:   PetscDA_Ensemble *en = (PetscDA_Ensemble *)da->data;

683:   PetscFunctionBegin;
685:   PetscAssertPointer(inflation, 2);
686:   *inflation = en->inflation;
687:   PetscFunctionReturn(PETSC_SUCCESS);
688: }

690: /*@
691:   PetscDAEnsembleGetMember - Returns a read-only view of an ensemble member stored in the `PetscDA`.

693:   Collective

695:   Input Parameters:
696: + da         - the `PetscDA` context
697: - member_idx - index of the requested member (0 <= idx < ensemble_size)

699:   Output Parameter:
700: . member - read-only vector view; call `PetscDAEnsembleRestoreMember()` when done

702:   Level: intermediate

704: .seealso: [](ch_da), `PetscDA`, `PETSCDAETKF`, `PETSCDALETKF`, `PetscDAEnsembleRestoreMember()`, `PetscDAEnsembleSetMember()`
705: @*/
706: PetscErrorCode PetscDAEnsembleGetMember(PetscDA da, PetscInt member_idx, Vec *member)
707: {
708:   PetscDA_Ensemble *en = (PetscDA_Ensemble *)da->data;

710:   PetscFunctionBegin;
712:   PetscAssertPointer(member, 3);
713:   PetscCheck(en->ensemble, PetscObjectComm((PetscObject)da), PETSC_ERR_ORDER, "PetscDASetUp() must be called before accessing ensemble members");
714:   PetscCheck(member_idx >= 0 && member_idx < en->size, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Member index %" PetscInt_FMT " out of range [0, %" PetscInt_FMT ")", member_idx, en->size);

716:   PetscCall(MatDenseGetColumnVecRead(en->ensemble, member_idx, member));
717:   PetscFunctionReturn(PETSC_SUCCESS);
718: }

720: /*@
721:   PetscDAEnsembleRestoreMember - Returns a column view obtained with `PetscDAEnsembleGetMember()`.

723:   Collective

725:   Input Parameters:
726: + da         - the `PetscDA` context
727: . member_idx - index that was previously requested
728: - member     - location that holds the view to restore

730:   Level: intermediate

732: .seealso: [](ch_da), `PetscDA`, `PETSCDAETKF`, `PETSCDALETKF`, `PetscDAEnsembleGetMember()`
733: @*/
734: PetscErrorCode PetscDAEnsembleRestoreMember(PetscDA da, PetscInt member_idx, Vec *member)
735: {
736:   PetscDA_Ensemble *en = (PetscDA_Ensemble *)da->data;

738:   PetscFunctionBegin;
740:   PetscAssertPointer(member, 3);
741:   PetscCheck(member_idx >= 0 && member_idx < en->size, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Member index %" PetscInt_FMT " out of range [0, %" PetscInt_FMT ")", member_idx, en->size);

743:   PetscCall(MatDenseRestoreColumnVecRead(en->ensemble, member_idx, member));
744:   PetscFunctionReturn(PETSC_SUCCESS);
745: }

747: /*@
748:   PetscDAEnsembleSetMember - Overwrites an ensemble member with user-provided state data.

750:   Collective

752:   Input Parameters:
753: + da         - the `PetscDA` context
754: . member_idx - index of the entry to modify
755: - member     - vector containing the new state values

757:   Level: intermediate

759: .seealso: [](ch_da), `PetscDA`, `PETSCDAETKF`, `PETSCDALETKF`, `PetscDAEnsembleGetMember()`
760: @*/
761: PetscErrorCode PetscDAEnsembleSetMember(PetscDA da, PetscInt member_idx, Vec member)
762: {
763:   Vec               col;
764:   PetscDA_Ensemble *en = (PetscDA_Ensemble *)da->data;

766:   PetscFunctionBegin;
769:   PetscCheck(en->ensemble, PetscObjectComm((PetscObject)da), PETSC_ERR_ORDER, "PetscDASetUp() must be called before setting ensemble members");
770:   PetscCheck(member_idx >= 0 && member_idx < en->size, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Member index %" PetscInt_FMT " out of range [0, %" PetscInt_FMT ")", member_idx, en->size);

772:   PetscCall(MatDenseGetColumnVecWrite(en->ensemble, member_idx, &col));
773:   PetscCall(VecCopy(member, col));
774:   PetscCall(MatDenseRestoreColumnVecWrite(en->ensemble, member_idx, &col));
775:   PetscFunctionReturn(PETSC_SUCCESS);
776: }

778: /*@
779:   PetscDAEnsembleComputeMean - Computes ensemble mean for a `PetscDA`

781:   Collective

783:   Input Parameter:
784: . da - the `PetscDA` context

786:   Output Parameter:
787: . mean - vector that will hold the ensemble mean

789:   Level: intermediate

791: .seealso: [](ch_da), `PetscDA`, `PETSCDAETKF`, `PETSCDALETKF`, `PetscDAEnsembleComputeAnomalies()`
792: @*/
793: PetscErrorCode PetscDAEnsembleComputeMean(PetscDA da, Vec mean)
794: {
795:   PetscDA_Ensemble *en = (PetscDA_Ensemble *)da->data;
796:   PetscScalar       inv_m;
797:   PetscInt          m;

799:   PetscFunctionBegin;
802:   PetscCheck(en->ensemble, PetscObjectComm((PetscObject)da), PETSC_ERR_ORDER, "PetscDASetUp() must be called before computing the ensemble mean");
803:   PetscCheck(en->size > 0, PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Ensemble size must be positive");

805:   m     = en->size;
806:   inv_m = 1.0 / (PetscScalar)m;
807:   PetscCall(MatGetRowSum(en->ensemble, mean));
808:   PetscCall(VecScale(mean, inv_m));
809:   PetscFunctionReturn(PETSC_SUCCESS);
810: }

812: /*@
813:   PetscDAEnsembleInitialize - Initialize ensemble members with Gaussian perturbations

815:   Input Parameters:
816: + da            - PetscDA context
817: . x0            - Background state
818: . obs_error_std - Standard deviation for perturbations
819: - rng           - Random number generator

821:   Level: beginner

823:   Notes:
824:   Each ensemble member is initialized as x0 + Gaussian(0, obs_error_std)

826: .seealso: [](ch_da), `PETSCDAETKF`, `PETSCDALETKF`, `PetscDA`
827: @*/
828: PetscErrorCode PetscDAEnsembleInitialize(PetscDA da, Vec x0, PetscReal obs_error_std, PetscRandom rng)
829: {
830:   PetscDA_Ensemble *en = (PetscDA_Ensemble *)da->data;

832:   Vec       member, col, x_mean;
833:   PetscInt  i;
834:   PetscReal scale;

836:   PetscFunctionBegin;
840:   PetscCall(VecDuplicate(x0, &member));
841:   PetscCall(VecDuplicate(x0, &x_mean));

843:   /*
844:      Scale factor to maintain consistent ensemble spread across different ensemble sizes.
845:      After removing the sample mean, the ensemble variance is approximately:
846:        Var_final ~= Var_initial * (m-1)/m
847:      To maintain consistent initial spread regardless of m, we scale by sqrt(m/(m-1)).
848:      This ensures the final ensemble spread is approximately obs_error_std^2. */
849:   scale = PetscSqrtReal((PetscReal)en->size / (PetscReal)(en->size - 1));

851:   /* Populate the Gaussian draws with scaled standard deviation */
852:   for (i = 0; i < en->size; i++) {
853:     PetscCall(VecSetRandomGaussian(member, rng, 0.0, obs_error_std * scale));
854:     PetscCall(PetscDAEnsembleSetMember(da, i, member));
855:   }
856:   /* get mean of perturbations */
857:   PetscCall(PetscDAEnsembleComputeMean(da, x_mean));
858:   /* remove mean and add x0 */
859:   for (i = 0; i < en->size; i++) {
860:     PetscCall(MatDenseGetColumnVecWrite(en->ensemble, i, &col));
861:     PetscCall(VecAXPY(col, -1.0, x_mean));
862:     PetscCall(VecAXPY(col, 1.0, x0));
863:     PetscCall(MatDenseRestoreColumnVecWrite(en->ensemble, i, &col));
864:   }

866:   PetscCall(VecDestroy(&member));
867:   PetscCall(VecDestroy(&x_mean));
868:   PetscFunctionReturn(PETSC_SUCCESS);
869: }

871: /*@
872:   PetscDAEnsembleComputeAnomalies - Forms the state-space anomalies matrix for a `PetscDA`.

874:   Collective

876:   Input Parameters:
877: + da      - the `PetscDA` context
878: - mean_in - optional mean state vector (pass `NULL` to compute internally)

880:   Output Parameter:
881: . anomalies_out - location to store the newly created anomalies matrix

883:   Notes:
884:   If `mean` is `NULL`, the function will create a temporary vector and compute
885:   the ensemble mean using `PetscDAEnsembleComputeMean()`. If `mean` is provided,
886:   it will be used directly, which can improve performance when the mean has
887:   already been computed.

889:   Level: intermediate

891: .seealso: [](ch_da), `PetscDA`, `PETSCDAETKF`, `PETSCDALETKF`, `PetscDAEnsembleComputeMean()`
892: @*/
893: PetscErrorCode PetscDAEnsembleComputeAnomalies(PetscDA da, Vec mean_in, Mat *anomalies_out)
894: {
895:   PetscDA_Ensemble *en   = (PetscDA_Ensemble *)da->data;
896:   Vec               mean = NULL;
897:   Vec               col_in, col_out;
898:   Mat               anomalies;
899:   MPI_Comm          comm;
900:   PetscReal         scale;
901:   PetscInt          ensemble_size;
902:   PetscInt          j;
903:   PetscBool         mean_created = PETSC_FALSE;

905:   PetscFunctionBegin;
908:   PetscAssertPointer(anomalies_out, 3);
909:   PetscCheck(en->ensemble, PetscObjectComm((PetscObject)da), PETSC_ERR_ORDER, "PetscDASetUp() must be called before computing anomalies");
910:   PetscCheck(en->size > 1, PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_OUTOFRANGE, "Ensemble size must be at least 2 to form anomalies");
911:   PetscCheck(da->state_size > 0, PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_OUTOFRANGE, "State size must be positive");

913:   /* Cache frequently-used values for clarity and efficiency */
914:   ensemble_size = en->size;
915:   comm          = PetscObjectComm((PetscObject)en->ensemble);

917:   /*
918:     Compute normalization scale for anomalies.
919:     Alg 6.4 line 2: anomalies are normalized by 1/sqrt(m-1) so that
920:     the anomalies matrix X satisfies X*X^T = ensemble covariance matrix.
921:     This ensures proper statistical properties for ensemble-based methods.
922:   */
923:   scale = 1.0 / PetscSqrtReal((PetscReal)(ensemble_size - 1));

925:   /* Allocate anomalies matrix (state_size x ensemble_size) */
926:   PetscCall(MatCreateDense(comm, da->local_state_size, PETSC_DECIDE, da->state_size, ensemble_size, NULL, &anomalies));
927:   PetscCall(PetscObjectSetOptionsPrefix((PetscObject)anomalies, "dense_"));
928:   PetscCall(MatSetFromOptions(anomalies));
929:   PetscCall(MatSetUp(anomalies));

931:   /* Use provided mean or create and compute it */
932:   if (mean_in) {
933:     mean = mean_in;
934:   } else {
935:     /* Create and compute ensemble mean vector */
936:     PetscCall(MatCreateVecs(anomalies, NULL, &mean));
937:     PetscCall(VecSetFromOptions(mean));
938:     mean_created = PETSC_TRUE;

940:     /* Alg 6.4 line 1: \bar{x} = (1/m)\sum_j x^{(j)} */
941:     PetscCall(PetscDAEnsembleComputeMean(da, mean));
942:   }

944:   /*
945:     Form anomalies by subtracting mean from each ensemble member and scaling.
946:     For each column j: anomaly_j = (ensemble_j - mean) / sqrt(m-1)
947:   */
948:   for (j = 0; j < ensemble_size; ++j) {
949:     PetscCall(MatDenseGetColumnVecRead(en->ensemble, j, &col_in));
950:     PetscCall(MatDenseGetColumnVecWrite(anomalies, j, &col_out));

952:     /* Alg 6.4 line 2: subtract the mean column-wise to form x^{(j)} - \bar{x} */
953:     PetscCall(VecWAXPY(col_out, -1.0, mean, col_in));
954:     /* Alg 6.4 line 2: scale anomalies by 1/\sqrt{m-1} */
955:     PetscCall(VecScale(col_out, scale));

957:     PetscCall(MatDenseRestoreColumnVecWrite(anomalies, j, &col_out));
958:     PetscCall(MatDenseRestoreColumnVecRead(en->ensemble, j, &col_in));
959:   }
960:   /* Transfer ownership to output and clean up temporary resources */
961:   *anomalies_out = anomalies;
962:   if (mean_created) PetscCall(VecDestroy(&mean));
963:   PetscFunctionReturn(PETSC_SUCCESS);
964: }

966: /*@
967:   PetscDAEnsembleAnalysis - Executes the analysis (update) step using sparse observation matrix H

969:   Collective

971:   Input Parameters:
972: + da          - the `PetscDA` context
973: . observation - observation vector y in R^P
974: - H           - observation operator matrix (P x N), sparse AIJ format

976:   Notes:
977:   The observation matrix H maps from state space (N dimensions) to observation
978:   space (P dimensions): y = H*x + noise

980:   H must be a sparse AIJ matrix

982:   For identity observations (observe entire state), use an identity matrix for H.
983:   For partial observations, set appropriate rows and columns to observe
984:   specific state components.

986:   Level: intermediate

988: .seealso: [](ch_da), `PetscDA`, `PETSCDAETKF`, `PETSCDALETKF`, `PetscDAEnsembleForecast()`, `PetscDASetObsErrorVariance()`
989: @*/
990: PetscErrorCode PetscDAEnsembleAnalysis(PetscDA da, Vec observation, Mat H)
991: {
992:   PetscDA_Ensemble *en = (PetscDA_Ensemble *)da->data;
993:   PetscInt          h_rows, h_cols;

995:   PetscFunctionBegin;
999:   PetscCheck(en->size > 1, PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_OUTOFRANGE, "Ensemble size must be > 1, got %" PetscInt_FMT, en->size);
1000:   PetscCall(MatGetSize(H, &h_rows, &h_cols));
1001:   PetscCheck(h_rows == da->obs_size, PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_INCOMP, "H matrix rows (%" PetscInt_FMT ") must match obs_size (%" PetscInt_FMT ")", h_rows, da->obs_size);
1002:   PetscCheck(h_cols == da->state_size, PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_INCOMP, "H matrix cols (%" PetscInt_FMT ") must match state_size (%" PetscInt_FMT ")", h_cols, da->state_size);
1003:   PetscCall(VecGetSize(observation, &h_rows));
1004:   PetscCheck(h_rows == da->obs_size, PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_INCOMP, "observation vector size (%" PetscInt_FMT ") must match obs_size (%" PetscInt_FMT ")", h_rows, da->obs_size);

1006:   PetscCall(PetscLogEventBegin(PetscDA_Analysis, (PetscObject)da, 0, 0, 0));
1007:   PetscCall((*en->analysis)(da, observation, H));
1008:   PetscCall(PetscLogEventEnd(PetscDA_Analysis, (PetscObject)da, 0, 0, 0));
1009:   PetscFunctionReturn(PETSC_SUCCESS);
1010: }

1012: /*@C
1013:   PetscDAEnsembleForecast - Advances every ensemble member through the user-supplied forecast model.

1015:   Collective

1017:   Input Parameters:
1018: + da    - the `PetscDA` context
1019: . model - routine that evaluates the model map `f(input, output; ctx)`
1020: - ctx   - optional context for `model`

1022:   Level: intermediate

1024: .seealso: [](ch_da), `PetscDA`, `PETSCDAETKF`, `PETSCDALETKF`, `PetscDAEnsembleAnalysis()`
1025: @*/
1026: PetscErrorCode PetscDAEnsembleForecast(PetscDA da, PetscErrorCode (*model)(Vec, Vec, PetscCtx), PetscCtx ctx)
1027: {
1028:   PetscDA_Ensemble *en = (PetscDA_Ensemble *)da->data;

1030:   PetscFunctionBegin;
1032:   PetscCall((*en->forecast)(da, model, ctx));
1033:   PetscFunctionReturn(PETSC_SUCCESS);
1034: }

1036: PetscErrorCode PetscDAView_Ensemble(PetscDA da, PetscViewer viewer)
1037: {
1038:   PetscDA_Ensemble *en = (PetscDA_Ensemble *)da->data;
1039:   PetscBool         iascii;

1041:   PetscFunctionBegin;
1042:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1043:   if (iascii) {
1044:     PetscCall(PetscViewerASCIIPrintf(viewer, "  Ensemble size: %" PetscInt_FMT "\n", en->size));
1045:     PetscCall(PetscViewerASCIIPrintf(viewer, "  Assembled: %s\n", en->assembled ? "true" : "false"));
1046:     PetscCall(PetscViewerASCIIPrintf(viewer, "  Inflation: %g\n", (double)en->inflation));
1047:     PetscCall(PetscViewerASCIIPrintf(viewer, "  Square root type: %s\n", (en->sqrt_type == PETSCDA_SQRT_EIGEN) ? "eigen" : "cholesky"));
1048:   }
1049:   PetscFunctionReturn(PETSC_SUCCESS);
1050: }

1052: PetscErrorCode PetscDASetUp_Ensemble(PetscDA da)
1053: {
1054:   PetscDA_Ensemble *en = (PetscDA_Ensemble *)da->data;
1055:   MPI_Comm          comm;

1057:   PetscFunctionBegin;
1058:   if (en->assembled) PetscFunctionReturn(PETSC_SUCCESS);

1060:   PetscCheck(da->state_size > 0, PetscObjectComm((PetscObject)da), PETSC_ERR_ORDER, "Must set state size before calling PetscDASetUp()");
1061:   PetscCheck(da->obs_size > 0, PetscObjectComm((PetscObject)da), PETSC_ERR_ORDER, "Must set observation size before calling PetscDASetUp()");
1062:   PetscCheck(en->size > 0, PetscObjectComm((PetscObject)da), PETSC_ERR_ORDER, "Must set ensemble size before calling PetscDASetUp()");

1064:   comm = PetscObjectComm((PetscObject)da);
1065:   if (!en->ensemble) {
1066:     PetscCall(MatCreateDense(comm, da->local_state_size, PETSC_DECIDE, da->state_size, en->size, NULL, &en->ensemble));
1067:     PetscCall(PetscObjectSetOptionsPrefix((PetscObject)en->ensemble, "dense_"));
1068:     PetscCall(MatSetFromOptions(en->ensemble));
1069:     PetscCall(MatSetUp(en->ensemble));
1070:   }
1071:   en->assembled = PETSC_TRUE;
1072:   PetscFunctionReturn(PETSC_SUCCESS);
1073: }

1075: /*@
1076:   PetscDAEnsembleSetSize - Sets the ensemble dimensions used by a `PetscDA`.

1078:   Collective

1080:   Input Parameters:
1081: + da            - the `PetscDA` context
1082: - ensemble_size - number of ensemble members

1084:   Options Database Key:
1085: . -petscda_ensemble_size <size> - number of ensemble members

1087:   Level: beginner

1089:   Note:
1090:   The size must be greater than or equal to two. See the scale factor in `PetscDAEnsembleInitialize()` and `PetscDALETKFLocalAnalysis()`

1092: .seealso: [](ch_da), `PetscDA`, `PETSCDAETKF`, `PETSCDALETKF`, `PetscDAGetSizes()`, `PetscDASetSizes()`, `PetscDASetUp()`
1093: @*/
1094: PetscErrorCode PetscDAEnsembleSetSize(PetscDA da, PetscInt ensemble_size)
1095: {
1096:   PetscDA_Ensemble *en = (PetscDA_Ensemble *)da->data;

1098:   PetscFunctionBegin;
1101:   PetscCheck(!en->assembled, PetscObjectComm((PetscObject)da), PETSC_ERR_ORDER, "Cannot change sizes after PetscDASetUp() has been called");
1102:   PetscCheck(ensemble_size > 1, PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_SIZ, "Ensemble size must be at least two");
1103:   en->size = ensemble_size;
1104:   PetscFunctionReturn(PETSC_SUCCESS);
1105: }

1107: /*@
1108:   PetscDAEnsembleGetSize - Retrieves the dimension of the ensemble in a `PetscDA`.

1110:   Not Collective

1112:   Input Parameter:
1113: . da - the `PetscDA` context

1115:   Output Parameters:
1116: . ensemble_size - number of ensemble members

1118:   Level: beginner

1120: .seealso: [](ch_da), `PetscDA`, `PETSCDAETKF`, `PETSCDALETKF`, `PetscDASetSizes()`, `PetscDAGetSizes()`
1121: @*/
1122: PetscErrorCode PetscDAEnsembleGetSize(PetscDA da, PetscInt *ensemble_size)
1123: {
1124:   PetscDA_Ensemble *en = (PetscDA_Ensemble *)da->data;

1126:   PetscFunctionBegin;
1128:   PetscAssertPointer(ensemble_size, 2);
1129:   *ensemble_size = en->size;
1130:   PetscFunctionReturn(PETSC_SUCCESS);
1131: }

1133: PetscErrorCode PetscDASetFromOptions_Ensemble(PetscDA da, PetscOptionItems *PetscOptionsObjectPtr)
1134: {
1135:   PetscDA_Ensemble *en                 = (PetscDA_Ensemble *)da->data;
1136:   PetscOptionItems  PetscOptionsObject = *PetscOptionsObjectPtr;
1137:   char              sqrt_type_name[256];
1138:   PetscBool         sqrt_set = PETSC_FALSE, flg;
1139:   const char       *sqrt_default;
1140:   PetscDASqrtType   sqrt_type;
1141:   PetscReal         inflation_val = en->inflation;
1142:   PetscBool         inflation_set;
1143:   PetscInt          ensemble_size;

1145:   PetscFunctionBegin;
1146:   PetscOptionsHeadBegin(PetscOptionsObject, "PetscDA Ensemble Options");

1148:   PetscCall(PetscOptionsReal("-petscda_ensemble_inflation", "Inflation factor", "PetscDAEnsembleSetInflation", en->inflation, &inflation_val, &inflation_set));
1149:   if (inflation_set) PetscCall(PetscDAEnsembleSetInflation(da, inflation_val));

1151:   sqrt_default = (en->sqrt_type == PETSCDA_SQRT_EIGEN) ? "eigen" : "cholesky";
1152:   PetscCall(PetscOptionsString("-petscda_ensemble_sqrt_type", "Matrix square root factorization", "PetscDASetSqrtType", sqrt_default, sqrt_type_name, sizeof(sqrt_type_name), &sqrt_set));
1153:   if (sqrt_set) {
1154:     PetscBool match_cholesky, match_eigen;
1155:     PetscCall(PetscStrcmp(sqrt_type_name, "cholesky", &match_cholesky));
1156:     PetscCall(PetscStrcmp(sqrt_type_name, "eigen", &match_eigen));
1157:     if (match_cholesky) {
1158:       sqrt_type = PETSCDA_SQRT_CHOLESKY;
1159:     } else if (match_eigen) {
1160:       sqrt_type = PETSCDA_SQRT_EIGEN;
1161:     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown PetscDA square-root type \"%s\"", sqrt_type_name);
1162:     PetscCall(PetscDAEnsembleSetSqrtType(da, sqrt_type));
1163:   }
1164:   PetscCall(PetscOptionsInt("-petscda_ensemble_size", "Number of ensemble members", "PetscDAEnsembleSetSize", en->size, &ensemble_size, &flg));
1165:   if (flg) PetscCall(PetscDAEnsembleSetSize(da, ensemble_size));
1166:   PetscOptionsHeadEnd();
1167:   PetscFunctionReturn(PETSC_SUCCESS);
1168: }

1170: PetscErrorCode PetscDADestroy_Ensemble(PetscDA da)
1171: {
1172:   PetscDA_Ensemble *en = (PetscDA_Ensemble *)da->data;

1174:   PetscFunctionBegin;
1175:   PetscCall(MatDestroy(&en->ensemble));
1176:   PetscCall(VecDestroy(&da->obs_error_var));
1177:   PetscCall(MatDestroy(&da->R));

1179:   /* Destroy T-matrix factorization data */
1180:   PetscCall(MatDestroy(&en->V));
1181:   PetscCall(MatDestroy(&en->L_cholesky));
1182:   PetscCall(VecDestroy(&en->sqrt_eigen_vals));
1183:   PetscCall(MatDestroy(&en->I_StS));
1184:   PetscFunctionReturn(PETSC_SUCCESS);
1185: }

1187: PetscErrorCode PetscDACreate_Ensemble(PetscDA da)
1188: {
1189:   PetscDA_Ensemble *en = (PetscDA_Ensemble *)da->data;

1191:   PetscFunctionBegin;
1192:   en->size      = 0;
1193:   en->ensemble  = NULL;
1194:   en->assembled = PETSC_FALSE;
1195:   en->inflation = 1.0;

1197:   /* Initialize T-matrix factorization fields */
1198:   en->sqrt_type       = PETSCDA_SQRT_EIGEN;
1199:   en->V               = NULL;
1200:   en->L_cholesky      = NULL;
1201:   en->sqrt_eigen_vals = NULL;
1202:   en->I_StS           = NULL;
1203:   PetscFunctionReturn(PETSC_SUCCESS);
1204: }

1206: /*@
1207:   PetscDAEnsembleComputeNormalizedInnovationMatrix - Computes S = R^{-1/2}(Z - y_mean * 1')/sqrt(m-1) [Alg 6.4 line 5]

1209:   Collective

1211:   Input Parameters:
1212: + Z          - observation ensemble matrix
1213: . y_mean     - mean of observations
1214: . r_inv_sqrt - R^{-1/2}
1215: . m          - ensemble size
1216: - scale      - 1/sqrt(m-1)

1218:   Output Parameter:
1219: . S - normalized innovation matrix

1221:   Level: developer

1223: .seealso: [](ch_da), `PetscDA`, `PETSCDAETKF`, `PETSCDALETKF`, `PetscDASetSizes()`, `PetscDAGetSizes()`
1224: @*/
1225: PetscErrorCode PetscDAEnsembleComputeNormalizedInnovationMatrix(Mat Z, Vec y_mean, Vec r_inv_sqrt, PetscInt m, PetscScalar scale, Mat S)
1226: {
1227:   const PetscScalar *z_array, *y_array, *r_array;
1228:   PetscScalar       *s_array;
1229:   PetscInt           obs_size, obs_size_local, z_cols, i, j;
1230:   PetscInt           y_local_size, r_local_size;
1231:   PetscInt           lda_z, lda_s;

1233:   PetscFunctionBegin;
1240:   PetscCheck(m > 0, PetscObjectComm((PetscObject)Z), PETSC_ERR_ARG_OUTOFRANGE, "Ensemble size m must be positive, got %" PetscInt_FMT, m);
1241:   PetscCall(MatGetSize(Z, &obs_size, &z_cols));
1242:   PetscCall(MatGetLocalSize(Z, &obs_size_local, NULL));
1243:   PetscCheck(z_cols == m, PetscObjectComm((PetscObject)Z), PETSC_ERR_ARG_INCOMP, "Matrix Z has %" PetscInt_FMT " columns but ensemble size is %" PetscInt_FMT, z_cols, m);

1245:   /* Verify vector dimensions match observation size (both global and local) */
1246:   PetscCall(VecGetLocalSize(y_mean, &y_local_size));
1247:   PetscCall(VecGetLocalSize(r_inv_sqrt, &r_local_size));
1248:   PetscCheck(y_local_size == obs_size_local, PetscObjectComm((PetscObject)Z), PETSC_ERR_ARG_INCOMP, "Vector y_mean local size %" PetscInt_FMT " does not match matrix local rows %" PetscInt_FMT, y_local_size, obs_size_local);
1249:   PetscCheck(r_local_size == obs_size_local, PetscObjectComm((PetscObject)Z), PETSC_ERR_ARG_INCOMP, "Vector r_inv_sqrt local size %" PetscInt_FMT " does not match matrix local rows %" PetscInt_FMT, r_local_size, obs_size_local);

1251:   /* Get direct access to arrays for performance */
1252:   PetscCall(MatDenseGetArrayRead(Z, &z_array));
1253:   PetscCall(MatDenseGetArrayWrite(S, &s_array));
1254:   PetscCall(VecGetArrayRead(y_mean, &y_array));
1255:   PetscCall(VecGetArrayRead(r_inv_sqrt, &r_array));

1257:   /* Get Leading Dimension (LDA) to handle padding/strides correctly */
1258:   PetscCall(MatDenseGetLDA(Z, &lda_z));
1259:   PetscCall(MatDenseGetLDA(S, &lda_s));

1261:   /* Compute normalized innovation: S_ij = (Z_ij - y_mean_i) * scale * r_inv_sqrt_i
1262:      Iterate column-wise (j) then row-wise (i) for optimal cache access with column-major storage */
1263:   for (j = 0; j < m; j++) {
1264:     const PetscScalar *z_col = z_array + j * lda_z;
1265:     PetscScalar       *s_col = s_array + j * lda_s;

1267:     for (i = 0; i < obs_size_local; i++) s_col[i] = (z_col[i] - y_array[i]) * scale * r_array[i];
1268:   }

1270:   /* Restore arrays */
1271:   PetscCall(VecRestoreArrayRead(r_inv_sqrt, &r_array));
1272:   PetscCall(VecRestoreArrayRead(y_mean, &y_array));
1273:   PetscCall(MatDenseRestoreArrayWrite(S, &s_array));
1274:   PetscCall(MatDenseRestoreArrayRead(Z, &z_array));
1275:   PetscFunctionReturn(PETSC_SUCCESS);
1276: }