Actual source code: daensemble.c
  1: #include <petscda.h>
  2: #include <petsc/private/daimpl.h>
  3: #include <petscblaslapack.h>
  4: #include <petsc/private/daensembleimpl.h>

  6: /*
  7:      Code that is shared by PETSCDALETKF (and any future ensemble methods).

  9: */
 10: /*  T-Matrix Factorization and Application Methods [Alg 6.4 line 7] */

 12: /*
 13:    Tolerance for matrix square root verification in debug mode
 14:    Use a more relaxed tolerance to account for accumulated floating-point errors
 15:    in multiple matrix operations (Y^T * T * Y involves 3 matrix multiplications).
 16:    A tolerance of 1e-2 (1%) is reasonable for numerical verification. */
 17: #define MATRIX_SQRT_TOLERANCE_FACTOR 1.0e-2

 19: /*
 20:   PetscDAEnsembleTFactorFromGram - Build (or refresh) en->I_StS from a host m x m gram buffer
 21:   (column-major), shift by 1/inflation, and run the eigendecomposition.

 23:   Contract: the caller supplies gram_host = S^T S, where S already contains the 1/sqrt(m-1)
 24:   normalization. This routine adds the (1/inflation) I shift and computes the eigendecomposition,
 25:   so en->I_StS = (1/inflation) I + S^T S on return.

 27:   The matrix lives on PETSC_COMM_SELF so the caller is responsible for any cross-rank reduction
 28:   on gram_host before calling.
 29: */
 30: PETSC_INTERN PetscErrorCode PetscDAEnsembleTFactorFromGram(PetscDA da, PetscInt m, const PetscScalar *gram_host)
 31: {
 32:   PetscDA_Ensemble *en = (PetscDA_Ensemble *)da->data;
 33:   PetscScalar      *dst;

 35:   PetscFunctionBegin;
 36:   if (en->I_StS) {
 37:     PetscInt rows, cols;
 38:     PetscCall(MatGetSize(en->I_StS, &rows, &cols));
 39:     if (rows != m || cols != m) {
 40:       PetscCall(MatDestroy(&en->I_StS));
 41:       PetscCall(MatDestroy(&en->V));
 42:       PetscCall(VecDestroy(&en->sqrt_eigen_vals));
 43:     }
 44:   }
 45:   if (!en->I_StS) PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, m, m, NULL, &en->I_StS));
 46:   PetscCall(MatDenseGetArrayWrite(en->I_StS, &dst));
 47:   PetscCall(PetscArraycpy(dst, gram_host, (size_t)m * m));
 48:   PetscCall(MatDenseRestoreArrayWrite(en->I_StS, &dst));
 49:   PetscCall(MatShift(en->I_StS, 1.0 / en->inflation));
 50:   PetscCall(PetscDAEnsembleTFactor_Eigen(da));
 51:   PetscFunctionReturn(PETSC_SUCCESS);
 52: }

 54: /*
 55:   PetscDAEnsembleTFactor_Eigen - Compute the symmetric eigendecomposition of the m x m matrix
 56:   held in en->I_StS (the user pre-shifted it by 1/inflation). On return, en->V holds the
 57:   eigenvectors and en->sqrt_eigen_vals holds the eigenvalues (the elementwise sqrt is taken
 58:   later by PetscDAEnsembleApplySqrtTInverse_Eigen()).
 59: */
 60: PETSC_INTERN PetscErrorCode PetscDAEnsembleTFactor_Eigen(PetscDA da)
 61: {
 62:   PetscDA_Ensemble *en = (PetscDA_Ensemble *)da->data;
 63:   PetscBLASInt      n, lda, lwork;
 64:   PetscScalar      *a_array, *work, *eig_array;
 65:   PetscInt          m_V, N_V;
 66: #if defined(PETSC_USE_COMPLEX)
 67:   PetscReal *rwork = NULL;
 68: #endif

 70:   PetscFunctionBegin;
 71:   /* Initialize or update V matrix */
 72:   if (!en->V) PetscCall(MatDuplicate(en->I_StS, MAT_COPY_VALUES, &en->V));
 73:   else PetscCall(MatCopy(en->I_StS, en->V, SAME_NONZERO_PATTERN));

 75:   /* Initialize or update eigenvalue vector */
 76:   if (!en->sqrt_eigen_vals) PetscCall(MatCreateVecs(en->I_StS, &en->sqrt_eigen_vals, NULL));

 78:   /* Get matrix dimensions */
 79:   PetscCall(MatGetSize(en->V, &m_V, &N_V));
 80:   PetscCheck(m_V == N_V, PetscObjectComm((PetscObject)en->V), PETSC_ERR_ARG_WRONG, "Matrix must be square");
 81:   PetscCall(PetscBLASIntCast(N_V, &n));
 82:   lda = n;

 84:   /* Get arrays */
 85:   PetscCall(MatDenseGetArrayWrite(en->V, &a_array));
 86:   PetscCall(VecGetArrayWrite(en->sqrt_eigen_vals, &eig_array));

 88:   /* Query optimal workspace size */
 89:   lwork = -1;
 90:   PetscCall(PetscMalloc1(1, &work));
 91: #if defined(PETSC_USE_COMPLEX)
 92:   PetscCall(PetscMalloc1(PetscMax(1, 3 * n - 2), &rwork));
 93:   PetscCallLAPACKInfo("LAPACKsyev", LAPACKsyev_("V", "U", &n, a_array, &lda, (PetscReal *)eig_array, work, &lwork, rwork, &info));
 94: #else
 95:   PetscCallLAPACKInfo("LAPACKsyev", LAPACKsyev_("V", "U", &n, a_array, &lda, eig_array, work, &lwork, &info));
 96: #endif

 98:   /* Allocate workspace. LAPACK returns the optimal lwork as a double-valued integer in work[0];
 99:      wrap with PetscCeilReal before narrowing so a 1-ulp shrink (some LAPACK builds return
100:      e.g. 2591.999...) cannot under-allocate. PetscBLASIntCast then checks the int range. */
101:   PetscCall(PetscBLASIntCast((PetscInt)PetscCeilReal(PetscRealPart(work[0])), &lwork));
102:   PetscCall(PetscFree(work));
103:   PetscCall(PetscMalloc1(lwork, &work));

105:   /* Compute eigendecomposition */
106: #if defined(PETSC_USE_COMPLEX)
107:   PetscCallLAPACKInfo("LAPACKsyev", LAPACKsyev_("V", "U", &n, a_array, &lda, (PetscReal *)eig_array, work, &lwork, rwork, &info));
108:   PetscCall(PetscFree(rwork));
109: #else
110:   PetscCallLAPACKInfo("LAPACKsyev", LAPACKsyev_("V", "U", &n, a_array, &lda, eig_array, work, &lwork, &info));
111: #endif

113:   /* Cleanup */
114:   PetscCall(PetscFree(work));
115:   PetscCall(VecRestoreArrayWrite(en->sqrt_eigen_vals, &eig_array));
116:   PetscCall(MatDenseRestoreArrayWrite(en->V, &a_array));

118:   /* T = (1/rho)*I + S^T*S is SPD by construction (rho > 0, S^T*S is PSD), so a strongly negative
119:      eigenvalue means the decomposition went wrong upstream. Catch in debug builds before
120:      VecSqrtAbs() rewrites the sign and the analysis silently uses garbage T^{-1/2}. The tolerance
121:      is sqrt(eps_machine)*||T||_F so the test scales with both working precision and problem
122:      magnitude; this is far tighter than MATRIX_SQRT_TOLERANCE_FACTOR (used downstream for
123:      matrix-reconstruction verification) because we are checking a sign error, not the
124:      accuracy of an O(eps)-noisy reconstruction. */
125:   if (PetscDefined(USE_DEBUG)) {
126:     PetscReal lambda_min, norm_T, tol;

128:     PetscCall(VecMin(en->sqrt_eigen_vals, NULL, &lambda_min));
129:     PetscCall(MatNorm(en->I_StS, NORM_FROBENIUS, &norm_T));
130:     tol = PetscSqrtReal(PETSC_MACHINE_EPSILON) * norm_T;
131:     PetscCheck(lambda_min >= -tol, PetscObjectComm((PetscObject)da), PETSC_ERR_PLIB, "T = (1/rho)I + S^T*S has eigenvalue %g; expected >= -%g (sqrt(eps)*||T||, ||T|| = %g)", (double)lambda_min, (double)tol, (double)norm_T);
132:   }

134:   /* Compute sqrt(eigenvalues) */
135:   PetscCall(VecSqrtAbs(en->sqrt_eigen_vals));

137:   /* Debug verification: Ensure V * D * V^T == T */
138:   if (PetscDefined(USE_DEBUG)) {
139:     PetscReal norm_T, norm_diff, relative_error;
140:     Mat       V_D, VDVt;

142:     /* Compute D * V^T by scaling rows */
143:     PetscCall(MatDuplicate(en->V, MAT_COPY_VALUES, &V_D));

145:     /* Restore D for verification (since sqrt_eigen_vals currently holds sqrt(D)) */
146:     PetscCall(VecPointwiseMult(en->sqrt_eigen_vals, en->sqrt_eigen_vals, en->sqrt_eigen_vals));

148:     PetscCall(MatDiagonalScale(V_D, NULL, en->sqrt_eigen_vals));

150:     /* Compute V * D * V^T */
151:     PetscCall(MatMatTransposeMult(V_D, en->V, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &VDVt));

153:     /* Compute ||V*D*V^T - T|| / ||T|| */
154:     PetscCall(MatAXPY(VDVt, -1.0, en->I_StS, SAME_NONZERO_PATTERN));
155:     PetscCall(MatNorm(en->I_StS, NORM_FROBENIUS, &norm_T));
156:     PetscCall(MatNorm(VDVt, NORM_FROBENIUS, &norm_diff));

158:     PetscCheck(norm_T > 0, PetscObjectComm((PetscObject)da), PETSC_ERR_PLIB, "T = 0");
159:     relative_error = norm_diff / norm_T;
160:     PetscCheck(relative_error < MATRIX_SQRT_TOLERANCE_FACTOR, PetscObjectComm((PetscObject)da), PETSC_ERR_PLIB, "Eigendecomposition verification failed: ||V*D*V^T - T||/||T|| = %g", (double)relative_error);

162:     /* Restore sqrt(D) back to sqrt_eigen_vals */
163:     PetscCall(VecSqrtAbs(en->sqrt_eigen_vals));

165:     /* Cleanup debug matrices */
166:     PetscCall(MatDestroy(&V_D));
167:     PetscCall(MatDestroy(&VDVt));
168:   }
169:   PetscFunctionReturn(PETSC_SUCCESS);
170: }

172: /*@
173:   PetscDAEnsembleTFactor - Compute and store factorization of T matrix

175:   Collective

177:   Input Parameters:
178: + da - the `PetscDA` context
179: - S  - normalized innovation matrix (obs_size x m)

181:   Level: advanced

183:   Notes:
184:   This function computes $T = (1/\rho) I + S^T * S$ (where $\rho$ is the inflation factor set via
185:   `PetscDAEnsembleSetInflation()`) and stores its symmetric eigendecomposition, i.e. eigenvectors
186:   $V$ and eigenvalues $D$ such that $T = V * D * V^T$.

188:   The implementation uses matrix reuse (`MAT_REUSE_MATRIX`) to minimize memory allocation
189:   overhead when the ensemble size remains constant across analysis cycles.

191: .seealso: [](ch_da), `PetscDA`, `PETSCDALETKF`, `PetscDAEnsembleApplyTInverse()`, `PetscDAEnsembleApplySqrtTInverse()`
192: @*/
193: PetscErrorCode PetscDAEnsembleTFactor(PetscDA da, Mat S)
194: {
195:   PetscDA_Ensemble *en = (PetscDA_Ensemble *)da->data;
196:   PetscInt          m, s_rows, s_cols;
197:   MatReuse          scall = MAT_INITIAL_MATRIX;

199:   PetscFunctionBegin;
202:   PetscCall(MatGetSize(S, &s_rows, &s_cols));
203:   m = s_cols; /* Ensemble size */
204:   PetscCheck(m > 0, PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_OUTOFRANGE, "Innovation matrix S must have positive columns, got %" PetscInt_FMT, m);
205:   PetscCheck(m == en->size, PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_INCOMP, "S matrix columns (%" PetscInt_FMT ") must match ensemble size (%" PetscInt_FMT ") defined in PetscDA", m, en->size);

207:   /* 2. Manage Resource Reuse */
208:   /* Check if we can reuse the T matrix (I_StS) and dependent factors */
209:   if (en->I_StS) {
210:     PetscInt t_rows, t_cols;
211:     PetscCall(MatGetSize(en->I_StS, &t_rows, &t_cols));

213:     /* If dimensions have changed, drop the stale T/V/eigen state so the MAT_INITIAL_MATRIX
214:        initializer at declaration takes effect; otherwise switch to MAT_REUSE_MATRIX. */
215:     if (t_rows != m || t_cols != m) {
216:       PetscCall(MatDestroy(&en->I_StS));
217:       PetscCall(MatDestroy(&en->V));
218:       PetscCall(VecDestroy(&en->sqrt_eigen_vals));
219:       PetscCall(PetscInfo(da, "Ensemble size changed (old: %" PetscInt_FMT ", new: %" PetscInt_FMT "), reallocating T matrix and factors\n", t_rows, m));
220:     } else scall = MAT_REUSE_MATRIX;
221:   }

223:   /* 3. Compute T = (1/rho)I + S^T * S (the (1/rho) shift is added below). */
224:   /*
225:      MatTransposeMatMult computes C = A^T * B (here C = S^T * S).
226:      When using MAT_REUSE_MATRIX, the existing C is overwritten with the new result.
227:   */
228:   PetscCall(MatTransposeMatMult(S, S, scall, PETSC_DEFAULT, &en->I_StS));

230:   /* Add Identity: T = (1/rho)I + S^T*S */
231:   PetscCall(MatShift(en->I_StS, 1.0 / en->inflation));

233:   /* 4. Compute symmetric eigendecomposition T = V * D * V^T */
234:   PetscCall(PetscDAEnsembleTFactor_Eigen(da));
235:   PetscFunctionReturn(PETSC_SUCCESS);
236: }

238: /*
239:   ApplyTInverse_Eigen - Helper for Eigendecomposition solver path
240: */
241: static PetscErrorCode ApplyTInverse_Eigen(PetscDA da, Vec sdel, Vec w)
242: {
243:   PetscDA_Ensemble *en = (PetscDA_Ensemble *)da->data;
244:   Vec               temp;

246:   PetscFunctionBegin;
247:   PetscCheck(en->V, PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONGSTATE, "Eigenvectors not computed");
248:   PetscCheck(en->sqrt_eigen_vals, PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONGSTATE, "Eigenvalues not computed");

250:   /* Allocate temporary vector for projection */
251:   PetscCall(VecDuplicate(sdel, &temp));

253:   /* 1. Project onto eigenvectors: temp = V^T * sdel */
254:   PetscCall(MatMultTranspose(en->V, sdel, temp));

256:   /* 2. Scale by inverse eigenvalues: temp = D^{-1} * temp */
257:   /* We store sqrt(D), so divide twice: temp = (temp / sqrt(D)) / sqrt(D) */
258:   PetscCall(VecPointwiseDivide(temp, temp, en->sqrt_eigen_vals));
259:   PetscCall(VecPointwiseDivide(temp, temp, en->sqrt_eigen_vals));

261:   /* 3. Map back to standard basis: w = V * temp */
262:   PetscCall(MatMult(en->V, temp, w));

264:   PetscCall(VecDestroy(&temp));
265:   PetscFunctionReturn(PETSC_SUCCESS);
266: }

268: /*@
269:   PetscDAEnsembleApplyTInverse - Apply T^{-1} to a vector [Alg 6.4 line 8]

271:   Collective

273:   Input Parameters:
274: + da   - the `PetscDA` context
275: - sdel - input vector S^T-delta

277:   Output Parameter:
278: . w - output vector w = T^{-1} * sdel

280:   Level: advanced

282:   Notes:
283:   This function applies the inverse of $T = (1/\rho) I + S^T S$ (with $\rho$ the inflation factor)
284:   using the stored symmetric eigendecomposition: $T^{-1} = V D^{-1} V^T$.

286: .seealso: [](ch_da), `PetscDA`, `PETSCDALETKF`, `PetscDAEnsembleTFactor()`, `PetscDAEnsembleApplySqrtTInverse()`
287: @*/
288: PetscErrorCode PetscDAEnsembleApplyTInverse(PetscDA da, Vec sdel, Vec w)
289: {
290:   PetscDA_Ensemble *en = (PetscDA_Ensemble *)da->data;

292:   PetscFunctionBegin;

297:   PetscCheck(en->I_StS, PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONGSTATE, "T matrix not factored. Call PetscDAEnsembleTFactor first");
298:   PetscCall(ApplyTInverse_Eigen(da, sdel, w));
299:   PetscFunctionReturn(PETSC_SUCCESS);
300: }

302: /*
303:   ApplySqrtTInverse_Eigen - Computes Y = V * D^{-1/2} * V^T * U.

305:   Notes:
306:   This computes the symmetric square root T^{-1/2} = V * D^{-1/2} * V^T.
307:   The operation is performed as Y = V * (D^{-1/2} * (V^T * U)) to strictly follow
308:   linear algebra operations for general matrix U.
309: */
310: static PetscErrorCode ApplySqrtTInverse_Eigen(PetscDA da, Mat U, Mat Y)
311: {
312:   PetscDA_Ensemble *en = (PetscDA_Ensemble *)da->data;
313:   Mat               W;
314:   Vec               diag_inv;

316:   PetscFunctionBegin;
317:   PetscCheck(en->V, PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONGSTATE, "Eigenvectors not computed");
318:   PetscCheck(en->sqrt_eigen_vals, PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONGSTATE, "Eigenvalues not computed");

320:   /* Prepare inverse sqrt eigenvalues: D^{-1/2}
321:      Note: en->sqrt_eigen_vals currently stores sqrt(D) */
322:   PetscCall(VecDuplicate(en->sqrt_eigen_vals, &diag_inv));
323:   PetscCall(VecCopy(en->sqrt_eigen_vals, diag_inv));
324:   PetscCall(VecReciprocal(diag_inv)); /* Now diag_inv contains 1/sqrt(D) = D^{-1/2} */

326:   if (U) {
327:     /* General case: Compute Y = V * D^{-1/2} * V^T * U */
328:     /* Step 1: Compute W = V^T * U (Project U onto eigenbasis) */
329:     PetscCall(MatTransposeMatMult(en->V, U, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &W));

331:     /* Step 2: Scale rows of W by D^{-1/2}: W <- D^{-1/2} * W */
332:     PetscCall(MatDiagonalScale(W, diag_inv, NULL));

334:     /* Step 3: Compute Y = V * W (Project back to standard basis)
335:        Y = V * (D^{-1/2} * V^T * U) */
336:     {
337:       Mat Y_temp;
338:       PetscCall(MatMatMult(en->V, W, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &Y_temp));
339:       PetscCall(MatCopy(Y_temp, Y, SAME_NONZERO_PATTERN));
340:       PetscCall(MatDestroy(&Y_temp));
341:     }

343:     /* Cleanup */
344:     PetscCall(MatDestroy(&W));
345:   } else {
346:     /* U is NULL (identity): Compute Y = V * D^{-1/2} * V^T directly */
347:     /* Step 1: Compute W = V * D^{-1/2} (scale columns of V) */
348:     PetscCall(MatDuplicate(en->V, MAT_COPY_VALUES, &W));
349:     PetscCall(MatDiagonalScale(W, NULL, diag_inv));

351:     /* Step 2: Compute Y = W * V^T = V * D^{-1/2} * V^T */
352:     {
353:       Mat Y_temp;
354:       PetscCall(MatMatTransposeMult(W, en->V, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &Y_temp));
355:       PetscCall(MatCopy(Y_temp, Y, SAME_NONZERO_PATTERN));
356:       PetscCall(MatDestroy(&Y_temp));
357:     }

359:     /* Cleanup */
360:     PetscCall(MatDestroy(&W));
361:   }

363:   PetscCall(VecDestroy(&diag_inv));
364:   PetscFunctionReturn(PETSC_SUCCESS);
365: }

367: /*@
368:   PetscDAEnsembleApplySqrtTInverse - Apply T^{-1/2} to a matrix U [Alg 6.4 line 9]

370:   Collective

372:   Input Parameters:
373: + da - the `PetscDA` context
374: - U  - input matrix (usually Identity, but can be general)

376:   Output Parameter:
377: . Y - output matrix Y = T^{-1/2} * U

379:   Level: advanced

381:   Notes:
382:   This function applies the symmetric inverse square root of $T = (1/\rho) I + S^T * S$ (with $\rho$
383:   the inflation factor) using the stored eigendecomposition: $Y = V D^{-1/2} V^T U$. The result
384:   satisfies $Y^T * T * Y = U^T * U$, preserving the metric.

386: .seealso: [](ch_da), `PetscDA`, `PETSCDALETKF`, `PetscDAEnsembleTFactor()`, `PetscDAEnsembleApplyTInverse()`
387: @*/
388: PetscErrorCode PetscDAEnsembleApplySqrtTInverse(PetscDA da, Mat U, Mat Y)
389: {
390:   PetscDA_Ensemble *en = (PetscDA_Ensemble *)da->data;

392:   PetscFunctionBegin;

397:   PetscCheck(en->I_StS, PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONGSTATE, "I_StS matrix not created. Call PetscDAEnsembleTFactor first");
398:   PetscCall(ApplySqrtTInverse_Eigen(da, U, Y));

400:   /* Debugging verification: Check that metric is preserved
401:      Verify that Y^T * T * Y = U^T * U (or Y^T * T * Y = I if U is NULL) */
402:   if (PetscDefined(USE_DEBUG)) {
403:     Mat       YtTY, T_Y;
404:     PetscReal norm_T, norm_diff;

406:     /* Compute LHS: Y^T * T * Y */
407:     PetscCall(MatMatMult(en->I_StS, Y, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &T_Y));     /* T * Y */
408:     PetscCall(MatTransposeMatMult(Y, T_Y, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &YtTY)); /* Y^T * (T * Y) */

410:     if (U) {
411:       Mat       UtU;
412:       PetscReal norm_ref;

414:       /* Compute RHS: U^T * U and difference YtTY <- YtTY - U^T*U */
415:       PetscCall(MatTransposeMatMult(U, U, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &UtU));
416:       PetscCall(MatAXPY(YtTY, -1.0, UtU, SAME_NONZERO_PATTERN));

418:       /* Check norms. When ||U^T*U|| == 0 the relative form is undefined, so fall back to an
419:          absolute tolerance scaled by ||T|| (the only nonzero scale we have on hand) instead of
420:          silently passing on any norm_diff. */
421:       PetscCall(MatNorm(UtU, NORM_FROBENIUS, &norm_ref));
422:       PetscCall(MatNorm(YtTY, NORM_FROBENIUS, &norm_diff));
423:       if (norm_ref > 0.0) PetscCheck(norm_diff / norm_ref < MATRIX_SQRT_TOLERANCE_FACTOR, PetscObjectComm((PetscObject)da), PETSC_ERR_PLIB, "T^{-1/2} verification failed. ||Y^T*T*Y - U^T*U||/||U^T*U|| = %g", (double)(norm_diff / norm_ref));
424:       else {
425:         PetscCall(MatNorm(en->I_StS, NORM_FROBENIUS, &norm_T));
426:         PetscCheck(norm_diff <= MATRIX_SQRT_TOLERANCE_FACTOR * norm_T, PetscObjectComm((PetscObject)da), PETSC_ERR_PLIB, "T^{-1/2} verification failed (U^T*U is zero). ||Y^T*T*Y|| = %g, ||T|| = %g", (double)norm_diff, (double)norm_T);
427:       }
428:       PetscCall(MatDestroy(&UtU));
429:     } else {
430:       /* RHS is the identity: form YtTY - I via MatShift, then compare against ||T|| */
431:       PetscCall(MatShift(YtTY, -1.0));
432:       PetscCall(MatNorm(YtTY, NORM_FROBENIUS, &norm_diff));
433:       PetscCall(MatNorm(en->I_StS, NORM_FROBENIUS, &norm_T));
434:       PetscCheck(norm_diff <= MATRIX_SQRT_TOLERANCE_FACTOR * norm_T, PetscObjectComm((PetscObject)da), PETSC_ERR_PLIB, "T^{-1/2} verification failed (U is NULL). ||Y^T*T*Y - I|| = %g, ||T|| = %g", (double)norm_diff, (double)norm_T);
435:     }

437:     /* Cleanup debug matrices */
438:     PetscCall(MatDestroy(&T_Y));
439:     PetscCall(MatDestroy(&YtTY));
440:   }
441:   PetscFunctionReturn(PETSC_SUCCESS);
442: }

444: /*@
445:   PetscDAEnsembleSetInflation - Sets the inflation factor for the data assimilation method.

447:   Logically Collective

449:   Input Parameters:
450: + da        - the `PetscDA` context
451: - inflation - the inflation factor (must be >= 1.0)

453:   Level: intermediate

455: .seealso: [](ch_da), `PetscDA`, `PETSCDALETKF`, `PetscDAEnsembleGetInflation()`
456: @*/
457: PetscErrorCode PetscDAEnsembleSetInflation(PetscDA da, PetscReal inflation)
458: {
459:   PetscDA_Ensemble *en = (PetscDA_Ensemble *)da->data;

461:   PetscFunctionBegin;
464:   PetscCheck(inflation >= 1.0, PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_OUTOFRANGE, "Inflation factor must be >= 1.0, got %g", (double)inflation);
465:   en->inflation = inflation;
466:   PetscFunctionReturn(PETSC_SUCCESS);
467: }

469: /*@
470:   PetscDAEnsembleGetInflation - Gets the inflation factor for the data assimilation method.

472:   Not Collective

474:   Input Parameter:
475: . da - the `PetscDA` context

477:   Output Parameter:
478: . inflation - the inflation factor

480:   Level: intermediate

482: .seealso: [](ch_da), `PetscDA`, `PETSCDALETKF`, `PetscDAEnsembleSetInflation()`
483: @*/
484: PetscErrorCode PetscDAEnsembleGetInflation(PetscDA da, PetscReal *inflation)
485: {
486:   PetscDA_Ensemble *en = (PetscDA_Ensemble *)da->data;

488:   PetscFunctionBegin;
490:   PetscAssertPointer(inflation, 2);
491:   *inflation = en->inflation;
492:   PetscFunctionReturn(PETSC_SUCCESS);
493: }

495: /*@
496:   PetscDAEnsembleGetMember - Returns a read-only view of an ensemble member stored in the `PetscDA`.

498:   Collective

500:   Input Parameters:
501: + da         - the `PetscDA` context
502: - member_idx - index of the requested member (0 <= idx < ensemble_size)

504:   Output Parameter:
505: . member - read-only vector view; call `PetscDAEnsembleRestoreMember()` when done

507:   Level: intermediate

509: .seealso: [](ch_da), `PetscDA`, `PETSCDALETKF`, `PetscDAEnsembleRestoreMember()`, `PetscDAEnsembleSetMember()`
510: @*/
511: PetscErrorCode PetscDAEnsembleGetMember(PetscDA da, PetscInt member_idx, Vec *member)
512: {
513:   PetscDA_Ensemble *en = (PetscDA_Ensemble *)da->data;

515:   PetscFunctionBegin;
517:   PetscAssertPointer(member, 3);
518:   PetscCheck(en->ensemble, PetscObjectComm((PetscObject)da), PETSC_ERR_ORDER, "PetscDASetUp() must be called before accessing ensemble members");
519:   PetscCheck(member_idx >= 0 && member_idx < en->size, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Member index %" PetscInt_FMT " out of range [0, %" PetscInt_FMT ")", member_idx, en->size);

521:   PetscCall(MatDenseGetColumnVecRead(en->ensemble, member_idx, member));
522:   PetscFunctionReturn(PETSC_SUCCESS);
523: }

525: /*@
526:   PetscDAEnsembleRestoreMember - Returns a column view obtained with `PetscDAEnsembleGetMember()`.

528:   Collective

530:   Input Parameters:
531: + da         - the `PetscDA` context
532: . member_idx - index that was previously requested
533: - member     - location that holds the view to restore

535:   Level: intermediate

537: .seealso: [](ch_da), `PetscDA`, `PETSCDALETKF`, `PetscDAEnsembleGetMember()`
538: @*/
539: PetscErrorCode PetscDAEnsembleRestoreMember(PetscDA da, PetscInt member_idx, Vec *member)
540: {
541:   PetscDA_Ensemble *en = (PetscDA_Ensemble *)da->data;

543:   PetscFunctionBegin;
545:   PetscAssertPointer(member, 3);
546:   PetscCheck(member_idx >= 0 && member_idx < en->size, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Member index %" PetscInt_FMT " out of range [0, %" PetscInt_FMT ")", member_idx, en->size);

548:   PetscCall(MatDenseRestoreColumnVecRead(en->ensemble, member_idx, member));
549:   PetscFunctionReturn(PETSC_SUCCESS);
550: }

552: /*@
553:   PetscDAEnsembleSetMember - Overwrites an ensemble member with user-provided state data.

555:   Collective

557:   Input Parameters:
558: + da         - the `PetscDA` context
559: . member_idx - index of the entry to modify
560: - member     - vector containing the new state values

562:   Level: intermediate

564: .seealso: [](ch_da), `PetscDA`, `PETSCDALETKF`, `PetscDAEnsembleGetMember()`
565: @*/
566: PetscErrorCode PetscDAEnsembleSetMember(PetscDA da, PetscInt member_idx, Vec member)
567: {
568:   Vec               col;
569:   PetscDA_Ensemble *en = (PetscDA_Ensemble *)da->data;

571:   PetscFunctionBegin;
574:   PetscCheck(en->ensemble, PetscObjectComm((PetscObject)da), PETSC_ERR_ORDER, "PetscDASetUp() must be called before setting ensemble members");
575:   PetscCheck(member_idx >= 0 && member_idx < en->size, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Member index %" PetscInt_FMT " out of range [0, %" PetscInt_FMT ")", member_idx, en->size);

577:   PetscCall(MatDenseGetColumnVecWrite(en->ensemble, member_idx, &col));
578:   PetscCall(VecCopy(member, col));
579:   PetscCall(MatDenseRestoreColumnVecWrite(en->ensemble, member_idx, &col));
580:   PetscFunctionReturn(PETSC_SUCCESS);
581: }

583: /*@
584:   PetscDAEnsembleComputeMean - Computes ensemble mean for a `PetscDA`

586:   Collective

588:   Input Parameter:
589: . da - the `PetscDA` context

591:   Output Parameter:
592: . mean - vector that will hold the ensemble mean

594:   Level: intermediate

596: .seealso: [](ch_da), `PetscDA`, `PETSCDALETKF`, `PetscDAEnsembleComputeAnomalies()`
597: @*/
598: PetscErrorCode PetscDAEnsembleComputeMean(PetscDA da, Vec mean)
599: {
600:   PetscDA_Ensemble *en = (PetscDA_Ensemble *)da->data;
601:   PetscScalar       inv_m;
602:   PetscInt          m;

604:   PetscFunctionBegin;
607:   PetscCheck(en->ensemble, PetscObjectComm((PetscObject)da), PETSC_ERR_ORDER, "PetscDASetUp() must be called before computing the ensemble mean");
608:   PetscCheck(en->size > 0, PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Ensemble size must be positive");

610:   m     = en->size;
611:   inv_m = 1.0 / (PetscScalar)m;
612:   PetscCall(MatGetRowSum(en->ensemble, mean));
613:   PetscCall(VecScale(mean, inv_m));
614:   PetscFunctionReturn(PETSC_SUCCESS);
615: }

617: /*@
618:   PetscDAEnsembleInitialize - Initialize ensemble members with Gaussian perturbations

620:   Collective

622:   Input Parameters:
623: + da            - PetscDA context
624: . x0            - Background state
625: . obs_error_std - Target ensemble spread (standard deviation) after sample-mean removal
626: - rng           - Random number generator

628:   Level: beginner

630:   Notes:
631:   Each member is drawn as `Gaussian(0, obs_error_std * sqrt(m / (m - 1)))` (with `m` the ensemble size),
632:   the sample mean across the ensemble is subtracted, and `x0` is added. The pre-mean-removal scale
633:   by `sqrt(m / (m - 1))` compensates for the variance reduction from centering, so the per-member
634:   spread after the subtraction is approximately `obs_error_std` regardless of `m`.

636: .seealso: [](ch_da), `PETSCDALETKF`, `PetscDA`
637: @*/
638: PetscErrorCode PetscDAEnsembleInitialize(PetscDA da, Vec x0, PetscReal obs_error_std, PetscRandom rng)
639: {
640:   PetscDA_Ensemble *en = (PetscDA_Ensemble *)da->data;
641:   Vec               member, col, x_mean;
642:   PetscReal         scale;

644:   PetscFunctionBegin;
648:   PetscCall(VecDuplicate(x0, &member));
649:   PetscCall(VecDuplicate(x0, &x_mean));

651:   /*
652:      Scale factor to maintain consistent ensemble spread across different ensemble sizes.
653:      After removing the sample mean, the ensemble variance is approximately:
654:        Var_final ~= Var_initial * (m-1)/m
655:      To maintain consistent initial spread regardless of m, we scale by sqrt(m/(m-1)).
656:      This ensures the final ensemble spread is approximately obs_error_std^2. */
657:   scale = PetscSqrtReal((PetscReal)en->size / (PetscReal)(en->size - 1));

659:   /* Populate the Gaussian draws with scaled standard deviation */
660:   for (PetscInt i = 0; i < en->size; i++) {
661:     PetscCall(VecSetRandomGaussian(member, rng, 0.0, obs_error_std * scale));
662:     PetscCall(PetscDAEnsembleSetMember(da, i, member));
663:   }
664:   /* get mean of perturbations */
665:   PetscCall(PetscDAEnsembleComputeMean(da, x_mean));
666:   /* remove mean and add x0 */
667:   for (PetscInt i = 0; i < en->size; i++) {
668:     PetscCall(MatDenseGetColumnVecWrite(en->ensemble, i, &col));
669:     PetscCall(VecAXPY(col, -1.0, x_mean));
670:     PetscCall(VecAXPY(col, 1.0, x0));
671:     PetscCall(MatDenseRestoreColumnVecWrite(en->ensemble, i, &col));
672:   }

674:   PetscCall(VecDestroy(&member));
675:   PetscCall(VecDestroy(&x_mean));
676:   PetscFunctionReturn(PETSC_SUCCESS);
677: }

679: /*@
680:   PetscDAEnsembleComputeAnomalies - Forms the state-space anomalies matrix for a `PetscDA`.

682:   Collective

684:   Input Parameters:
685: + da      - the `PetscDA` context
686: - mean_in - optional mean state vector (pass `NULL` to compute internally)

688:   Output Parameter:
689: . anomalies_out - location to store the newly created anomalies matrix

691:   Level: intermediate

693:   Notes:
694:   If `mean` is `NULL`, the function will create a temporary vector and compute
695:   the ensemble mean using `PetscDAEnsembleComputeMean()`. If `mean` is provided,
696:   it will be used directly, which can improve performance when the mean has
697:   already been computed.

699: .seealso: [](ch_da), `PetscDA`, `PETSCDALETKF`, `PetscDAEnsembleComputeMean()`
700: @*/
701: PetscErrorCode PetscDAEnsembleComputeAnomalies(PetscDA da, Vec mean_in, Mat *anomalies_out)
702: {
703:   PetscDA_Ensemble *en   = (PetscDA_Ensemble *)da->data;
704:   Vec               mean = NULL;
705:   Vec               col_in, col_out;
706:   Mat               anomalies;
707:   MPI_Comm          comm;
708:   PetscReal         scale;
709:   PetscInt          ensemble_size;
710:   PetscInt          j;
711:   PetscBool         mean_created = PETSC_FALSE;

713:   PetscFunctionBegin;
716:   PetscAssertPointer(anomalies_out, 3);
717:   PetscCheck(en->ensemble, PetscObjectComm((PetscObject)da), PETSC_ERR_ORDER, "PetscDASetUp() must be called before computing anomalies");
718:   PetscCheck(en->size > 1, PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_OUTOFRANGE, "Ensemble size must be at least 2 to form anomalies");
719:   PetscCheck(da->state_size > 0, PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_OUTOFRANGE, "State size must be positive");

721:   /* Cache frequently-used values for clarity and efficiency */
722:   ensemble_size = en->size;
723:   comm          = PetscObjectComm((PetscObject)en->ensemble);

725:   /*
726:     Compute normalization scale for anomalies.
727:     Alg 6.4 line 2: anomalies are normalized by 1/sqrt(m-1) so that
728:     the anomalies matrix X satisfies X*X^T = ensemble covariance matrix.
729:     This ensures proper statistical properties for ensemble-based methods.
730:   */
731:   scale = 1.0 / PetscSqrtReal((PetscReal)(ensemble_size - 1));

733:   /* Allocate anomalies matrix (state_size x ensemble_size) */
734:   PetscCall(MatCreateDense(comm, da->local_state_size, PETSC_DECIDE, da->state_size, ensemble_size, NULL, &anomalies));
735:   PetscCall(PetscObjectSetOptionsPrefix((PetscObject)anomalies, "dense_"));
736:   PetscCall(MatSetFromOptions(anomalies));
737:   PetscCall(MatSetUp(anomalies));

739:   /* Use provided mean or create and compute it */
740:   if (mean_in) {
741:     mean = mean_in;
742:   } else {
743:     /* Create and compute ensemble mean vector */
744:     PetscCall(MatCreateVecs(anomalies, NULL, &mean));
745:     PetscCall(VecSetFromOptions(mean));
746:     mean_created = PETSC_TRUE;

748:     /* Alg 6.4 line 1: \bar{x} = (1/m)\sum_j x^{(j)} */
749:     PetscCall(PetscDAEnsembleComputeMean(da, mean));
750:   }

752:   /*
753:     Form anomalies by subtracting mean from each ensemble member and scaling.
754:     For each column j: anomaly_j = (ensemble_j - mean) / sqrt(m-1)
755:   */
756:   for (j = 0; j < ensemble_size; ++j) {
757:     PetscCall(MatDenseGetColumnVecRead(en->ensemble, j, &col_in));
758:     PetscCall(MatDenseGetColumnVecWrite(anomalies, j, &col_out));

760:     /* Alg 6.4 line 2: subtract the mean column-wise to form x^{(j)} - \bar{x} */
761:     PetscCall(VecWAXPY(col_out, -1.0, mean, col_in));
762:     /* Alg 6.4 line 2: scale anomalies by 1/\sqrt{m-1} */
763:     PetscCall(VecScale(col_out, scale));

765:     PetscCall(MatDenseRestoreColumnVecWrite(anomalies, j, &col_out));
766:     PetscCall(MatDenseRestoreColumnVecRead(en->ensemble, j, &col_in));
767:   }
768:   /* Transfer ownership to output and clean up temporary resources */
769:   *anomalies_out = anomalies;
770:   if (mean_created) PetscCall(VecDestroy(&mean));
771:   PetscFunctionReturn(PETSC_SUCCESS);
772: }

774: /*@
775:   PetscDAEnsembleAnalysis - Executes the analysis (update) step using sparse observation matrix H

777:   Collective

779:   Input Parameters:
780: + da          - the `PetscDA` context
781: . observation - observation vector y in R^P
782: - H           - observation operator matrix (P x N), sparse AIJ format

784:   Level: intermediate

786:   Notes:
787:   The observation matrix H maps from state space (N dimensions) to observation
788:   space (P dimensions): y = H*x + noise

790:   H must be a sparse AIJ matrix

792:   For identity observations (observe entire state), use an identity matrix for H.
793:   For partial observations, set appropriate rows and columns to observe
794:   specific state components. On return, the ensemble matrix held by `da` has
795:   been updated in place: every member has been replaced by its analysis update.
796:   Read the analysis state with `PetscDAEnsembleGetMember()` or `PetscDAEnsembleComputeMean()`.

798: .seealso: [](ch_da), `PetscDA`, `PETSCDALETKF`, `PetscDAEnsembleForecast()`, `PetscDASetObsErrorVariance()`,
799:           `PetscDAEnsembleGetMember()`, `PetscDAEnsembleComputeMean()`
800: @*/
801: PetscErrorCode PetscDAEnsembleAnalysis(PetscDA da, Vec observation, Mat H)
802: {
803:   PetscDA_Ensemble *en = (PetscDA_Ensemble *)da->data;
804:   PetscInt          h_rows, h_cols;

806:   PetscFunctionBegin;
810:   PetscCheck(en->size > 1, PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_OUTOFRANGE, "Ensemble size must be > 1, got %" PetscInt_FMT, en->size);
811:   PetscCall(MatGetSize(H, &h_rows, &h_cols));
812:   PetscCheck(h_rows == da->obs_size, PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_INCOMP, "H matrix rows (%" PetscInt_FMT ") must match obs_size (%" PetscInt_FMT ")", h_rows, da->obs_size);
813:   PetscCheck(h_cols == da->state_size, PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_INCOMP, "H matrix cols (%" PetscInt_FMT ") must match state_size (%" PetscInt_FMT ")", h_cols, da->state_size);
814:   PetscCall(VecGetSize(observation, &h_rows));
815:   PetscCheck(h_rows == da->obs_size, PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_INCOMP, "observation vector size (%" PetscInt_FMT ") must match obs_size (%" PetscInt_FMT ")", h_rows, da->obs_size);

817:   PetscCall(PetscLogEventBegin(PetscDA_Analysis, (PetscObject)da, 0, 0, 0));
818:   PetscCall((*en->analysis)(da, observation, H));
819:   PetscCall(PetscLogEventEnd(PetscDA_Analysis, (PetscObject)da, 0, 0, 0));
820:   PetscFunctionReturn(PETSC_SUCCESS);
821: }

823: /*@C
824:   PetscDAEnsembleForecast - Advances the entire ensemble through the user-supplied forecast model.

826:   Collective

828:   Input Parameters:
829: + da    - the `PetscDA` context
830: . model - routine that advances the ensemble matrix in place; if the model can only advance one state
831:           at a time (e.g. a `TS`-driven step), it must loop over columns itself
832: - ctx   - optional context for `model`

834:   Level: intermediate

836:   Note:
837:   The columns of the ensemble matrix are the individual members; `model` advances them in place.

839: .seealso: [](ch_da), `PetscDA`, `PETSCDALETKF`, `PetscDAEnsembleAnalysis()`
840: @*/
841: PetscErrorCode PetscDAEnsembleForecast(PetscDA da, PetscDAEnsembleForecastFn *model, PetscCtx ctx)
842: {
843:   PetscDA_Ensemble *en = (PetscDA_Ensemble *)da->data;

845:   PetscFunctionBegin;
847:   PetscCall((*en->forecast)(da, model, ctx));
848:   PetscFunctionReturn(PETSC_SUCCESS);
849: }

851: PetscErrorCode PetscDAView_Ensemble(PetscDA da, PetscViewer viewer)
852: {
853:   PetscDA_Ensemble *en = (PetscDA_Ensemble *)da->data;
854:   PetscBool         iascii;

856:   PetscFunctionBegin;
857:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
858:   if (iascii) {
859:     PetscCall(PetscViewerASCIIPrintf(viewer, "  Ensemble size: %" PetscInt_FMT "\n", en->size));
860:     PetscCall(PetscViewerASCIIPrintf(viewer, "  Assembled: %s\n", en->assembled ? "true" : "false"));
861:     PetscCall(PetscViewerASCIIPrintf(viewer, "  Inflation: %g\n", (double)en->inflation));
862:   }
863:   PetscFunctionReturn(PETSC_SUCCESS);
864: }

866: PetscErrorCode PetscDASetUp_Ensemble(PetscDA da)
867: {
868:   PetscDA_Ensemble *en = (PetscDA_Ensemble *)da->data;
869:   MPI_Comm          comm;

871:   PetscFunctionBegin;
872:   if (en->assembled) PetscFunctionReturn(PETSC_SUCCESS);

874:   PetscCheck(da->state_size > 0, PetscObjectComm((PetscObject)da), PETSC_ERR_ORDER, "Must set state size before calling PetscDASetUp()");
875:   PetscCheck(da->obs_size > 0, PetscObjectComm((PetscObject)da), PETSC_ERR_ORDER, "Must set observation size before calling PetscDASetUp()");
876:   PetscCheck(en->size > 0, PetscObjectComm((PetscObject)da), PETSC_ERR_ORDER, "Must set ensemble size before calling PetscDASetUp()");

878:   comm = PetscObjectComm((PetscObject)da);
879:   if (!en->ensemble) {
880:     PetscCall(MatCreateDense(comm, da->local_state_size, PETSC_DECIDE, da->state_size, en->size, NULL, &en->ensemble));
881:     PetscCall(PetscObjectSetOptionsPrefix((PetscObject)en->ensemble, "dense_"));
882:     PetscCall(MatSetFromOptions(en->ensemble));
883:     PetscCall(MatSetUp(en->ensemble));
884:   }
885:   en->assembled = PETSC_TRUE;
886:   PetscFunctionReturn(PETSC_SUCCESS);
887: }

889: /*@
890:   PetscDAEnsembleSetSize - Sets the ensemble dimensions used by a `PetscDA`.

892:   Collective

894:   Input Parameters:
895: + da            - the `PetscDA` context
896: - ensemble_size - number of ensemble members

898:   Options Database Key:
899: . -petscda_ensemble_size size - number of ensemble members

901:   Level: beginner

903:   Note:
904:   The size must be greater than or equal to two. See the scale factor in `PetscDAEnsembleInitialize()` and `PetscDALETKFLocalAnalysis()`

906: .seealso: [](ch_da), `PetscDA`, `PETSCDALETKF`, `PetscDAGetSizes()`, `PetscDASetSizes()`, `PetscDASetUp()`
907: @*/
908: PetscErrorCode PetscDAEnsembleSetSize(PetscDA da, PetscInt ensemble_size)
909: {
910:   PetscDA_Ensemble *en = (PetscDA_Ensemble *)da->data;

912:   PetscFunctionBegin;
915:   PetscCheck(!en->assembled, PetscObjectComm((PetscObject)da), PETSC_ERR_ORDER, "Cannot change sizes after PetscDASetUp() has been called");
916:   PetscCheck(ensemble_size > 1, PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_SIZ, "Ensemble size must be at least two");
917:   en->size = ensemble_size;
918:   PetscFunctionReturn(PETSC_SUCCESS);
919: }

921: /*@
922:   PetscDAEnsembleGetSize - Retrieves the dimension of the ensemble in a `PetscDA`.

924:   Not Collective

926:   Input Parameter:
927: . da - the `PetscDA` context

929:   Output Parameters:
930: . ensemble_size - number of ensemble members

932:   Level: beginner

934: .seealso: [](ch_da), `PetscDA`, `PETSCDALETKF`, `PetscDASetSizes()`, `PetscDAGetSizes()`
935: @*/
936: PetscErrorCode PetscDAEnsembleGetSize(PetscDA da, PetscInt *ensemble_size)
937: {
938:   PetscDA_Ensemble *en = (PetscDA_Ensemble *)da->data;

940:   PetscFunctionBegin;
942:   PetscAssertPointer(ensemble_size, 2);
943:   *ensemble_size = en->size;
944:   PetscFunctionReturn(PETSC_SUCCESS);
945: }

947: PetscErrorCode PetscDASetFromOptions_Ensemble(PetscDA da, PetscOptionItems *PetscOptionsObjectPtr)
948: {
949:   PetscDA_Ensemble *en                 = (PetscDA_Ensemble *)da->data;
950:   PetscOptionItems  PetscOptionsObject = *PetscOptionsObjectPtr;
951:   PetscReal         inflation_val      = en->inflation;
952:   PetscBool         inflation_set, flg;
953:   PetscInt          ensemble_size;

955:   PetscFunctionBegin;
956:   PetscOptionsHeadBegin(PetscOptionsObject, "PetscDA Ensemble Options");

958:   PetscCall(PetscOptionsReal("-petscda_ensemble_inflation", "Inflation factor", "PetscDAEnsembleSetInflation", en->inflation, &inflation_val, &inflation_set));
959:   if (inflation_set) PetscCall(PetscDAEnsembleSetInflation(da, inflation_val));

961:   PetscCall(PetscOptionsInt("-petscda_ensemble_size", "Number of ensemble members", "PetscDAEnsembleSetSize", en->size, &ensemble_size, &flg));
962:   if (flg) PetscCall(PetscDAEnsembleSetSize(da, ensemble_size));
963:   PetscOptionsHeadEnd();
964:   PetscFunctionReturn(PETSC_SUCCESS);
965: }

967: PetscErrorCode PetscDADestroy_Ensemble(PetscDA da)
968: {
969:   PetscDA_Ensemble *en = (PetscDA_Ensemble *)da->data;

971:   PetscFunctionBegin;
972:   PetscCall(MatDestroy(&en->ensemble));
973:   PetscCall(VecDestroy(&da->obs_error_var));
974:   PetscCall(MatDestroy(&da->R));

976:   /* Destroy T-matrix factorization data */
977:   PetscCall(MatDestroy(&en->V));
978:   PetscCall(VecDestroy(&en->sqrt_eigen_vals));
979:   PetscCall(MatDestroy(&en->I_StS));
980:   PetscFunctionReturn(PETSC_SUCCESS);
981: }

983: PetscErrorCode PetscDACreate_Ensemble(PetscDA da)
984: {
985:   PetscDA_Ensemble *en = (PetscDA_Ensemble *)da->data;

987:   PetscFunctionBegin;
988:   en->size      = 0;
989:   en->ensemble  = NULL;
990:   en->assembled = PETSC_FALSE;
991:   en->inflation = 1.0;

993:   /* Initialize T-matrix factorization fields */
994:   en->V               = NULL;
995:   en->sqrt_eigen_vals = NULL;
996:   en->I_StS           = NULL;
997:   PetscFunctionReturn(PETSC_SUCCESS);
998: }

1000: /*@
1001:   PetscDAEnsembleComputeNormalizedInnovationMatrix - Computes S = R^{-1/2}(Z - y_mean * 1')/sqrt(m-1) [Alg 6.4 line 5]

1003:   Collective

1005:   Input Parameters:
1006: + Z          - observation ensemble matrix
1007: . y_mean     - mean of observations
1008: . r_inv_sqrt - R^{-1/2}
1009: . m          - ensemble size
1010: - scale      - 1/sqrt(m-1)

1012:   Output Parameter:
1013: . S - normalized innovation matrix

1015:   Level: developer

1017: .seealso: [](ch_da), `PetscDA`, `PETSCDALETKF`, `PetscDASetSizes()`, `PetscDAGetSizes()`
1018: @*/
1019: PetscErrorCode PetscDAEnsembleComputeNormalizedInnovationMatrix(Mat Z, Vec y_mean, Vec r_inv_sqrt, PetscInt m, PetscScalar scale, Mat S)
1020: {
1021:   const PetscScalar *z_array, *y_array, *r_array;
1022:   PetscScalar       *s_array;
1023:   PetscInt           obs_size, obs_size_local, z_cols, i, j;
1024:   PetscInt           y_local_size, r_local_size;
1025:   PetscInt           lda_z, lda_s;

1027:   PetscFunctionBegin;
1034:   PetscCheck(m > 0, PetscObjectComm((PetscObject)Z), PETSC_ERR_ARG_OUTOFRANGE, "Ensemble size m must be positive, got %" PetscInt_FMT, m);
1035:   PetscCall(MatGetSize(Z, &obs_size, &z_cols));
1036:   PetscCall(MatGetLocalSize(Z, &obs_size_local, NULL));
1037:   PetscCheck(z_cols == m, PetscObjectComm((PetscObject)Z), PETSC_ERR_ARG_INCOMP, "Matrix Z has %" PetscInt_FMT " columns but ensemble size is %" PetscInt_FMT, z_cols, m);

1039:   /* Verify vector dimensions match observation size (both global and local) */
1040:   PetscCall(VecGetLocalSize(y_mean, &y_local_size));
1041:   PetscCall(VecGetLocalSize(r_inv_sqrt, &r_local_size));
1042:   PetscCheck(y_local_size == obs_size_local, PetscObjectComm((PetscObject)Z), PETSC_ERR_ARG_INCOMP, "Vector y_mean local size %" PetscInt_FMT " does not match matrix local rows %" PetscInt_FMT, y_local_size, obs_size_local);
1043:   PetscCheck(r_local_size == obs_size_local, PetscObjectComm((PetscObject)Z), PETSC_ERR_ARG_INCOMP, "Vector r_inv_sqrt local size %" PetscInt_FMT " does not match matrix local rows %" PetscInt_FMT, r_local_size, obs_size_local);

1045:   /* Get direct access to arrays for performance */
1046:   PetscCall(MatDenseGetArrayRead(Z, &z_array));
1047:   PetscCall(MatDenseGetArrayWrite(S, &s_array));
1048:   PetscCall(VecGetArrayRead(y_mean, &y_array));
1049:   PetscCall(VecGetArrayRead(r_inv_sqrt, &r_array));

1051:   /* Get Leading Dimension (LDA) to handle padding/strides correctly */
1052:   PetscCall(MatDenseGetLDA(Z, &lda_z));
1053:   PetscCall(MatDenseGetLDA(S, &lda_s));

1055:   /* Compute normalized innovation: S_ij = (Z_ij - y_mean_i) * scale * r_inv_sqrt_i
1056:      Iterate column-wise (j) then row-wise (i) for optimal cache access with column-major storage */
1057:   for (j = 0; j < m; j++) {
1058:     const PetscScalar *z_col = z_array + j * lda_z;
1059:     PetscScalar       *s_col = s_array + j * lda_s;

1061:     for (i = 0; i < obs_size_local; i++) s_col[i] = (z_col[i] - y_array[i]) * scale * r_array[i];
1062:   }

1064:   /* Restore arrays */
1065:   PetscCall(VecRestoreArrayRead(r_inv_sqrt, &r_array));
1066:   PetscCall(VecRestoreArrayRead(y_mean, &y_array));
1067:   PetscCall(MatDenseRestoreArrayWrite(S, &s_array));
1068:   PetscCall(MatDenseRestoreArrayRead(Z, &z_array));
1069:   PetscFunctionReturn(PETSC_SUCCESS);
1070: }

1072: PETSC_INTERN PetscErrorCode PetscDAEnsembleForecast_Ensemble(PetscDA da, PetscDAEnsembleForecastFn *model, PetscCtx ctx)
1073: {
1074:   PetscDA_Ensemble *en = (PetscDA_Ensemble *)da->data;

1076:   PetscFunctionBegin;
1078:   PetscCall((*model)(en->ensemble, ctx));
1079:   PetscFunctionReturn(PETSC_SUCCESS);
1080: }