Actual source code: ex3.c

  1: static char help[] = "Shallow water test cases with data assimilation.\n"
  2:                      "Implements 1D shallow water equations with 2 DOF per grid point (h, hu).\n\n"
  3:                      "Example usage:\n"
  4:                      "  ./ex3 -steps 100 -obs_freq 5 -obs_error 0.1 -petscda_ensemble_size 30\n"
  5:                      "  ./ex3 -ex3_test wave -steps 500\n\n";

  7: /* Data assimilation framework header (provides PetscDA) */
  8: #include <petscda.h>
  9: /* PETSc DMDA header (provides DM, DMDA functionality) */
 10: #include <petscdmda.h>
 11: #include <petscdmplex.h>
 12: #include <petscts.h>
 13: #include <petscvec.h>

 15: /* Default parameter values */
 16: #define DEFAULT_N             80 /* 80 grid points */
 17: #define DEFAULT_STEPS         100
 18: #define DEFAULT_OBS_FREQ      5
 19: #define DEFAULT_RANDOM_SEED   12345
 20: #define DEFAULT_G             9.81
 21: #define DEFAULT_DT            0.02
 22: #define DEFAULT_OBS_ERROR_STD 0.01
 23: #define DEFAULT_ENSEMBLE_SIZE 30
 24: #define SPINUP_STEPS          0 /* No spinup needed - wave test has smooth analytical initial condition */

 26: /* Minimum valid parameter values */
 27: #define MIN_N                 1
 28: #define MIN_ENSEMBLE_SIZE     2
 29: #define MIN_OBS_FREQ          1
 30: #define DEFAULT_PROGRESS_FREQ 10 /* Print progress every N steps by default */

 32: /* Test case types */
 33: typedef enum {
 34:   EX3_TEST_DAM,
 35:   EX3_TEST_WAVE
 36: } Ex3TestType;

 38: static PetscFunctionList Ex3TestList               = NULL;
 39: static PetscBool         Ex3TestPackageInitialized = PETSC_FALSE;

 41: typedef enum {
 42:   EX3_FLUX_RUSANOV,
 43:   EX3_FLUX_MC
 44: } Ex3FluxType;

 46: static const char *const Ex3FluxTypes[] = {"rusanov", "mc", "Ex3FluxType", "EX3_FLUX_", NULL};

 48: typedef struct {
 49:   DM          da;        /* 1D periodic DM storing the shallow water state */
 50:   PetscInt    n_vert;    /* State dimension (number of grid points) */
 51:   PetscReal   L;         /* Domain length */
 52:   PetscReal   g;         /* Gravitational constant */
 53:   PetscReal   dx;        /* Grid spacing */
 54:   PetscReal   dt;        /* Integration time step size */
 55:   TS          ts;        /* Reusable time stepper for efficiency */
 56:   Ex3TestType test_type; /* Test case type */
 57:   Ex3FluxType flux_type; /* Flux scheme */
 58: } ShallowWaterCtx;

 60: /*
 61:   Limit - MC (Monotonized Central) limiter
 62: */
 63: static PetscReal Limit(PetscReal a, PetscReal b)
 64: {
 65:   PetscReal c = 0.5 * (a + b);
 66:   if (a * b <= 0.0) return 0.0;
 67:   if (c > 0) return PetscMin(2.0 * a, PetscMin(2.0 * b, c));
 68:   else return PetscMax(2.0 * a, PetscMax(2.0 * b, c));
 69: }

 71: /*
 72:   ComputeFlux - Compute physical flux and wave speed for shallow water
 73: */
 74: static void ComputeFlux(PetscReal g, PetscReal h, PetscReal hu, PetscReal *F_h, PetscReal *F_hu, PetscReal *u, PetscReal *c)
 75: {
 76:   if (h > 1e-10) {
 77:     *u    = hu / h;
 78:     *c    = PetscSqrtReal(g * h);
 79:     *F_h  = hu;
 80:     *F_hu = hu * *u + 0.5 * g * h * h;
 81:   } else {
 82:     *u    = 0.0;
 83:     *c    = 0.0;
 84:     *F_h  = 0.0;
 85:     *F_hu = 0.0;
 86:   }
 87: }

 89: /*
 90:   ShallowWaterRHS - Compute the right-hand side of the shallow water equations

 92:   Dispatches to appropriate flux scheme implementation.
 93: */
 94: static PetscErrorCode ShallowWaterRHS(TS ts, PetscReal t, Vec X, Vec F_vec, PetscCtx ctx)
 95: {
 96:   ShallowWaterCtx   *sw = (ShallowWaterCtx *)ctx;
 97:   Vec                X_local;
 98:   const PetscScalar *x;
 99:   PetscScalar       *f;
100:   PetscInt           xs, xm, i;
101:   const PetscInt     ndof = 2; /* h and hu */

103:   PetscFunctionBeginUser;
104:   PetscCall(DMDAGetCorners(sw->da, &xs, NULL, NULL, &xm, NULL, NULL));
105:   PetscCall(DMGetLocalVector(sw->da, &X_local));
106:   PetscCall(DMGlobalToLocalBegin(sw->da, X, INSERT_VALUES, X_local));
107:   PetscCall(DMGlobalToLocalEnd(sw->da, X, INSERT_VALUES, X_local));
108:   PetscCall(DMDAVecGetArrayRead(sw->da, X_local, &x));
109:   PetscCall(DMDAVecGetArray(sw->da, F_vec, &f));

111:   if (sw->flux_type == EX3_FLUX_RUSANOV) {
112:     /* First-order Rusanov (Local Lax-Friedrichs) scheme */
113:     for (i = xs; i < xs + xm; i++) {
114:       PetscReal h  = PetscRealPart(x[i * ndof]);
115:       PetscReal hu = PetscRealPart(x[i * ndof + 1]);

117:       PetscReal h_im1  = PetscRealPart(x[(i - 1) * ndof]);
118:       PetscReal hu_im1 = PetscRealPart(x[(i - 1) * ndof + 1]);

120:       PetscReal h_ip1  = PetscRealPart(x[(i + 1) * ndof]);
121:       PetscReal hu_ip1 = PetscRealPart(x[(i + 1) * ndof + 1]);

123:       PetscReal F_h_i, F_hu_i, u, c;
124:       PetscReal F_h_im1, F_hu_im1, u_im1, c_im1;
125:       PetscReal F_h_ip1, F_hu_ip1, u_ip1, c_ip1;

127:       ComputeFlux(sw->g, h, hu, &F_h_i, &F_hu_i, &u, &c);
128:       ComputeFlux(sw->g, h_im1, hu_im1, &F_h_im1, &F_hu_im1, &u_im1, &c_im1);
129:       ComputeFlux(sw->g, h_ip1, hu_ip1, &F_h_ip1, &F_hu_ip1, &u_ip1, &c_ip1);

131:       PetscReal alpha_left  = PetscMax(PetscAbsReal(u_im1) + c_im1, PetscAbsReal(u) + c);
132:       PetscReal alpha_right = PetscMax(PetscAbsReal(u) + c, PetscAbsReal(u_ip1) + c_ip1);

134:       PetscReal flux_h_left  = 0.5 * (F_h_im1 + F_h_i - alpha_left * (h - h_im1));
135:       PetscReal flux_hu_left = 0.5 * (F_hu_im1 + F_hu_i - alpha_left * (hu - hu_im1));

137:       PetscReal flux_h_right  = 0.5 * (F_h_i + F_h_ip1 - alpha_right * (h_ip1 - h));
138:       PetscReal flux_hu_right = 0.5 * (F_hu_i + F_hu_ip1 - alpha_right * (hu_ip1 - hu));

140:       f[i * ndof]     = -(flux_h_right - flux_h_left) / sw->dx;
141:       f[i * ndof + 1] = -(flux_hu_right - flux_hu_left) / sw->dx;
142:     }
143:   } else {
144:     /* Second-order MC (Monotonized Central) scheme */
145:     for (i = xs; i < xs + xm; i++) {
146:       /* Read state */
147:       PetscReal h_im2 = PetscRealPart(x[(i - 2) * ndof]);
148:       PetscReal h_im1 = PetscRealPart(x[(i - 1) * ndof]);
149:       PetscReal h_i   = PetscRealPart(x[i * ndof]);
150:       PetscReal h_ip1 = PetscRealPart(x[(i + 1) * ndof]);
151:       PetscReal h_ip2 = PetscRealPart(x[(i + 2) * ndof]);

153:       PetscReal hu_im2 = PetscRealPart(x[(i - 2) * ndof + 1]);
154:       PetscReal hu_im1 = PetscRealPart(x[(i - 1) * ndof + 1]);
155:       PetscReal hu_i   = PetscRealPart(x[i * ndof + 1]);
156:       PetscReal hu_ip1 = PetscRealPart(x[(i + 1) * ndof + 1]);
157:       PetscReal hu_ip2 = PetscRealPart(x[(i + 2) * ndof + 1]);

159:       /* Compute slopes (MC limiter) */
160:       PetscReal s_h_im1 = Limit(h_im1 - h_im2, h_i - h_im1);
161:       PetscReal s_h_i   = Limit(h_i - h_im1, h_ip1 - h_i);
162:       PetscReal s_h_ip1 = Limit(h_ip1 - h_i, h_ip2 - h_ip1);

164:       PetscReal s_hu_im1 = Limit(hu_im1 - hu_im2, hu_i - hu_im1);
165:       PetscReal s_hu_i   = Limit(hu_i - hu_im1, hu_ip1 - hu_i);
166:       PetscReal s_hu_ip1 = Limit(hu_ip1 - hu_i, hu_ip2 - hu_ip1);

168:       /* Reconstruct states at interfaces */
169:       /* Left interface (i-1/2) */
170:       PetscReal h_L_left  = h_im1 + 0.5 * s_h_im1;
171:       PetscReal hu_L_left = hu_im1 + 0.5 * s_hu_im1;
172:       PetscReal h_R_left  = h_i - 0.5 * s_h_i;
173:       PetscReal hu_R_left = hu_i - 0.5 * s_hu_i;

175:       /* Right interface (i+1/2) */
176:       PetscReal h_L_right  = h_i + 0.5 * s_h_i;
177:       PetscReal hu_L_right = hu_i + 0.5 * s_hu_i;
178:       PetscReal h_R_right  = h_ip1 - 0.5 * s_h_ip1;
179:       PetscReal hu_R_right = hu_ip1 - 0.5 * s_hu_ip1;

181:       /* Compute fluxes */
182:       PetscReal F_h_LL, F_hu_LL, u_LL, c_LL;
183:       PetscReal F_h_RL, F_hu_RL, u_RL, c_RL;
184:       PetscReal F_h_LR, F_hu_LR, u_LR, c_LR;
185:       PetscReal F_h_RR, F_hu_RR, u_RR, c_RR;

187:       ComputeFlux(sw->g, h_L_left, hu_L_left, &F_h_LL, &F_hu_LL, &u_LL, &c_LL);
188:       ComputeFlux(sw->g, h_R_left, hu_R_left, &F_h_RL, &F_hu_RL, &u_RL, &c_RL);
189:       ComputeFlux(sw->g, h_L_right, hu_L_right, &F_h_LR, &F_hu_LR, &u_LR, &c_LR);
190:       ComputeFlux(sw->g, h_R_right, hu_R_right, &F_h_RR, &F_hu_RR, &u_RR, &c_RR);

192:       /* Rusanov flux */
193:       PetscReal speed_left   = PetscMax(PetscAbsReal(u_LL) + c_LL, PetscAbsReal(u_RL) + c_RL);
194:       PetscReal flux_h_left  = 0.5 * (F_h_LL + F_h_RL - speed_left * (h_R_left - h_L_left));
195:       PetscReal flux_hu_left = 0.5 * (F_hu_LL + F_hu_RL - speed_left * (hu_R_left - hu_L_left));

197:       PetscReal speed_right   = PetscMax(PetscAbsReal(u_LR) + c_LR, PetscAbsReal(u_RR) + c_RR);
198:       PetscReal flux_h_right  = 0.5 * (F_h_LR + F_h_RR - speed_right * (h_R_right - h_L_right));
199:       PetscReal flux_hu_right = 0.5 * (F_hu_LR + F_hu_RR - speed_right * (hu_R_right - hu_L_right));

201:       /* Update RHS using finite volume method */
202:       f[i * ndof]     = -(flux_h_right - flux_h_left) / sw->dx;
203:       f[i * ndof + 1] = -(flux_hu_right - flux_hu_left) / sw->dx;
204:     }
205:   }

207:   PetscCall(DMDAVecRestoreArrayRead(sw->da, X_local, &x));
208:   PetscCall(DMDAVecRestoreArray(sw->da, F_vec, &f));
209:   PetscCall(DMRestoreLocalVector(sw->da, &X_local));
210:   PetscFunctionReturn(PETSC_SUCCESS);
211: }

213: /*
214:   ShallowWaterContextCreate - Create and initialize a shallow water context with reusable TS object
215: */
216: static PetscErrorCode ShallowWaterContextCreate(DM da, PetscInt n_vert, PetscReal L, PetscReal g, PetscReal dt, Ex3TestType test_type, Ex3FluxType flux_type, ShallowWaterCtx **ctx)
217: {
218:   ShallowWaterCtx *sw;

220:   PetscFunctionBeginUser;
221:   PetscCall(PetscNew(&sw));
222:   sw->da        = da;
223:   sw->n_vert    = n_vert;
224:   sw->L         = L;
225:   sw->g         = g;
226:   sw->dx        = L / n_vert; /* Domain is [0, L] */
227:   sw->dt        = dt;
228:   sw->test_type = test_type;
229:   sw->flux_type = flux_type;

231:   PetscCall(TSCreate(PetscObjectComm((PetscObject)da), &sw->ts));
232:   PetscCall(TSSetProblemType(sw->ts, TS_NONLINEAR));
233:   PetscCall(TSSetRHSFunction(sw->ts, NULL, ShallowWaterRHS, sw));
234:   PetscCall(TSSetType(sw->ts, TSRK));
235:   PetscCall(TSRKSetType(sw->ts, TSRK4));
236:   PetscCall(TSSetTimeStep(sw->ts, dt));
237:   PetscCall(TSSetMaxSteps(sw->ts, 1));
238:   PetscCall(TSSetMaxTime(sw->ts, dt));
239:   PetscCall(TSSetExactFinalTime(sw->ts, TS_EXACTFINALTIME_MATCHSTEP));
240:   PetscCall(TSSetFromOptions(sw->ts));
241:   /* Note: TSSetUp() will be called automatically by TSSolve() when needed */

243:   *ctx = sw;
244:   PetscFunctionReturn(PETSC_SUCCESS);
245: }

247: /*
248:   ShallowWaterContextDestroy - Destroy a shallow water context and its TS object
249: */
250: static PetscErrorCode ShallowWaterContextDestroy(ShallowWaterCtx **ctx)
251: {
252:   PetscFunctionBeginUser;
253:   if (!ctx || !*ctx) PetscFunctionReturn(PETSC_SUCCESS);
254:   PetscCall(TSDestroy(&(*ctx)->ts));
255:   PetscCall(PetscFree(*ctx));
256:   PetscFunctionReturn(PETSC_SUCCESS);
257: }

259: /* Advance a single state vector one TS step. Used by the truth trajectory and as the per-column kernel of ShallowWaterStep(). */
260: static PetscErrorCode ShallowWaterStepVec(ShallowWaterCtx *sw, Vec x)
261: {
262:   PetscFunctionBeginUser;
263:   /* Reset the TS time for each integration (required for proper RK4 stepping) */
264:   PetscCall(TSSetTime(sw->ts, 0.0));
265:   PetscCall(TSSetStepNumber(sw->ts, 0));
266:   PetscCall(TSSetMaxTime(sw->ts, sw->dt));
267:   PetscCall(TSSolve(sw->ts, x));
268:   PetscFunctionReturn(PETSC_SUCCESS);
269: }

271: /*
272:   ShallowWaterStep - Advance every column of the ensemble matrix one time step using shallow water dynamics.
273:   TS only advances one state at a time, so loop over columns here.
274: */
275: static PetscErrorCode ShallowWaterStep(Mat ensemble, PetscCtx ctx)
276: {
277:   ShallowWaterCtx *sw = (ShallowWaterCtx *)ctx;
278:   PetscInt         n;

280:   PetscFunctionBeginUser;
281:   PetscCall(MatGetSize(ensemble, NULL, &n));
282:   /* Collective: dense ensemble Mat is row-distributed, so every rank visits every global column j and
283:      MatDenseGetColumnVec returns the parallel column-Vec that all ranks step together. */
284:   for (PetscInt j = 0; j < n; j++) {
285:     Vec col;

287:     PetscCall(MatDenseGetColumnVec(ensemble, j, &col));
288:     PetscCall(ShallowWaterStepVec(sw, col));
289:     PetscCall(MatDenseRestoreColumnVec(ensemble, j, &col));
290:   }
291:   PetscFunctionReturn(PETSC_SUCCESS);
292: }

294: /*
295:   ShallowWaterSolution_Dam - Smooth periodic "dam-like" initial condition

297:   Creates a smooth Gaussian bump compatible with periodic boundaries.
298:   This avoids boundary artifacts while maintaining dam-like evolution.
299: */
300: static PetscErrorCode ShallowWaterSolution_Dam(PetscReal L, PetscReal x, PetscReal *h, PetscReal *hu)
301: {
302:   const PetscReal h_mean = 1.5;      /* Mean water height */
303:   const PetscReal h_amp  = 0.4;      /* Bump amplitude */
304:   const PetscReal x_c    = 0.25 * L; /* Bump center */
305:   const PetscReal sigma  = 0.1 * L;  /* Gaussian width */

307:   PetscFunctionBeginUser;
308:   /* Smooth Gaussian bump: h = h_mean + h_amp * exp(-(x-x_c)^2/(2*sigma^2)) */
309:   PetscReal dx = x - x_c;
310:   /* Handle periodicity: use minimum distance on periodic domain */
311:   if (dx > 0.5 * L) dx -= L;
312:   if (dx < -0.5 * L) dx += L;

314:   *h = h_mean + h_amp * PetscExpReal(-dx * dx / (2.0 * sigma * sigma));
315:   /* Initially at rest */
316:   *hu = 0.0;
317:   PetscFunctionReturn(PETSC_SUCCESS);
318: }

320: /*
321:   ShallowWaterSolution_Wave - Traveling wave initial condition

323:   Sets smooth traveling wave with sinusoidal perturbation.
324:   For shallow water, a rightward-traveling wave requires velocity perturbation
325:   coupled to height: u' = c * (h'/h_mean) where c = sqrt(g*h_mean).
326: */
327: static PetscErrorCode ShallowWaterSolution_Wave(PetscReal L, PetscReal x, PetscReal *h, PetscReal *hu)
328: {
329:   const PetscReal h_mean = 1.5;                       /* Mean water height */
330:   const PetscReal h_amp  = 0.3;                       /* Wave amplitude */
331:   const PetscReal g      = DEFAULT_G;                 /* Gravitational constant */
332:   const PetscReal k      = 2.0 * PETSC_PI / L;        /* Wave number (one wavelength over domain) */
333:   const PetscReal c      = PetscSqrtReal(g * h_mean); /* Wave speed */

335:   PetscFunctionBeginUser;
336:   /* Height field: h = h_mean + h_amp * sin(k*x) */
337:   PetscReal h_pert = h_amp * PetscSinReal(k * x);
338:   *h               = h_mean + h_pert;

340:   /* Velocity for rightward-traveling wave: u = c * (h'/h_mean)
341:      Using linearized shallow water: u ~= (c/h_mean) * h_pert */
342:   PetscReal u = (c / h_mean) * h_pert;
343:   *hu         = (*h) * u;
344:   PetscFunctionReturn(PETSC_SUCCESS);
345: }

347: /*
348:   ShallowWaterSolution - Dispatch to appropriate initial condition based on test type
349: */
350: static PetscErrorCode ShallowWaterSolution(Ex3TestType test_type, PetscReal L, PetscReal x, PetscReal *h, PetscReal *hu)
351: {
352:   PetscFunctionBeginUser;
353:   switch (test_type) {
354:   case EX3_TEST_DAM:
355:     PetscCall(ShallowWaterSolution_Dam(L, x, h, hu));
356:     break;
357:   case EX3_TEST_WAVE:
358:     PetscCall(ShallowWaterSolution_Wave(L, x, h, hu));
359:     break;
360:   default:
361:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Unknown test type");
362:   }
363:   PetscFunctionReturn(PETSC_SUCCESS);
364: }

366: /*
367:   CreateObservationMatrix - Create observation matrix H for shallow water, and H1 as scalar version

369:   Observes water height (h) at every obs_stride-th grid point.
370:   This creates a sparse matrix mapping from full state (n_vert*ndof) to observations.
371:   For n_vert=80 and obs_stride=2, we observe at points 0, 2, 4, ..., 78.
372: */
373: static PetscErrorCode CreateObservationMatrix(PetscInt n_vert, PetscInt ndof, PetscInt nobs, PetscInt obs_stride, Vec state, Mat *H, Mat *H1)
374: {
375:   PetscInt i, local_state_size;

377:   PetscFunctionBeginUser;
378:   PetscCheck(n_vert == obs_stride * nobs, PETSC_COMM_WORLD, PETSC_ERR_ARG_INCOMP, "Number of grid points (%" PetscInt_FMT ") must equal obs_stride*nobs (%" PetscInt_FMT "*%" PetscInt_FMT ")", n_vert, obs_stride, nobs);

380:   PetscCall(VecGetLocalSize(state, &local_state_size));

382:   /* Create observation matrix H (nobs x n_vert*ndof) */
383:   PetscCall(MatCreateAIJ(PETSC_COMM_WORLD, PETSC_DECIDE, local_state_size, nobs, n_vert * ndof, 1, NULL, 0, NULL, H));
384:   PetscCall(MatSetFromOptions(*H));

386:   PetscCall(MatCreateAIJ(PETSC_COMM_WORLD, PETSC_DECIDE, local_state_size / ndof, nobs, n_vert, 1, NULL, 0, NULL, H1));
387:   PetscCall(MatSetFromOptions(*H1));

389:   /* Observe water height (h) at every obs_stride-th grid point */
390:   for (i = 0; i < nobs; i++) {
391:     PetscInt grid_point = obs_stride * i;
392:     PetscCall(MatSetValue(*H1, i, grid_point, 1.0, INSERT_VALUES));
393:     /* pick out the h component (first DOF) at that grid point */
394:     PetscCall(MatSetValue(*H, i, grid_point * ndof, 1.0, INSERT_VALUES));
395:   }

397:   PetscCall(MatAssemblyBegin(*H, MAT_FINAL_ASSEMBLY));
398:   PetscCall(MatAssemblyEnd(*H, MAT_FINAL_ASSEMBLY));
399:   PetscCall(MatAssemblyBegin(*H1, MAT_FINAL_ASSEMBLY));
400:   PetscCall(MatAssemblyEnd(*H1, MAT_FINAL_ASSEMBLY));

402:   PetscCall(MatViewFromOptions(*H1, NULL, "-H_view"));
403:   PetscFunctionReturn(PETSC_SUCCESS);
404: }

406: /*
407:   ValidateParameters - Validate input parameters and apply constraints
408: */
409: static PetscErrorCode ValidateParameters(PetscInt *n, PetscInt *nobs, PetscInt *steps, PetscInt *obs_freq, PetscInt *ensemble_size, PetscReal *dt, PetscReal *g, PetscReal *obs_error_std)
410: {
411:   PetscFunctionBeginUser;
412:   PetscCheck(*n > 0, PETSC_COMM_WORLD, PETSC_ERR_ARG_OUTOFRANGE, "State dimension n must be positive, got %" PetscInt_FMT, *n);
413:   PetscCheck(*steps >= 0, PETSC_COMM_WORLD, PETSC_ERR_ARG_OUTOFRANGE, "Number of steps must be non-negative, got %" PetscInt_FMT, *steps);
414:   PetscCheck(*ensemble_size >= MIN_ENSEMBLE_SIZE, PETSC_COMM_WORLD, PETSC_ERR_ARG_OUTOFRANGE, "Ensemble size must be at least %d for meaningful statistics, got %" PetscInt_FMT, MIN_ENSEMBLE_SIZE, *ensemble_size);

416:   if (*obs_freq < MIN_OBS_FREQ) {
417:     PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Warning: Observation frequency adjusted from %" PetscInt_FMT " to %d\n", *obs_freq, MIN_OBS_FREQ));
418:     *obs_freq = MIN_OBS_FREQ;
419:   }
420:   if (*obs_freq > *steps && *steps > 0) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Warning: Observation frequency (%" PetscInt_FMT ") > total steps (%" PetscInt_FMT "), no observations will be assimilated.\n", *obs_freq, *steps));

422:   PetscCheck(*dt > 0.0, PETSC_COMM_WORLD, PETSC_ERR_ARG_OUTOFRANGE, "Time step dt must be positive, got %g", (double)*dt);
423:   PetscCheck(*obs_error_std > 0.0, PETSC_COMM_WORLD, PETSC_ERR_ARG_OUTOFRANGE, "Observation error std must be positive, got %g", (double)*obs_error_std);
424:   PetscCheck(PetscIsNormalReal(*g), PETSC_COMM_WORLD, PETSC_ERR_ARG_OUTOFRANGE, "Gravitational constant g must be a normal real number");
425:   PetscFunctionReturn(PETSC_SUCCESS);
426: }

428: /*
429:   ComputeRMSE - Compute root mean square error between two vectors
430: */
431: static PetscErrorCode ComputeRMSE(Vec v1, Vec v2, Vec work, PetscInt n, PetscReal *rmse)
432: {
433:   PetscReal norm;

435:   PetscFunctionBeginUser;
436:   PetscCall(VecWAXPY(work, -1.0, v2, v1));
437:   PetscCall(VecNorm(work, NORM_2, &norm));
438:   *rmse = norm / PetscSqrtReal((PetscReal)n);
439:   PetscFunctionReturn(PETSC_SUCCESS);
440: }

442: /* Forward declaration */
443: static PetscErrorCode Ex3TestFinalizePackage(void);

445: /* Test type setters */
446: static PetscErrorCode Ex3SetTest_Dam(Ex3TestType *test_type)
447: {
448:   PetscFunctionBeginUser;
449:   *test_type = EX3_TEST_DAM;
450:   PetscFunctionReturn(PETSC_SUCCESS);
451: }

453: static PetscErrorCode Ex3SetTest_Wave(Ex3TestType *test_type)
454: {
455:   PetscFunctionBeginUser;
456:   *test_type = EX3_TEST_WAVE;
457:   PetscFunctionReturn(PETSC_SUCCESS);
458: }

460: /* Package initialization */
461: static PetscErrorCode Ex3TestInitializePackage(void)
462: {
463:   PetscFunctionBeginUser;
464:   if (Ex3TestPackageInitialized) PetscFunctionReturn(PETSC_SUCCESS);
465:   Ex3TestPackageInitialized = PETSC_TRUE;
466:   PetscCall(PetscFunctionListAdd(&Ex3TestList, "dam", Ex3SetTest_Dam));
467:   PetscCall(PetscFunctionListAdd(&Ex3TestList, "wave", Ex3SetTest_Wave));
468:   PetscCall(PetscRegisterFinalize(Ex3TestFinalizePackage));
469:   PetscFunctionReturn(PETSC_SUCCESS);
470: }

472: static PetscErrorCode Ex3TestFinalizePackage(void)
473: {
474:   PetscFunctionBeginUser;
475:   Ex3TestPackageInitialized = PETSC_FALSE;
476:   PetscCall(PetscFunctionListDestroy(&Ex3TestList));
477:   PetscFunctionReturn(PETSC_SUCCESS);
478: }

480: int main(int argc, char **argv)
481: {
482:   ShallowWaterCtx *sw_ctx = NULL;
483:   DM               da_state, cda;
484:   PetscDA          da;
485:   Vec              x0, x_mean, x_forecast;
486:   Vec              truth_state, rmse_work;
487:   Vec              observation, obs_noise, obs_error_var;
488:   Vec              xyz[3] = {NULL, NULL, NULL};
489:   Vec              coord;
490:   Mat              H = NULL, H1 = NULL; /* Observation operator matrix (h at every other grid point) and scalar version */
491:   PetscRandom      rng;
492:   PetscScalar     *x_coord;
493:   Ex3TestType      test_type      = EX3_TEST_DAM;     /* Default to dam-break */
494:   Ex3FluxType      flux_type      = EX3_FLUX_RUSANOV; /* Default to first-order Rusanov */
495:   PetscBool        output_enabled = PETSC_FALSE;
496:   PetscBool        radius_set;
497:   const char      *da_prefix;
498:   FILE            *fp = NULL;
499:   char             output_file[PETSC_MAX_PATH_LEN];
500:   const PetscInt   ndof   = 2; /* Degrees of freedom per grid point: h and hu */
501:   PetscInt         n_vert = DEFAULT_N, steps = DEFAULT_STEPS, obs_freq = DEFAULT_OBS_FREQ;
502:   PetscInt         random_seed = DEFAULT_RANDOM_SEED, ensemble_size = DEFAULT_ENSEMBLE_SIZE;
503:   PetscInt         n_spin = SPINUP_STEPS, progress_freq = DEFAULT_PROGRESS_FREQ;
504:   PetscInt         n_stat_steps = 0, obs_count = 0, step;
505:   PetscInt         obs_stride = 2, nobs; /* LETKF samples nobs = n_vert/obs_stride observations, one every obs_stride-th grid point */
506:   PetscInt         xs, xm, i;
507:   PetscReal        g = DEFAULT_G, dt = DEFAULT_DT, obs_error_std = DEFAULT_OBS_ERROR_STD;
508:   PetscReal        localization_radius;                  /* Default 2*L: effectively no localization with Gaspari-Cohn (max periodic distance is L/2) */
509:   PetscReal        L             = (PetscReal)DEFAULT_N; /* Domain length */
510:   PetscReal        rmse_forecast = 0.0, rmse_analysis = 0.0;
511:   PetscReal        sum_rmse_forecast = 0.0, sum_rmse_analysis = 0.0;
512:   PetscReal        bd[3] = {0, 0, 0};

514:   PetscFunctionBeginUser;
515:   PetscCall(PetscInitialize(&argc, &argv, NULL, help));
516:   /* Kokkos initialization deferred to Phase 5 optimization */

518:   /* Initialize test type package */
519:   PetscCall(Ex3TestInitializePackage());

521:   /* Parse command-line options */
522:   PetscOptionsBegin(PETSC_COMM_WORLD, NULL, "Shallow Water [L]ETKF Example", NULL);
523:   PetscCall(PetscOptionsInt("-n", "Number of grid points", "", n_vert, &n_vert, NULL));
524:   PetscCall(PetscOptionsInt("-steps", "Number of time steps", "", steps, &steps, NULL));
525:   PetscCall(PetscOptionsInt("-obs_freq", "Observation frequency", "", obs_freq, &obs_freq, NULL));
526:   PetscCall(PetscOptionsInt("-obs_stride", "Observation stride (sample 1 of every N grid points)", "", obs_stride, &obs_stride, NULL));
527:   PetscCall(PetscOptionsReal("-g", "Gravitational constant", "", g, &g, NULL));
528:   PetscCall(PetscOptionsReal("-dt", "Time step size", "", dt, &dt, NULL));
529:   PetscCall(PetscOptionsReal("-obs_error", "Observation error standard deviation", "", obs_error_std, &obs_error_std, NULL));
530:   PetscCall(PetscOptionsReal("-L", "Domain length", "", L, &L, NULL));
531:   PetscCall(PetscOptionsInt("-random_seed", "Random seed for ensemble perturbations", "", random_seed, &random_seed, NULL));
532:   PetscCall(PetscOptionsInt("-progress_freq", "Print progress every N steps (0 = only first/last)", "", progress_freq, &progress_freq, NULL));
533:   PetscCall(PetscOptionsString("-output_file", "Output file for visualization data", "", "", output_file, sizeof(output_file), &output_enabled));
534:   /* Parse test type option */
535:   {
536:     char        testTypeName[256];
537:     const char *defaultType                 = "dam";
538:     PetscBool   set                         = PETSC_FALSE;
539:     PetscErrorCode (*setter)(Ex3TestType *) = NULL;

541:     PetscCall(PetscStrncpy(testTypeName, defaultType, sizeof(testTypeName)));
542:     PetscCall(PetscOptionsFList("-ex3_test", "Test case type", "Ex3SetTest", Ex3TestList, defaultType, testTypeName, sizeof(testTypeName), &set));
543:     if (set) {
544:       PetscCall(PetscFunctionListFind(Ex3TestList, testTypeName, &setter));
545:       PetscCheck(setter, PETSC_COMM_WORLD, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown test type \"%s\"", testTypeName);
546:       PetscCall((*setter)(&test_type));
547:     }
548:   }

550:   /* Parse flux type option */
551:   PetscCall(PetscOptionsEnum("-ex3_flux", "Flux scheme (rusanov/mc)", "", Ex3FluxTypes, (PetscEnum)flux_type, (PetscEnum *)&flux_type, NULL));
552:   PetscOptionsEnd();
553:   localization_radius = 2.0 * L;
554:   n_spin              = 0; /* No spinup needed for either test - dam evolves naturally, wave is already smooth */
555:   PetscCheck(obs_stride > 0, PETSC_COMM_WORLD, PETSC_ERR_ARG_OUTOFRANGE, "obs_stride must be positive (got %" PetscInt_FMT ")", obs_stride);
556:   nobs = n_vert / obs_stride;

558:   /* Validate and constrain parameters */
559:   PetscCall(ValidateParameters(&n_vert, &nobs, &steps, &obs_freq, &ensemble_size, &dt, &g, &obs_error_std));

561:   /* Validate progress frequency */
562:   if (progress_freq < 0) {
563:     PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Warning: Progress frequency adjusted from %" PetscInt_FMT " to 0 (only first/last)\n", progress_freq));
564:     progress_freq = 0;
565:   }

567:   /* Create 1D periodic DM for state space with ndof=2 */
568:   PetscCall(DMDACreate1d(PETSC_COMM_WORLD, DM_BOUNDARY_PERIODIC, n_vert, ndof, 2, NULL, &da_state));
569:   PetscCall(DMSetFromOptions(da_state));
570:   PetscCall(DMSetUp(da_state));

572:   /* Create shallow water context with reusable TS object */
573:   PetscCall(ShallowWaterContextCreate(da_state, n_vert, L, g, dt, test_type, flux_type, &sw_ctx));

575:   /* Initialize random number generator */
576:   PetscCall(PetscRandomCreate(PETSC_COMM_WORLD, &rng));
577:   {
578:     PetscMPIInt rank;
579:     PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank));
580:     PetscCall(PetscRandomSetSeed(rng, (unsigned long)(random_seed + rank)));
581:   }
582:   PetscCall(PetscRandomSetFromOptions(rng));
583:   PetscCall(PetscRandomSeed(rng));

585:   /* Initialize state vectors */
586:   PetscCall(DMCreateGlobalVector(da_state, &x0));

588:   /* Set initial condition based on test type */
589:   {
590:     PetscScalar *x_array;
591:     PetscInt     xs, xm, i;
592:     PetscCall(DMDAGetCorners(da_state, &xs, NULL, NULL, &xm, NULL, NULL));
593:     PetscCall(DMDAVecGetArray(da_state, x0, &x_array));
594:     for (i = xs; i < xs + xm; i++) {
595:       PetscReal x = ((PetscReal)i + 0.5) * L / n_vert;
596:       PetscReal h, hu;
597:       PetscCall(ShallowWaterSolution(test_type, L, x, &h, &hu));
598:       x_array[i * ndof]     = h;
599:       x_array[i * ndof + 1] = hu;
600:     }
601:     PetscCall(DMDAVecRestoreArray(da_state, x0, &x_array));
602:   }

604:   /* Initialize truth trajectory */
605:   PetscCall(VecDuplicate(x0, &truth_state));
606:   PetscCall(VecCopy(x0, truth_state));
607:   PetscCall(VecDuplicate(x0, &rmse_work));

609:   /* Spinup if needed (not used by default - both tests start from their analytical initial conditions) */
610:   if (n_spin > 0) {
611:     PetscInt spinup_progress_interval = (n_spin >= 10) ? (n_spin / 10) : 1;
612:     PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Spinning up truth trajectory for %" PetscInt_FMT " steps...\n", n_spin));

614:     for (PetscInt k = 0; k < n_spin; k++) {
615:       PetscCall(ShallowWaterStepVec(sw_ctx, truth_state));

617:       /* Progress reporting for long spinups */
618:       if ((k + 1) % spinup_progress_interval == 0 || (k + 1) == n_spin) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "  Spinup progress: %" PetscInt_FMT "/%" PetscInt_FMT " (%.0f%%)\n", k + 1, n_spin, 100.0 * (k + 1) / n_spin));
619:     }

621:     /* Update x0 to match spun-up state for consistent ensemble initialization */
622:     PetscCall(VecCopy(truth_state, x0));
623:     PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Spinup complete. Ensemble will be initialized from spun-up state.\n\n"));
624:   }

626:   /* Create observation matrix H, observing h at every obs_stride-th grid point */
627:   PetscCall(CreateObservationMatrix(n_vert, ndof, nobs, obs_stride, x0, &H, &H1));

629:   /* Initialize observation vectors using MatCreateVecs from H (same as H1) */
630:   PetscCall(MatCreateVecs(H, NULL, &observation));
631:   PetscCall(VecDuplicate(observation, &obs_noise));
632:   PetscCall(VecDuplicate(observation, &obs_error_var));
633:   PetscCall(VecSet(obs_error_var, obs_error_std * obs_error_std));

635:   /* Create and configure PetscDA for ensemble data assimilation */
636:   PetscCall(PetscDACreate(PETSC_COMM_WORLD, &da));
637:   PetscCall(PetscDASetSizes(da, n_vert * ndof, nobs));  /* State size includes ndof */
638:   PetscCall(PetscDAEnsembleSetSize(da, ensemble_size)); /* State size includes ndof */
639:   {
640:     PetscInt local_state_size, local_obs_size;
641:     PetscCall(VecGetLocalSize(x0, &local_state_size));
642:     PetscCall(VecGetLocalSize(observation, &local_obs_size));
643:     PetscCall(PetscDASetLocalSizes(da, local_state_size, local_obs_size));
644:   }
645:   PetscCall(PetscDASetNDOF(da, ndof)); /* Set number of degrees of freedom per grid point */
646:   PetscCall(PetscDASetFromOptions(da));
647:   PetscCall(PetscDAEnsembleGetSize(da, &ensemble_size));
648:   PetscCall(PetscDASetUp(da));

650:   /* Initialize ensemble statistics vectors */
651:   PetscCall(VecDuplicate(x0, &x_mean));
652:   PetscCall(VecDuplicate(x0, &x_forecast));

654:   /* Set observation error variance */
655:   PetscCall(PetscDASetObsErrorVariance(da, obs_error_var));

657:   /* Configure localization for LETKF (Q is built lazily on first analysis). PETSCDALETKF is
658:      now the only registered PetscDAType, so we can call the setters unconditionally. */
659:   bd[0] = L;
660:   PetscCall(DMDASetUniformCoordinates(da_state, 0.0, L, 0.0, 0.0, 0.0, 0.0));
661:   PetscCall(DMGetCoordinateDM(da_state, &cda));
662:   PetscCall(DMGetCoordinates(da_state, &coord));
663:   PetscCall(DMDAGetCorners(cda, &xs, NULL, NULL, &xm, NULL, NULL));
664:   PetscCall(DMDAVecGetArray(cda, coord, &x_coord));
665:   for (i = xs; i < xs + xm; i++) x_coord[i] = ((PetscReal)i + 0.5) * L / n_vert;
666:   PetscCall(DMDAVecRestoreArray(cda, coord, &x_coord));

668:   PetscCall(DMCreateGlobalVector(cda, &xyz[0]));
669:   PetscCall(VecSetFromOptions(xyz[0]));
670:   PetscCall(PetscObjectSetName((PetscObject)xyz[0], "x_coordinate"));
671:   PetscCall(VecCopy(coord, xyz[0]));

673:   PetscCall(PetscObjectGetOptionsPrefix((PetscObject)da, &da_prefix));
674:   PetscCall(PetscOptionsHasName(NULL, da_prefix, "-petscda_letkf_localization_radius", &radius_set));
675:   if (!radius_set) PetscCall(PetscDALETKFSetLocalizationRadius(da, localization_radius));
676:   PetscCall(PetscDALETKFGetLocalizationRadius(da, &localization_radius));
677:   PetscCall(PetscDALETKFSetLocalizationCoordinates(da, xyz, bd, H1));
678:   PetscCall(VecDestroy(&xyz[0]));

680:   /* Initialize ensemble members with perturbations around spun-up state
681:      This is critical for convergence - ensemble needs spread even after spinup */
682:   PetscCall(PetscDAEnsembleInitialize(da, x0, obs_error_std, rng));

684:   /* Print configuration summary */
685:   {
686:     const char *test_name = (test_type == EX3_TEST_DAM) ? "Dam-break" : "Traveling wave";
687:     const char *flux_name = (flux_type == EX3_FLUX_RUSANOV) ? "Rusanov (1st order)" : "MC (2nd order)";
688:     PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Shallow Water [L]ETKF Example\n"));
689:     PetscCall(PetscPrintf(PETSC_COMM_WORLD, "============================\n"));
690:     PetscCall(PetscPrintf(PETSC_COMM_WORLD,
691:                           "  Test case             : %s\n"
692:                           "  Flux scheme           : %s\n"
693:                           "  State dimension       : %" PetscInt_FMT " (%" PetscInt_FMT " grid points x %d DOF)\n"
694:                           "  Observation dimension : %" PetscInt_FMT "\n"
695:                           "  Ensemble size         : %" PetscInt_FMT "\n"
696:                           "  Domain length (L)     : %.4f\n"
697:                           "  Gravitational const   : %.4f\n"
698:                           "  Time step (dt)        : %.4f\n"
699:                           "  Total steps           : %" PetscInt_FMT "\n"
700:                           "  Observation frequency : %" PetscInt_FMT "\n"
701:                           "  Observation noise std : %.3f\n"
702:                           "  Random seed           : %" PetscInt_FMT "\n"
703:                           "  Localization radius   : %g\n\n",
704:                           test_name, flux_name, n_vert * ndof, n_vert, (int)ndof, nobs, ensemble_size, (double)L, (double)g, (double)dt, steps, obs_freq, (double)obs_error_std, random_seed, (double)localization_radius));
705:   }

707:   /* Open output file if requested */
708:   if (output_enabled) {
709:     PetscCall(PetscFOpen(PETSC_COMM_WORLD, output_file, "w", &fp));
710:     PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fp, "# Shallow Water [L]ETKF Data Assimilation Output\n"));
711:     PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fp, "# Test case: %s\n", (test_type == EX3_TEST_DAM) ? "Dam-break" : "Traveling wave"));
712:     PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fp, "# n_vert=%d, ndof=%d, nobs=%d, ensemble_size=%d\n", (int)n_vert, (int)ndof, (int)nobs, (int)ensemble_size));
713:     PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fp, "# dt=%.6f, g=%.6f, obs_error_std=%.6f\n", (double)dt, (double)g, (double)obs_error_std));
714:     PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fp, "# Format: step time [truth_h truth_hu]x%d [mean_h mean_hu]x%d [obs]x%d\n", (int)n_vert, (int)n_vert, (int)nobs));
715:     PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Writing output to: %s\n\n", output_file));

717:     /* Write initial condition (step 0) */
718:     const PetscScalar *truth_array, *mean_array;

720:     /* Compute initial ensemble mean */
721:     PetscCall(PetscDAEnsembleComputeMean(da, x_mean));

723:     PetscCall(DMDAVecGetArrayRead(da_state, truth_state, &truth_array));
724:     PetscCall(DMDAVecGetArrayRead(da_state, x_mean, &mean_array));

726:     /* Write step 0 and time 0 */
727:     PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fp, "0 0.000000"));

729:     /* Write truth state (h, hu for each grid point) */
730:     for (PetscInt i = 0; i < n_vert * ndof; i++) PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fp, " %.8e", (double)PetscRealPart(truth_array[i])));

732:     /* Write ensemble mean (h, hu for each grid point) */
733:     for (PetscInt i = 0; i < n_vert * ndof; i++) PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fp, " %.8e", (double)PetscRealPart(mean_array[i])));

735:     /* Write nan for observations (no observations at step 0) */
736:     for (PetscInt i = 0; i < nobs; i++) PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fp, " nan"));

738:     PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fp, "\n"));

740:     PetscCall(DMDAVecRestoreArrayRead(da_state, truth_state, &truth_array));
741:     PetscCall(DMDAVecRestoreArrayRead(da_state, x_mean, &mean_array));
742:   }

744:   /* Print initial condition (step 0) */
745:   {
746:     PetscReal rmse_initial;
747:     PetscCall(PetscDAEnsembleComputeMean(da, x_mean));
748:     PetscCall(ComputeRMSE(x_mean, truth_state, rmse_work, n_vert * ndof, &rmse_initial));
749:     PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Step %4d, time %6.3f  RMSE_forecast %.5f  RMSE_analysis %.5f [initial]\n", 0, 0.0, (double)rmse_initial, (double)rmse_initial));
750:   }

752:   /* Main assimilation cycle: forecast and analysis steps */
753:   for (step = 1; step <= steps; step++) {
754:     PetscReal time = step * dt;

756:     /* Propagate ensemble and truth trajectory from t_{k-1} to t_k */
757:     PetscCall(PetscDAEnsembleForecast(da, ShallowWaterStep, sw_ctx));
758:     PetscCall(ShallowWaterStepVec(sw_ctx, truth_state));

760:     /* Forecast step: compute ensemble mean and forecast RMSE */
761:     PetscCall(PetscDAEnsembleComputeMean(da, x_mean));
762:     PetscCall(VecCopy(x_mean, x_forecast));
763:     PetscCall(ComputeRMSE(x_forecast, truth_state, rmse_work, n_vert * ndof, &rmse_forecast));
764:     rmse_analysis = rmse_forecast;

766:     /* Analysis step: assimilate observations when available */
767:     if (step % obs_freq == 0 && step > 0) {
768:       /* Generate synthetic noisy observations from truth using observation matrix H */
769:       Vec truth_obs, temp_truth;
770:       PetscCall(MatCreateVecs(H, NULL, &truth_obs));
771:       PetscCall(MatCreateVecs(H, &temp_truth, NULL));

773:       /* Apply H to get observations: y = H*x_true
774:          Use temporary vector compatible with H's type to avoid Kokkos vector type issues */
775:       PetscCall(VecCopy(truth_state, temp_truth));
776:       PetscCall(MatMult(H, temp_truth, truth_obs));

778:       /* Add observation noise */
779:       PetscCall(VecSetRandomGaussian(obs_noise, rng, 0.0, obs_error_std));
780:       PetscCall(VecWAXPY(observation, 1.0, obs_noise, truth_obs));

782:       /* Perform LETKF analysis with observation matrix H */
783:       PetscCall(PetscDAEnsembleAnalysis(da, observation, H));

785:       /* Clean up */
786:       PetscCall(VecDestroy(&temp_truth));
787:       PetscCall(VecDestroy(&truth_obs));

789:       /* Compute analysis RMSE */
790:       PetscCall(PetscDAEnsembleComputeMean(da, x_mean));
791:       PetscCall(ComputeRMSE(x_mean, truth_state, rmse_work, n_vert * ndof, &rmse_analysis));
792:       obs_count++;
793:     }

795:     /* Accumulate statistics */
796:     sum_rmse_forecast += rmse_forecast;
797:     sum_rmse_analysis += rmse_analysis;
798:     n_stat_steps++;

800:     /* Write data to output file if enabled */
801:     if (output_enabled && fp) {
802:       const PetscScalar *truth_array, *mean_array, *obs_array;

804:       PetscCall(DMDAVecGetArrayRead(da_state, truth_state, &truth_array));
805:       PetscCall(DMDAVecGetArrayRead(da_state, x_mean, &mean_array));

807:       /* Write step and time */
808:       PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fp, "%d %.6f", (int)step, (double)time));

810:       /* Write truth state (h, hu for each grid point) */
811:       for (PetscInt i = 0; i < n_vert * ndof; i++) PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fp, " %.8e", (double)PetscRealPart(truth_array[i])));

813:       /* Write ensemble mean (h, hu for each grid point) */
814:       for (PetscInt i = 0; i < n_vert * ndof; i++) PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fp, " %.8e", (double)PetscRealPart(mean_array[i])));

816:       /* Write observations (or nan if no observation at this step) */
817:       if (step % obs_freq == 0 && step > 0) {
818:         PetscCall(VecGetArrayRead(observation, &obs_array));
819:         for (PetscInt i = 0; i < nobs; i++) PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fp, " %.8e", (double)PetscRealPart(obs_array[i])));
820:         PetscCall(VecRestoreArrayRead(observation, &obs_array));
821:       } else {
822:         for (PetscInt i = 0; i < nobs; i++) PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fp, " nan"));
823:       }

825:       PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fp, "\n"));

827:       PetscCall(DMDAVecRestoreArrayRead(da_state, truth_state, &truth_array));
828:       PetscCall(DMDAVecRestoreArrayRead(da_state, x_mean, &mean_array));
829:     }

831:     /* Progress reporting */
832:     if (progress_freq == 0) {
833:       /* Only print first and last steps */
834:       if (step == 0 || step == steps) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Step %4" PetscInt_FMT ", time %6.3f  RMSE_forecast %.5f  RMSE_analysis %.5f\n", step, (double)time, (double)rmse_forecast, (double)rmse_analysis));
835:     } else {
836:       /* Print every progress_freq steps, plus first and last */
837:       if ((step % progress_freq == 0) || (step == steps)) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Step %4" PetscInt_FMT ", time %6.3f  RMSE_forecast %.5f  RMSE_analysis %.5f\n", step, (double)time, (double)rmse_forecast, (double)rmse_analysis));
838:     }
839:   }

841:   /* Report final statistics */
842:   if (n_stat_steps > 0) {
843:     PetscReal avg_rmse_forecast = sum_rmse_forecast / n_stat_steps;
844:     PetscReal avg_rmse_analysis = sum_rmse_analysis / n_stat_steps;
845:     PetscCall(PetscPrintf(PETSC_COMM_WORLD, "\nStatistics (%" PetscInt_FMT " steps):\n", n_stat_steps));
846:     PetscCall(PetscPrintf(PETSC_COMM_WORLD, "==================================================\n"));
847:     PetscCall(PetscPrintf(PETSC_COMM_WORLD, "  Mean RMSE (forecast) : %.5f\n", (double)avg_rmse_forecast));
848:     PetscCall(PetscPrintf(PETSC_COMM_WORLD, "  Mean RMSE (analysis) : %.5f\n", (double)avg_rmse_analysis));
849:     PetscCall(PetscPrintf(PETSC_COMM_WORLD, "  Observations used    : %" PetscInt_FMT "\n\n", obs_count));
850:   } else {
851:     PetscCall(PetscPrintf(PETSC_COMM_WORLD, "\nWarning: No statistics collected\n\n"));
852:   }

854:   /* Close output file if opened */
855:   if (output_enabled && fp) {
856:     PetscCall(PetscFClose(PETSC_COMM_WORLD, fp));
857:     PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Output written to: %s\n", output_file));
858:   }

860:   PetscCall(PetscDAView(da, PETSC_VIEWER_STDOUT_WORLD));

862:   /* Cleanup */
863:   PetscCall(MatDestroy(&H));
864:   PetscCall(MatDestroy(&H1));
865:   PetscCall(VecDestroy(&x_forecast));
866:   PetscCall(VecDestroy(&x_mean));
867:   PetscCall(VecDestroy(&obs_error_var));
868:   PetscCall(VecDestroy(&obs_noise));
869:   PetscCall(VecDestroy(&observation));
870:   PetscCall(VecDestroy(&rmse_work));
871:   PetscCall(VecDestroy(&truth_state));
872:   PetscCall(VecDestroy(&x0));
873:   PetscCall(PetscDADestroy(&da));
874:   PetscCall(DMDestroy(&da_state));
875:   PetscCall(ShallowWaterContextDestroy(&sw_ctx));
876:   PetscCall(PetscRandomDestroy(&rng));

878:   PetscCall(PetscFinalize());
879:   return 0;
880: }

882: /*TEST

884:   testset:
885:     requires: kokkos_kernels !complex
886:     diff_args: -j
887:     args: -ex3_test dam -steps 10 -progress_freq 1 -petscda_ensemble_size 10 -obs_freq 2 -obs_error 0.03

889:     test:
890:       suffix: letkf_dam
891:       args: -petscda_type letkf -petscda_ensemble_size 7

893:     test:
894:       suffix: letkf_dam_loc_none
895:       args: -petscda_type letkf -petscda_letkf_localization_type none

897:     test:
898:       nsize: 3
899:       suffix: kokkos_dam
900:       args: -petscda_type letkf -mat_type aijkokkos -vec_type kokkos -petscda_letkf_batch_size 13 -info :vec -petscda_ensemble_size 5 -petscda_letkf_localization_radius 10.0

902:   testset:
903:     requires: kokkos_kernels !complex
904:     diff_args: -j
905:     args: -ex3_test wave -steps 10 -petscda_ensemble_size 10 -petscda_type letkf -obs_freq 2 -obs_error 0.03

907:     test:
908:       suffix: letkf_wave
909:       args: -petscda_type letkf -petscda_ensemble_size 5

911:     test:
912:       nsize: 3
913:       suffix: kokkos_wave
914:       args: -petscda_type letkf -mat_type aijkokkos -vec_type kokkos -petscda_letkf_batch_size 13 -info :vec -petscda_ensemble_size 5 -petscda_letkf_localization_radius 10.0

916:     test:
917:       suffix: letkf_wave_mc
918:       args: -ex3_flux mc -petscda_type letkf -petscda_letkf_localization_type none

920: TEST*/