Actual source code: ex4.c

  1: static char help[] = "2D Shallow water equations with traveling wave verification and LETKF data assimilation.\n"
  2:                      "Implements 2D shallow water equations with 3 DOF per grid point (h, hu, hv).\n\n"
  3:                      "Usage modes:\n"
  4:                      "  1. LETKF data assimilation:\n"
  5:                      "     ./ex4 -steps 100 -nx 41 -ny 41 -petscda_type letkf -ensemble_size 30\n"
  6:                      "  2. Verification mode (compare to analytic solution):\n"
  7:                      "     ./ex4 -verification_mode -steps 500 -nx 80 -ny 80 -verification_freq 50\n"
  8:                      "  3. Combined mode:\n"
  9:                      "     ./ex4 -steps 100 -ensemble_size 20 -petscda_type letkf -enable_verification\n\n";

 11: #include <petscda.h>
 12: #include <petscdmda.h>
 13: #include <petscts.h>

 15: /* Default parameter values */
 16: #define DEFAULT_NX            40
 17: #define DEFAULT_NY            40
 18: #define DEFAULT_STEPS         100
 19: #define DEFAULT_OBS_FREQ      5
 20: #define DEFAULT_RANDOM_SEED   12345
 21: #define DEFAULT_G             9.81
 22: #define DEFAULT_DT            0.02
 23: #define DEFAULT_LX            80.0
 24: #define DEFAULT_LY            80.0
 25: #define DEFAULT_H0            1.5
 26: #define DEFAULT_AX            0.2
 27: #define DEFAULT_AY            0.2
 28: #define DEFAULT_OBS_ERROR_STD 0.01
 29: #define DEFAULT_ENSEMBLE_SIZE 30
 30: #define DEFAULT_PROGRESS_FREQ 10
 31: #define DEFAULT_OBS_STRIDE    2
 32: #define SPINUP_STEPS          0

 34: /* Minimum valid parameter values */
 35: #define MIN_ENSEMBLE_SIZE 2
 36: #define MIN_OBS_FREQ      1

 38: /* Flux scheme types */
 39: typedef enum {
 40:   EX4_FLUX_RUSANOV,
 41:   EX4_FLUX_MC
 42: } Ex4FluxType;

 44: static const char *const Ex4FluxTypes[] = {"rusanov", "mc", "Ex4FluxType", "EX4_FLUX_", NULL};

 46: typedef struct {
 47:   DM          da;        /* 2D periodic DMDA for state */
 48:   PetscInt    nx, ny;    /* Grid dimensions */
 49:   PetscReal   Lx, Ly;    /* Domain size */
 50:   PetscReal   dx, dy;    /* Grid spacing */
 51:   PetscReal   g;         /* Gravity */
 52:   PetscReal   dt;        /* Time step */
 53:   TS          ts;        /* Time stepper */
 54:   PetscReal   h0;        /* Mean height */
 55:   PetscReal   Ax, Ay;    /* Wave amplitudes */
 56:   Ex4FluxType flux_type; /* Flux scheme */
 57: } ShallowWater2DCtx;

 59: /*
 60:   ComputeFluxX - Compute physical flux in x-direction for shallow water
 61: */
 62: static void ComputeFluxX(PetscReal g, PetscReal h, PetscReal hu, PetscReal hv, PetscReal *F_h, PetscReal *F_hu, PetscReal *F_hv, PetscReal *u, PetscReal *c)
 63: {
 64:   if (h > 1e-10) {
 65:     *u    = hu / h;
 66:     *c    = PetscSqrtReal(g * h);
 67:     *F_h  = hu;
 68:     *F_hu = hu * *u + 0.5 * g * h * h;
 69:     *F_hv = hu * (hv / h);
 70:   } else {
 71:     *u    = 0.0;
 72:     *c    = 0.0;
 73:     *F_h  = 0.0;
 74:     *F_hu = 0.0;
 75:     *F_hv = 0.0;
 76:   }
 77: }

 79: /*
 80:   ComputeFluxY - Compute physical flux in y-direction for shallow water
 81: */
 82: static void ComputeFluxY(PetscReal g, PetscReal h, PetscReal hu, PetscReal hv, PetscReal *G_h, PetscReal *G_hu, PetscReal *G_hv, PetscReal *v, PetscReal *c)
 83: {
 84:   if (h > 1e-10) {
 85:     *v    = hv / h;
 86:     *c    = PetscSqrtReal(g * h);
 87:     *G_h  = hv;
 88:     *G_hu = hv * (hu / h);
 89:     *G_hv = hv * *v + 0.5 * g * h * h;
 90:   } else {
 91:     *v    = 0.0;
 92:     *c    = 0.0;
 93:     *G_h  = 0.0;
 94:     *G_hu = 0.0;
 95:     *G_hv = 0.0;
 96:   }
 97: }

 99: /*
100:   ShallowWaterRHS2D - Compute the right-hand side of the 2D shallow water equations
101: */
102: static PetscErrorCode ShallowWaterRHS2D(TS ts, PetscReal t, Vec X, Vec F_vec, PetscCtx ctx)
103: {
104:   ShallowWater2DCtx   *sw = (ShallowWater2DCtx *)ctx;
105:   Vec                  X_local;
106:   const PetscScalar ***x;
107:   PetscScalar       ***f;
108:   PetscInt             xs, ys, xm, ym, i, j;

110:   PetscFunctionBeginUser;
111:   (void)ts;
112:   (void)t;

114:   PetscCall(DMDAGetCorners(sw->da, &xs, &ys, NULL, &xm, &ym, NULL));
115:   PetscCall(DMGetLocalVector(sw->da, &X_local));
116:   PetscCall(DMGlobalToLocalBegin(sw->da, X, INSERT_VALUES, X_local));
117:   PetscCall(DMGlobalToLocalEnd(sw->da, X, INSERT_VALUES, X_local));
118:   PetscCall(DMDAVecGetArrayDOFRead(sw->da, X_local, (void *)&x));
119:   PetscCall(DMDAVecGetArrayDOF(sw->da, F_vec, &f));

121:   if (sw->flux_type == EX4_FLUX_RUSANOV) {
122:     /* First-order Rusanov (Local Lax-Friedrichs) scheme */
123:     for (j = ys; j < ys + ym; j++) {
124:       for (i = xs; i < xs + xm; i++) {
125:         PetscReal h      = PetscRealPart(x[j][i][0]);
126:         PetscReal hu     = PetscRealPart(x[j][i][1]);
127:         PetscReal hv     = PetscRealPart(x[j][i][2]);
128:         PetscReal h_im1  = PetscRealPart(x[j][i - 1][0]);
129:         PetscReal hu_im1 = PetscRealPart(x[j][i - 1][1]);
130:         PetscReal hv_im1 = PetscRealPart(x[j][i - 1][2]);
131:         PetscReal h_ip1  = PetscRealPart(x[j][i + 1][0]);
132:         PetscReal hu_ip1 = PetscRealPart(x[j][i + 1][1]);
133:         PetscReal hv_ip1 = PetscRealPart(x[j][i + 1][2]);
134:         PetscReal h_jm1  = PetscRealPart(x[j - 1][i][0]);
135:         PetscReal hu_jm1 = PetscRealPart(x[j - 1][i][1]);
136:         PetscReal hv_jm1 = PetscRealPart(x[j - 1][i][2]);
137:         PetscReal h_jp1  = PetscRealPart(x[j + 1][i][0]);
138:         PetscReal hu_jp1 = PetscRealPart(x[j + 1][i][1]);
139:         PetscReal hv_jp1 = PetscRealPart(x[j + 1][i][2]);

141:         /* X-direction fluxes */
142:         PetscReal F_h_i, F_hu_i, F_hv_i, u, c;
143:         PetscReal F_h_im1, F_hu_im1, F_hv_im1, u_im1, c_im1;
144:         PetscReal F_h_ip1, F_hu_ip1, F_hv_ip1, u_ip1, c_ip1;

146:         ComputeFluxX(sw->g, h, hu, hv, &F_h_i, &F_hu_i, &F_hv_i, &u, &c);
147:         ComputeFluxX(sw->g, h_im1, hu_im1, hv_im1, &F_h_im1, &F_hu_im1, &F_hv_im1, &u_im1, &c_im1);
148:         ComputeFluxX(sw->g, h_ip1, hu_ip1, hv_ip1, &F_h_ip1, &F_hu_ip1, &F_hv_ip1, &u_ip1, &c_ip1);

150:         PetscReal alpha_left  = PetscMax(PetscAbsReal(u_im1) + c_im1, PetscAbsReal(u) + c);
151:         PetscReal alpha_right = PetscMax(PetscAbsReal(u) + c, PetscAbsReal(u_ip1) + c_ip1);

153:         PetscReal flux_h_left  = 0.5 * (F_h_im1 + F_h_i - alpha_left * (h - h_im1));
154:         PetscReal flux_hu_left = 0.5 * (F_hu_im1 + F_hu_i - alpha_left * (hu - hu_im1));
155:         PetscReal flux_hv_left = 0.5 * (F_hv_im1 + F_hv_i - alpha_left * (hv - hv_im1));

157:         PetscReal flux_h_right  = 0.5 * (F_h_i + F_h_ip1 - alpha_right * (h_ip1 - h));
158:         PetscReal flux_hu_right = 0.5 * (F_hu_i + F_hu_ip1 - alpha_right * (hu_ip1 - hu));
159:         PetscReal flux_hv_right = 0.5 * (F_hv_i + F_hv_ip1 - alpha_right * (hv_ip1 - hv));

161:         /* Y-direction fluxes */
162:         PetscReal G_h_j, G_hu_j, G_hv_j, v, c_y;
163:         PetscReal G_h_jm1, G_hu_jm1, G_hv_jm1, v_jm1, c_jm1;
164:         PetscReal G_h_jp1, G_hu_jp1, G_hv_jp1, v_jp1, c_jp1;

166:         ComputeFluxY(sw->g, h, hu, hv, &G_h_j, &G_hu_j, &G_hv_j, &v, &c_y);
167:         ComputeFluxY(sw->g, h_jm1, hu_jm1, hv_jm1, &G_h_jm1, &G_hu_jm1, &G_hv_jm1, &v_jm1, &c_jm1);
168:         ComputeFluxY(sw->g, h_jp1, hu_jp1, hv_jp1, &G_h_jp1, &G_hu_jp1, &G_hv_jp1, &v_jp1, &c_jp1);

170:         PetscReal beta_bottom = PetscMax(PetscAbsReal(v_jm1) + c_jm1, PetscAbsReal(v) + c_y);
171:         PetscReal beta_top    = PetscMax(PetscAbsReal(v) + c_y, PetscAbsReal(v_jp1) + c_jp1);

173:         PetscReal flux_h_bottom  = 0.5 * (G_h_jm1 + G_h_j - beta_bottom * (h - h_jm1));
174:         PetscReal flux_hu_bottom = 0.5 * (G_hu_jm1 + G_hu_j - beta_bottom * (hu - hu_jm1));
175:         PetscReal flux_hv_bottom = 0.5 * (G_hv_jm1 + G_hv_j - beta_bottom * (hv - hv_jm1));

177:         PetscReal flux_h_top  = 0.5 * (G_h_j + G_h_jp1 - beta_top * (h_jp1 - h));
178:         PetscReal flux_hu_top = 0.5 * (G_hu_j + G_hu_jp1 - beta_top * (hu_jp1 - hu));
179:         PetscReal flux_hv_top = 0.5 * (G_hv_j + G_hv_jp1 - beta_top * (hv_jp1 - hv));

181:         /* Update RHS using finite volume method */
182:         f[j][i][0] = -(flux_h_right - flux_h_left) / sw->dx - (flux_h_top - flux_h_bottom) / sw->dy;
183:         f[j][i][1] = -(flux_hu_right - flux_hu_left) / sw->dx - (flux_hu_top - flux_hu_bottom) / sw->dy;
184:         f[j][i][2] = -(flux_hv_right - flux_hv_left) / sw->dx - (flux_hv_top - flux_hv_bottom) / sw->dy;
185:       }
186:     }
187:   } else {
188:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MC limiter not yet implemented for 2D");
189:   }

191:   PetscCall(DMDAVecRestoreArrayDOFRead(sw->da, X_local, (void *)&x));
192:   PetscCall(DMDAVecRestoreArrayDOF(sw->da, F_vec, &f));
193:   PetscCall(DMRestoreLocalVector(sw->da, &X_local));
194:   PetscFunctionReturn(PETSC_SUCCESS);
195: }

197: /*
198:   ShallowWater2DContextCreate - Create and initialize a 2D shallow water context
199: */
200: static PetscErrorCode ShallowWater2DContextCreate(DM da, PetscInt nx, PetscInt ny, PetscReal Lx, PetscReal Ly, PetscReal g, PetscReal dt, PetscReal h0, PetscReal Ax, PetscReal Ay, Ex4FluxType flux_type, ShallowWater2DCtx **ctx)
201: {
202:   ShallowWater2DCtx *sw;

204:   PetscFunctionBeginUser;
205:   PetscCall(PetscNew(&sw));
206:   sw->da        = da;
207:   sw->nx        = nx;
208:   sw->ny        = ny;
209:   sw->Lx        = Lx;
210:   sw->Ly        = Ly;
211:   sw->g         = g;
212:   sw->dx        = Lx / nx;
213:   sw->dy        = Ly / ny;
214:   sw->dt        = dt;
215:   sw->h0        = h0;
216:   sw->Ax        = Ax;
217:   sw->Ay        = Ay;
218:   sw->flux_type = flux_type;

220:   PetscCall(TSCreate(PetscObjectComm((PetscObject)da), &sw->ts));
221:   PetscCall(TSSetProblemType(sw->ts, TS_NONLINEAR));
222:   PetscCall(TSSetRHSFunction(sw->ts, NULL, ShallowWaterRHS2D, sw));
223:   PetscCall(TSSetType(sw->ts, TSRK));
224:   PetscCall(TSRKSetType(sw->ts, TSRK4));
225:   PetscCall(TSSetTimeStep(sw->ts, dt));
226:   PetscCall(TSSetMaxSteps(sw->ts, 1));
227:   PetscCall(TSSetMaxTime(sw->ts, dt));
228:   PetscCall(TSSetExactFinalTime(sw->ts, TS_EXACTFINALTIME_MATCHSTEP));
229:   PetscCall(TSSetFromOptions(sw->ts));

231:   *ctx = sw;
232:   PetscFunctionReturn(PETSC_SUCCESS);
233: }

235: /*
236:   ShallowWater2DContextDestroy - Destroy a 2D shallow water context
237: */
238: static PetscErrorCode ShallowWater2DContextDestroy(ShallowWater2DCtx **ctx)
239: {
240:   PetscFunctionBeginUser;
241:   if (!ctx || !*ctx) PetscFunctionReturn(PETSC_SUCCESS);
242:   PetscCall(TSDestroy(&(*ctx)->ts));
243:   PetscCall(PetscFree(*ctx));
244:   PetscFunctionReturn(PETSC_SUCCESS);
245: }

247: /*
248:   ShallowWaterStep2D - Advance state vector one time step
249: */
250: static PetscErrorCode ShallowWaterStep2D(Vec input, Vec output, PetscCtx ctx)
251: {
252:   ShallowWater2DCtx *sw = (ShallowWater2DCtx *)ctx;

254:   PetscFunctionBeginUser;
255:   if (input != output) PetscCall(VecCopy(input, output));

257:   PetscCall(TSSetTime(sw->ts, 0.0));
258:   PetscCall(TSSetStepNumber(sw->ts, 0));
259:   PetscCall(TSSetMaxTime(sw->ts, sw->dt));
260:   PetscCall(TSSolve(sw->ts, output));
261:   PetscFunctionReturn(PETSC_SUCCESS);
262: }

264: /*
265:   ShallowWaterSolution_Wave2D - Analytic 2D traveling wave solution
266: */
267: static PetscErrorCode ShallowWaterSolution_Wave2D(PetscReal Lx, PetscReal Ly, PetscReal x, PetscReal y, PetscReal t, PetscReal g, PetscReal h0, PetscReal Ax, PetscReal Ay, PetscReal *h, PetscReal *hu, PetscReal *hv)
268: {
269:   PetscReal kx, ky, omega_x, omega_y, c;

271:   PetscFunctionBeginUser;
272:   /* Wave parameters */
273:   c       = PetscSqrtReal(g * h0);
274:   kx      = 2.0 * PETSC_PI / Lx;
275:   ky      = 2.0 * PETSC_PI / Ly;
276:   omega_x = c * kx;
277:   omega_y = c * ky;

279:   /* Height field: superposition of waves in x and y */
280:   PetscReal h_pert_x = Ax * PetscSinReal(kx * x - omega_x * t);
281:   PetscReal h_pert_y = Ay * PetscSinReal(ky * y - omega_y * t);
282:   *h                 = h0 + h_pert_x + h_pert_y;

284:   /* Velocity fields (linearized) */
285:   PetscReal u = (c / h0) * Ax * PetscCosReal(kx * x - omega_x * t);
286:   PetscReal v = (c / h0) * Ay * PetscCosReal(ky * y - omega_y * t);

288:   *hu = (*h) * u;
289:   *hv = (*h) * v;
290:   PetscFunctionReturn(PETSC_SUCCESS);
291: }

293: /*
294:   ComputeAnalyticError - Compute error between numerical solution and analytic traveling wave

296:   Computes L1, L2, and Linf norms of the error in the height field
297: */
298: static PetscErrorCode ComputeAnalyticError(Vec numerical, DM da, PetscReal time, PetscReal Lx, PetscReal Ly, PetscReal g, PetscReal h0, PetscReal Ax, PetscReal Ay, PetscReal *L1_error, PetscReal *L2_error, PetscReal *Linf_error)
299: {
300:   const PetscScalar ***x_num;
301:   PetscInt             xs, ys, xm, ym, i, j;
302:   PetscReal            dx, dy, dA;
303:   PetscReal            L1_local = 0.0, L2_local = 0.0, Linf_local = 0.0;
304:   PetscInt             nx, ny;

306:   PetscFunctionBeginUser;
307:   PetscCall(DMDAGetInfo(da, NULL, &nx, &ny, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL));
308:   dx = Lx / nx;
309:   dy = Ly / ny;
310:   dA = dx * dy;

312:   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &xm, &ym, NULL));
313:   PetscCall(DMDAVecGetArrayDOFRead(da, numerical, (void *)&x_num));

315:   for (j = ys; j < ys + ym; j++) {
316:     for (i = xs; i < xs + xm; i++) {
317:       PetscReal x = ((PetscReal)i + 0.5) * dx;
318:       PetscReal y = ((PetscReal)j + 0.5) * dy;
319:       PetscReal h_exact, hu_exact, hv_exact;
320:       PetscReal h_num = PetscRealPart(x_num[j][i][0]);

322:       /* Compute analytic solution at this point */
323:       PetscCall(ShallowWaterSolution_Wave2D(Lx, Ly, x, y, time, g, h0, Ax, Ay, &h_exact, &hu_exact, &hv_exact));

325:       /* Compute pointwise error */
326:       PetscReal error = PetscAbsReal(h_num - h_exact);

328:       /* Accumulate norms */
329:       L1_local += error * dA;
330:       L2_local += error * error * dA;
331:       Linf_local = PetscMax(Linf_local, error);
332:     }
333:   }

335:   PetscCall(DMDAVecRestoreArrayDOFRead(da, numerical, (void *)&x_num));

337:   /* Global reduction for L1 and L2 norms */
338:   PetscCallMPI(MPIU_Allreduce(&L1_local, L1_error, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)da)));
339:   PetscCallMPI(MPIU_Allreduce(&L2_local, L2_error, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)da)));
340:   *L2_error = PetscSqrtReal(*L2_error);

342:   /* Global reduction for Linf norm */
343:   PetscCallMPI(MPIU_Allreduce(&Linf_local, Linf_error, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)da)));
344:   PetscFunctionReturn(PETSC_SUCCESS);
345: }

347: /*
348:   CreateObservationMatrix2D - Create observation matrix H for 2D shallow water

350:   Observes water height (h) at every obs_stride-th grid point in both x and y directions.
351: */
352: static PetscErrorCode CreateObservationMatrix2D(PetscInt nx, PetscInt ny, PetscInt ndof, PetscInt obs_stride, Vec state, Mat *H, Mat *H1, PetscInt *nobs_out)
353: {
354:   PetscInt i, j, obs_idx, local_state_size;
355:   PetscInt nobs_x, nobs_y, nobs;
356:   PetscInt rstart, rend;

358:   PetscFunctionBeginUser;
359:   /* Calculate number of observations */
360:   nobs_x = (nx + obs_stride - 1) / obs_stride;
361:   nobs_y = (ny + obs_stride - 1) / obs_stride;
362:   nobs   = nobs_x * nobs_y;

364:   PetscCall(VecGetLocalSize(state, &local_state_size));

366:   /* Create observation matrix H (nobs x nx*ny*ndof) */
367:   PetscCall(MatCreateAIJ(PETSC_COMM_WORLD, PETSC_DECIDE, local_state_size, nobs, nx * ny * ndof, 1, NULL, 1, NULL, H));
368:   PetscCall(MatSetFromOptions(*H));

370:   /* Create H1 for scalar field (nobs x nx*ny) */
371:   PetscCall(MatCreateAIJ(PETSC_COMM_WORLD, PETSC_DECIDE, local_state_size / ndof, nobs, nx * ny, 1, NULL, 1, NULL, H1));
372:   PetscCall(MatSetFromOptions(*H1));

374:   /* Get row ownership range for local process */
375:   PetscCall(MatGetOwnershipRange(*H, &rstart, &rend));

377:   /* Observe water height (h) at sparse grid locations - only set local rows */
378:   obs_idx = 0;
379:   for (j = 0; j < ny; j += obs_stride) {
380:     for (i = 0; i < nx; i += obs_stride) {
381:       if (obs_idx >= rstart && obs_idx < rend) {
382:         PetscInt grid_idx = j * nx + i;
383:         /* H1: select grid point */
384:         PetscCall(MatSetValue(*H1, obs_idx, grid_idx, 1.0, INSERT_VALUES));
385:         /* H: select h component (first DOF) at that grid point */
386:         PetscCall(MatSetValue(*H, obs_idx, grid_idx * ndof, 1.0, INSERT_VALUES));
387:       }
388:       obs_idx++;
389:     }
390:   }

392:   PetscCall(MatAssemblyBegin(*H, MAT_FINAL_ASSEMBLY));
393:   PetscCall(MatAssemblyEnd(*H, MAT_FINAL_ASSEMBLY));
394:   PetscCall(MatAssemblyBegin(*H1, MAT_FINAL_ASSEMBLY));
395:   PetscCall(MatAssemblyEnd(*H1, MAT_FINAL_ASSEMBLY));

397:   PetscCall(MatViewFromOptions(*H1, NULL, "-H_view"));
398:   *nobs_out = nobs;
399:   PetscFunctionReturn(PETSC_SUCCESS);
400: }

402: /*
403:   CreateLocalizationMatrix2D - Create localization matrix Q for 2D shallow water
404: */
405: static PetscErrorCode CreateLocalizationMatrix2D(PetscInt nx, PetscInt ny, PetscInt nobs, Mat *Q)
406: {
407:   PetscInt i, j;

409:   PetscFunctionBeginUser;
410:   /* Create Q matrix (nx*ny x nobs) - global/no localization for simplicity */
411:   PetscCall(MatCreateAIJ(PETSC_COMM_WORLD, PETSC_DECIDE, PETSC_DECIDE, nx * ny, nobs, nobs, NULL, 0, NULL, Q));
412:   PetscCall(MatSetFromOptions(*Q));

414:   /* Initialize with no localization: each state variable uses all observations */
415:   for (i = 0; i < nx * ny; i++) {
416:     for (j = 0; j < nobs; j++) PetscCall(MatSetValue(*Q, i, j, 1.0, INSERT_VALUES));
417:   }
418:   PetscCall(MatAssemblyBegin(*Q, MAT_FINAL_ASSEMBLY));
419:   PetscCall(MatAssemblyEnd(*Q, MAT_FINAL_ASSEMBLY));
420:   PetscFunctionReturn(PETSC_SUCCESS);
421: }

423: /*
424:   ComputeRMSE - Compute root mean square error between two vectors
425: */
426: static PetscErrorCode ComputeRMSE(Vec v1, Vec v2, Vec work, PetscInt n, PetscReal *rmse)
427: {
428:   PetscReal norm;

430:   PetscFunctionBeginUser;
431:   PetscCall(VecWAXPY(work, -1.0, v2, v1));
432:   PetscCall(VecNorm(work, NORM_2, &norm));
433:   *rmse = norm / PetscSqrtReal((PetscReal)n);
434:   PetscFunctionReturn(PETSC_SUCCESS);
435: }

437: /*
438:   ValidateParameters - Validate input parameters
439: */
440: static PetscErrorCode ValidateParameters(PetscInt *nx, PetscInt *ny, PetscInt *nobs, PetscInt *steps, PetscInt *obs_freq, PetscInt *ensemble_size, PetscReal *dt, PetscReal *g, PetscReal *obs_error_std)
441: {
442:   PetscFunctionBeginUser;
443:   PetscCheck(*nx > 0 && *ny > 0, PETSC_COMM_WORLD, PETSC_ERR_ARG_OUTOFRANGE, "Grid dimensions must be positive");
444:   PetscCheck(*steps >= 0, PETSC_COMM_WORLD, PETSC_ERR_ARG_OUTOFRANGE, "Number of steps must be non-negative");
445:   PetscCheck(*ensemble_size >= MIN_ENSEMBLE_SIZE, PETSC_COMM_WORLD, PETSC_ERR_ARG_OUTOFRANGE, "Ensemble size must be at least %d", MIN_ENSEMBLE_SIZE);

447:   if (*obs_freq < MIN_OBS_FREQ) {
448:     PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Warning: Observation frequency adjusted from %" PetscInt_FMT " to %d\n", *obs_freq, MIN_OBS_FREQ));
449:     *obs_freq = MIN_OBS_FREQ;
450:   }
451:   if (*obs_freq > *steps && *steps > 0) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Warning: Observation frequency > total steps, no observations will be assimilated.\n"));

453:   PetscCheck(*dt > 0.0, PETSC_COMM_WORLD, PETSC_ERR_ARG_OUTOFRANGE, "Time step must be positive");
454:   PetscCheck(*obs_error_std > 0.0, PETSC_COMM_WORLD, PETSC_ERR_ARG_OUTOFRANGE, "Observation error std must be positive");
455:   PetscCheck(PetscIsNormalReal(*g), PETSC_COMM_WORLD, PETSC_ERR_ARG_OUTOFRANGE, "Gravitational constant must be a normal real number");
456:   PetscFunctionReturn(PETSC_SUCCESS);
457: }

459: int main(int argc, char **argv)
460: {
461:   /* Configuration parameters */
462:   const PetscInt ndof                    = 3; /* h, hu, hv */
463:   PetscInt       nx                      = DEFAULT_NX;
464:   PetscInt       ny                      = DEFAULT_NY;
465:   PetscInt       steps                   = DEFAULT_STEPS;
466:   PetscInt       obs_freq                = DEFAULT_OBS_FREQ;
467:   PetscInt       random_seed             = DEFAULT_RANDOM_SEED;
468:   PetscInt       ensemble_size           = DEFAULT_ENSEMBLE_SIZE;
469:   PetscInt       n_spin                  = SPINUP_STEPS;
470:   PetscInt       progress_freq           = DEFAULT_PROGRESS_FREQ;
471:   PetscInt       obs_stride              = DEFAULT_OBS_STRIDE;
472:   PetscReal      g                       = DEFAULT_G;
473:   PetscReal      dt                      = DEFAULT_DT;
474:   PetscReal      Lx                      = DEFAULT_LX;
475:   PetscReal      Ly                      = DEFAULT_LY;
476:   PetscReal      h0                      = DEFAULT_H0;
477:   PetscReal      Ax                      = DEFAULT_AX;
478:   PetscReal      Ay                      = DEFAULT_AY;
479:   PetscReal      obs_error_std           = DEFAULT_OBS_ERROR_STD;
480:   PetscBool      use_fake_localization   = PETSC_FALSE;
481:   PetscInt       num_observations_vertex = 7;
482:   Ex4FluxType    flux_type               = EX4_FLUX_RUSANOV;
483:   char           output_file[PETSC_MAX_PATH_LEN];
484:   PetscBool      output_enabled      = PETSC_FALSE;
485:   PetscBool      verification_mode   = PETSC_FALSE;
486:   PetscBool      enable_verification = PETSC_FALSE;
487:   PetscInt       verification_freq   = 10;
488:   FILE          *fp                  = NULL;

490:   /* PETSc objects */
491:   ShallowWater2DCtx *sw_ctx = NULL;
492:   DM                 da_state;
493:   PetscDA            da;
494:   Vec                x0, x_mean, x_forecast;
495:   Vec                truth_state, rmse_work;
496:   Vec                observation, obs_noise, obs_error_var;
497:   PetscRandom        rng;
498:   Mat                Q = NULL, H = NULL, H1 = NULL;
499:   PetscInt           nobs;

501:   /* Statistics tracking */
502:   PetscReal rmse_forecast = 0.0, rmse_analysis = 0.0;
503:   PetscReal sum_rmse_forecast = 0.0, sum_rmse_analysis = 0.0;
504:   PetscInt  n_stat_steps = 0;
505:   PetscInt  obs_count    = 0;
506:   PetscInt  step;

508:   PetscFunctionBeginUser;
509:   PetscCall(PetscInitialize(&argc, &argv, NULL, help));

511:   /* Parse command-line options */
512:   PetscOptionsBegin(PETSC_COMM_WORLD, NULL, "2D Shallow Water LETKF Example", NULL);
513:   PetscCall(PetscOptionsInt("-nx", "Number of grid points in x", "", nx, &nx, NULL));
514:   PetscCall(PetscOptionsInt("-ny", "Number of grid points in y", "", ny, &ny, NULL));
515:   PetscCall(PetscOptionsInt("-steps", "Number of time steps", "", steps, &steps, NULL));
516:   PetscCall(PetscOptionsInt("-obs_freq", "Observation frequency", "", obs_freq, &obs_freq, NULL));
517:   PetscCall(PetscOptionsInt("-obs_stride", "Observation stride (sample every Nth grid point)", "", obs_stride, &obs_stride, NULL));
518:   PetscCall(PetscOptionsReal("-g", "Gravitational constant", "", g, &g, NULL));
519:   PetscCall(PetscOptionsReal("-dt", "Time step size", "", dt, &dt, NULL));
520:   PetscCall(PetscOptionsReal("-Lx", "Domain length in x", "", Lx, &Lx, NULL));
521:   PetscCall(PetscOptionsReal("-Ly", "Domain length in y", "", Ly, &Ly, NULL));
522:   PetscCall(PetscOptionsReal("-h0", "Mean water height", "", h0, &h0, NULL));
523:   PetscCall(PetscOptionsReal("-Ax", "Wave amplitude in x", "", Ax, &Ax, NULL));
524:   PetscCall(PetscOptionsReal("-Ay", "Wave amplitude in y", "", Ay, &Ay, NULL));
525:   PetscCall(PetscOptionsReal("-obs_error", "Observation error standard deviation", "", obs_error_std, &obs_error_std, NULL));
526:   PetscCall(PetscOptionsInt("-random_seed", "Random seed for ensemble perturbations", "", random_seed, &random_seed, NULL));
527:   PetscCall(PetscOptionsInt("-progress_freq", "Print progress every N steps (0 = only first/last)", "", progress_freq, &progress_freq, NULL));
528:   PetscCall(PetscOptionsString("-output_file", "Output file for visualization data", "", "", output_file, sizeof(output_file), &output_enabled));
529:   PetscCall(PetscOptionsEnum("-ex4_flux", "Flux scheme (rusanov/mc)", "", Ex4FluxTypes, (PetscEnum)flux_type, (PetscEnum *)&flux_type, NULL));
530:   PetscCall(PetscOptionsBool("-use_fake_localization", "Use fake localization matrix", "", use_fake_localization, &use_fake_localization, NULL));
531:   if (!use_fake_localization) PetscCall(PetscOptionsInt("-petscda_letkf_obs_per_vertex", "Number of observations per vertex", "", num_observations_vertex, &num_observations_vertex, NULL));
532:   PetscCall(PetscOptionsBool("-verification_mode", "Run in pure verification mode (no DA)", "", verification_mode, &verification_mode, NULL));
533:   PetscCall(PetscOptionsBool("-enable_verification", "Enable verification alongside DA", "", enable_verification, &enable_verification, NULL));
534:   PetscCall(PetscOptionsInt("-verification_freq", "Frequency for verification error output", "", verification_freq, &verification_freq, NULL));
535:   PetscOptionsEnd();

537:   /* Handle verification mode settings */
538:   if (verification_mode) {
539:     /* In pure verification mode, disable all DA components */
540:     enable_verification = PETSC_TRUE;
541:     ensemble_size       = 0; /* Will skip DA setup */
542:     PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Running in pure verification mode (no data assimilation)\n"));
543:   }

545:   /* Calculate number of observations */
546:   nobs = ((nx + obs_stride - 1) / obs_stride) * ((ny + obs_stride - 1) / obs_stride);

548:   /* Set num_observations_vertex for fake localization after nobs is calculated */
549:   if (use_fake_localization) num_observations_vertex = nobs;

551:   /* Validate parameters - skip ensemble size check in verification mode */
552:   if (!verification_mode) {
553:     PetscCall(ValidateParameters(&nx, &ny, &nobs, &steps, &obs_freq, &ensemble_size, &dt, &g, &obs_error_std));
554:   } else {
555:     PetscCheck(nx > 0 && ny > 0, PETSC_COMM_WORLD, PETSC_ERR_ARG_OUTOFRANGE, "Grid dimensions must be positive");
556:     PetscCheck(steps >= 0, PETSC_COMM_WORLD, PETSC_ERR_ARG_OUTOFRANGE, "Number of steps must be non-negative");
557:     PetscCheck(dt > 0.0, PETSC_COMM_WORLD, PETSC_ERR_ARG_OUTOFRANGE, "Time step must be positive");
558:     PetscCheck(PetscIsNormalReal(g), PETSC_COMM_WORLD, PETSC_ERR_ARG_OUTOFRANGE, "Gravitational constant must be a normal real number");
559:   }

561:   /* Create 2D periodic DMDA with 3 DOF (h, hu, hv) */
562:   PetscCall(DMDACreate2d(PETSC_COMM_WORLD, DM_BOUNDARY_PERIODIC, DM_BOUNDARY_PERIODIC, DMDA_STENCIL_STAR, nx, ny, PETSC_DECIDE, PETSC_DECIDE, ndof, 2, NULL, NULL, &da_state));
563:   PetscCall(DMSetFromOptions(da_state));
564:   PetscCall(DMSetUp(da_state));

566:   /* Create shallow water context */
567:   PetscCall(ShallowWater2DContextCreate(da_state, nx, ny, Lx, Ly, g, dt, h0, Ax, Ay, flux_type, &sw_ctx));

569:   /* Initialize random number generator */
570:   PetscCall(PetscRandomCreate(PETSC_COMM_WORLD, &rng));
571:   {
572:     PetscMPIInt rank;
573:     PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank));
574:     PetscCall(PetscRandomSetSeed(rng, (unsigned long)(random_seed + rank)));
575:   }
576:   PetscCall(PetscRandomSetFromOptions(rng));
577:   PetscCall(PetscRandomSeed(rng));

579:   /* Initialize state vectors */
580:   PetscCall(DMCreateGlobalVector(da_state, &x0));

582:   /* Set initial condition from analytic solution */
583:   {
584:     PetscScalar ***x_array;
585:     PetscInt       xs, ys, xm, ym, i, j;
586:     PetscCall(DMDAGetCorners(da_state, &xs, &ys, NULL, &xm, &ym, NULL));
587:     PetscCall(DMDAVecGetArrayDOF(da_state, x0, &x_array));
588:     for (j = ys; j < ys + ym; j++) {
589:       for (i = xs; i < xs + xm; i++) {
590:         PetscReal x = ((PetscReal)i + 0.5) * sw_ctx->dx;
591:         PetscReal y = ((PetscReal)j + 0.5) * sw_ctx->dy;
592:         PetscReal h, hu, hv;
593:         PetscCall(ShallowWaterSolution_Wave2D(Lx, Ly, x, y, 0.0, g, h0, Ax, Ay, &h, &hu, &hv));
594:         x_array[j][i][0] = h;
595:         x_array[j][i][1] = hu;
596:         x_array[j][i][2] = hv;
597:       }
598:     }
599:     PetscCall(DMDAVecRestoreArrayDOF(da_state, x0, &x_array));
600:   }

602:   /* Initialize truth trajectory */
603:   PetscCall(VecDuplicate(x0, &truth_state));
604:   PetscCall(VecCopy(x0, truth_state));
605:   PetscCall(VecDuplicate(x0, &rmse_work));

607:   /* Spinup if needed */
608:   if (n_spin > 0) {
609:     PetscInt spinup_progress_interval = (n_spin >= 10) ? (n_spin / 10) : 1;
610:     PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Spinning up truth trajectory for %" PetscInt_FMT " steps...\n", n_spin));
611:     for (PetscInt k = 0; k < n_spin; k++) {
612:       PetscCall(ShallowWaterStep2D(truth_state, truth_state, sw_ctx));
613:       if ((k + 1) % spinup_progress_interval == 0 || (k + 1) == n_spin) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "  Spinup progress: %" PetscInt_FMT "/%" PetscInt_FMT "\n", k + 1, n_spin));
614:     }
615:     PetscCall(VecCopy(truth_state, x0));
616:     PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Spinup complete.\n\n"));
617:   }

619:   /* Setup data assimilation (skip in pure verification mode) */
620:   if (!verification_mode) {
621:     /* Create observation matrix H */
622:     PetscCall(CreateObservationMatrix2D(nx, ny, ndof, obs_stride, x0, &H, &H1, &nobs));

624:     /* Initialize observation vectors */
625:     PetscCall(MatCreateVecs(H, NULL, &observation));
626:     PetscCall(VecDuplicate(observation, &obs_noise));
627:     PetscCall(VecDuplicate(observation, &obs_error_var));
628:     PetscCall(VecSet(obs_error_var, obs_error_std * obs_error_std));

630:     /* Create and configure PetscDA for ensemble data assimilation */
631:     PetscCall(PetscDACreate(PETSC_COMM_WORLD, &da));
632:     PetscCall(PetscDASetSizes(da, nx * ny * ndof, nobs));
633:     PetscCall(PetscDAEnsembleSetSize(da, ensemble_size));
634:     {
635:       PetscInt local_state_size, local_obs_size;
636:       PetscCall(VecGetLocalSize(x0, &local_state_size));
637:       PetscCall(VecGetLocalSize(observation, &local_obs_size));
638:       PetscCall(PetscDASetLocalSizes(da, local_state_size, local_obs_size));
639:     }
640:     PetscCall(PetscDASetNDOF(da, ndof));
641:     PetscCall(PetscDASetFromOptions(da));
642:     PetscCall(PetscDAEnsembleGetSize(da, &ensemble_size));
643:     PetscCall(PetscDASetUp(da));

645:     /* Initialize ensemble statistics vectors */
646:     PetscCall(VecDuplicate(x0, &x_mean));
647:     PetscCall(VecDuplicate(x0, &x_forecast));

649:     /* Set observation error variance */
650:     PetscCall(PetscDASetObsErrorVariance(da, obs_error_var));

652:     /* Create and set localization matrix Q */
653:     {
654:       PetscBool isletkf;
655:       PetscCall(PetscObjectTypeCompare((PetscObject)da, PETSCDALETKF, &isletkf));

657:       if (!use_fake_localization && isletkf) {
658:         /* Use PetscDALETKFGetLocalizationMatrix for proper distance-based localization */
659:         Vec       Vecxyz[3] = {NULL, NULL, NULL};
660:         Vec       coord;
661:         DM        cda;
662:         PetscReal bd[3] = {Lx, Ly, 0};
663:         PetscInt  cdof;

665:         /* Set up coordinates for DMDA */
666:         PetscCall(DMDASetUniformCoordinates(da_state, 0.0, Lx, 0.0, Ly, 0.0, 0.0));

668:         /* Get coordinate DM and coordinates */
669:         PetscCall(DMGetCoordinateDM(da_state, &cda));
670:         PetscCall(DMGetCoordinates(da_state, &coord));
671:         PetscCall(DMGetBlockSize(cda, &cdof));

673:         /* Extract x and y coordinates into separate vectors for each grid point */
674:         /* Need vectors sized for nx*ny points (not nx*ny*ndof)  */
675:         PetscInt xs, ys, xm, ym;
676:         PetscCall(DMDAGetCorners(da_state, &xs, &ys, NULL, &xm, &ym, NULL));

678:         for (PetscInt d = 0; d < 2; d++) {
679:           PetscScalar ***x_coord_3d;
680:           PetscScalar   *vec_array;
681:           PetscInt       i, j, idx;
682:           PetscInt       local_grid_points = xm * ym;

684:           /* Create vector for this coordinate component - size should be nx*ny */
685:           PetscCall(VecCreate(PETSC_COMM_WORLD, &Vecxyz[d]));
686:           PetscCall(VecSetSizes(Vecxyz[d], local_grid_points, nx * ny));
687:           PetscCall(VecSetFromOptions(Vecxyz[d]));
688:           PetscCall(PetscObjectSetName((PetscObject)Vecxyz[d], d == 0 ? "x_coordinate" : "y_coordinate"));

690:           /* Get coordinate array - it's structured as [x,y] pairs */
691:           PetscCall(DMDAVecGetArrayDOFRead(cda, coord, (void *)&x_coord_3d));
692:           PetscCall(VecGetArray(Vecxyz[d], &vec_array));

694:           /* Copy coordinates from 2D array */
695:           idx = 0;
696:           for (j = ys; j < ys + ym; j++) {
697:             for (i = xs; i < xs + xm; i++) vec_array[idx++] = x_coord_3d[j][i][d];
698:           }

700:           PetscCall(VecRestoreArray(Vecxyz[d], &vec_array));
701:           PetscCall(DMDAVecRestoreArrayDOFRead(cda, coord, (void *)&x_coord_3d));
702:         }

704:         /* Get localization matrix using distance-based method */
705:         PetscCall(PetscDALETKFGetLocalizationMatrix(num_observations_vertex, 1, Vecxyz, bd, H1, &Q));
706:         PetscCall(VecDestroy(&Vecxyz[0]));
707:         PetscCall(VecDestroy(&Vecxyz[1]));
708:         PetscCall(PetscDALETKFSetObsPerVertex(da, num_observations_vertex));
709:         PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Using distance-based localization with %" PetscInt_FMT " observations per vertex\n", num_observations_vertex));
710:       } else {
711:         PetscCall(CreateLocalizationMatrix2D(nx, ny, nobs, &Q));
712:         if (isletkf) {
713:           PetscCall(PetscDALETKFSetObsPerVertex(da, num_observations_vertex));
714:           PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Using global localization (all observations)\n"));
715:         }
716:       }
717:       PetscCall(PetscDALETKFSetLocalization(da, Q, H));
718:       PetscCall(MatViewFromOptions(Q, NULL, "-Q_view"));
719:       PetscCall(MatDestroy(&Q));
720:     }

722:     /* Initialize ensemble members with perturbations */
723:     PetscCall(PetscDAEnsembleInitialize(da, x0, obs_error_std, rng));

725:     PetscCall(PetscDAViewFromOptions(da, NULL, "-petscda_view"));
726:   }

728:   /* Print configuration summary */
729:   {
730:     const char *flux_name = (flux_type == EX4_FLUX_RUSANOV) ? "Rusanov (1st order)" : "MC (2nd order)";
731:     const char *mode_name = verification_mode ? "Verification" : "LETKF";
732:     PetscReal   dx        = Lx / nx;
733:     PetscReal   dy        = Ly / ny;
734:     PetscReal   c         = PetscSqrtReal(g * h0);
735:     PetscReal   cfl       = dt * c * (1.0 / dx + 1.0 / dy);

737:     PetscCall(PetscPrintf(PETSC_COMM_WORLD, "2D Shallow Water %s Example\n", mode_name));
738:     PetscCall(PetscPrintf(PETSC_COMM_WORLD, "==============================\n"));
739:     if (verification_mode) {
740:       PetscCall(PetscPrintf(PETSC_COMM_WORLD,
741:                             "  Mode                  : Verification (traveling wave test)\n"
742:                             "  Flux scheme           : %s\n"
743:                             "  Grid dimensions       : %" PetscInt_FMT " x %" PetscInt_FMT "\n"
744:                             "  Domain size           : %.2f x %.2f\n"
745:                             "  Grid spacing          : dx=%.4f, dy=%.4f\n"
746:                             "  Mean height (h0)      : %.4f\n"
747:                             "  Wave amplitudes       : Ax=%.4f, Ay=%.4f\n"
748:                             "  Gravitational const   : %.4f\n"
749:                             "  Wave speed (c)        : %.4f\n"
750:                             "  Time step (dt)        : %.4f\n"
751:                             "  CFL number            : %.4f\n"
752:                             "  Total steps           : %" PetscInt_FMT "\n"
753:                             "  Verification freq     : %" PetscInt_FMT "\n\n",
754:                             flux_name, nx, ny, (double)Lx, (double)Ly, (double)dx, (double)dy, (double)h0, (double)Ax, (double)Ay, (double)g, (double)c, (double)dt, (double)cfl, steps, verification_freq));
755:     } else {
756:       PetscCall(PetscPrintf(PETSC_COMM_WORLD,
757:                             "  Mode                  : Data Assimilation%s\n"
758:                             "  Flux scheme           : %s\n"
759:                             "  Grid dimensions       : %" PetscInt_FMT " x %" PetscInt_FMT "\n"
760:                             "  State dimension       : %" PetscInt_FMT " (%" PetscInt_FMT " grid points x %d DOF)\n"
761:                             "  Observation dimension : %" PetscInt_FMT "\n"
762:                             "  Observation stride    : %" PetscInt_FMT "\n"
763:                             "  Ensemble size         : %" PetscInt_FMT "\n"
764:                             "  Domain size           : %.2f x %.2f\n"
765:                             "  Grid spacing          : dx=%.4f, dy=%.4f\n"
766:                             "  Mean height (h0)      : %.4f\n"
767:                             "  Wave amplitudes       : Ax=%.4f, Ay=%.4f\n"
768:                             "  Gravitational const   : %.4f\n"
769:                             "  Wave speed (c)        : %.4f\n"
770:                             "  Time step (dt)        : %.4f\n"
771:                             "  CFL number            : %.4f\n"
772:                             "  Total steps           : %" PetscInt_FMT "\n"
773:                             "  Observation frequency : %" PetscInt_FMT "\n"
774:                             "  Observation noise std : %.3f\n"
775:                             "  Random seed           : %" PetscInt_FMT "\n",
776:                             enable_verification ? " with verification" : "", flux_name, nx, ny, nx * ny * ndof, nx * ny, (int)ndof, nobs, obs_stride, ensemble_size, (double)Lx, (double)Ly, (double)dx, (double)dy, (double)h0, (double)Ax, (double)Ay, (double)g, (double)c, (double)dt, (double)cfl, steps, obs_freq, (double)obs_error_std, random_seed));
777:       if (enable_verification) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "  Verification freq     : %" PetscInt_FMT "\n", verification_freq));
778:       PetscCall(PetscPrintf(PETSC_COMM_WORLD, "\n"));
779:     }
780:   }

782:   /* Open output file if requested - only in serial mode */
783:   if (output_enabled) {
784:     PetscMPIInt size;
785:     PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
786:     if (size > 1) {
787:       PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Warning: Output file generation is only supported in serial mode (currently running with %d processes)\n", (int)size));
788:       PetscCall(PetscPrintf(PETSC_COMM_WORLD, "         Disabling output file. Run with single process to enable.\n\n"));
789:       output_enabled = PETSC_FALSE;
790:       fp             = NULL;
791:     } else {
792:       PetscCall(PetscFOpen(PETSC_COMM_WORLD, output_file, "w", &fp));
793:       PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fp, "# 2D Shallow Water LETKF Output\n"));
794:       PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fp, "# nx=%d, ny=%d, ndof=%d, nobs=%d, ensemble_size=%d\n", (int)nx, (int)ny, (int)ndof, (int)nobs, (int)ensemble_size));
795:       PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fp, "# dt=%.6f, g=%.6f, obs_error_std=%.6f\n", (double)dt, (double)g, (double)obs_error_std));
796:       PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fp, "# Format: step time [truth]x(nx*ny*ndof) [mean]x(nx*ny*ndof) [obs]x(nobs) rmse_forecast rmse_analysis\n"));
797:       PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Writing output to: %s\n\n", output_file));
798:     }
799:   }

801:   /* Print initial condition */
802:   if (verification_mode) {
803:     PetscReal L1_err, L2_err, Linf_err;
804:     PetscCall(ComputeAnalyticError(x0, da_state, 0.0, Lx, Ly, g, h0, Ax, Ay, &L1_err, &L2_err, &Linf_err));
805:     PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Step %4d, time %6.3f  L1=%.5e  L2=%.5e  Linf=%.5e [initial]\n", 0, 0.0, (double)L1_err, (double)L2_err, (double)Linf_err));
806:   } else {
807:     PetscReal rmse_initial;
808:     PetscCall(PetscDAEnsembleComputeMean(da, x_mean));
809:     PetscCall(ComputeRMSE(x_mean, truth_state, rmse_work, nx * ny * ndof, &rmse_initial));
810:     PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Step %4d, time %6.3f  RMSE_forecast %.5f  RMSE_analysis %.5f [initial]\n", 0, 0.0, (double)rmse_initial, (double)rmse_initial));

812:     if (output_enabled && fp) {
813:       const PetscScalar *truth_array, *mean_array;
814:       PetscInt           i;
815:       PetscCall(VecGetArrayRead(truth_state, &truth_array));
816:       PetscCall(VecGetArrayRead(x_mean, &mean_array));
817:       PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fp, "0 0.000000"));
818:       for (i = 0; i < nx * ny * ndof; i++) PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fp, " %.8e", (double)PetscRealPart(truth_array[i])));
819:       for (i = 0; i < nx * ny * ndof; i++) PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fp, " %.8e", (double)PetscRealPart(mean_array[i])));
820:       for (i = 0; i < nobs; i++) PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fp, " nan"));
821:       PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fp, " %.8e %.8e\n", (double)rmse_initial, (double)rmse_initial));
822:       PetscCall(VecRestoreArrayRead(truth_state, &truth_array));
823:       PetscCall(VecRestoreArrayRead(x_mean, &mean_array));
824:     }
825:   }

827:   /* Main simulation loop */
828:   if (verification_mode) {
829:     /* Pure verification mode: advance and compute errors */
830:     Vec x_numerical;
831:     PetscCall(VecDuplicate(x0, &x_numerical));
832:     PetscCall(VecCopy(x0, x_numerical));

834:     /* Write initial condition to file */
835:     if (output_enabled && fp) {
836:       const PetscScalar *num_array;
837:       PetscInt           i;
838:       PetscCall(VecGetArrayRead(x_numerical, &num_array));
839:       PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fp, "0 0.000000"));
840:       for (i = 0; i < nx * ny * ndof; i++) PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fp, " %.8e", (double)PetscRealPart(num_array[i])));
841:       for (i = 0; i < nx * ny * ndof; i++) PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fp, " %.8e", (double)PetscRealPart(num_array[i])));
842:       for (i = 0; i < nobs; i++) PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fp, " nan"));
843:       PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fp, " 0.0 0.0\n"));
844:       PetscCall(VecRestoreArrayRead(x_numerical, &num_array));
845:     }

847:     for (step = 1; step <= steps; step++) {
848:       PetscReal time = step * dt;

850:       /* Advance numerical solution */
851:       PetscCall(ShallowWaterStep2D(x_numerical, x_numerical, sw_ctx));

853:       /* Compute error against analytic solution */
854:       if (step % verification_freq == 0 || step == steps) {
855:         PetscReal L1_err, L2_err, Linf_err;
856:         PetscCall(ComputeAnalyticError(x_numerical, da_state, time, Lx, Ly, g, h0, Ax, Ay, &L1_err, &L2_err, &Linf_err));
857:         PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Step %4" PetscInt_FMT ", time %6.3f  L1=%.5e  L2=%.5e  Linf=%.5e\n", step, (double)time, (double)L1_err, (double)L2_err, (double)Linf_err));
858:       }

860:       /* Write data to output file */
861:       if (output_enabled && fp) {
862:         const PetscScalar *num_array;
863:         PetscInt           i;
864:         PetscCall(VecGetArrayRead(x_numerical, &num_array));
865:         PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fp, "%d %.6f", (int)step, (double)time));
866:         /* Write numerical solution as both truth and mean */
867:         for (i = 0; i < nx * ny * ndof; i++) PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fp, " %.8e", (double)PetscRealPart(num_array[i])));
868:         for (i = 0; i < nx * ny * ndof; i++) PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fp, " %.8e", (double)PetscRealPart(num_array[i])));
869:         for (i = 0; i < nobs; i++) PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fp, " nan"));
870:         PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fp, " 0.0 0.0\n"));
871:         PetscCall(VecRestoreArrayRead(x_numerical, &num_array));
872:       }
873:     }
874:     PetscCall(VecDestroy(&x_numerical));
875:   } else {
876:     /* Data assimilation mode */
877:     for (step = 1; step <= steps; step++) {
878:       PetscReal time = step * dt;

880:       /* Propagate ensemble and truth trajectory */
881:       PetscCall(PetscDAEnsembleForecast(da, ShallowWaterStep2D, sw_ctx));
882:       PetscCall(ShallowWaterStep2D(truth_state, truth_state, sw_ctx));

884:       /* Forecast step: compute ensemble mean and forecast RMSE */
885:       PetscCall(PetscDAEnsembleComputeMean(da, x_mean));
886:       PetscCall(VecCopy(x_mean, x_forecast));
887:       PetscCall(ComputeRMSE(x_forecast, truth_state, rmse_work, nx * ny * ndof, &rmse_forecast));
888:       rmse_analysis = rmse_forecast;

890:       /* Analysis step: assimilate observations when available */
891:       if (step % obs_freq == 0 && step > 0) {
892:         Vec truth_obs, temp_truth;
893:         PetscCall(MatCreateVecs(H, NULL, &truth_obs));
894:         PetscCall(MatCreateVecs(H, &temp_truth, NULL));

896:         /* Generate observations from truth */
897:         PetscCall(VecCopy(truth_state, temp_truth));
898:         PetscCall(MatMult(H, temp_truth, truth_obs));

900:         /* Add observation noise */
901:         PetscCall(VecSetRandomGaussian(obs_noise, rng, 0.0, obs_error_std));
902:         PetscCall(VecWAXPY(observation, 1.0, obs_noise, truth_obs));

904:         /* Perform LETKF analysis */
905:         PetscCall(PetscDAEnsembleAnalysis(da, observation, H));

907:         /* Clean up */
908:         PetscCall(VecDestroy(&temp_truth));
909:         PetscCall(VecDestroy(&truth_obs));

911:         /* Compute analysis RMSE */
912:         PetscCall(PetscDAEnsembleComputeMean(da, x_mean));
913:         PetscCall(ComputeRMSE(x_mean, truth_state, rmse_work, nx * ny * ndof, &rmse_analysis));
914:         obs_count++;
915:       }

917:       /* Compute verification errors if enabled */
918:       if (enable_verification && (step % verification_freq == 0)) {
919:         PetscReal L1_err, L2_err, Linf_err;
920:         PetscCall(ComputeAnalyticError(x_mean, da_state, time, Lx, Ly, g, h0, Ax, Ay, &L1_err, &L2_err, &Linf_err));
921:         PetscCall(PetscPrintf(PETSC_COMM_WORLD, "                        Verification: L1=%.5e  L2=%.5e  Linf=%.5e\n", (double)L1_err, (double)L2_err, (double)Linf_err));
922:       }

924:       /* Accumulate statistics */
925:       sum_rmse_forecast += rmse_forecast;
926:       sum_rmse_analysis += rmse_analysis;
927:       n_stat_steps++;

929:       /* Write data to output file */
930:       if (output_enabled && fp) {
931:         const PetscScalar *truth_array, *mean_array, *obs_array;
932:         PetscInt           i;
933:         PetscCall(VecGetArrayRead(truth_state, &truth_array));
934:         PetscCall(VecGetArrayRead(x_mean, &mean_array));
935:         PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fp, "%d %.6f", (int)step, (double)time));
936:         for (i = 0; i < nx * ny * ndof; i++) PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fp, " %.8e", (double)PetscRealPart(truth_array[i])));
937:         for (i = 0; i < nx * ny * ndof; i++) PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fp, " %.8e", (double)PetscRealPart(mean_array[i])));
938:         if (step % obs_freq == 0 && step > 0) {
939:           PetscCall(VecGetArrayRead(observation, &obs_array));
940:           for (i = 0; i < nobs; i++) PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fp, " %.8e", (double)PetscRealPart(obs_array[i])));
941:           PetscCall(VecRestoreArrayRead(observation, &obs_array));
942:         } else {
943:           for (i = 0; i < nobs; i++) PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fp, " nan"));
944:         }
945:         PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fp, " %.8e %.8e\n", (double)rmse_forecast, (double)rmse_analysis));
946:         PetscCall(VecRestoreArrayRead(truth_state, &truth_array));
947:         PetscCall(VecRestoreArrayRead(x_mean, &mean_array));
948:       }

950:       /* Progress reporting */
951:       if (progress_freq == 0) {
952:         if (step == steps) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Step %4" PetscInt_FMT ", time %6.3f  RMSE_forecast %.5f  RMSE_analysis %.5f\n", step, (double)time, (double)rmse_forecast, (double)rmse_analysis));
953:       } else {
954:         if ((step % progress_freq == 0) || (step == steps)) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Step %4" PetscInt_FMT ", time %6.3f  RMSE_forecast %.5f  RMSE_analysis %.5f\n", step, (double)time, (double)rmse_forecast, (double)rmse_analysis));
955:       }
956:     }
957:   }

959:   /* Report final statistics */
960:   if (verification_mode) {
961:     PetscCall(PetscPrintf(PETSC_COMM_WORLD, "\nVerification test complete.\n"));
962:   } else {
963:     if (n_stat_steps > 0) {
964:       PetscReal avg_rmse_forecast = sum_rmse_forecast / n_stat_steps;
965:       PetscReal avg_rmse_analysis = sum_rmse_analysis / n_stat_steps;
966:       PetscCall(PetscPrintf(PETSC_COMM_WORLD, "\nStatistics (%" PetscInt_FMT " steps):\n", n_stat_steps));
967:       PetscCall(PetscPrintf(PETSC_COMM_WORLD, "==================================================\n"));
968:       PetscCall(PetscPrintf(PETSC_COMM_WORLD, "  Mean RMSE (forecast) : %.5f\n", (double)avg_rmse_forecast));
969:       PetscCall(PetscPrintf(PETSC_COMM_WORLD, "  Mean RMSE (analysis) : %.5f\n", (double)avg_rmse_analysis));
970:       PetscCall(PetscPrintf(PETSC_COMM_WORLD, "  Observations used    : %" PetscInt_FMT "\n\n", obs_count));
971:     }
972:   }

974:   /* Close output file */
975:   if (output_enabled && fp) {
976:     PetscCall(PetscFClose(PETSC_COMM_WORLD, fp));
977:     PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Output written to: %s\n", output_file));
978:   }

980:   /* Cleanup */
981:   if (!verification_mode) {
982:     PetscCall(MatDestroy(&H));
983:     PetscCall(MatDestroy(&H1));
984:     PetscCall(VecDestroy(&x_forecast));
985:     PetscCall(VecDestroy(&x_mean));
986:     PetscCall(VecDestroy(&obs_error_var));
987:     PetscCall(VecDestroy(&obs_noise));
988:     PetscCall(VecDestroy(&observation));
989:     PetscCall(PetscDADestroy(&da));
990:   }
991:   /* These are created in both modes */
992:   PetscCall(VecDestroy(&rmse_work));
993:   PetscCall(VecDestroy(&truth_state));
994:   PetscCall(VecDestroy(&x0));
995:   PetscCall(DMDestroy(&da_state));
996:   PetscCall(ShallowWater2DContextDestroy(&sw_ctx));
997:   PetscCall(PetscRandomDestroy(&rng));

999:   PetscCall(PetscFinalize());
1000:   return 0;
1001: }

1003: /*TEST

1005:   testset:
1006:     requires: kokkos_kernels !complex
1007:     args: -steps 10 -progress_freq 1 -petscda_view -petscda_ensemble_size 10 -obs_freq 2 -obs_error 0.03 -nx 21 -ny 21

1009:     test:
1010:       suffix: letkf_wave2d
1011:       args: -petscda_type letkf -petscda_ensemble_size 7

1013:     test:
1014:       nsize: 3
1015:       suffix: kokkos_wave2d
1016:       args: -petscda_type letkf -mat_type aijkokkos -vec_type kokkos -petscda_ensemble_size 5 -petscda_letkf_obs_per_vertex 5

1018:   test:
1019:     suffix: verification
1020:     requires: !complex kokkos_kernels
1021:     args: -verification_mode -steps 50 -nx 40 -ny 40 -verification_freq 10

1023:   test:
1024:     suffix: verification_parallel
1025:     requires: !complex kokkos_kernels
1026:     nsize: 2
1027:     args: -verification_mode -steps 20 -nx 30 -ny 30 -verification_freq 5

1029: TEST*/