Actual source code: ex14.c

  1: static const char help[] = "Toy hydrostatic ice flow with multigrid in 3D.\n\
  2: \n\
  3: Solves the hydrostatic (aka Blatter/Pattyn/First Order) equations for ice sheet flow\n\
  4: using multigrid.  The ice uses a power-law rheology with \"Glen\" exponent 3 (corresponds\n\
  5: to p=4/3 in a p-Laplacian).  The focus is on ISMIP-HOM experiments which assume periodic\n\
  6: boundary conditions in the x- and y-directions.\n\
  7: \n\
  8: Equations are rescaled so that the domain size and solution are O(1), details of this scaling\n\
  9: can be controlled by the options -units_meter, -units_second, and -units_kilogram.\n\
 10: \n\
 11: A VTK StructuredGrid output file can be written using the option -o filename.vts\n\
 12: \n\n";

 14: /*
 15: The equations for horizontal velocity (u,v) are

 17:   - [eta (4 u_x + 2 v_y)]_x - [eta (u_y + v_x)]_y - [eta u_z]_z + rho g s_x = 0
 18:   - [eta (4 v_y + 2 u_x)]_y - [eta (u_y + v_x)]_x - [eta v_z]_z + rho g s_y = 0

 20: where

 22:   eta = B/2 (epsilon + gamma)^((p-2)/2)

 24: is the nonlinear effective viscosity with regularization epsilon and hardness parameter B,
 25: written in terms of the second invariant

 27:   gamma = u_x^2 + v_y^2 + u_x v_y + (1/4) (u_y + v_x)^2 + (1/4) u_z^2 + (1/4) v_z^2

 29: The surface boundary conditions are the natural conditions.  The basal boundary conditions
 30: are either no-slip, or Navier (linear) slip with spatially variant friction coefficient beta^2.

 32: In the code, the equations for (u,v) are multiplied through by 1/(rho g) so that residuals are O(1).

 34: The discretization is Q1 finite elements, managed by a DMDA.  The grid is never distorted in the
 35: map (x,y) plane, but the bed and surface may be bumpy.  This is handled as usual in FEM, through
 36: the Jacobian of the coordinate transformation from a reference element to the physical element.

 38: Since ice-flow is tightly coupled in the z-direction (within columns), the DMDA is managed
 39: specially so that columns are never distributed, and are always contiguous in memory.
 40: This amounts to reversing the meaning of X,Y,Z compared to the DMDA's internal interpretation,
 41: and then indexing as vec[i][j][k].  The exotic coarse spaces require 2D DMDAs which are made to
 42: use compatible domain decomposition relative to the 3D DMDAs.

 44: */

 46: #include <petscts.h>
 47: #include <petscdm.h>
 48: #include <petscdmda.h>
 49: #include <petscdmcomposite.h>
 50: #include <ctype.h> /* toupper() */
 51: #include <petsc/private/petscimpl.h>

 53: #if defined __SSE2__
 54:   #include <emmintrin.h>
 55: #endif

 57: /* The SSE2 kernels are only for PetscScalar=double on architectures that support it */
 58: #define USE_SSE2_KERNELS (!defined NO_SSE2 && !defined PETSC_USE_COMPLEX && !defined PETSC_USE_REAL_SINGLE && defined __SSE2__)

 60: #if !defined __STDC_VERSION__ || __STDC_VERSION__ < 199901L
 61:   #if defined __cplusplus /* C++ restrict is nonstandard and compilers have inconsistent rules about where it can be used */
 62:     #define restrict
 63:   #else
 64:     #define restrict PETSC_RESTRICT
 65:   #endif
 66: #endif

 68: static PetscClassId THI_CLASSID;

 70: typedef enum {
 71:   QUAD_GAUSS,
 72:   QUAD_LOBATTO
 73: } QuadratureType;
 74: static const char     *QuadratureTypes[] = {"gauss", "lobatto", "QuadratureType", "QUAD_", 0};
 75: static const PetscReal HexQWeights[8]    = {1, 1, 1, 1, 1, 1, 1, 1};
 76: static const PetscReal HexQNodes[]       = {-0.57735026918962573, 0.57735026918962573};
 77: #define G 0.57735026918962573
 78: #define H (0.5 * (1. + G))
 79: #define L (0.5 * (1. - G))
 80: #define M (-0.5)
 81: #define P (0.5)
 82: /* Special quadrature: Lobatto in horizontal, Gauss in vertical */
 83: static const PetscReal HexQInterp_Lobatto[8][8] = {
 84:   {H, 0, 0, 0, L, 0, 0, 0},
 85:   {0, H, 0, 0, 0, L, 0, 0},
 86:   {0, 0, H, 0, 0, 0, L, 0},
 87:   {0, 0, 0, H, 0, 0, 0, L},
 88:   {L, 0, 0, 0, H, 0, 0, 0},
 89:   {0, L, 0, 0, 0, H, 0, 0},
 90:   {0, 0, L, 0, 0, 0, H, 0},
 91:   {0, 0, 0, L, 0, 0, 0, H}
 92: };
 93: static const PetscReal HexQDeriv_Lobatto[8][8][3] = {
 94:   {{M * H, M *H, M}, {P * H, 0, 0},    {0, 0, 0},        {0, P *H, 0},     {M * L, M *L, P}, {P * L, 0, 0},    {0, 0, 0},        {0, P *L, 0}    },
 95:   {{M * H, 0, 0},    {P * H, M *H, M}, {0, P *H, 0},     {0, 0, 0},        {M * L, 0, 0},    {P * L, M *L, P}, {0, P *L, 0},     {0, 0, 0}       },
 96:   {{0, 0, 0},        {0, M *H, 0},     {P * H, P *H, M}, {M * H, 0, 0},    {0, 0, 0},        {0, M *L, 0},     {P * L, P *L, P}, {M * L, 0, 0}   },
 97:   {{0, M *H, 0},     {0, 0, 0},        {P * H, 0, 0},    {M * H, P *H, M}, {0, M *L, 0},     {0, 0, 0},        {P * L, 0, 0},    {M * L, P *L, P}},
 98:   {{M * L, M *L, M}, {P * L, 0, 0},    {0, 0, 0},        {0, P *L, 0},     {M * H, M *H, P}, {P * H, 0, 0},    {0, 0, 0},        {0, P *H, 0}    },
 99:   {{M * L, 0, 0},    {P * L, M *L, M}, {0, P *L, 0},     {0, 0, 0},        {M * H, 0, 0},    {P * H, M *H, P}, {0, P *H, 0},     {0, 0, 0}       },
100:   {{0, 0, 0},        {0, M *L, 0},     {P * L, P *L, M}, {M * L, 0, 0},    {0, 0, 0},        {0, M *H, 0},     {P * H, P *H, P}, {M * H, 0, 0}   },
101:   {{0, M *L, 0},     {0, 0, 0},        {P * L, 0, 0},    {M * L, P *L, M}, {0, M *H, 0},     {0, 0, 0},        {P * H, 0, 0},    {M * H, P *H, P}}
102: };
103: /* Stanndard Gauss */
104: static const PetscReal HexQInterp_Gauss[8][8] = {
105:   {H * H * H, L *H *H, L *L *H, H *L *H, H *H *L, L *H *L, L *L *L, H *L *L},
106:   {L * H * H, H *H *H, H *L *H, L *L *H, L *H *L, H *H *L, H *L *L, L *L *L},
107:   {L * L * H, H *L *H, H *H *H, L *H *H, L *L *L, H *L *L, H *H *L, L *H *L},
108:   {H * L * H, L *L *H, L *H *H, H *H *H, H *L *L, L *L *L, L *H *L, H *H *L},
109:   {H * H * L, L *H *L, L *L *L, H *L *L, H *H *H, L *H *H, L *L *H, H *L *H},
110:   {L * H * L, H *H *L, H *L *L, L *L *L, L *H *H, H *H *H, H *L *H, L *L *H},
111:   {L * L * L, H *L *L, H *H *L, L *H *L, L *L *H, H *L *H, H *H *H, L *H *H},
112:   {H * L * L, L *L *L, L *H *L, H *H *L, H *L *H, L *L *H, L *H *H, H *H *H}
113: };
114: static const PetscReal HexQDeriv_Gauss[8][8][3] = {
115:   {{M * H * H, H *M *H, H *H *M}, {P * H * H, L *M *H, L *H *M}, {P * L * H, L *P *H, L *L *M}, {M * L * H, H *P *H, H *L *M}, {M * H * L, H *M *L, H *H *P}, {P * H * L, L *M *L, L *H *P}, {P * L * L, L *P *L, L *L *P}, {M * L * L, H *P *L, H *L *P}},
116:   {{M * H * H, L *M *H, L *H *M}, {P * H * H, H *M *H, H *H *M}, {P * L * H, H *P *H, H *L *M}, {M * L * H, L *P *H, L *L *M}, {M * H * L, L *M *L, L *H *P}, {P * H * L, H *M *L, H *H *P}, {P * L * L, H *P *L, H *L *P}, {M * L * L, L *P *L, L *L *P}},
117:   {{M * L * H, L *M *H, L *L *M}, {P * L * H, H *M *H, H *L *M}, {P * H * H, H *P *H, H *H *M}, {M * H * H, L *P *H, L *H *M}, {M * L * L, L *M *L, L *L *P}, {P * L * L, H *M *L, H *L *P}, {P * H * L, H *P *L, H *H *P}, {M * H * L, L *P *L, L *H *P}},
118:   {{M * L * H, H *M *H, H *L *M}, {P * L * H, L *M *H, L *L *M}, {P * H * H, L *P *H, L *H *M}, {M * H * H, H *P *H, H *H *M}, {M * L * L, H *M *L, H *L *P}, {P * L * L, L *M *L, L *L *P}, {P * H * L, L *P *L, L *H *P}, {M * H * L, H *P *L, H *H *P}},
119:   {{M * H * L, H *M *L, H *H *M}, {P * H * L, L *M *L, L *H *M}, {P * L * L, L *P *L, L *L *M}, {M * L * L, H *P *L, H *L *M}, {M * H * H, H *M *H, H *H *P}, {P * H * H, L *M *H, L *H *P}, {P * L * H, L *P *H, L *L *P}, {M * L * H, H *P *H, H *L *P}},
120:   {{M * H * L, L *M *L, L *H *M}, {P * H * L, H *M *L, H *H *M}, {P * L * L, H *P *L, H *L *M}, {M * L * L, L *P *L, L *L *M}, {M * H * H, L *M *H, L *H *P}, {P * H * H, H *M *H, H *H *P}, {P * L * H, H *P *H, H *L *P}, {M * L * H, L *P *H, L *L *P}},
121:   {{M * L * L, L *M *L, L *L *M}, {P * L * L, H *M *L, H *L *M}, {P * H * L, H *P *L, H *H *M}, {M * H * L, L *P *L, L *H *M}, {M * L * H, L *M *H, L *L *P}, {P * L * H, H *M *H, H *L *P}, {P * H * H, H *P *H, H *H *P}, {M * H * H, L *P *H, L *H *P}},
122:   {{M * L * L, H *M *L, H *L *M}, {P * L * L, L *M *L, L *L *M}, {P * H * L, L *P *L, L *H *M}, {M * H * L, H *P *L, H *H *M}, {M * L * H, H *M *H, H *L *P}, {P * L * H, L *M *H, L *L *P}, {P * H * H, L *P *H, L *H *P}, {M * H * H, H *P *H, H *H *P}}
123: };
124: static const PetscReal (*HexQInterp)[8], (*HexQDeriv)[8][3];
125: /* Standard 2x2 Gauss quadrature for the bottom layer. */
126: static const PetscReal QuadQInterp[4][4] = {
127:   {H * H, L *H, L *L, H *L},
128:   {L * H, H *H, H *L, L *L},
129:   {L * L, H *L, H *H, L *H},
130:   {H * L, L *L, L *H, H *H}
131: };
132: static const PetscReal QuadQDeriv[4][4][2] = {
133:   {{M * H, M *H}, {P * H, M *L}, {P * L, P *L}, {M * L, P *H}},
134:   {{M * H, M *L}, {P * H, M *H}, {P * L, P *H}, {M * L, P *L}},
135:   {{M * L, M *L}, {P * L, M *H}, {P * H, P *H}, {M * H, P *L}},
136:   {{M * L, M *H}, {P * L, M *L}, {P * H, P *L}, {M * H, P *H}}
137: };
138: #undef G
139: #undef H
140: #undef L
141: #undef M
142: #undef P

144: #define HexExtract(x, i, j, k, n) \
145:   do { \
146:     (n)[0] = (x)[i][j][k]; \
147:     (n)[1] = (x)[i + 1][j][k]; \
148:     (n)[2] = (x)[i + 1][j + 1][k]; \
149:     (n)[3] = (x)[i][j + 1][k]; \
150:     (n)[4] = (x)[i][j][k + 1]; \
151:     (n)[5] = (x)[i + 1][j][k + 1]; \
152:     (n)[6] = (x)[i + 1][j + 1][k + 1]; \
153:     (n)[7] = (x)[i][j + 1][k + 1]; \
154:   } while (0)

156: #define HexExtractRef(x, i, j, k, n) \
157:   do { \
158:     (n)[0] = &(x)[i][j][k]; \
159:     (n)[1] = &(x)[i + 1][j][k]; \
160:     (n)[2] = &(x)[i + 1][j + 1][k]; \
161:     (n)[3] = &(x)[i][j + 1][k]; \
162:     (n)[4] = &(x)[i][j][k + 1]; \
163:     (n)[5] = &(x)[i + 1][j][k + 1]; \
164:     (n)[6] = &(x)[i + 1][j + 1][k + 1]; \
165:     (n)[7] = &(x)[i][j + 1][k + 1]; \
166:   } while (0)

168: #define QuadExtract(x, i, j, n) \
169:   do { \
170:     (n)[0] = (x)[i][j]; \
171:     (n)[1] = (x)[i + 1][j]; \
172:     (n)[2] = (x)[i + 1][j + 1]; \
173:     (n)[3] = (x)[i][j + 1]; \
174:   } while (0)

176: static PetscScalar Sqr(PetscScalar a)
177: {
178:   return a * a;
179: }

181: static void HexGrad(const PetscReal dphi[][3], const PetscReal zn[], PetscReal dz[])
182: {
183:   PetscInt i;
184:   dz[0] = dz[1] = dz[2] = 0;
185:   for (i = 0; i < 8; i++) {
186:     dz[0] += dphi[i][0] * zn[i];
187:     dz[1] += dphi[i][1] * zn[i];
188:     dz[2] += dphi[i][2] * zn[i];
189:   }
190: }

192: static void HexComputeGeometry(PetscInt q, PetscReal hx, PetscReal hy, const PetscReal dz[restrict], PetscReal phi[restrict], PetscReal dphi[restrict][3], PetscReal *restrict jw)
193: {
194:   const PetscReal jac[3][3] =
195:     {
196:       {hx / 2, 0,      0    },
197:       {0,      hy / 2, 0    },
198:       {dz[0],  dz[1],  dz[2]}
199:   },
200:                   ijac[3][3] = {{1 / jac[0][0], 0, 0}, {0, 1 / jac[1][1], 0}, {-jac[2][0] / (jac[0][0] * jac[2][2]), -jac[2][1] / (jac[1][1] * jac[2][2]), 1 / jac[2][2]}}, jdet = jac[0][0] * jac[1][1] * jac[2][2];
201:   PetscInt i;

203:   for (i = 0; i < 8; i++) {
204:     const PetscReal *dphir = HexQDeriv[q][i];
205:     phi[i]                 = HexQInterp[q][i];
206:     dphi[i][0]             = dphir[0] * ijac[0][0] + dphir[1] * ijac[1][0] + dphir[2] * ijac[2][0];
207:     dphi[i][1]             = dphir[0] * ijac[0][1] + dphir[1] * ijac[1][1] + dphir[2] * ijac[2][1];
208:     dphi[i][2]             = dphir[0] * ijac[0][2] + dphir[1] * ijac[1][2] + dphir[2] * ijac[2][2];
209:   }
210:   *jw = 1.0 * jdet;
211: }

213: typedef struct _p_THI   *THI;
214: typedef struct _n_Units *Units;

216: typedef struct {
217:   PetscScalar u, v;
218: } Node;

220: typedef struct {
221:   PetscScalar b;     /* bed */
222:   PetscScalar h;     /* thickness */
223:   PetscScalar beta2; /* friction */
224: } PrmNode;

226: #define FieldSize(ntype)             ((PetscInt)(sizeof(ntype) / sizeof(PetscScalar)))
227: #define FieldOffset(ntype, member)   ((PetscInt)(offsetof(ntype, member) / sizeof(PetscScalar)))
228: #define FieldIndex(ntype, i, member) ((PetscInt)((i)*FieldSize(ntype) + FieldOffset(ntype, member)))
229: #define NODE_SIZE                    FieldSize(Node)
230: #define PRMNODE_SIZE                 FieldSize(PrmNode)

232: typedef struct {
233:   PetscReal min, max, cmin, cmax;
234: } PRange;

236: struct _p_THI {
237:   PETSCHEADER(int);
238:   void (*initialize)(THI, PetscReal x, PetscReal y, PrmNode *p);
239:   PetscInt  nlevels;
240:   PetscInt  zlevels;
241:   PetscReal Lx, Ly, Lz; /* Model domain */
242:   PetscReal alpha;      /* Bed angle */
243:   Units     units;
244:   PetscReal dirichlet_scale;
245:   PetscReal ssa_friction_scale;
246:   PetscReal inertia;
247:   PRange    eta;
248:   PRange    beta2;
249:   struct {
250:     PetscReal Bd2, eps, exponent, glen_n;
251:   } viscosity;
252:   struct {
253:     PetscReal irefgam, eps2, exponent;
254:   } friction;
255:   struct {
256:     PetscReal rate, exponent, refvel;
257:   } erosion;
258:   PetscReal rhog;
259:   PetscBool no_slip;
260:   PetscBool verbose;
261:   char     *mattype;
262:   char     *monitor_basename;
263:   PetscInt  monitor_interval;
264: };

266: struct _n_Units {
267:   /* fundamental */
268:   PetscReal meter;
269:   PetscReal kilogram;
270:   PetscReal second;
271:   /* derived */
272:   PetscReal Pascal;
273:   PetscReal year;
274: };

276: static void PrmHexGetZ(const PrmNode pn[], PetscInt k, PetscInt zm, PetscReal zn[])
277: {
278:   const PetscScalar zm1 = zm - 1, znl[8] = {pn[0].b + pn[0].h * (PetscScalar)k / zm1,       pn[1].b + pn[1].h * (PetscScalar)k / zm1,       pn[2].b + pn[2].h * (PetscScalar)k / zm1,       pn[3].b + pn[3].h * (PetscScalar)k / zm1,
279:                                             pn[0].b + pn[0].h * (PetscScalar)(k + 1) / zm1, pn[1].b + pn[1].h * (PetscScalar)(k + 1) / zm1, pn[2].b + pn[2].h * (PetscScalar)(k + 1) / zm1, pn[3].b + pn[3].h * (PetscScalar)(k + 1) / zm1};
280:   PetscInt          i;
281:   for (i = 0; i < 8; i++) zn[i] = PetscRealPart(znl[i]);
282: }

284: /* Compute a gradient of all the 2D fields at four quadrature points.  Output for [quadrature_point][direction].field_name */
285: static PetscErrorCode QuadComputeGrad4(const PetscReal dphi[][4][2], PetscReal hx, PetscReal hy, const PrmNode pn[4], PrmNode dp[4][2])
286: {
287:   PetscInt q, i, f;
288:   const PetscScalar(*restrict pg)[PRMNODE_SIZE] = (const PetscScalar(*)[PRMNODE_SIZE])pn; /* Get generic array pointers to the node */
289:   PetscScalar(*restrict dpg)[2][PRMNODE_SIZE]   = (PetscScalar(*)[2][PRMNODE_SIZE])dp;

292:   PetscArrayzero(dpg, 4);
293:   for (q = 0; q < 4; q++) {
294:     for (i = 0; i < 4; i++) {
295:       for (f = 0; f < PRMNODE_SIZE; f++) {
296:         dpg[q][0][f] += dphi[q][i][0] / hx * pg[i][f];
297:         dpg[q][1][f] += dphi[q][i][1] / hy * pg[i][f];
298:       }
299:     }
300:   }
301:   return 0;
302: }

304: static inline PetscReal StaggeredMidpoint2D(PetscScalar a, PetscScalar b, PetscScalar c, PetscScalar d)
305: {
306:   return 0.5 * PetscRealPart(0.75 * a + 0.75 * b + 0.25 * c + 0.25 * d);
307: }
308: static inline PetscReal UpwindFlux1D(PetscReal u, PetscReal hL, PetscReal hR)
309: {
310:   return (u > 0) ? hL * u : hR * u;
311: }

313: #define UpwindFluxXW(x3, x2, h, i, j, k, dj) \
314:   UpwindFlux1D(StaggeredMidpoint2D(x3[i][j][k].u, x3[i - 1][j][k].u, x3[i - 1][j + dj][k].u, x3[i][k + dj][k].u), PetscRealPart(0.75 * x2[i - 1][j].h + 0.25 * x2[i - 1][j + dj].h), PetscRealPart(0.75 * x2[i][j].h + 0.25 * x2[i][j + dj].h))
315: #define UpwindFluxXE(x3, x2, h, i, j, k, dj) \
316:   UpwindFlux1D(StaggeredMidpoint2D(x3[i][j][k].u, x3[i + 1][j][k].u, x3[i + 1][j + dj][k].u, x3[i][k + dj][k].u), PetscRealPart(0.75 * x2[i][j].h + 0.25 * x2[i][j + dj].h), PetscRealPart(0.75 * x2[i + 1][j].h + 0.25 * x2[i + 1][j + dj].h))
317: #define UpwindFluxYS(x3, x2, h, i, j, k, di) \
318:   UpwindFlux1D(StaggeredMidpoint2D(x3[i][j][k].v, x3[i][j - 1][k].v, x3[i + di][j - 1][k].v, x3[i + di][j][k].v), PetscRealPart(0.75 * x2[i][j - 1].h + 0.25 * x2[i + di][j - 1].h), PetscRealPart(0.75 * x2[i][j].h + 0.25 * x2[i + di][j].h))
319: #define UpwindFluxYN(x3, x2, h, i, j, k, di) \
320:   UpwindFlux1D(StaggeredMidpoint2D(x3[i][j][k].v, x3[i][j + 1][k].v, x3[i + di][j + 1][k].v, x3[i + di][j][k].v), PetscRealPart(0.75 * x2[i][j].h + 0.25 * x2[i + di][j].h), PetscRealPart(0.75 * x2[i][j + 1].h + 0.25 * x2[i + di][j + 1].h))

322: static void PrmNodeGetFaceMeasure(const PrmNode **p, PetscInt i, PetscInt j, PetscScalar h[])
323: {
324:   /* West */
325:   h[0] = StaggeredMidpoint2D(p[i][j].h, p[i - 1][j].h, p[i - 1][j - 1].h, p[i][j - 1].h);
326:   h[1] = StaggeredMidpoint2D(p[i][j].h, p[i - 1][j].h, p[i - 1][j + 1].h, p[i][j + 1].h);
327:   /* East */
328:   h[2] = StaggeredMidpoint2D(p[i][j].h, p[i + 1][j].h, p[i + 1][j + 1].h, p[i][j + 1].h);
329:   h[3] = StaggeredMidpoint2D(p[i][j].h, p[i + 1][j].h, p[i + 1][j - 1].h, p[i][j - 1].h);
330:   /* South */
331:   h[4] = StaggeredMidpoint2D(p[i][j].h, p[i][j - 1].h, p[i + 1][j - 1].h, p[i + 1][j].h);
332:   h[5] = StaggeredMidpoint2D(p[i][j].h, p[i][j - 1].h, p[i - 1][j - 1].h, p[i - 1][j].h);
333:   /* North */
334:   h[6] = StaggeredMidpoint2D(p[i][j].h, p[i][j + 1].h, p[i - 1][j + 1].h, p[i - 1][j].h);
335:   h[7] = StaggeredMidpoint2D(p[i][j].h, p[i][j + 1].h, p[i + 1][j + 1].h, p[i + 1][j].h);
336: }

338: /* Tests A and C are from the ISMIP-HOM paper (Pattyn et al. 2008) */
339: static void THIInitialize_HOM_A(THI thi, PetscReal x, PetscReal y, PrmNode *p)
340: {
341:   Units     units = thi->units;
342:   PetscReal s     = -x * PetscSinReal(thi->alpha);
343:   p->b            = s - 1000 * units->meter + 500 * units->meter * PetscSinReal(x * 2 * PETSC_PI / thi->Lx) * PetscSinReal(y * 2 * PETSC_PI / thi->Ly);
344:   p->h            = s - p->b;
345:   p->beta2        = -1e-10; /* This value is not used, but it should not be huge because that would change the finite difference step size  */
346: }

348: static void THIInitialize_HOM_C(THI thi, PetscReal x, PetscReal y, PrmNode *p)
349: {
350:   Units     units = thi->units;
351:   PetscReal s     = -x * PetscSinReal(thi->alpha);
352:   p->b            = s - 1000 * units->meter;
353:   p->h            = s - p->b;
354:   /* tau_b = beta2 v   is a stress (Pa).
355:    * This is a big number in our units (it needs to balance the driving force from the surface), so we scale it by 1/rhog, just like the residual. */
356:   p->beta2 = 1000 * (1 + PetscSinReal(x * 2 * PETSC_PI / thi->Lx) * PetscSinReal(y * 2 * PETSC_PI / thi->Ly)) * units->Pascal * units->year / units->meter / thi->rhog;
357: }

359: /* These are just toys */

361: /* From Fred Herman */
362: static void THIInitialize_HOM_F(THI thi, PetscReal x, PetscReal y, PrmNode *p)
363: {
364:   Units     units = thi->units;
365:   PetscReal s     = -x * PetscSinReal(thi->alpha);
366:   p->b            = s - 1000 * units->meter + 100 * units->meter * PetscSinReal(x * 2 * PETSC_PI / thi->Lx); /* * sin(y*2*PETSC_PI/thi->Ly); */
367:   p->h            = s - p->b;
368:   p->h            = (1 - (atan((x - thi->Lx / 2) / 1.) + PETSC_PI / 2.) / PETSC_PI) * 500 * units->meter + 1 * units->meter;
369:   s               = PetscRealPart(p->b + p->h);
370:   p->beta2        = -1e-10;
371:   /*  p->beta2 = 1000 * units->Pascal * units->year / units->meter; */
372: }

374: /* Same bed as test A, free slip everywhere except for a discontinuous jump to a circular sticky region in the middle. */
375: static void THIInitialize_HOM_X(THI thi, PetscReal xx, PetscReal yy, PrmNode *p)
376: {
377:   Units     units = thi->units;
378:   PetscReal x = xx * 2 * PETSC_PI / thi->Lx - PETSC_PI, y = yy * 2 * PETSC_PI / thi->Ly - PETSC_PI; /* [-pi,pi] */
379:   PetscReal r = PetscSqrtReal(x * x + y * y), s = -x * PetscSinReal(thi->alpha);
380:   p->b     = s - 1000 * units->meter + 500 * units->meter * PetscSinReal(x + PETSC_PI) * PetscSinReal(y + PETSC_PI);
381:   p->h     = s - p->b;
382:   p->beta2 = 1000 * (r < 1 ? 2 : 0) * units->Pascal * units->year / units->meter / thi->rhog;
383: }

385: /* Like Z, but with 200 meter cliffs */
386: static void THIInitialize_HOM_Y(THI thi, PetscReal xx, PetscReal yy, PrmNode *p)
387: {
388:   Units     units = thi->units;
389:   PetscReal x = xx * 2 * PETSC_PI / thi->Lx - PETSC_PI, y = yy * 2 * PETSC_PI / thi->Ly - PETSC_PI; /* [-pi,pi] */
390:   PetscReal r = PetscSqrtReal(x * x + y * y), s = -x * PetscSinReal(thi->alpha);
391:   p->b = s - 1000 * units->meter + 500 * units->meter * PetscSinReal(x + PETSC_PI) * PetscSinReal(y + PETSC_PI);
392:   if (PetscRealPart(p->b) > -700 * units->meter) p->b += 200 * units->meter;
393:   p->h     = s - p->b;
394:   p->beta2 = 1000 * (1. + PetscSinReal(PetscSqrtReal(16 * r)) / PetscSqrtReal(1e-2 + 16 * r) * PetscCosReal(x * 3 / 2) * PetscCosReal(y * 3 / 2)) * units->Pascal * units->year / units->meter / thi->rhog;
395: }

397: /* Same bed as A, smoothly varying slipperiness, similar to MATLAB's "sombrero" (uncorrelated with bathymetry) */
398: static void THIInitialize_HOM_Z(THI thi, PetscReal xx, PetscReal yy, PrmNode *p)
399: {
400:   Units     units = thi->units;
401:   PetscReal x = xx * 2 * PETSC_PI / thi->Lx - PETSC_PI, y = yy * 2 * PETSC_PI / thi->Ly - PETSC_PI; /* [-pi,pi] */
402:   PetscReal r = PetscSqrtReal(x * x + y * y), s = -x * PetscSinReal(thi->alpha);
403:   p->b     = s - 1000 * units->meter + 500 * units->meter * PetscSinReal(x + PETSC_PI) * PetscSinReal(y + PETSC_PI);
404:   p->h     = s - p->b;
405:   p->beta2 = 1000 * (1. + PetscSinReal(PetscSqrtReal(16 * r)) / PetscSqrtReal(1e-2 + 16 * r) * PetscCosReal(x * 3 / 2) * PetscCosReal(y * 3 / 2)) * units->Pascal * units->year / units->meter / thi->rhog;
406: }

408: static void THIFriction(THI thi, PetscReal rbeta2, PetscReal gam, PetscReal *beta2, PetscReal *dbeta2)
409: {
410:   if (thi->friction.irefgam == 0) {
411:     Units units           = thi->units;
412:     thi->friction.irefgam = 1. / (0.5 * PetscSqr(100 * units->meter / units->year));
413:     thi->friction.eps2    = 0.5 * PetscSqr(1.e-4 / thi->friction.irefgam);
414:   }
415:   if (thi->friction.exponent == 0) {
416:     *beta2  = rbeta2;
417:     *dbeta2 = 0;
418:   } else {
419:     *beta2  = rbeta2 * PetscPowReal(thi->friction.eps2 + gam * thi->friction.irefgam, thi->friction.exponent);
420:     *dbeta2 = thi->friction.exponent * *beta2 / (thi->friction.eps2 + gam * thi->friction.irefgam) * thi->friction.irefgam;
421:   }
422: }

424: static void THIViscosity(THI thi, PetscReal gam, PetscReal *eta, PetscReal *deta)
425: {
426:   PetscReal Bd2, eps, exponent;
427:   if (thi->viscosity.Bd2 == 0) {
428:     Units           units   = thi->units;
429:     const PetscReal n       = thi->viscosity.glen_n,                                  /* Glen exponent */
430:       p                     = 1. + 1. / n,                                            /* for Stokes */
431:       A                     = 1.e-16 * PetscPowReal(units->Pascal, -n) / units->year, /* softness parameter (Pa^{-n}/s) */
432:       B                     = PetscPowReal(A, -1. / n);                               /* hardness parameter */
433:     thi->viscosity.Bd2      = B / 2;
434:     thi->viscosity.exponent = (p - 2) / 2;
435:     thi->viscosity.eps      = 0.5 * PetscSqr(1e-5 / units->year);
436:   }
437:   Bd2      = thi->viscosity.Bd2;
438:   exponent = thi->viscosity.exponent;
439:   eps      = thi->viscosity.eps;
440:   *eta     = Bd2 * PetscPowReal(eps + gam, exponent);
441:   *deta    = exponent * (*eta) / (eps + gam);
442: }

444: static void THIErosion(THI thi, const Node *vel, PetscScalar *erate, Node *derate)
445: {
446:   const PetscScalar magref2 = 1.e-10 + (PetscSqr(vel->u) + PetscSqr(vel->v)) / PetscSqr(thi->erosion.refvel), rate = -thi->erosion.rate * PetscPowScalar(magref2, 0.5 * thi->erosion.exponent);
447:   if (erate) *erate = rate;
448:   if (derate) {
449:     if (thi->erosion.exponent == 1) {
450:       derate->u = 0;
451:       derate->v = 0;
452:     } else {
453:       derate->u = 0.5 * thi->erosion.exponent * rate / magref2 * 2. * vel->u / PetscSqr(thi->erosion.refvel);
454:       derate->v = 0.5 * thi->erosion.exponent * rate / magref2 * 2. * vel->v / PetscSqr(thi->erosion.refvel);
455:     }
456:   }
457: }

459: static void RangeUpdate(PetscReal *min, PetscReal *max, PetscReal x)
460: {
461:   if (x < *min) *min = x;
462:   if (x > *max) *max = x;
463: }

465: static void PRangeClear(PRange *p)
466: {
467:   p->cmin = p->min = 1e100;
468:   p->cmax = p->max = -1e100;
469: }

471: static PetscErrorCode PRangeMinMax(PRange *p, PetscReal min, PetscReal max)
472: {
474:   p->cmin = min;
475:   p->cmax = max;
476:   if (min < p->min) p->min = min;
477:   if (max > p->max) p->max = max;
478:   return 0;
479: }

481: static PetscErrorCode THIDestroy(THI *thi)
482: {
484:   if (--((PetscObject)(*thi))->refct > 0) return 0;
485:   PetscFree((*thi)->units);
486:   PetscFree((*thi)->mattype);
487:   PetscFree((*thi)->monitor_basename);
488:   PetscHeaderDestroy(thi);
489:   return 0;
490: }

492: static PetscErrorCode THICreate(MPI_Comm comm, THI *inthi)
493: {
494:   static PetscBool registered = PETSC_FALSE;
495:   THI              thi;
496:   Units            units;
497:   char             monitor_basename[PETSC_MAX_PATH_LEN] = "thi-";
498:   PetscErrorCode   ierr;

501:   *inthi = 0;
502:   if (!registered) {
503:     PetscClassIdRegister("Toy Hydrostatic Ice", &THI_CLASSID);
504:     registered = PETSC_TRUE;
505:   }
506:   PetscHeaderCreate(thi, THI_CLASSID, "THI", "Toy Hydrostatic Ice", "THI", comm, THIDestroy, 0);

508:   PetscNew(&thi->units);

510:   units           = thi->units;
511:   units->meter    = 1e-2;
512:   units->second   = 1e-7;
513:   units->kilogram = 1e-12;

515:   PetscOptionsBegin(comm, NULL, "Scaled units options", "");
516:   {
517:     PetscOptionsReal("-units_meter", "1 meter in scaled length units", "", units->meter, &units->meter, NULL);
518:     PetscOptionsReal("-units_second", "1 second in scaled time units", "", units->second, &units->second, NULL);
519:     PetscOptionsReal("-units_kilogram", "1 kilogram in scaled mass units", "", units->kilogram, &units->kilogram, NULL);
520:   }
521:   PetscOptionsEnd();
522:   units->Pascal = units->kilogram / (units->meter * PetscSqr(units->second));
523:   units->year   = 31556926. * units->second, /* seconds per year */

525:     thi->Lx            = 10.e3;
526:   thi->Ly              = 10.e3;
527:   thi->Lz              = 1000;
528:   thi->nlevels         = 1;
529:   thi->dirichlet_scale = 1;
530:   thi->verbose         = PETSC_FALSE;

532:   thi->viscosity.glen_n = 3.;
533:   thi->erosion.rate     = 1e-3; /* m/a */
534:   thi->erosion.exponent = 1.;
535:   thi->erosion.refvel   = 1.; /* m/a */

537:   PetscOptionsBegin(comm, NULL, "Toy Hydrostatic Ice options", "");
538:   {
539:     QuadratureType quad       = QUAD_GAUSS;
540:     char           homexp[]   = "A";
541:     char           mtype[256] = MATSBAIJ;
542:     PetscReal      L, m = 1.0;
543:     PetscBool      flg;
544:     L = thi->Lx;
545:     PetscOptionsReal("-thi_L", "Domain size (m)", "", L, &L, &flg);
546:     if (flg) thi->Lx = thi->Ly = L;
547:     PetscOptionsReal("-thi_Lx", "X Domain size (m)", "", thi->Lx, &thi->Lx, NULL);
548:     PetscOptionsReal("-thi_Ly", "Y Domain size (m)", "", thi->Ly, &thi->Ly, NULL);
549:     PetscOptionsReal("-thi_Lz", "Z Domain size (m)", "", thi->Lz, &thi->Lz, NULL);
550:     PetscOptionsString("-thi_hom", "ISMIP-HOM experiment (A or C)", "", homexp, homexp, sizeof(homexp), NULL);
551:     switch (homexp[0] = toupper(homexp[0])) {
552:     case 'A':
553:       thi->initialize = THIInitialize_HOM_A;
554:       thi->no_slip    = PETSC_TRUE;
555:       thi->alpha      = 0.5;
556:       break;
557:     case 'C':
558:       thi->initialize = THIInitialize_HOM_C;
559:       thi->no_slip    = PETSC_FALSE;
560:       thi->alpha      = 0.1;
561:       break;
562:     case 'F':
563:       thi->initialize = THIInitialize_HOM_F;
564:       thi->no_slip    = PETSC_FALSE;
565:       thi->alpha      = 0.5;
566:       break;
567:     case 'X':
568:       thi->initialize = THIInitialize_HOM_X;
569:       thi->no_slip    = PETSC_FALSE;
570:       thi->alpha      = 0.3;
571:       break;
572:     case 'Y':
573:       thi->initialize = THIInitialize_HOM_Y;
574:       thi->no_slip    = PETSC_FALSE;
575:       thi->alpha      = 0.5;
576:       break;
577:     case 'Z':
578:       thi->initialize = THIInitialize_HOM_Z;
579:       thi->no_slip    = PETSC_FALSE;
580:       thi->alpha      = 0.5;
581:       break;
582:     default:
583:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "HOM experiment '%c' not implemented", homexp[0]);
584:     }
585:     PetscOptionsEnum("-thi_quadrature", "Quadrature to use for 3D elements", "", QuadratureTypes, (PetscEnum)quad, (PetscEnum *)&quad, NULL);
586:     switch (quad) {
587:     case QUAD_GAUSS:
588:       HexQInterp = HexQInterp_Gauss;
589:       HexQDeriv  = HexQDeriv_Gauss;
590:       break;
591:     case QUAD_LOBATTO:
592:       HexQInterp = HexQInterp_Lobatto;
593:       HexQDeriv  = HexQDeriv_Lobatto;
594:       break;
595:     }
596:     PetscOptionsReal("-thi_alpha", "Bed angle (degrees)", "", thi->alpha, &thi->alpha, NULL);
597:     PetscOptionsReal("-thi_viscosity_glen_n", "Exponent in Glen flow law, 1=linear, infty=ideal plastic", NULL, thi->viscosity.glen_n, &thi->viscosity.glen_n, NULL);
598:     PetscOptionsReal("-thi_friction_m", "Friction exponent, 0=Coulomb, 1=Navier", "", m, &m, NULL);
599:     thi->friction.exponent = (m - 1) / 2;
600:     PetscOptionsReal("-thi_erosion_rate", "Rate of erosion relative to sliding velocity at reference velocity (m/a)", NULL, thi->erosion.rate, &thi->erosion.rate, NULL);
601:     PetscOptionsReal("-thi_erosion_exponent", "Power of sliding velocity appearing in erosion relation", NULL, thi->erosion.exponent, &thi->erosion.exponent, NULL);
602:     PetscOptionsReal("-thi_erosion_refvel", "Reference sliding velocity for erosion (m/a)", NULL, thi->erosion.refvel, &thi->erosion.refvel, NULL);
603:     thi->erosion.rate *= units->meter / units->year;
604:     thi->erosion.refvel *= units->meter / units->year;
605:     PetscOptionsReal("-thi_dirichlet_scale", "Scale Dirichlet boundary conditions by this factor", "", thi->dirichlet_scale, &thi->dirichlet_scale, NULL);
606:     PetscOptionsReal("-thi_ssa_friction_scale", "Scale slip boundary conditions by this factor in SSA (2D) assembly", "", thi->ssa_friction_scale, &thi->ssa_friction_scale, NULL);
607:     PetscOptionsReal("-thi_inertia", "Coefficient of accelaration term in velocity system, physical is almost zero", NULL, thi->inertia, &thi->inertia, NULL);
608:     PetscOptionsInt("-thi_nlevels", "Number of levels of refinement", "", thi->nlevels, &thi->nlevels, NULL);
609:     PetscOptionsFList("-thi_mat_type", "Matrix type", "MatSetType", MatList, mtype, (char *)mtype, sizeof(mtype), NULL);
610:     PetscStrallocpy(mtype, &thi->mattype);
611:     PetscOptionsBool("-thi_verbose", "Enable verbose output (like matrix sizes and statistics)", "", thi->verbose, &thi->verbose, NULL);
612:     PetscOptionsString("-thi_monitor", "Basename to write state files to", NULL, monitor_basename, monitor_basename, sizeof(monitor_basename), &flg);
613:     if (flg) {
614:       PetscStrallocpy(monitor_basename, &thi->monitor_basename);
615:       thi->monitor_interval = 1;
616:       PetscOptionsInt("-thi_monitor_interval", "Frequency at which to write state files", NULL, thi->monitor_interval, &thi->monitor_interval, NULL);
617:     }
618:   }
619:   PetscOptionsEnd();

621:   /* dimensionalize */
622:   thi->Lx *= units->meter;
623:   thi->Ly *= units->meter;
624:   thi->Lz *= units->meter;
625:   thi->alpha *= PETSC_PI / 180;

627:   PRangeClear(&thi->eta);
628:   PRangeClear(&thi->beta2);

630:   {
631:     PetscReal u = 1000 * units->meter / (3e7 * units->second), gradu = u / (100 * units->meter), eta, deta, rho = 910 * units->kilogram / PetscPowRealInt(units->meter, 3), grav = 9.81 * units->meter / PetscSqr(units->second),
632:               driving = rho * grav * PetscSinReal(thi->alpha) * 1000 * units->meter;
633:     THIViscosity(thi, 0.5 * gradu * gradu, &eta, &deta);
634:     thi->rhog = rho * grav;
635:     if (thi->verbose) {
636:       PetscPrintf(PetscObjectComm((PetscObject)thi), "Units: meter %8.2g  second %8.2g  kg %8.2g  Pa %8.2g\n", (double)units->meter, (double)units->second, (double)units->kilogram, (double)units->Pascal);
637:       PetscPrintf(PetscObjectComm((PetscObject)thi), "Domain (%6.2g,%6.2g,%6.2g), pressure %8.2g, driving stress %8.2g\n", (double)thi->Lx, (double)thi->Ly, (double)thi->Lz, (double)(rho * grav * 1e3 * units->meter), (double)driving);
638:       PetscPrintf(PetscObjectComm((PetscObject)thi), "Large velocity 1km/a %8.2g, velocity gradient %8.2g, eta %8.2g, stress %8.2g, ratio %8.2g\n", (double)u, (double)gradu, (double)eta, (double)(2 * eta * gradu, 2 * eta * gradu / driving));
639:       THIViscosity(thi, 0.5 * PetscSqr(1e-3 * gradu), &eta, &deta);
640:       PetscPrintf(PetscObjectComm((PetscObject)thi), "Small velocity 1m/a  %8.2g, velocity gradient %8.2g, eta %8.2g, stress %8.2g, ratio %8.2g\n", (double)(1e-3 * u), (double)(1e-3 * gradu), (double)eta, (double)(2 * eta * 1e-3 * gradu, 2 * eta * 1e-3 * gradu / driving));
641:     }
642:   }

644:   *inthi = thi;
645:   return 0;
646: }

648: /* Our problem is periodic, but the domain has a mean slope of alpha so the bed does not line up between the upstream
649:  * and downstream ends of the domain.  This function fixes the ghost values so that the domain appears truly periodic in
650:  * the horizontal. */
651: static PetscErrorCode THIFixGhosts(THI thi, DM da3, DM da2, Vec X3, Vec X2)
652: {
653:   DMDALocalInfo info;
654:   PrmNode     **x2;
655:   PetscInt      i, j;

658:   DMDAGetLocalInfo(da3, &info);
659:   /* VecView(X2,PETSC_VIEWER_STDOUT_WORLD); */
660:   DMDAVecGetArray(da2, X2, &x2);
661:   for (i = info.gzs; i < info.gzs + info.gzm; i++) {
662:     if (i > -1 && i < info.mz) continue;
663:     for (j = info.gys; j < info.gys + info.gym; j++) x2[i][j].b += PetscSinReal(thi->alpha) * thi->Lx * (i < 0 ? 1.0 : -1.0);
664:   }
665:   DMDAVecRestoreArray(da2, X2, &x2);
666:   /* VecView(X2,PETSC_VIEWER_STDOUT_WORLD); */
667:   return 0;
668: }

670: static PetscErrorCode THIInitializePrm(THI thi, DM da2prm, PrmNode **p)
671: {
672:   PetscInt i, j, xs, xm, ys, ym, mx, my;

675:   DMDAGetGhostCorners(da2prm, &ys, &xs, 0, &ym, &xm, 0);
676:   DMDAGetInfo(da2prm, 0, &my, &mx, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
677:   for (i = xs; i < xs + xm; i++) {
678:     for (j = ys; j < ys + ym; j++) {
679:       PetscReal xx = thi->Lx * i / mx, yy = thi->Ly * j / my;
680:       thi->initialize(thi, xx, yy, &p[i][j]);
681:     }
682:   }
683:   return 0;
684: }

686: static PetscErrorCode THIInitial(THI thi, DM pack, Vec X)
687: {
688:   DM        da3, da2;
689:   PetscInt  i, j, k, xs, xm, ys, ym, zs, zm, mx, my;
690:   PetscReal hx, hy;
691:   PrmNode **prm;
692:   Node   ***x;
693:   Vec       X3g, X2g, X2;

696:   DMCompositeGetEntries(pack, &da3, &da2);
697:   DMCompositeGetAccess(pack, X, &X3g, &X2g);
698:   DMGetLocalVector(da2, &X2);

700:   DMDAGetInfo(da3, 0, 0, &my, &mx, 0, 0, 0, 0, 0, 0, 0, 0, 0);
701:   DMDAGetCorners(da3, &zs, &ys, &xs, &zm, &ym, &xm);
702:   DMDAVecGetArray(da3, X3g, &x);
703:   DMDAVecGetArray(da2, X2, &prm);

705:   THIInitializePrm(thi, da2, prm);

707:   hx = thi->Lx / mx;
708:   hy = thi->Ly / my;
709:   for (i = xs; i < xs + xm; i++) {
710:     for (j = ys; j < ys + ym; j++) {
711:       for (k = zs; k < zs + zm; k++) {
712:         const PetscScalar zm1 = zm - 1, drivingx = thi->rhog * (prm[i + 1][j].b + prm[i + 1][j].h - prm[i - 1][j].b - prm[i - 1][j].h) / (2 * hx), drivingy = thi->rhog * (prm[i][j + 1].b + prm[i][j + 1].h - prm[i][j - 1].b - prm[i][j - 1].h) / (2 * hy);
713:         x[i][j][k].u = 0. * drivingx * prm[i][j].h * (PetscScalar)k / zm1;
714:         x[i][j][k].v = 0. * drivingy * prm[i][j].h * (PetscScalar)k / zm1;
715:       }
716:     }
717:   }

719:   DMDAVecRestoreArray(da3, X3g, &x);
720:   DMDAVecRestoreArray(da2, X2, &prm);

722:   DMLocalToGlobalBegin(da2, X2, INSERT_VALUES, X2g);
723:   DMLocalToGlobalEnd(da2, X2, INSERT_VALUES, X2g);
724:   DMRestoreLocalVector(da2, &X2);

726:   DMCompositeRestoreAccess(pack, X, &X3g, &X2g);
727:   return 0;
728: }

730: static void PointwiseNonlinearity(THI thi, const Node n[restrict 8], const PetscReal phi[restrict 3], PetscReal dphi[restrict 8][3], PetscScalar *restrict u, PetscScalar *restrict v, PetscScalar du[restrict 3], PetscScalar dv[restrict 3], PetscReal *eta, PetscReal *deta)
731: {
732:   PetscInt    l, ll;
733:   PetscScalar gam;

735:   du[0] = du[1] = du[2] = 0;
736:   dv[0] = dv[1] = dv[2] = 0;
737:   *u                    = 0;
738:   *v                    = 0;
739:   for (l = 0; l < 8; l++) {
740:     *u += phi[l] * n[l].u;
741:     *v += phi[l] * n[l].v;
742:     for (ll = 0; ll < 3; ll++) {
743:       du[ll] += dphi[l][ll] * n[l].u;
744:       dv[ll] += dphi[l][ll] * n[l].v;
745:     }
746:   }
747:   gam = Sqr(du[0]) + Sqr(dv[1]) + du[0] * dv[1] + 0.25 * Sqr(du[1] + dv[0]) + 0.25 * Sqr(du[2]) + 0.25 * Sqr(dv[2]);
748:   THIViscosity(thi, PetscRealPart(gam), eta, deta);
749: }

751: static PetscErrorCode THIFunctionLocal_3D(DMDALocalInfo *info, const Node ***x, const PrmNode **prm, const Node ***xdot, Node ***f, THI thi)
752: {
753:   PetscInt  xs, ys, xm, ym, zm, i, j, k, q, l;
754:   PetscReal hx, hy, etamin, etamax, beta2min, beta2max;

757:   xs = info->zs;
758:   ys = info->ys;
759:   xm = info->zm;
760:   ym = info->ym;
761:   zm = info->xm;
762:   hx = thi->Lx / info->mz;
763:   hy = thi->Ly / info->my;

765:   etamin   = 1e100;
766:   etamax   = 0;
767:   beta2min = 1e100;
768:   beta2max = 0;

770:   for (i = xs; i < xs + xm; i++) {
771:     for (j = ys; j < ys + ym; j++) {
772:       PrmNode pn[4], dpn[4][2];
773:       QuadExtract(prm, i, j, pn);
774:       QuadComputeGrad4(QuadQDeriv, hx, hy, pn, dpn);
775:       for (k = 0; k < zm - 1; k++) {
776:         PetscInt  ls = 0;
777:         Node      n[8], ndot[8], *fn[8];
778:         PetscReal zn[8], etabase = 0;

780:         PrmHexGetZ(pn, k, zm, zn);
781:         HexExtract(x, i, j, k, n);
782:         HexExtract(xdot, i, j, k, ndot);
783:         HexExtractRef(f, i, j, k, fn);
784:         if (thi->no_slip && k == 0) {
785:           for (l = 0; l < 4; l++) n[l].u = n[l].v = 0;
786:           /* The first 4 basis functions lie on the bottom layer, so their contribution is exactly 0, hence we can skip them */
787:           ls = 4;
788:         }
789:         for (q = 0; q < 8; q++) {
790:           PetscReal   dz[3], phi[8], dphi[8][3], jw, eta, deta;
791:           PetscScalar du[3], dv[3], u, v, udot = 0, vdot = 0;
792:           for (l = ls; l < 8; l++) {
793:             udot += HexQInterp[q][l] * ndot[l].u;
794:             vdot += HexQInterp[q][l] * ndot[l].v;
795:           }
796:           HexGrad(HexQDeriv[q], zn, dz);
797:           HexComputeGeometry(q, hx, hy, dz, phi, dphi, &jw);
798:           PointwiseNonlinearity(thi, n, phi, dphi, &u, &v, du, dv, &eta, &deta);
799:           jw /= thi->rhog; /* scales residuals to be O(1) */
800:           if (q == 0) etabase = eta;
801:           RangeUpdate(&etamin, &etamax, eta);
802:           for (l = ls; l < 8; l++) { /* test functions */
803:             const PetscScalar ds[2] = {dpn[q % 4][0].h + dpn[q % 4][0].b, dpn[q % 4][1].h + dpn[q % 4][1].b};
804:             const PetscReal   pp = phi[l], *dp = dphi[l];
805:             fn[l]->u += dp[0] * jw * eta * (4. * du[0] + 2. * dv[1]) + dp[1] * jw * eta * (du[1] + dv[0]) + dp[2] * jw * eta * du[2] + pp * jw * thi->rhog * ds[0];
806:             fn[l]->v += dp[1] * jw * eta * (2. * du[0] + 4. * dv[1]) + dp[0] * jw * eta * (du[1] + dv[0]) + dp[2] * jw * eta * dv[2] + pp * jw * thi->rhog * ds[1];
807:             fn[l]->u += pp * jw * udot * thi->inertia * pp;
808:             fn[l]->v += pp * jw * vdot * thi->inertia * pp;
809:           }
810:         }
811:         if (k == 0) { /* we are on a bottom face */
812:           if (thi->no_slip) {
813:             /* Note: Non-Galerkin coarse grid operators are very sensitive to the scaling of Dirichlet boundary
814:             * conditions.  After shenanigans above, etabase contains the effective viscosity at the closest quadrature
815:             * point to the bed.  We want the diagonal entry in the Dirichlet condition to have similar magnitude to the
816:             * diagonal entry corresponding to the adjacent node.  The fundamental scaling of the viscous part is in
817:             * diagu, diagv below.  This scaling is easy to recognize by considering the finite difference operator after
818:             * scaling by element size.  The no-slip Dirichlet condition is scaled by this factor, and also in the
819:             * assembled matrix (see the similar block in THIJacobianLocal).
820:             *
821:             * Note that the residual at this Dirichlet node is linear in the state at this node, but also depends
822:             * (nonlinearly in general) on the neighboring interior nodes through the local viscosity.  This will make
823:             * a matrix-free Jacobian have extra entries in the corresponding row.  We assemble only the diagonal part,
824:             * so the solution will exactly satisfy the boundary condition after the first linear iteration.
825:             */
826:             const PetscReal   hz    = PetscRealPart(pn[0].h) / (zm - 1.);
827:             const PetscScalar diagu = 2 * etabase / thi->rhog * (hx * hy / hz + hx * hz / hy + 4 * hy * hz / hx), diagv = 2 * etabase / thi->rhog * (hx * hy / hz + 4 * hx * hz / hy + hy * hz / hx);
828:             fn[0]->u = thi->dirichlet_scale * diagu * x[i][j][k].u;
829:             fn[0]->v = thi->dirichlet_scale * diagv * x[i][j][k].v;
830:           } else {                    /* Integrate over bottom face to apply boundary condition */
831:             for (q = 0; q < 4; q++) { /* We remove the explicit scaling of the residual by 1/rhog because beta2 already has that scaling to be O(1) */
832:               const PetscReal jw = 0.25 * hx * hy, *phi = QuadQInterp[q];
833:               PetscScalar     u = 0, v = 0, rbeta2 = 0;
834:               PetscReal       beta2, dbeta2;
835:               for (l = 0; l < 4; l++) {
836:                 u += phi[l] * n[l].u;
837:                 v += phi[l] * n[l].v;
838:                 rbeta2 += phi[l] * pn[l].beta2;
839:               }
840:               THIFriction(thi, PetscRealPart(rbeta2), PetscRealPart(u * u + v * v) / 2, &beta2, &dbeta2);
841:               RangeUpdate(&beta2min, &beta2max, beta2);
842:               for (l = 0; l < 4; l++) {
843:                 const PetscReal pp = phi[l];
844:                 fn[ls + l]->u += pp * jw * beta2 * u;
845:                 fn[ls + l]->v += pp * jw * beta2 * v;
846:               }
847:             }
848:           }
849:         }
850:       }
851:     }
852:   }

854:   PRangeMinMax(&thi->eta, etamin, etamax);
855:   PRangeMinMax(&thi->beta2, beta2min, beta2max);
856:   return 0;
857: }

859: static PetscErrorCode THIFunctionLocal_2D(DMDALocalInfo *info, const Node ***x, const PrmNode **prm, const PrmNode **prmdot, PrmNode **f, THI thi)
860: {
861:   PetscInt xs, ys, xm, ym, zm, i, j, k;

864:   xs = info->zs;
865:   ys = info->ys;
866:   xm = info->zm;
867:   ym = info->ym;
868:   zm = info->xm;

870:   for (i = xs; i < xs + xm; i++) {
871:     for (j = ys; j < ys + ym; j++) {
872:       PetscScalar div = 0, erate, h[8];
873:       PrmNodeGetFaceMeasure(prm, i, j, h);
874:       for (k = 0; k < zm; k++) {
875:         PetscScalar weight = (k == 0 || k == zm - 1) ? 0.5 / (zm - 1) : 1.0 / (zm - 1);
876:         if (0) { /* centered flux */
877:           div += (-weight * h[0] * StaggeredMidpoint2D(x[i][j][k].u, x[i - 1][j][k].u, x[i - 1][j - 1][k].u, x[i][j - 1][k].u) - weight * h[1] * StaggeredMidpoint2D(x[i][j][k].u, x[i - 1][j][k].u, x[i - 1][j + 1][k].u, x[i][j + 1][k].u) +
878:                   weight * h[2] * StaggeredMidpoint2D(x[i][j][k].u, x[i + 1][j][k].u, x[i + 1][j + 1][k].u, x[i][j + 1][k].u) + weight * h[3] * StaggeredMidpoint2D(x[i][j][k].u, x[i + 1][j][k].u, x[i + 1][j - 1][k].u, x[i][j - 1][k].u) -
879:                   weight * h[4] * StaggeredMidpoint2D(x[i][j][k].v, x[i][j - 1][k].v, x[i + 1][j - 1][k].v, x[i + 1][j][k].v) - weight * h[5] * StaggeredMidpoint2D(x[i][j][k].v, x[i][j - 1][k].v, x[i - 1][j - 1][k].v, x[i - 1][j][k].v) +
880:                   weight * h[6] * StaggeredMidpoint2D(x[i][j][k].v, x[i][j + 1][k].v, x[i - 1][j + 1][k].v, x[i - 1][j][k].v) + weight * h[7] * StaggeredMidpoint2D(x[i][j][k].v, x[i][j + 1][k].v, x[i + 1][j + 1][k].v, x[i + 1][j][k].v));
881:         } else { /* Upwind flux */
882:           div += weight * (-UpwindFluxXW(x, prm, h, i, j, k, 1) - UpwindFluxXW(x, prm, h, i, j, k, -1) + UpwindFluxXE(x, prm, h, i, j, k, 1) + UpwindFluxXE(x, prm, h, i, j, k, -1) - UpwindFluxYS(x, prm, h, i, j, k, 1) - UpwindFluxYS(x, prm, h, i, j, k, -1) + UpwindFluxYN(x, prm, h, i, j, k, 1) + UpwindFluxYN(x, prm, h, i, j, k, -1));
883:         }
884:       }
885:       /* printf("div[%d][%d] %g\n",i,j,div); */
886:       THIErosion(thi, &x[i][j][0], &erate, NULL);
887:       f[i][j].b     = prmdot[i][j].b - erate;
888:       f[i][j].h     = prmdot[i][j].h + div;
889:       f[i][j].beta2 = prmdot[i][j].beta2;
890:     }
891:   }
892:   return 0;
893: }

895: static PetscErrorCode THIFunction(TS ts, PetscReal t, Vec X, Vec Xdot, Vec F, void *ctx)
896: {
897:   THI             thi = (THI)ctx;
898:   DM              pack, da3, da2;
899:   Vec             X3, X2, Xdot3, Xdot2, F3, F2, F3g, F2g;
900:   const Node   ***x3, ***xdot3;
901:   const PrmNode **x2, **xdot2;
902:   Node         ***f3;
903:   PrmNode       **f2;
904:   DMDALocalInfo   info3;

907:   TSGetDM(ts, &pack);
908:   DMCompositeGetEntries(pack, &da3, &da2);
909:   DMDAGetLocalInfo(da3, &info3);
910:   DMCompositeGetLocalVectors(pack, &X3, &X2);
911:   DMCompositeGetLocalVectors(pack, &Xdot3, &Xdot2);
912:   DMCompositeScatter(pack, X, X3, X2);
913:   THIFixGhosts(thi, da3, da2, X3, X2);
914:   DMCompositeScatter(pack, Xdot, Xdot3, Xdot2);

916:   DMGetLocalVector(da3, &F3);
917:   DMGetLocalVector(da2, &F2);
918:   VecZeroEntries(F3);

920:   DMDAVecGetArray(da3, X3, &x3);
921:   DMDAVecGetArray(da2, X2, &x2);
922:   DMDAVecGetArray(da3, Xdot3, &xdot3);
923:   DMDAVecGetArray(da2, Xdot2, &xdot2);
924:   DMDAVecGetArray(da3, F3, &f3);
925:   DMDAVecGetArray(da2, F2, &f2);

927:   THIFunctionLocal_3D(&info3, x3, x2, xdot3, f3, thi);
928:   THIFunctionLocal_2D(&info3, x3, x2, xdot2, f2, thi);

930:   DMDAVecRestoreArray(da3, X3, &x3);
931:   DMDAVecRestoreArray(da2, X2, &x2);
932:   DMDAVecRestoreArray(da3, Xdot3, &xdot3);
933:   DMDAVecRestoreArray(da2, Xdot2, &xdot2);
934:   DMDAVecRestoreArray(da3, F3, &f3);
935:   DMDAVecRestoreArray(da2, F2, &f2);

937:   DMCompositeRestoreLocalVectors(pack, &X3, &X2);
938:   DMCompositeRestoreLocalVectors(pack, &Xdot3, &Xdot2);

940:   VecZeroEntries(F);
941:   DMCompositeGetAccess(pack, F, &F3g, &F2g);
942:   DMLocalToGlobalBegin(da3, F3, ADD_VALUES, F3g);
943:   DMLocalToGlobalEnd(da3, F3, ADD_VALUES, F3g);
944:   DMLocalToGlobalBegin(da2, F2, INSERT_VALUES, F2g);
945:   DMLocalToGlobalEnd(da2, F2, INSERT_VALUES, F2g);

947:   if (thi->verbose) {
948:     PetscViewer viewer;
949:     PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)thi), &viewer);
950:     PetscViewerASCIIPrintf(viewer, "3D_Velocity residual (bs=2):\n");
951:     PetscViewerASCIIPushTab(viewer);
952:     VecView(F3, viewer);
953:     PetscViewerASCIIPopTab(viewer);
954:     PetscViewerASCIIPrintf(viewer, "2D_Fields residual (bs=3):\n");
955:     PetscViewerASCIIPushTab(viewer);
956:     VecView(F2, viewer);
957:     PetscViewerASCIIPopTab(viewer);
958:   }

960:   DMCompositeRestoreAccess(pack, F, &F3g, &F2g);

962:   DMRestoreLocalVector(da3, &F3);
963:   DMRestoreLocalVector(da2, &F2);
964:   return 0;
965: }

967: static PetscErrorCode THIMatrixStatistics(THI thi, Mat B, PetscViewer viewer)
968: {
969:   PetscReal   nrm;
970:   PetscInt    m;
971:   PetscMPIInt rank;

974:   MatNorm(B, NORM_FROBENIUS, &nrm);
975:   MatGetSize(B, &m, 0);
976:   MPI_Comm_rank(PetscObjectComm((PetscObject)B), &rank);
977:   if (rank == 0) {
978:     PetscScalar val0, val2;
979:     MatGetValue(B, 0, 0, &val0);
980:     MatGetValue(B, 2, 2, &val2);
981:     PetscCall(PetscViewerASCIIPrintf(viewer, "Matrix dim %8" PetscInt_FMT "  norm %8.2e, (0,0) %8.2e  (2,2) %8.2e, eta [%8.2e,%8.2e] beta2 [%8.2e,%8.2e]\n", m, (double)nrm, (double)PetscRealPart(val0), (double)PetscRealPart(val2), (double)thi->eta.cmin,
982:                                      (double)thi->eta.cmax, (double)thi->beta2.cmin, (double)thi->beta2.cmax));
983:   }
984:   return 0;
985: }

987: static PetscErrorCode THISurfaceStatistics(DM pack, Vec X, PetscReal *min, PetscReal *max, PetscReal *mean)
988: {
989:   DM          da3, da2;
990:   Vec         X3, X2;
991:   Node     ***x;
992:   PetscInt    i, j, xs, ys, zs, xm, ym, zm, mx, my, mz;
993:   PetscReal   umin = 1e100, umax = -1e100;
994:   PetscScalar usum = 0.0, gusum;

997:   DMCompositeGetEntries(pack, &da3, &da2);
998:   DMCompositeGetAccess(pack, X, &X3, &X2);
999:   *min = *max = *mean = 0;
1000:   DMDAGetInfo(da3, 0, &mz, &my, &mx, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1001:   DMDAGetCorners(da3, &zs, &ys, &xs, &zm, &ym, &xm);
1003:   DMDAVecGetArray(da3, X3, &x);
1004:   for (i = xs; i < xs + xm; i++) {
1005:     for (j = ys; j < ys + ym; j++) {
1006:       PetscReal u = PetscRealPart(x[i][j][zm - 1].u);
1007:       RangeUpdate(&umin, &umax, u);
1008:       usum += u;
1009:     }
1010:   }
1011:   DMDAVecRestoreArray(da3, X3, &x);
1012:   DMCompositeRestoreAccess(pack, X, &X3, &X2);

1014:   MPI_Allreduce(&umin, min, 1, MPIU_REAL, MPIU_MIN, PetscObjectComm((PetscObject)da3));
1015:   MPI_Allreduce(&umax, max, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)da3));
1016:   MPI_Allreduce(&usum, &gusum, 1, MPIU_SCALAR, MPIU_SUM, PetscObjectComm((PetscObject)da3));
1017:   *mean = PetscRealPart(gusum) / (mx * my);
1018:   return 0;
1019: }

1021: static PetscErrorCode THISolveStatistics(THI thi, TS ts, PetscInt coarsened, const char name[])
1022: {
1023:   MPI_Comm comm;
1024:   DM       pack;
1025:   Vec      X, X3, X2;

1028:   PetscObjectGetComm((PetscObject)thi, &comm);
1029:   TSGetDM(ts, &pack);
1030:   TSGetSolution(ts, &X);
1031:   DMCompositeGetAccess(pack, X, &X3, &X2);
1032:   PetscPrintf(comm, "Solution statistics after solve: %s\n", name);
1033:   {
1034:     PetscInt            its, lits;
1035:     SNESConvergedReason reason;
1036:     SNES                snes;
1037:     TSGetSNES(ts, &snes);
1038:     SNESGetIterationNumber(snes, &its);
1039:     SNESGetConvergedReason(snes, &reason);
1040:     SNESGetLinearSolveIterations(snes, &lits);
1041:     PetscPrintf(comm, "%s: Number of SNES iterations = %" PetscInt_FMT ", total linear iterations = %" PetscInt_FMT "\n", SNESConvergedReasons[reason], its, lits);
1042:   }
1043:   {
1044:     PetscReal    nrm2, tmin[3] = {1e100, 1e100, 1e100}, tmax[3] = {-1e100, -1e100, -1e100}, min[3], max[3];
1045:     PetscInt     i, j, m;
1046:     PetscScalar *x;
1047:     VecNorm(X3, NORM_2, &nrm2);
1048:     VecGetLocalSize(X3, &m);
1049:     VecGetArray(X3, &x);
1050:     for (i = 0; i < m; i += 2) {
1051:       PetscReal u = PetscRealPart(x[i]), v = PetscRealPart(x[i + 1]), c = PetscSqrtReal(u * u + v * v);
1052:       tmin[0] = PetscMin(u, tmin[0]);
1053:       tmin[1] = PetscMin(v, tmin[1]);
1054:       tmin[2] = PetscMin(c, tmin[2]);
1055:       tmax[0] = PetscMax(u, tmax[0]);
1056:       tmax[1] = PetscMax(v, tmax[1]);
1057:       tmax[2] = PetscMax(c, tmax[2]);
1058:     }
1059:     VecRestoreArray(X, &x);
1060:     MPI_Allreduce(tmin, min, 3, MPIU_REAL, MPIU_MIN, PetscObjectComm((PetscObject)thi));
1061:     MPI_Allreduce(tmax, max, 3, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)thi));
1062:     /* Dimensionalize to meters/year */
1063:     nrm2 *= thi->units->year / thi->units->meter;
1064:     for (j = 0; j < 3; j++) {
1065:       min[j] *= thi->units->year / thi->units->meter;
1066:       max[j] *= thi->units->year / thi->units->meter;
1067:     }
1068:     PetscPrintf(comm, "|X|_2 %g   u in [%g, %g]   v in [%g, %g]   c in [%g, %g] \n", (double)nrm2, (double)min[0], (double)max[0], (double)min[1], (double)max[1], (double)min[2], (double)max[2]);
1069:     {
1070:       PetscReal umin, umax, umean;
1071:       THISurfaceStatistics(pack, X, &umin, &umax, &umean);
1072:       umin *= thi->units->year / thi->units->meter;
1073:       umax *= thi->units->year / thi->units->meter;
1074:       umean *= thi->units->year / thi->units->meter;
1075:       PetscPrintf(comm, "Surface statistics: u in [%12.6e, %12.6e] mean %12.6e\n", (double)umin, (double)umax, (double)umean);
1076:     }
1077:     /* These values stay nondimensional */
1078:     PetscPrintf(comm, "Global eta range   [%g, %g], converged range [%g, %g]\n", (double)thi->eta.min, (double)thi->eta.max, (double)thi->eta.cmin, (double)thi->eta.cmax);
1079:     PetscPrintf(comm, "Global beta2 range [%g, %g], converged range [%g, %g]\n", (double)thi->beta2.min, (double)thi->beta2.max, (double)thi->beta2.cmin, (double)thi->beta2.cmax);
1080:   }
1081:   PetscPrintf(comm, "\n");
1082:   DMCompositeRestoreAccess(pack, X, &X3, &X2);
1083:   return 0;
1084: }

1086: static inline PetscInt DMDALocalIndex3D(DMDALocalInfo *info, PetscInt i, PetscInt j, PetscInt k)
1087: {
1088:   return ((i - info->gzs) * info->gym + (j - info->gys)) * info->gxm + (k - info->gxs);
1089: }
1090: static inline PetscInt DMDALocalIndex2D(DMDALocalInfo *info, PetscInt i, PetscInt j)
1091: {
1092:   return (i - info->gzs) * info->gym + (j - info->gys);
1093: }

1095: static PetscErrorCode THIJacobianLocal_Momentum(DMDALocalInfo *info, const Node ***x, const PrmNode **prm, Mat B, Mat Bcpl, THI thi)
1096: {
1097:   PetscInt  xs, ys, xm, ym, zm, i, j, k, q, l, ll;
1098:   PetscReal hx, hy;

1101:   xs = info->zs;
1102:   ys = info->ys;
1103:   xm = info->zm;
1104:   ym = info->ym;
1105:   zm = info->xm;
1106:   hx = thi->Lx / info->mz;
1107:   hy = thi->Ly / info->my;

1109:   for (i = xs; i < xs + xm; i++) {
1110:     for (j = ys; j < ys + ym; j++) {
1111:       PrmNode pn[4], dpn[4][2];
1112:       QuadExtract(prm, i, j, pn);
1113:       QuadComputeGrad4(QuadQDeriv, hx, hy, pn, dpn);
1114:       for (k = 0; k < zm - 1; k++) {
1115:         Node        n[8];
1116:         PetscReal   zn[8], etabase = 0;
1117:         PetscScalar Ke[8 * NODE_SIZE][8 * NODE_SIZE], Kcpl[8 * NODE_SIZE][4 * PRMNODE_SIZE];
1118:         PetscInt    ls = 0;

1120:         PrmHexGetZ(pn, k, zm, zn);
1121:         HexExtract(x, i, j, k, n);
1122:         PetscMemzero(Ke, sizeof(Ke));
1123:         PetscMemzero(Kcpl, sizeof(Kcpl));
1124:         if (thi->no_slip && k == 0) {
1125:           for (l = 0; l < 4; l++) n[l].u = n[l].v = 0;
1126:           ls = 4;
1127:         }
1128:         for (q = 0; q < 8; q++) {
1129:           PetscReal   dz[3], phi[8], dphi[8][3], jw, eta, deta;
1130:           PetscScalar du[3], dv[3], u, v;
1131:           HexGrad(HexQDeriv[q], zn, dz);
1132:           HexComputeGeometry(q, hx, hy, dz, phi, dphi, &jw);
1133:           PointwiseNonlinearity(thi, n, phi, dphi, &u, &v, du, dv, &eta, &deta);
1134:           jw /= thi->rhog; /* residuals are scaled by this factor */
1135:           if (q == 0) etabase = eta;
1136:           for (l = ls; l < 8; l++) { /* test functions */
1137:             const PetscReal pp = phi[l], *restrict dp = dphi[l];
1138:             for (ll = ls; ll < 8; ll++) { /* trial functions */
1139:               const PetscReal *restrict dpl = dphi[ll];
1140:               PetscScalar dgdu, dgdv;
1141:               dgdu = 2. * du[0] * dpl[0] + dv[1] * dpl[0] + 0.5 * (du[1] + dv[0]) * dpl[1] + 0.5 * du[2] * dpl[2];
1142:               dgdv = 2. * dv[1] * dpl[1] + du[0] * dpl[1] + 0.5 * (du[1] + dv[0]) * dpl[0] + 0.5 * dv[2] * dpl[2];
1143:               /* Picard part */
1144:               Ke[l * 2 + 0][ll * 2 + 0] += dp[0] * jw * eta * 4. * dpl[0] + dp[1] * jw * eta * dpl[1] + dp[2] * jw * eta * dpl[2];
1145:               Ke[l * 2 + 0][ll * 2 + 1] += dp[0] * jw * eta * 2. * dpl[1] + dp[1] * jw * eta * dpl[0];
1146:               Ke[l * 2 + 1][ll * 2 + 0] += dp[1] * jw * eta * 2. * dpl[0] + dp[0] * jw * eta * dpl[1];
1147:               Ke[l * 2 + 1][ll * 2 + 1] += dp[1] * jw * eta * 4. * dpl[1] + dp[0] * jw * eta * dpl[0] + dp[2] * jw * eta * dpl[2];
1148:               /* extra Newton terms */
1149:               Ke[l * 2 + 0][ll * 2 + 0] += dp[0] * jw * deta * dgdu * (4. * du[0] + 2. * dv[1]) + dp[1] * jw * deta * dgdu * (du[1] + dv[0]) + dp[2] * jw * deta * dgdu * du[2];
1150:               Ke[l * 2 + 0][ll * 2 + 1] += dp[0] * jw * deta * dgdv * (4. * du[0] + 2. * dv[1]) + dp[1] * jw * deta * dgdv * (du[1] + dv[0]) + dp[2] * jw * deta * dgdv * du[2];
1151:               Ke[l * 2 + 1][ll * 2 + 0] += dp[1] * jw * deta * dgdu * (4. * dv[1] + 2. * du[0]) + dp[0] * jw * deta * dgdu * (du[1] + dv[0]) + dp[2] * jw * deta * dgdu * dv[2];
1152:               Ke[l * 2 + 1][ll * 2 + 1] += dp[1] * jw * deta * dgdv * (4. * dv[1] + 2. * du[0]) + dp[0] * jw * deta * dgdv * (du[1] + dv[0]) + dp[2] * jw * deta * dgdv * dv[2];
1153:               /* inertial part */
1154:               Ke[l * 2 + 0][ll * 2 + 0] += pp * jw * thi->inertia * pp;
1155:               Ke[l * 2 + 1][ll * 2 + 1] += pp * jw * thi->inertia * pp;
1156:             }
1157:             for (ll = 0; ll < 4; ll++) {                                                              /* Trial functions for surface/bed */
1158:               const PetscReal dpl[] = {QuadQDeriv[q % 4][ll][0] / hx, QuadQDeriv[q % 4][ll][1] / hy}; /* surface = h + b */
1159:               Kcpl[FieldIndex(Node, l, u)][FieldIndex(PrmNode, ll, h)] += pp * jw * thi->rhog * dpl[0];
1160:               Kcpl[FieldIndex(Node, l, u)][FieldIndex(PrmNode, ll, b)] += pp * jw * thi->rhog * dpl[0];
1161:               Kcpl[FieldIndex(Node, l, v)][FieldIndex(PrmNode, ll, h)] += pp * jw * thi->rhog * dpl[1];
1162:               Kcpl[FieldIndex(Node, l, v)][FieldIndex(PrmNode, ll, b)] += pp * jw * thi->rhog * dpl[1];
1163:             }
1164:           }
1165:         }
1166:         if (k == 0) { /* on a bottom face */
1167:           if (thi->no_slip) {
1168:             const PetscReal   hz    = PetscRealPart(pn[0].h) / (zm - 1);
1169:             const PetscScalar diagu = 2 * etabase / thi->rhog * (hx * hy / hz + hx * hz / hy + 4 * hy * hz / hx), diagv = 2 * etabase / thi->rhog * (hx * hy / hz + 4 * hx * hz / hy + hy * hz / hx);
1170:             Ke[0][0] = thi->dirichlet_scale * diagu;
1171:             Ke[0][1] = 0;
1172:             Ke[1][0] = 0;
1173:             Ke[1][1] = thi->dirichlet_scale * diagv;
1174:           } else {
1175:             for (q = 0; q < 4; q++) { /* We remove the explicit scaling by 1/rhog because beta2 already has that scaling to be O(1) */
1176:               const PetscReal jw = 0.25 * hx * hy, *phi = QuadQInterp[q];
1177:               PetscScalar     u = 0, v = 0, rbeta2 = 0;
1178:               PetscReal       beta2, dbeta2;
1179:               for (l = 0; l < 4; l++) {
1180:                 u += phi[l] * n[l].u;
1181:                 v += phi[l] * n[l].v;
1182:                 rbeta2 += phi[l] * pn[l].beta2;
1183:               }
1184:               THIFriction(thi, PetscRealPart(rbeta2), PetscRealPart(u * u + v * v) / 2, &beta2, &dbeta2);
1185:               for (l = 0; l < 4; l++) {
1186:                 const PetscReal pp = phi[l];
1187:                 for (ll = 0; ll < 4; ll++) {
1188:                   const PetscReal ppl = phi[ll];
1189:                   Ke[l * 2 + 0][ll * 2 + 0] += pp * jw * beta2 * ppl + pp * jw * dbeta2 * u * u * ppl;
1190:                   Ke[l * 2 + 0][ll * 2 + 1] += pp * jw * dbeta2 * u * v * ppl;
1191:                   Ke[l * 2 + 1][ll * 2 + 0] += pp * jw * dbeta2 * v * u * ppl;
1192:                   Ke[l * 2 + 1][ll * 2 + 1] += pp * jw * beta2 * ppl + pp * jw * dbeta2 * v * v * ppl;
1193:                 }
1194:               }
1195:             }
1196:           }
1197:         }
1198:         {
1199:           const PetscInt rc3blocked[8]                 = {DMDALocalIndex3D(info, i + 0, j + 0, k + 0), DMDALocalIndex3D(info, i + 1, j + 0, k + 0), DMDALocalIndex3D(info, i + 1, j + 1, k + 0), DMDALocalIndex3D(info, i + 0, j + 1, k + 0),
1200:                                                           DMDALocalIndex3D(info, i + 0, j + 0, k + 1), DMDALocalIndex3D(info, i + 1, j + 0, k + 1), DMDALocalIndex3D(info, i + 1, j + 1, k + 1), DMDALocalIndex3D(info, i + 0, j + 1, k + 1)},
1201:                          col2blocked[PRMNODE_SIZE * 4] = {DMDALocalIndex2D(info, i + 0, j + 0), DMDALocalIndex2D(info, i + 1, j + 0), DMDALocalIndex2D(info, i + 1, j + 1), DMDALocalIndex2D(info, i + 0, j + 1)};
1202: #if !defined COMPUTE_LOWER_TRIANGULAR /* fill in lower-triangular part, this is really cheap compared to computing the entries */
1203:           for (l = 0; l < 8; l++) {
1204:             for (ll = l + 1; ll < 8; ll++) {
1205:               Ke[ll * 2 + 0][l * 2 + 0] = Ke[l * 2 + 0][ll * 2 + 0];
1206:               Ke[ll * 2 + 1][l * 2 + 0] = Ke[l * 2 + 0][ll * 2 + 1];
1207:               Ke[ll * 2 + 0][l * 2 + 1] = Ke[l * 2 + 1][ll * 2 + 0];
1208:               Ke[ll * 2 + 1][l * 2 + 1] = Ke[l * 2 + 1][ll * 2 + 1];
1209:             }
1210:           }
1211: #endif
1212:           MatSetValuesBlockedLocal(B, 8, rc3blocked, 8, rc3blocked, &Ke[0][0], ADD_VALUES); /* velocity-velocity coupling can use blocked insertion */
1213:           {                                                                                            /* The off-diagonal part cannot (yet) */
1214:             PetscInt row3scalar[NODE_SIZE * 8], col2scalar[PRMNODE_SIZE * 4];
1215:             for (l = 0; l < 8; l++)
1216:               for (ll = 0; ll < NODE_SIZE; ll++) row3scalar[l * NODE_SIZE + ll] = rc3blocked[l] * NODE_SIZE + ll;
1217:             for (l = 0; l < 4; l++)
1218:               for (ll = 0; ll < PRMNODE_SIZE; ll++) col2scalar[l * PRMNODE_SIZE + ll] = col2blocked[l] * PRMNODE_SIZE + ll;
1219:             MatSetValuesLocal(Bcpl, 8 * NODE_SIZE, row3scalar, 4 * PRMNODE_SIZE, col2scalar, &Kcpl[0][0], ADD_VALUES);
1220:           }
1221:         }
1222:       }
1223:     }
1224:   }
1225:   return 0;
1226: }

1228: static PetscErrorCode THIJacobianLocal_2D(DMDALocalInfo *info, const Node ***x3, const PrmNode **x2, const PrmNode **xdot2, PetscReal a, Mat B22, Mat B21, THI thi)
1229: {
1230:   PetscInt xs, ys, xm, ym, zm, i, j, k;

1233:   xs = info->zs;
1234:   ys = info->ys;
1235:   xm = info->zm;
1236:   ym = info->ym;
1237:   zm = info->xm;

1240:   for (i = xs; i < xs + xm; i++) {
1241:     for (j = ys; j < ys + ym; j++) {
1242:       { /* Self-coupling */
1243:         const PetscInt    row[]  = {DMDALocalIndex2D(info, i, j)};
1244:         const PetscInt    col[]  = {DMDALocalIndex2D(info, i, j)};
1245:         const PetscScalar vals[] = {a, 0, 0, 0, a, 0, 0, 0, a};
1246:         MatSetValuesBlockedLocal(B22, 1, row, 1, col, vals, INSERT_VALUES);
1247:       }
1248:       for (k = 0; k < zm; k++) { /* Coupling to velocity problem */
1249:         /* Use a cheaper quadrature than for residual evaluation, because it is much sparser */
1250:         const PetscInt    row[]  = {FieldIndex(PrmNode, DMDALocalIndex2D(info, i, j), h)};
1251:         const PetscInt    cols[] = {FieldIndex(Node, DMDALocalIndex3D(info, i - 1, j, k), u), FieldIndex(Node, DMDALocalIndex3D(info, i, j, k), u), FieldIndex(Node, DMDALocalIndex3D(info, i + 1, j, k), u),
1252:                                     FieldIndex(Node, DMDALocalIndex3D(info, i, j - 1, k), v), FieldIndex(Node, DMDALocalIndex3D(info, i, j, k), v), FieldIndex(Node, DMDALocalIndex3D(info, i, j + 1, k), v)};
1253:         const PetscScalar w = (k && k < zm - 1) ? 0.5 : 0.25, hW = w * (x2[i - 1][j].h + x2[i][j].h) / (zm - 1.), hE = w * (x2[i][j].h + x2[i + 1][j].h) / (zm - 1.), hS = w * (x2[i][j - 1].h + x2[i][j].h) / (zm - 1.),
1254:                           hN = w * (x2[i][j].h + x2[i][j + 1].h) / (zm - 1.);
1255:         PetscScalar *vals, vals_upwind[] = {((PetscRealPart(x3[i][j][k].u) > 0) ? -hW : 0), ((PetscRealPart(x3[i][j][k].u) > 0) ? +hE : -hW), ((PetscRealPart(x3[i][j][k].u) > 0) ? 0 : +hE),
1256:                                             ((PetscRealPart(x3[i][j][k].v) > 0) ? -hS : 0), ((PetscRealPart(x3[i][j][k].v) > 0) ? +hN : -hS), ((PetscRealPart(x3[i][j][k].v) > 0) ? 0 : +hN)},
1257:                            vals_centered[] = {-0.5 * hW, 0.5 * (-hW + hE), 0.5 * hE, -0.5 * hS, 0.5 * (-hS + hN), 0.5 * hN};
1258:         vals                               = 1 ? vals_upwind : vals_centered;
1259:         if (k == 0) {
1260:           Node derate;
1261:           THIErosion(thi, &x3[i][j][0], NULL, &derate);
1262:           vals[1] -= derate.u;
1263:           vals[4] -= derate.v;
1264:         }
1265:         MatSetValuesLocal(B21, 1, row, 6, cols, vals, INSERT_VALUES);
1266:       }
1267:     }
1268:   }
1269:   return 0;
1270: }

1272: static PetscErrorCode THIJacobian(TS ts, PetscReal t, Vec X, Vec Xdot, PetscReal a, Mat A, Mat B, void *ctx)
1273: {
1274:   THI             thi = (THI)ctx;
1275:   DM              pack, da3, da2;
1276:   Vec             X3, X2, Xdot2;
1277:   Mat             B11, B12, B21, B22;
1278:   DMDALocalInfo   info3;
1279:   IS             *isloc;
1280:   const Node   ***x3;
1281:   const PrmNode **x2, **xdot2;

1284:   TSGetDM(ts, &pack);
1285:   DMCompositeGetEntries(pack, &da3, &da2);
1286:   DMDAGetLocalInfo(da3, &info3);
1287:   DMCompositeGetLocalVectors(pack, &X3, &X2);
1288:   DMCompositeGetLocalVectors(pack, NULL, &Xdot2);
1289:   DMCompositeScatter(pack, X, X3, X2);
1290:   THIFixGhosts(thi, da3, da2, X3, X2);
1291:   DMCompositeScatter(pack, Xdot, NULL, Xdot2);

1293:   MatZeroEntries(B);

1295:   DMCompositeGetLocalISs(pack, &isloc);
1296:   MatGetLocalSubMatrix(B, isloc[0], isloc[0], &B11);
1297:   MatGetLocalSubMatrix(B, isloc[0], isloc[1], &B12);
1298:   MatGetLocalSubMatrix(B, isloc[1], isloc[0], &B21);
1299:   MatGetLocalSubMatrix(B, isloc[1], isloc[1], &B22);

1301:   DMDAVecGetArray(da3, X3, &x3);
1302:   DMDAVecGetArray(da2, X2, &x2);
1303:   DMDAVecGetArray(da2, Xdot2, &xdot2);

1305:   THIJacobianLocal_Momentum(&info3, x3, x2, B11, B12, thi);

1307:   /* Need to switch from ADD_VALUES to INSERT_VALUES */
1308:   MatAssemblyBegin(B, MAT_FLUSH_ASSEMBLY);
1309:   MatAssemblyEnd(B, MAT_FLUSH_ASSEMBLY);

1311:   THIJacobianLocal_2D(&info3, x3, x2, xdot2, a, B22, B21, thi);

1313:   DMDAVecRestoreArray(da3, X3, &x3);
1314:   DMDAVecRestoreArray(da2, X2, &x2);
1315:   DMDAVecRestoreArray(da2, Xdot2, &xdot2);

1317:   MatRestoreLocalSubMatrix(B, isloc[0], isloc[0], &B11);
1318:   MatRestoreLocalSubMatrix(B, isloc[0], isloc[1], &B12);
1319:   MatRestoreLocalSubMatrix(B, isloc[1], isloc[0], &B21);
1320:   MatRestoreLocalSubMatrix(B, isloc[1], isloc[1], &B22);
1321:   ISDestroy(&isloc[0]);
1322:   ISDestroy(&isloc[1]);
1323:   PetscFree(isloc);

1325:   DMCompositeRestoreLocalVectors(pack, &X3, &X2);
1326:   DMCompositeRestoreLocalVectors(pack, 0, &Xdot2);

1328:   MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY);
1329:   MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY);
1330:   if (A != B) {
1331:     MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
1332:     MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
1333:   }
1334:   if (thi->verbose) THIMatrixStatistics(thi, B, PETSC_VIEWER_STDOUT_WORLD);
1335:   return 0;
1336: }

1338: /* VTK's XML formats are so brain-dead that they can't handle multiple grids in the same file.  Since the communication
1339:  * can be shared between the two grids, we write two files at once, one for velocity (living on a 3D grid defined by
1340:  * h=thickness and b=bed) and another for all properties living on the 2D grid.
1341:  */
1342: static PetscErrorCode THIDAVecView_VTK_XML(THI thi, DM pack, Vec X, const char filename[], const char filename2[])
1343: {
1344:   const PetscInt dof = NODE_SIZE, dof2 = PRMNODE_SIZE;
1345:   Units          units = thi->units;
1346:   MPI_Comm       comm;
1347:   PetscViewer    viewer3, viewer2;
1348:   PetscMPIInt    rank, size, tag, nn, nmax, nn2, nmax2;
1349:   PetscInt       mx, my, mz, r, range[6];
1350:   PetscScalar   *x, *x2;
1351:   DM             da3, da2;
1352:   Vec            X3, X2;

1355:   PetscObjectGetComm((PetscObject)thi, &comm);
1356:   DMCompositeGetEntries(pack, &da3, &da2);
1357:   DMCompositeGetAccess(pack, X, &X3, &X2);
1358:   DMDAGetInfo(da3, 0, &mz, &my, &mx, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1359:   MPI_Comm_size(comm, &size);
1360:   MPI_Comm_rank(comm, &rank);
1361:   PetscViewerASCIIOpen(comm, filename, &viewer3);
1362:   PetscViewerASCIIOpen(comm, filename2, &viewer2);
1363:   PetscViewerASCIIPrintf(viewer3, "<VTKFile type=\"StructuredGrid\" version=\"0.1\" byte_order=\"LittleEndian\">\n");
1364:   PetscViewerASCIIPrintf(viewer2, "<VTKFile type=\"StructuredGrid\" version=\"0.1\" byte_order=\"LittleEndian\">\n");
1365:   PetscViewerASCIIPrintf(viewer3, "  <StructuredGrid WholeExtent=\"%d %" PetscInt_FMT " %d %" PetscInt_FMT " %d %" PetscInt_FMT "\">\n", 0, mz - 1, 0, my - 1, 0, mx - 1);
1366:   PetscViewerASCIIPrintf(viewer2, "  <StructuredGrid WholeExtent=\"%d %d %d %" PetscInt_FMT " %d %" PetscInt_FMT "\">\n", 0, 0, 0, my - 1, 0, mx - 1);

1368:   DMDAGetCorners(da3, range, range + 1, range + 2, range + 3, range + 4, range + 5);
1369:   PetscMPIIntCast(range[3] * range[4] * range[5] * dof, &nn);
1370:   MPI_Reduce(&nn, &nmax, 1, MPI_INT, MPI_MAX, 0, comm);
1371:   PetscMPIIntCast(range[4] * range[5] * dof2, &nn2);
1372:   MPI_Reduce(&nn2, &nmax2, 1, MPI_INT, MPI_MAX, 0, comm);
1373:   tag = ((PetscObject)viewer3)->tag;
1374:   VecGetArrayRead(X3, (const PetscScalar **)&x);
1375:   VecGetArrayRead(X2, (const PetscScalar **)&x2);
1376:   if (rank == 0) {
1377:     PetscScalar *array, *array2;
1378:     PetscMalloc2(nmax, &array, nmax2, &array2);
1379:     for (r = 0; r < size; r++) {
1380:       PetscInt i, j, k, f, xs, xm, ys, ym, zs, zm;
1381:       Node    *y3;
1382:       PetscScalar(*y2)[PRMNODE_SIZE];
1383:       MPI_Status status;

1385:       if (r) MPI_Recv(range, 6, MPIU_INT, r, tag, comm, MPI_STATUS_IGNORE);
1386:       zs = range[0];
1387:       ys = range[1];
1388:       xs = range[2];
1389:       zm = range[3];
1390:       ym = range[4];
1391:       xm = range[5];
1393:       if (r) {
1394:         MPI_Recv(array, nmax, MPIU_SCALAR, r, tag, comm, &status);
1395:         MPI_Get_count(&status, MPIU_SCALAR, &nn);
1397:         y3 = (Node *)array;
1398:         MPI_Recv(array2, nmax2, MPIU_SCALAR, r, tag, comm, &status);
1399:         MPI_Get_count(&status, MPIU_SCALAR, &nn2);
1401:         y2 = (PetscScalar(*)[PRMNODE_SIZE])array2;
1402:       } else {
1403:         y3 = (Node *)x;
1404:         y2 = (PetscScalar(*)[PRMNODE_SIZE])x2;
1405:       }
1406:       PetscViewerASCIIPrintf(viewer3, "    <Piece Extent=\"%" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\">\n", zs, zs + zm - 1, ys, ys + ym - 1, xs, xs + xm - 1);
1407:       PetscViewerASCIIPrintf(viewer2, "    <Piece Extent=\"%d %d %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\">\n", 0, 0, ys, ys + ym - 1, xs, xs + xm - 1);

1409:       PetscViewerASCIIPrintf(viewer3, "      <Points>\n");
1410:       PetscViewerASCIIPrintf(viewer2, "      <Points>\n");
1411:       PetscViewerASCIIPrintf(viewer3, "        <DataArray type=\"Float32\" NumberOfComponents=\"3\" format=\"ascii\">\n");
1412:       PetscViewerASCIIPrintf(viewer2, "        <DataArray type=\"Float32\" NumberOfComponents=\"3\" format=\"ascii\">\n");
1413:       for (i = xs; i < xs + xm; i++) {
1414:         for (j = ys; j < ys + ym; j++) {
1415:           PetscReal xx = thi->Lx * i / mx, yy = thi->Ly * j / my, b = PetscRealPart(y2[i * ym + j][FieldOffset(PrmNode, b)]), h = PetscRealPart(y2[i * ym + j][FieldOffset(PrmNode, h)]);
1416:           for (k = zs; k < zs + zm; k++) {
1417:             PetscReal zz = b + h * k / (mz - 1.);
1418:             PetscViewerASCIIPrintf(viewer3, "%f %f %f\n", (double)xx, (double)yy, (double)zz);
1419:           }
1420:           PetscViewerASCIIPrintf(viewer2, "%f %f %f\n", (double)xx, (double)yy, (double)0.0);
1421:         }
1422:       }
1423:       PetscViewerASCIIPrintf(viewer3, "        </DataArray>\n");
1424:       PetscViewerASCIIPrintf(viewer2, "        </DataArray>\n");
1425:       PetscViewerASCIIPrintf(viewer3, "      </Points>\n");
1426:       PetscViewerASCIIPrintf(viewer2, "      </Points>\n");

1428:       { /* Velocity and rank (3D) */
1429:         PetscViewerASCIIPrintf(viewer3, "      <PointData>\n");
1430:         PetscViewerASCIIPrintf(viewer3, "        <DataArray type=\"Float32\" Name=\"velocity\" NumberOfComponents=\"3\" format=\"ascii\">\n");
1431:         for (i = 0; i < nn / dof; i++) PetscViewerASCIIPrintf(viewer3, "%f %f %f\n", (double)(PetscRealPart(y3[i].u) * units->year / units->meter), (double)(PetscRealPart(y3[i].v) * units->year / units->meter), 0.0);
1432:         PetscViewerASCIIPrintf(viewer3, "        </DataArray>\n");

1434:         PetscViewerASCIIPrintf(viewer3, "        <DataArray type=\"Int32\" Name=\"rank\" NumberOfComponents=\"1\" format=\"ascii\">\n");
1435:         for (i = 0; i < nn; i += dof) PetscViewerASCIIPrintf(viewer3, "%" PetscInt_FMT "\n", r);
1436:         PetscViewerASCIIPrintf(viewer3, "        </DataArray>\n");
1437:         PetscViewerASCIIPrintf(viewer3, "      </PointData>\n");
1438:       }

1440:       { /* 2D */
1441:         PetscViewerASCIIPrintf(viewer2, "      <PointData>\n");
1442:         for (f = 0; f < PRMNODE_SIZE; f++) {
1443:           const char *fieldname;
1444:           DMDAGetFieldName(da2, f, &fieldname);
1445:           PetscViewerASCIIPrintf(viewer2, "        <DataArray type=\"Float32\" Name=\"%s\" format=\"ascii\">\n", fieldname);
1446:           for (i = 0; i < nn2 / PRMNODE_SIZE; i++) PetscViewerASCIIPrintf(viewer2, "%g\n", (double)y2[i][f]);
1447:           PetscViewerASCIIPrintf(viewer2, "        </DataArray>\n");
1448:         }
1449:         PetscViewerASCIIPrintf(viewer2, "      </PointData>\n");
1450:       }

1452:       PetscViewerASCIIPrintf(viewer3, "    </Piece>\n");
1453:       PetscViewerASCIIPrintf(viewer2, "    </Piece>\n");
1454:     }
1455:     PetscFree2(array, array2);
1456:   } else {
1457:     MPI_Send(range, 6, MPIU_INT, 0, tag, comm);
1458:     MPI_Send(x, nn, MPIU_SCALAR, 0, tag, comm);
1459:     MPI_Send(x2, nn2, MPIU_SCALAR, 0, tag, comm);
1460:   }
1461:   VecRestoreArrayRead(X3, (const PetscScalar **)&x);
1462:   VecRestoreArrayRead(X2, (const PetscScalar **)&x2);
1463:   PetscViewerASCIIPrintf(viewer3, "  </StructuredGrid>\n");
1464:   PetscViewerASCIIPrintf(viewer2, "  </StructuredGrid>\n");

1466:   DMCompositeRestoreAccess(pack, X, &X3, &X2);
1467:   PetscViewerASCIIPrintf(viewer3, "</VTKFile>\n");
1468:   PetscViewerASCIIPrintf(viewer2, "</VTKFile>\n");
1469:   PetscViewerDestroy(&viewer3);
1470:   PetscViewerDestroy(&viewer2);
1471:   return 0;
1472: }

1474: static PetscErrorCode THITSMonitor(TS ts, PetscInt step, PetscReal t, Vec X, void *ctx)
1475: {
1476:   THI  thi = (THI)ctx;
1477:   DM   pack;
1478:   char filename3[PETSC_MAX_PATH_LEN], filename2[PETSC_MAX_PATH_LEN];

1481:   if (step < 0) return 0; /* negative one is used to indicate an interpolated solution */
1482:   PetscPrintf(PetscObjectComm((PetscObject)ts), "%3" PetscInt_FMT ": t=%g\n", step, (double)t);
1483:   if (thi->monitor_interval && step % thi->monitor_interval) return 0;
1484:   TSGetDM(ts, &pack);
1485:   PetscSNPrintf(filename3, sizeof(filename3), "%s-3d-%03" PetscInt_FMT ".vts", thi->monitor_basename, step);
1486:   PetscSNPrintf(filename2, sizeof(filename2), "%s-2d-%03" PetscInt_FMT ".vts", thi->monitor_basename, step);
1487:   THIDAVecView_VTK_XML(thi, pack, X, filename3, filename2);
1488:   return 0;
1489: }

1491: static PetscErrorCode THICreateDM3d(THI thi, DM *dm3d)
1492: {
1493:   MPI_Comm comm;
1494:   PetscInt M = 3, N = 3, P = 2;
1495:   DM       da;

1498:   PetscObjectGetComm((PetscObject)thi, &comm);
1499:   PetscOptionsBegin(comm, NULL, "Grid resolution options", "");
1500:   {
1501:     PetscOptionsInt("-M", "Number of elements in x-direction on coarse level", "", M, &M, NULL);
1502:     N = M;
1503:     PetscOptionsInt("-N", "Number of elements in y-direction on coarse level (if different from M)", "", N, &N, NULL);
1504:     PetscOptionsInt("-P", "Number of elements in z-direction on coarse level", "", P, &P, NULL);
1505:   }
1506:   PetscOptionsEnd();
1507:   DMDACreate3d(comm, DM_BOUNDARY_NONE, DM_BOUNDARY_PERIODIC, DM_BOUNDARY_PERIODIC, DMDA_STENCIL_BOX, P, N, M, 1, PETSC_DETERMINE, PETSC_DETERMINE, sizeof(Node) / sizeof(PetscScalar), 1, 0, 0, 0, &da);
1508:   DMSetFromOptions(da);
1509:   DMSetUp(da);
1510:   DMDASetFieldName(da, 0, "x-velocity");
1511:   DMDASetFieldName(da, 1, "y-velocity");
1512:   *dm3d = da;
1513:   return 0;
1514: }

1516: int main(int argc, char *argv[])
1517: {
1518:   MPI_Comm  comm;
1519:   DM        pack, da3, da2;
1520:   TS        ts;
1521:   THI       thi;
1522:   Vec       X;
1523:   Mat       B;
1524:   PetscInt  i, steps;
1525:   PetscReal ftime;

1528:   PetscInitialize(&argc, &argv, 0, help);
1529:   comm = PETSC_COMM_WORLD;

1531:   THICreate(comm, &thi);
1532:   THICreateDM3d(thi, &da3);
1533:   {
1534:     PetscInt        Mx, My, mx, my, s;
1535:     DMDAStencilType st;
1536:     DMDAGetInfo(da3, 0, 0, &My, &Mx, 0, &my, &mx, 0, &s, 0, 0, 0, &st);
1537:     DMDACreate2d(PetscObjectComm((PetscObject)thi), DM_BOUNDARY_PERIODIC, DM_BOUNDARY_PERIODIC, st, My, Mx, my, mx, sizeof(PrmNode) / sizeof(PetscScalar), s, 0, 0, &da2);
1538:     DMSetUp(da2);
1539:   }

1541:   PetscObjectSetName((PetscObject)da3, "3D_Velocity");
1542:   DMSetOptionsPrefix(da3, "f3d_");
1543:   DMDASetFieldName(da3, 0, "u");
1544:   DMDASetFieldName(da3, 1, "v");
1545:   PetscObjectSetName((PetscObject)da2, "2D_Fields");
1546:   DMSetOptionsPrefix(da2, "f2d_");
1547:   DMDASetFieldName(da2, 0, "b");
1548:   DMDASetFieldName(da2, 1, "h");
1549:   DMDASetFieldName(da2, 2, "beta2");
1550:   DMCompositeCreate(comm, &pack);
1551:   DMCompositeAddDM(pack, da3);
1552:   DMCompositeAddDM(pack, da2);
1553:   DMDestroy(&da3);
1554:   DMDestroy(&da2);
1555:   DMSetUp(pack);
1556:   DMCreateMatrix(pack, &B);
1557:   MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE);
1558:   MatSetOptionsPrefix(B, "thi_");

1560:   for (i = 0; i < thi->nlevels; i++) {
1561:     PetscReal Lx = thi->Lx / thi->units->meter, Ly = thi->Ly / thi->units->meter, Lz = thi->Lz / thi->units->meter;
1562:     PetscInt  Mx, My, Mz;
1563:     DMCompositeGetEntries(pack, &da3, &da2);
1564:     DMDAGetInfo(da3, 0, &Mz, &My, &Mx, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1565:     PetscPrintf(PetscObjectComm((PetscObject)thi), "Level %" PetscInt_FMT " domain size (m) %8.2g x %8.2g x %8.2g, num elements %3d x %3d x %3d (%8d), size (m) %g x %g x %g\n", i, Lx, Ly, Lz, Mx, My, Mz, Mx * My * Mz, Lx / Mx, Ly / My, 1000. / (Mz - 1));
1566:   }

1568:   DMCreateGlobalVector(pack, &X);
1569:   THIInitial(thi, pack, X);

1571:   TSCreate(comm, &ts);
1572:   TSSetDM(ts, pack);
1573:   TSSetProblemType(ts, TS_NONLINEAR);
1574:   TSMonitorSet(ts, THITSMonitor, thi, NULL);
1575:   TSSetType(ts, TSTHETA);
1576:   TSSetIFunction(ts, NULL, THIFunction, thi);
1577:   TSSetIJacobian(ts, B, B, THIJacobian, thi);
1578:   TSSetMaxTime(ts, 10.0);
1579:   TSSetExactFinalTime(ts, TS_EXACTFINALTIME_STEPOVER);
1580:   TSSetSolution(ts, X);
1581:   TSSetTimeStep(ts, 1e-3);
1582:   TSSetFromOptions(ts);

1584:   TSSolve(ts, X);
1585:   TSGetSolveTime(ts, &ftime);
1586:   TSGetStepNumber(ts, &steps);
1587:   PetscPrintf(PETSC_COMM_WORLD, "Steps %" PetscInt_FMT "  final time %g\n", steps, (double)ftime);

1589:   if (0) THISolveStatistics(thi, ts, 0, "Full");

1591:   {
1592:     PetscBool flg;
1593:     char      filename[PETSC_MAX_PATH_LEN] = "";
1594:     PetscOptionsGetString(NULL, NULL, "-o", filename, sizeof(filename), &flg);
1595:     if (flg) THIDAVecView_VTK_XML(thi, pack, X, filename, NULL);
1596:   }

1598:   VecDestroy(&X);
1599:   MatDestroy(&B);
1600:   DMDestroy(&pack);
1601:   TSDestroy(&ts);
1602:   THIDestroy(&thi);
1603:   PetscFinalize();
1604:   return 0;
1605: }