Actual source code: ex14.c

  1: static const char help[] = "Toy hydrostatic ice flow with multigrid in 3D.\n\
  2: \n\
  3: Solves the hydrostatic (aka Blatter/Pattyn/First Order) equations for ice sheet flow\n\
  4: using multigrid.  The ice uses a power-law rheology with \"Glen\" exponent 3 (corresponds\n\
  5: to p=4/3 in a p-Laplacian).  The focus is on ISMIP-HOM experiments which assume periodic\n\
  6: boundary conditions in the x- and y-directions.\n\
  7: \n\
  8: Equations are rescaled so that the domain size and solution are O(1), details of this scaling\n\
  9: can be controlled by the options -units_meter, -units_second, and -units_kilogram.\n\
 10: \n\
 11: A VTK StructuredGrid output file can be written using the option -o filename.vts\n\
 12: \n\n";

 14: /*
 15: The equations for horizontal velocity (u,v) are

 17:   - [eta (4 u_x + 2 v_y)]_x - [eta (u_y + v_x)]_y - [eta u_z]_z + rho g s_x = 0
 18:   - [eta (4 v_y + 2 u_x)]_y - [eta (u_y + v_x)]_x - [eta v_z]_z + rho g s_y = 0

 20: where

 22:   eta = B/2 (epsilon + gamma)^((p-2)/2)

 24: is the nonlinear effective viscosity with regularization epsilon and hardness parameter B,
 25: written in terms of the second invariant

 27:   gamma = u_x^2 + v_y^2 + u_x v_y + (1/4) (u_y + v_x)^2 + (1/4) u_z^2 + (1/4) v_z^2

 29: The surface boundary conditions are the natural conditions.  The basal boundary conditions
 30: are either no-slip, or Navier (linear) slip with spatially variant friction coefficient beta^2.

 32: In the code, the equations for (u,v) are multiplied through by 1/(rho g) so that residuals are O(1).

 34: The discretization is Q1 finite elements, managed by a DMDA.  The grid is never distorted in the
 35: map (x,y) plane, but the bed and surface may be bumpy.  This is handled as usual in FEM, through
 36: the Jacobian of the coordinate transformation from a reference element to the physical element.

 38: Since ice-flow is tightly coupled in the z-direction (within columns), the DMDA is managed
 39: specially so that columns are never distributed, and are always contiguous in memory.
 40: This amounts to reversing the meaning of X,Y,Z compared to the DMDA's internal interpretation,
 41: and then indexing as vec[i][j][k].  The exotic coarse spaces require 2D DMDAs which are made to
 42: use compatible domain decomposition relative to the 3D DMDAs.

 44: */

 46: #include <petscts.h>
 47: #include <petscdm.h>
 48: #include <petscdmda.h>
 49: #include <petscdmcomposite.h>
 50: #include <ctype.h> /* toupper() */
 51: #include <petsc/private/petscimpl.h>

 53: #if defined __SSE2__
 54:   #include <emmintrin.h>
 55: #endif

 57: /* The SSE2 kernels are only for PetscScalar=double on architectures that support it */
 58: #define USE_SSE2_KERNELS (!defined NO_SSE2 && !defined PETSC_USE_COMPLEX && !defined PETSC_USE_REAL_SINGLE && defined __SSE2__)

 60: #if !defined __STDC_VERSION__ || __STDC_VERSION__ < 199901L
 61:   #if defined __cplusplus /* C++ restrict is nonstandard and compilers have inconsistent rules about where it can be used */
 62:     #define restrict
 63:   #else
 64:     #define restrict PETSC_RESTRICT
 65:   #endif
 66: #endif

 68: static PetscClassId THI_CLASSID;

 70: typedef enum {
 71:   QUAD_GAUSS,
 72:   QUAD_LOBATTO
 73: } QuadratureType;
 74: static const char     *QuadratureTypes[] = {"gauss", "lobatto", "QuadratureType", "QUAD_", 0};
 75: static const PetscReal HexQWeights[8]    = {1, 1, 1, 1, 1, 1, 1, 1};
 76: static const PetscReal HexQNodes[]       = {-0.57735026918962573, 0.57735026918962573};
 77: #define G 0.57735026918962573
 78: #define H (0.5 * (1. + G))
 79: #define L (0.5 * (1. - G))
 80: #define M (-0.5)
 81: #define P (0.5)
 82: /* Special quadrature: Lobatto in horizontal, Gauss in vertical */
 83: static const PetscReal HexQInterp_Lobatto[8][8] = {
 84:   {H, 0, 0, 0, L, 0, 0, 0},
 85:   {0, H, 0, 0, 0, L, 0, 0},
 86:   {0, 0, H, 0, 0, 0, L, 0},
 87:   {0, 0, 0, H, 0, 0, 0, L},
 88:   {L, 0, 0, 0, H, 0, 0, 0},
 89:   {0, L, 0, 0, 0, H, 0, 0},
 90:   {0, 0, L, 0, 0, 0, H, 0},
 91:   {0, 0, 0, L, 0, 0, 0, H}
 92: };
 93: static const PetscReal HexQDeriv_Lobatto[8][8][3] = {
 94:   {{M * H, M *H, M}, {P * H, 0, 0},    {0, 0, 0},        {0, P *H, 0},     {M * L, M *L, P}, {P * L, 0, 0},    {0, 0, 0},        {0, P *L, 0}    },
 95:   {{M * H, 0, 0},    {P * H, M *H, M}, {0, P *H, 0},     {0, 0, 0},        {M * L, 0, 0},    {P * L, M *L, P}, {0, P *L, 0},     {0, 0, 0}       },
 96:   {{0, 0, 0},        {0, M *H, 0},     {P * H, P *H, M}, {M * H, 0, 0},    {0, 0, 0},        {0, M *L, 0},     {P * L, P *L, P}, {M * L, 0, 0}   },
 97:   {{0, M *H, 0},     {0, 0, 0},        {P * H, 0, 0},    {M * H, P *H, M}, {0, M *L, 0},     {0, 0, 0},        {P * L, 0, 0},    {M * L, P *L, P}},
 98:   {{M * L, M *L, M}, {P * L, 0, 0},    {0, 0, 0},        {0, P *L, 0},     {M * H, M *H, P}, {P * H, 0, 0},    {0, 0, 0},        {0, P *H, 0}    },
 99:   {{M * L, 0, 0},    {P * L, M *L, M}, {0, P *L, 0},     {0, 0, 0},        {M * H, 0, 0},    {P * H, M *H, P}, {0, P *H, 0},     {0, 0, 0}       },
100:   {{0, 0, 0},        {0, M *L, 0},     {P * L, P *L, M}, {M * L, 0, 0},    {0, 0, 0},        {0, M *H, 0},     {P * H, P *H, P}, {M * H, 0, 0}   },
101:   {{0, M *L, 0},     {0, 0, 0},        {P * L, 0, 0},    {M * L, P *L, M}, {0, M *H, 0},     {0, 0, 0},        {P * H, 0, 0},    {M * H, P *H, P}}
102: };
103: /* Stanndard Gauss */
104: static const PetscReal HexQInterp_Gauss[8][8] = {
105:   {H * H * H, L *H *H, L *L *H, H *L *H, H *H *L, L *H *L, L *L *L, H *L *L},
106:   {L * H * H, H *H *H, H *L *H, L *L *H, L *H *L, H *H *L, H *L *L, L *L *L},
107:   {L * L * H, H *L *H, H *H *H, L *H *H, L *L *L, H *L *L, H *H *L, L *H *L},
108:   {H * L * H, L *L *H, L *H *H, H *H *H, H *L *L, L *L *L, L *H *L, H *H *L},
109:   {H * H * L, L *H *L, L *L *L, H *L *L, H *H *H, L *H *H, L *L *H, H *L *H},
110:   {L * H * L, H *H *L, H *L *L, L *L *L, L *H *H, H *H *H, H *L *H, L *L *H},
111:   {L * L * L, H *L *L, H *H *L, L *H *L, L *L *H, H *L *H, H *H *H, L *H *H},
112:   {H * L * L, L *L *L, L *H *L, H *H *L, H *L *H, L *L *H, L *H *H, H *H *H}
113: };
114: static const PetscReal HexQDeriv_Gauss[8][8][3] = {
115:   {{M * H * H, H *M *H, H *H *M}, {P * H * H, L *M *H, L *H *M}, {P * L * H, L *P *H, L *L *M}, {M * L * H, H *P *H, H *L *M}, {M * H * L, H *M *L, H *H *P}, {P * H * L, L *M *L, L *H *P}, {P * L * L, L *P *L, L *L *P}, {M * L * L, H *P *L, H *L *P}},
116:   {{M * H * H, L *M *H, L *H *M}, {P * H * H, H *M *H, H *H *M}, {P * L * H, H *P *H, H *L *M}, {M * L * H, L *P *H, L *L *M}, {M * H * L, L *M *L, L *H *P}, {P * H * L, H *M *L, H *H *P}, {P * L * L, H *P *L, H *L *P}, {M * L * L, L *P *L, L *L *P}},
117:   {{M * L * H, L *M *H, L *L *M}, {P * L * H, H *M *H, H *L *M}, {P * H * H, H *P *H, H *H *M}, {M * H * H, L *P *H, L *H *M}, {M * L * L, L *M *L, L *L *P}, {P * L * L, H *M *L, H *L *P}, {P * H * L, H *P *L, H *H *P}, {M * H * L, L *P *L, L *H *P}},
118:   {{M * L * H, H *M *H, H *L *M}, {P * L * H, L *M *H, L *L *M}, {P * H * H, L *P *H, L *H *M}, {M * H * H, H *P *H, H *H *M}, {M * L * L, H *M *L, H *L *P}, {P * L * L, L *M *L, L *L *P}, {P * H * L, L *P *L, L *H *P}, {M * H * L, H *P *L, H *H *P}},
119:   {{M * H * L, H *M *L, H *H *M}, {P * H * L, L *M *L, L *H *M}, {P * L * L, L *P *L, L *L *M}, {M * L * L, H *P *L, H *L *M}, {M * H * H, H *M *H, H *H *P}, {P * H * H, L *M *H, L *H *P}, {P * L * H, L *P *H, L *L *P}, {M * L * H, H *P *H, H *L *P}},
120:   {{M * H * L, L *M *L, L *H *M}, {P * H * L, H *M *L, H *H *M}, {P * L * L, H *P *L, H *L *M}, {M * L * L, L *P *L, L *L *M}, {M * H * H, L *M *H, L *H *P}, {P * H * H, H *M *H, H *H *P}, {P * L * H, H *P *H, H *L *P}, {M * L * H, L *P *H, L *L *P}},
121:   {{M * L * L, L *M *L, L *L *M}, {P * L * L, H *M *L, H *L *M}, {P * H * L, H *P *L, H *H *M}, {M * H * L, L *P *L, L *H *M}, {M * L * H, L *M *H, L *L *P}, {P * L * H, H *M *H, H *L *P}, {P * H * H, H *P *H, H *H *P}, {M * H * H, L *P *H, L *H *P}},
122:   {{M * L * L, H *M *L, H *L *M}, {P * L * L, L *M *L, L *L *M}, {P * H * L, L *P *L, L *H *M}, {M * H * L, H *P *L, H *H *M}, {M * L * H, H *M *H, H *L *P}, {P * L * H, L *M *H, L *L *P}, {P * H * H, L *P *H, L *H *P}, {M * H * H, H *P *H, H *H *P}}
123: };
124: static const PetscReal (*HexQInterp)[8], (*HexQDeriv)[8][3];
125: /* Standard 2x2 Gauss quadrature for the bottom layer. */
126: static const PetscReal QuadQInterp[4][4] = {
127:   {H * H, L *H, L *L, H *L},
128:   {L * H, H *H, H *L, L *L},
129:   {L * L, H *L, H *H, L *H},
130:   {H * L, L *L, L *H, H *H}
131: };
132: static const PetscReal QuadQDeriv[4][4][2] = {
133:   {{M * H, M *H}, {P * H, M *L}, {P * L, P *L}, {M * L, P *H}},
134:   {{M * H, M *L}, {P * H, M *H}, {P * L, P *H}, {M * L, P *L}},
135:   {{M * L, M *L}, {P * L, M *H}, {P * H, P *H}, {M * H, P *L}},
136:   {{M * L, M *H}, {P * L, M *L}, {P * H, P *L}, {M * H, P *H}}
137: };
138: #undef G
139: #undef H
140: #undef L
141: #undef M
142: #undef P

144: #define HexExtract(x, i, j, k, n) \
145:   do { \
146:     (n)[0] = (x)[i][j][k]; \
147:     (n)[1] = (x)[i + 1][j][k]; \
148:     (n)[2] = (x)[i + 1][j + 1][k]; \
149:     (n)[3] = (x)[i][j + 1][k]; \
150:     (n)[4] = (x)[i][j][k + 1]; \
151:     (n)[5] = (x)[i + 1][j][k + 1]; \
152:     (n)[6] = (x)[i + 1][j + 1][k + 1]; \
153:     (n)[7] = (x)[i][j + 1][k + 1]; \
154:   } while (0)

156: #define HexExtractRef(x, i, j, k, n) \
157:   do { \
158:     (n)[0] = &(x)[i][j][k]; \
159:     (n)[1] = &(x)[i + 1][j][k]; \
160:     (n)[2] = &(x)[i + 1][j + 1][k]; \
161:     (n)[3] = &(x)[i][j + 1][k]; \
162:     (n)[4] = &(x)[i][j][k + 1]; \
163:     (n)[5] = &(x)[i + 1][j][k + 1]; \
164:     (n)[6] = &(x)[i + 1][j + 1][k + 1]; \
165:     (n)[7] = &(x)[i][j + 1][k + 1]; \
166:   } while (0)

168: #define QuadExtract(x, i, j, n) \
169:   do { \
170:     (n)[0] = (x)[i][j]; \
171:     (n)[1] = (x)[i + 1][j]; \
172:     (n)[2] = (x)[i + 1][j + 1]; \
173:     (n)[3] = (x)[i][j + 1]; \
174:   } while (0)

176: static PetscScalar Sqr(PetscScalar a)
177: {
178:   return a * a;
179: }

181: static void HexGrad(const PetscReal dphi[][3], const PetscReal zn[], PetscReal dz[])
182: {
183:   PetscInt i;
184:   dz[0] = dz[1] = dz[2] = 0;
185:   for (i = 0; i < 8; i++) {
186:     dz[0] += dphi[i][0] * zn[i];
187:     dz[1] += dphi[i][1] * zn[i];
188:     dz[2] += dphi[i][2] * zn[i];
189:   }
190: }

192: static void HexComputeGeometry(PetscInt q, PetscReal hx, PetscReal hy, const PetscReal dz[restrict], PetscReal phi[restrict], PetscReal dphi[restrict][3], PetscReal *restrict jw)
193: {
194:   const PetscReal jac[3][3] =
195:     {
196:       {hx / 2, 0,      0    },
197:       {0,      hy / 2, 0    },
198:       {dz[0],  dz[1],  dz[2]}
199:   },
200:                   ijac[3][3] = {{1 / jac[0][0], 0, 0}, {0, 1 / jac[1][1], 0}, {-jac[2][0] / (jac[0][0] * jac[2][2]), -jac[2][1] / (jac[1][1] * jac[2][2]), 1 / jac[2][2]}}, jdet = jac[0][0] * jac[1][1] * jac[2][2];
201:   PetscInt i;

203:   for (i = 0; i < 8; i++) {
204:     const PetscReal *dphir = HexQDeriv[q][i];
205:     phi[i]                 = HexQInterp[q][i];
206:     dphi[i][0]             = dphir[0] * ijac[0][0] + dphir[1] * ijac[1][0] + dphir[2] * ijac[2][0];
207:     dphi[i][1]             = dphir[0] * ijac[0][1] + dphir[1] * ijac[1][1] + dphir[2] * ijac[2][1];
208:     dphi[i][2]             = dphir[0] * ijac[0][2] + dphir[1] * ijac[1][2] + dphir[2] * ijac[2][2];
209:   }
210:   *jw = 1.0 * jdet;
211: }

213: typedef struct _p_THI   *THI;
214: typedef struct _n_Units *Units;

216: typedef struct {
217:   PetscScalar u, v;
218: } Node;

220: typedef struct {
221:   PetscScalar b;     /* bed */
222:   PetscScalar h;     /* thickness */
223:   PetscScalar beta2; /* friction */
224: } PrmNode;

226: #define FieldSize(ntype)             ((PetscInt)(sizeof(ntype) / sizeof(PetscScalar)))
227: #define FieldOffset(ntype, member)   ((PetscInt)(offsetof(ntype, member) / sizeof(PetscScalar)))
228: #define FieldIndex(ntype, i, member) ((PetscInt)((i) * FieldSize(ntype) + FieldOffset(ntype, member)))
229: #define NODE_SIZE                    FieldSize(Node)
230: #define PRMNODE_SIZE                 FieldSize(PrmNode)

232: typedef struct {
233:   PetscReal min, max, cmin, cmax;
234: } PRange;

236: struct _p_THI {
237:   PETSCHEADER(int);
238:   void (*initialize)(THI, PetscReal x, PetscReal y, PrmNode *p);
239:   PetscInt  nlevels;
240:   PetscInt  zlevels;
241:   PetscReal Lx, Ly, Lz; /* Model domain */
242:   PetscReal alpha;      /* Bed angle */
243:   Units     units;
244:   PetscReal dirichlet_scale;
245:   PetscReal ssa_friction_scale;
246:   PetscReal inertia;
247:   PRange    eta;
248:   PRange    beta2;
249:   struct {
250:     PetscReal Bd2, eps, exponent, glen_n;
251:   } viscosity;
252:   struct {
253:     PetscReal irefgam, eps2, exponent;
254:   } friction;
255:   struct {
256:     PetscReal rate, exponent, refvel;
257:   } erosion;
258:   PetscReal rhog;
259:   PetscBool no_slip;
260:   PetscBool verbose;
261:   char     *mattype;
262:   char     *monitor_basename;
263:   PetscInt  monitor_interval;
264: };

266: struct _n_Units {
267:   /* fundamental */
268:   PetscReal meter;
269:   PetscReal kilogram;
270:   PetscReal second;
271:   /* derived */
272:   PetscReal Pascal;
273:   PetscReal year;
274: };

276: static void PrmHexGetZ(const PrmNode pn[], PetscInt k, PetscInt zm, PetscReal zn[])
277: {
278:   const PetscScalar zm1 = zm - 1, znl[8] = {pn[0].b + pn[0].h * (PetscScalar)k / zm1,       pn[1].b + pn[1].h * (PetscScalar)k / zm1,       pn[2].b + pn[2].h * (PetscScalar)k / zm1,       pn[3].b + pn[3].h * (PetscScalar)k / zm1,
279:                                             pn[0].b + pn[0].h * (PetscScalar)(k + 1) / zm1, pn[1].b + pn[1].h * (PetscScalar)(k + 1) / zm1, pn[2].b + pn[2].h * (PetscScalar)(k + 1) / zm1, pn[3].b + pn[3].h * (PetscScalar)(k + 1) / zm1};
280:   PetscInt          i;
281:   for (i = 0; i < 8; i++) zn[i] = PetscRealPart(znl[i]);
282: }

284: /* Compute a gradient of all the 2D fields at four quadrature points.  Output for [quadrature_point][direction].field_name */
285: static PetscErrorCode QuadComputeGrad4(const PetscReal dphi[][4][2], PetscReal hx, PetscReal hy, const PrmNode pn[4], PrmNode dp[4][2])
286: {
287:   PetscInt q, i, f;
288:   const PetscScalar(*restrict pg)[PRMNODE_SIZE] = (const PetscScalar(*)[PRMNODE_SIZE])pn; /* Get generic array pointers to the node */
289:   PetscScalar(*restrict dpg)[2][PRMNODE_SIZE]   = (PetscScalar(*)[2][PRMNODE_SIZE])dp;

291:   PetscFunctionBeginUser;
292:   PetscCall(PetscArrayzero(dpg, 4));
293:   for (q = 0; q < 4; q++) {
294:     for (i = 0; i < 4; i++) {
295:       for (f = 0; f < PRMNODE_SIZE; f++) {
296:         dpg[q][0][f] += dphi[q][i][0] / hx * pg[i][f];
297:         dpg[q][1][f] += dphi[q][i][1] / hy * pg[i][f];
298:       }
299:     }
300:   }
301:   PetscFunctionReturn(PETSC_SUCCESS);
302: }

304: static inline PetscReal StaggeredMidpoint2D(PetscScalar a, PetscScalar b, PetscScalar c, PetscScalar d)
305: {
306:   return 0.5 * PetscRealPart(0.75 * a + 0.75 * b + 0.25 * c + 0.25 * d);
307: }
308: static inline PetscReal UpwindFlux1D(PetscReal u, PetscReal hL, PetscReal hR)
309: {
310:   return (u > 0) ? hL * u : hR * u;
311: }

313: #define UpwindFluxXW(x3, x2, h, i, j, k, dj) \
314:   UpwindFlux1D(StaggeredMidpoint2D(x3[i][j][k].u, x3[i - 1][j][k].u, x3[i - 1][j + dj][k].u, x3[i][k + dj][k].u), PetscRealPart(0.75 * x2[i - 1][j].h + 0.25 * x2[i - 1][j + dj].h), PetscRealPart(0.75 * x2[i][j].h + 0.25 * x2[i][j + dj].h))
315: #define UpwindFluxXE(x3, x2, h, i, j, k, dj) \
316:   UpwindFlux1D(StaggeredMidpoint2D(x3[i][j][k].u, x3[i + 1][j][k].u, x3[i + 1][j + dj][k].u, x3[i][k + dj][k].u), PetscRealPart(0.75 * x2[i][j].h + 0.25 * x2[i][j + dj].h), PetscRealPart(0.75 * x2[i + 1][j].h + 0.25 * x2[i + 1][j + dj].h))
317: #define UpwindFluxYS(x3, x2, h, i, j, k, di) \
318:   UpwindFlux1D(StaggeredMidpoint2D(x3[i][j][k].v, x3[i][j - 1][k].v, x3[i + di][j - 1][k].v, x3[i + di][j][k].v), PetscRealPart(0.75 * x2[i][j - 1].h + 0.25 * x2[i + di][j - 1].h), PetscRealPart(0.75 * x2[i][j].h + 0.25 * x2[i + di][j].h))
319: #define UpwindFluxYN(x3, x2, h, i, j, k, di) \
320:   UpwindFlux1D(StaggeredMidpoint2D(x3[i][j][k].v, x3[i][j + 1][k].v, x3[i + di][j + 1][k].v, x3[i + di][j][k].v), PetscRealPart(0.75 * x2[i][j].h + 0.25 * x2[i + di][j].h), PetscRealPart(0.75 * x2[i][j + 1].h + 0.25 * x2[i + di][j + 1].h))

322: static void PrmNodeGetFaceMeasure(const PrmNode **p, PetscInt i, PetscInt j, PetscScalar h[])
323: {
324:   /* West */
325:   h[0] = StaggeredMidpoint2D(p[i][j].h, p[i - 1][j].h, p[i - 1][j - 1].h, p[i][j - 1].h);
326:   h[1] = StaggeredMidpoint2D(p[i][j].h, p[i - 1][j].h, p[i - 1][j + 1].h, p[i][j + 1].h);
327:   /* East */
328:   h[2] = StaggeredMidpoint2D(p[i][j].h, p[i + 1][j].h, p[i + 1][j + 1].h, p[i][j + 1].h);
329:   h[3] = StaggeredMidpoint2D(p[i][j].h, p[i + 1][j].h, p[i + 1][j - 1].h, p[i][j - 1].h);
330:   /* South */
331:   h[4] = StaggeredMidpoint2D(p[i][j].h, p[i][j - 1].h, p[i + 1][j - 1].h, p[i + 1][j].h);
332:   h[5] = StaggeredMidpoint2D(p[i][j].h, p[i][j - 1].h, p[i - 1][j - 1].h, p[i - 1][j].h);
333:   /* North */
334:   h[6] = StaggeredMidpoint2D(p[i][j].h, p[i][j + 1].h, p[i - 1][j + 1].h, p[i - 1][j].h);
335:   h[7] = StaggeredMidpoint2D(p[i][j].h, p[i][j + 1].h, p[i + 1][j + 1].h, p[i + 1][j].h);
336: }

338: /* Tests A and C are from the ISMIP-HOM paper (Pattyn et al. 2008) */
339: static void THIInitialize_HOM_A(THI thi, PetscReal x, PetscReal y, PrmNode *p)
340: {
341:   Units     units = thi->units;
342:   PetscReal s     = -x * PetscSinReal(thi->alpha);
343:   p->b            = s - 1000 * units->meter + 500 * units->meter * PetscSinReal(x * 2 * PETSC_PI / thi->Lx) * PetscSinReal(y * 2 * PETSC_PI / thi->Ly);
344:   p->h            = s - p->b;
345:   p->beta2        = -1e-10; /* This value is not used, but it should not be huge because that would change the finite difference step size  */
346: }

348: static void THIInitialize_HOM_C(THI thi, PetscReal x, PetscReal y, PrmNode *p)
349: {
350:   Units     units = thi->units;
351:   PetscReal s     = -x * PetscSinReal(thi->alpha);
352:   p->b            = s - 1000 * units->meter;
353:   p->h            = s - p->b;
354:   /* tau_b = beta2 v   is a stress (Pa).
355:    * This is a big number in our units (it needs to balance the driving force from the surface), so we scale it by 1/rhog, just like the residual. */
356:   p->beta2 = 1000 * (1 + PetscSinReal(x * 2 * PETSC_PI / thi->Lx) * PetscSinReal(y * 2 * PETSC_PI / thi->Ly)) * units->Pascal * units->year / units->meter / thi->rhog;
357: }

359: /* These are just toys */

361: /* From Fred Herman */
362: static void THIInitialize_HOM_F(THI thi, PetscReal x, PetscReal y, PrmNode *p)
363: {
364:   Units     units = thi->units;
365:   PetscReal s     = -x * PetscSinReal(thi->alpha);
366:   p->b            = s - 1000 * units->meter + 100 * units->meter * PetscSinReal(x * 2 * PETSC_PI / thi->Lx); /* * sin(y*2*PETSC_PI/thi->Ly); */
367:   p->h            = s - p->b;
368:   p->h            = (1 - (atan((x - thi->Lx / 2) / 1.) + PETSC_PI / 2.) / PETSC_PI) * 500 * units->meter + 1 * units->meter;
369:   s               = PetscRealPart(p->b + p->h);
370:   p->beta2        = -1e-10;
371:   /*  p->beta2 = 1000 * units->Pascal * units->year / units->meter; */
372: }

374: /* Same bed as test A, free slip everywhere except for a discontinuous jump to a circular sticky region in the middle. */
375: static void THIInitialize_HOM_X(THI thi, PetscReal xx, PetscReal yy, PrmNode *p)
376: {
377:   Units     units = thi->units;
378:   PetscReal x = xx * 2 * PETSC_PI / thi->Lx - PETSC_PI, y = yy * 2 * PETSC_PI / thi->Ly - PETSC_PI; /* [-pi,pi] */
379:   PetscReal r = PetscSqrtReal(x * x + y * y), s = -x * PetscSinReal(thi->alpha);
380:   p->b     = s - 1000 * units->meter + 500 * units->meter * PetscSinReal(x + PETSC_PI) * PetscSinReal(y + PETSC_PI);
381:   p->h     = s - p->b;
382:   p->beta2 = 1000 * (r < 1 ? 2 : 0) * units->Pascal * units->year / units->meter / thi->rhog;
383: }

385: /* Like Z, but with 200 meter cliffs */
386: static void THIInitialize_HOM_Y(THI thi, PetscReal xx, PetscReal yy, PrmNode *p)
387: {
388:   Units     units = thi->units;
389:   PetscReal x = xx * 2 * PETSC_PI / thi->Lx - PETSC_PI, y = yy * 2 * PETSC_PI / thi->Ly - PETSC_PI; /* [-pi,pi] */
390:   PetscReal r = PetscSqrtReal(x * x + y * y), s = -x * PetscSinReal(thi->alpha);
391:   p->b = s - 1000 * units->meter + 500 * units->meter * PetscSinReal(x + PETSC_PI) * PetscSinReal(y + PETSC_PI);
392:   if (PetscRealPart(p->b) > -700 * units->meter) p->b += 200 * units->meter;
393:   p->h     = s - p->b;
394:   p->beta2 = 1000 * (1. + PetscSinReal(PetscSqrtReal(16 * r)) / PetscSqrtReal(1e-2 + 16 * r) * PetscCosReal(x * 3 / 2) * PetscCosReal(y * 3 / 2)) * units->Pascal * units->year / units->meter / thi->rhog;
395: }

397: /* Same bed as A, smoothly varying slipperiness, similar to MATLAB's "sombrero" (uncorrelated with bathymetry) */
398: static void THIInitialize_HOM_Z(THI thi, PetscReal xx, PetscReal yy, PrmNode *p)
399: {
400:   Units     units = thi->units;
401:   PetscReal x = xx * 2 * PETSC_PI / thi->Lx - PETSC_PI, y = yy * 2 * PETSC_PI / thi->Ly - PETSC_PI; /* [-pi,pi] */
402:   PetscReal r = PetscSqrtReal(x * x + y * y), s = -x * PetscSinReal(thi->alpha);
403:   p->b     = s - 1000 * units->meter + 500 * units->meter * PetscSinReal(x + PETSC_PI) * PetscSinReal(y + PETSC_PI);
404:   p->h     = s - p->b;
405:   p->beta2 = 1000 * (1. + PetscSinReal(PetscSqrtReal(16 * r)) / PetscSqrtReal(1e-2 + 16 * r) * PetscCosReal(x * 3 / 2) * PetscCosReal(y * 3 / 2)) * units->Pascal * units->year / units->meter / thi->rhog;
406: }

408: static void THIFriction(THI thi, PetscReal rbeta2, PetscReal gam, PetscReal *beta2, PetscReal *dbeta2)
409: {
410:   if (thi->friction.irefgam == 0) {
411:     Units units           = thi->units;
412:     thi->friction.irefgam = 1. / (0.5 * PetscSqr(100 * units->meter / units->year));
413:     thi->friction.eps2    = 0.5 * PetscSqr(1.e-4 / thi->friction.irefgam);
414:   }
415:   if (thi->friction.exponent == 0) {
416:     *beta2  = rbeta2;
417:     *dbeta2 = 0;
418:   } else {
419:     *beta2  = rbeta2 * PetscPowReal(thi->friction.eps2 + gam * thi->friction.irefgam, thi->friction.exponent);
420:     *dbeta2 = thi->friction.exponent * *beta2 / (thi->friction.eps2 + gam * thi->friction.irefgam) * thi->friction.irefgam;
421:   }
422: }

424: static void THIViscosity(THI thi, PetscReal gam, PetscReal *eta, PetscReal *deta)
425: {
426:   PetscReal Bd2, eps, exponent;
427:   if (thi->viscosity.Bd2 == 0) {
428:     Units           units   = thi->units;
429:     const PetscReal n       = thi->viscosity.glen_n,                                  /* Glen exponent */
430:       p                     = 1. + 1. / n,                                            /* for Stokes */
431:       A                     = 1.e-16 * PetscPowReal(units->Pascal, -n) / units->year, /* softness parameter (Pa^{-n}/s) */
432:       B                     = PetscPowReal(A, -1. / n);                               /* hardness parameter */
433:     thi->viscosity.Bd2      = B / 2;
434:     thi->viscosity.exponent = (p - 2) / 2;
435:     thi->viscosity.eps      = 0.5 * PetscSqr(1e-5 / units->year);
436:   }
437:   Bd2      = thi->viscosity.Bd2;
438:   exponent = thi->viscosity.exponent;
439:   eps      = thi->viscosity.eps;
440:   *eta     = Bd2 * PetscPowReal(eps + gam, exponent);
441:   *deta    = exponent * (*eta) / (eps + gam);
442: }

444: static void THIErosion(THI thi, const Node *vel, PetscScalar *erate, Node *derate)
445: {
446:   const PetscScalar magref2 = 1.e-10 + (PetscSqr(vel->u) + PetscSqr(vel->v)) / PetscSqr(thi->erosion.refvel), rate = -thi->erosion.rate * PetscPowScalar(magref2, 0.5 * thi->erosion.exponent);
447:   if (erate) *erate = rate;
448:   if (derate) {
449:     if (thi->erosion.exponent == 1) {
450:       derate->u = 0;
451:       derate->v = 0;
452:     } else {
453:       derate->u = 0.5 * thi->erosion.exponent * rate / magref2 * 2. * vel->u / PetscSqr(thi->erosion.refvel);
454:       derate->v = 0.5 * thi->erosion.exponent * rate / magref2 * 2. * vel->v / PetscSqr(thi->erosion.refvel);
455:     }
456:   }
457: }

459: static void RangeUpdate(PetscReal *min, PetscReal *max, PetscReal x)
460: {
461:   if (x < *min) *min = x;
462:   if (x > *max) *max = x;
463: }

465: static void PRangeClear(PRange *p)
466: {
467:   p->cmin = p->min = 1e100;
468:   p->cmax = p->max = -1e100;
469: }

471: static PetscErrorCode PRangeMinMax(PRange *p, PetscReal min, PetscReal max)
472: {
473:   PetscFunctionBeginUser;
474:   p->cmin = min;
475:   p->cmax = max;
476:   if (min < p->min) p->min = min;
477:   if (max > p->max) p->max = max;
478:   PetscFunctionReturn(PETSC_SUCCESS);
479: }

481: static PetscErrorCode THIDestroy(THI *thi)
482: {
483:   PetscFunctionBeginUser;
484:   if (--((PetscObject)*thi)->refct > 0) PetscFunctionReturn(PETSC_SUCCESS);
485:   PetscCall(PetscFree((*thi)->units));
486:   PetscCall(PetscFree((*thi)->mattype));
487:   PetscCall(PetscFree((*thi)->monitor_basename));
488:   PetscCall(PetscHeaderDestroy(thi));
489:   PetscFunctionReturn(PETSC_SUCCESS);
490: }

492: static PetscErrorCode THICreate(MPI_Comm comm, THI *inthi)
493: {
494:   static PetscBool registered = PETSC_FALSE;
495:   THI              thi;
496:   Units            units;
497:   char             monitor_basename[PETSC_MAX_PATH_LEN] = "thi-";

499:   PetscFunctionBeginUser;
500:   *inthi = 0;
501:   if (!registered) {
502:     PetscCall(PetscClassIdRegister("Toy Hydrostatic Ice", &THI_CLASSID));
503:     registered = PETSC_TRUE;
504:   }
505:   PetscCall(PetscHeaderCreate(thi, THI_CLASSID, "THI", "Toy Hydrostatic Ice", "THI", comm, THIDestroy, 0));

507:   PetscCall(PetscNew(&thi->units));

509:   units           = thi->units;
510:   units->meter    = 1e-2;
511:   units->second   = 1e-7;
512:   units->kilogram = 1e-12;

514:   PetscOptionsBegin(comm, NULL, "Scaled units options", "");
515:   {
516:     PetscCall(PetscOptionsReal("-units_meter", "1 meter in scaled length units", "", units->meter, &units->meter, NULL));
517:     PetscCall(PetscOptionsReal("-units_second", "1 second in scaled time units", "", units->second, &units->second, NULL));
518:     PetscCall(PetscOptionsReal("-units_kilogram", "1 kilogram in scaled mass units", "", units->kilogram, &units->kilogram, NULL));
519:   }
520:   PetscOptionsEnd();
521:   units->Pascal = units->kilogram / (units->meter * PetscSqr(units->second));
522:   units->year   = 31556926. * units->second, /* seconds per year */

524:     thi->Lx            = 10.e3;
525:   thi->Ly              = 10.e3;
526:   thi->Lz              = 1000;
527:   thi->nlevels         = 1;
528:   thi->dirichlet_scale = 1;
529:   thi->verbose         = PETSC_FALSE;

531:   thi->viscosity.glen_n = 3.;
532:   thi->erosion.rate     = 1e-3; /* m/a */
533:   thi->erosion.exponent = 1.;
534:   thi->erosion.refvel   = 1.; /* m/a */

536:   PetscOptionsBegin(comm, NULL, "Toy Hydrostatic Ice options", "");
537:   {
538:     QuadratureType quad       = QUAD_GAUSS;
539:     char           homexp[]   = "A";
540:     char           mtype[256] = MATSBAIJ;
541:     PetscReal      L, m = 1.0;
542:     PetscBool      flg;
543:     L = thi->Lx;
544:     PetscCall(PetscOptionsReal("-thi_L", "Domain size (m)", "", L, &L, &flg));
545:     if (flg) thi->Lx = thi->Ly = L;
546:     PetscCall(PetscOptionsReal("-thi_Lx", "X Domain size (m)", "", thi->Lx, &thi->Lx, NULL));
547:     PetscCall(PetscOptionsReal("-thi_Ly", "Y Domain size (m)", "", thi->Ly, &thi->Ly, NULL));
548:     PetscCall(PetscOptionsReal("-thi_Lz", "Z Domain size (m)", "", thi->Lz, &thi->Lz, NULL));
549:     PetscCall(PetscOptionsString("-thi_hom", "ISMIP-HOM experiment (A or C)", "", homexp, homexp, sizeof(homexp), NULL));
550:     switch (homexp[0] = toupper(homexp[0])) {
551:     case 'A':
552:       thi->initialize = THIInitialize_HOM_A;
553:       thi->no_slip    = PETSC_TRUE;
554:       thi->alpha      = 0.5;
555:       break;
556:     case 'C':
557:       thi->initialize = THIInitialize_HOM_C;
558:       thi->no_slip    = PETSC_FALSE;
559:       thi->alpha      = 0.1;
560:       break;
561:     case 'F':
562:       thi->initialize = THIInitialize_HOM_F;
563:       thi->no_slip    = PETSC_FALSE;
564:       thi->alpha      = 0.5;
565:       break;
566:     case 'X':
567:       thi->initialize = THIInitialize_HOM_X;
568:       thi->no_slip    = PETSC_FALSE;
569:       thi->alpha      = 0.3;
570:       break;
571:     case 'Y':
572:       thi->initialize = THIInitialize_HOM_Y;
573:       thi->no_slip    = PETSC_FALSE;
574:       thi->alpha      = 0.5;
575:       break;
576:     case 'Z':
577:       thi->initialize = THIInitialize_HOM_Z;
578:       thi->no_slip    = PETSC_FALSE;
579:       thi->alpha      = 0.5;
580:       break;
581:     default:
582:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "HOM experiment '%c' not implemented", homexp[0]);
583:     }
584:     PetscCall(PetscOptionsEnum("-thi_quadrature", "Quadrature to use for 3D elements", "", QuadratureTypes, (PetscEnum)quad, (PetscEnum *)&quad, NULL));
585:     switch (quad) {
586:     case QUAD_GAUSS:
587:       HexQInterp = HexQInterp_Gauss;
588:       HexQDeriv  = HexQDeriv_Gauss;
589:       break;
590:     case QUAD_LOBATTO:
591:       HexQInterp = HexQInterp_Lobatto;
592:       HexQDeriv  = HexQDeriv_Lobatto;
593:       break;
594:     }
595:     PetscCall(PetscOptionsReal("-thi_alpha", "Bed angle (degrees)", "", thi->alpha, &thi->alpha, NULL));
596:     PetscCall(PetscOptionsReal("-thi_viscosity_glen_n", "Exponent in Glen flow law, 1=linear, infty=ideal plastic", NULL, thi->viscosity.glen_n, &thi->viscosity.glen_n, NULL));
597:     PetscCall(PetscOptionsReal("-thi_friction_m", "Friction exponent, 0=Coulomb, 1=Navier", "", m, &m, NULL));
598:     thi->friction.exponent = (m - 1) / 2;
599:     PetscCall(PetscOptionsReal("-thi_erosion_rate", "Rate of erosion relative to sliding velocity at reference velocity (m/a)", NULL, thi->erosion.rate, &thi->erosion.rate, NULL));
600:     PetscCall(PetscOptionsReal("-thi_erosion_exponent", "Power of sliding velocity appearing in erosion relation", NULL, thi->erosion.exponent, &thi->erosion.exponent, NULL));
601:     PetscCall(PetscOptionsReal("-thi_erosion_refvel", "Reference sliding velocity for erosion (m/a)", NULL, thi->erosion.refvel, &thi->erosion.refvel, NULL));
602:     thi->erosion.rate *= units->meter / units->year;
603:     thi->erosion.refvel *= units->meter / units->year;
604:     PetscCall(PetscOptionsReal("-thi_dirichlet_scale", "Scale Dirichlet boundary conditions by this factor", "", thi->dirichlet_scale, &thi->dirichlet_scale, NULL));
605:     PetscCall(PetscOptionsReal("-thi_ssa_friction_scale", "Scale slip boundary conditions by this factor in SSA (2D) assembly", "", thi->ssa_friction_scale, &thi->ssa_friction_scale, NULL));
606:     PetscCall(PetscOptionsReal("-thi_inertia", "Coefficient of acceleration term in velocity system, physical is almost zero", NULL, thi->inertia, &thi->inertia, NULL));
607:     PetscCall(PetscOptionsInt("-thi_nlevels", "Number of levels of refinement", "", thi->nlevels, &thi->nlevels, NULL));
608:     PetscCall(PetscOptionsFList("-thi_mat_type", "Matrix type", "MatSetType", MatList, mtype, (char *)mtype, sizeof(mtype), NULL));
609:     PetscCall(PetscStrallocpy(mtype, &thi->mattype));
610:     PetscCall(PetscOptionsBool("-thi_verbose", "Enable verbose output (like matrix sizes and statistics)", "", thi->verbose, &thi->verbose, NULL));
611:     PetscCall(PetscOptionsString("-thi_monitor", "Basename to write state files to", NULL, monitor_basename, monitor_basename, sizeof(monitor_basename), &flg));
612:     if (flg) {
613:       PetscCall(PetscStrallocpy(monitor_basename, &thi->monitor_basename));
614:       thi->monitor_interval = 1;
615:       PetscCall(PetscOptionsInt("-thi_monitor_interval", "Frequency at which to write state files", NULL, thi->monitor_interval, &thi->monitor_interval, NULL));
616:     }
617:   }
618:   PetscOptionsEnd();

620:   /* dimensionalize */
621:   thi->Lx *= units->meter;
622:   thi->Ly *= units->meter;
623:   thi->Lz *= units->meter;
624:   thi->alpha *= PETSC_PI / 180;

626:   PRangeClear(&thi->eta);
627:   PRangeClear(&thi->beta2);

629:   {
630:     PetscReal u = 1000 * units->meter / (3e7 * units->second), gradu = u / (100 * units->meter), eta, deta, rho = 910 * units->kilogram / PetscPowRealInt(units->meter, 3), grav = 9.81 * units->meter / PetscSqr(units->second),
631:               driving = rho * grav * PetscSinReal(thi->alpha) * 1000 * units->meter;
632:     THIViscosity(thi, 0.5 * gradu * gradu, &eta, &deta);
633:     thi->rhog = rho * grav;
634:     if (thi->verbose) {
635:       PetscCall(PetscPrintf(PetscObjectComm((PetscObject)thi), "Units: meter %8.2g  second %8.2g  kg %8.2g  Pa %8.2g\n", (double)units->meter, (double)units->second, (double)units->kilogram, (double)units->Pascal));
636:       PetscCall(PetscPrintf(PetscObjectComm((PetscObject)thi), "Domain (%6.2g,%6.2g,%6.2g), pressure %8.2g, driving stress %8.2g\n", (double)thi->Lx, (double)thi->Ly, (double)thi->Lz, (double)(rho * grav * 1e3 * units->meter), (double)driving));
637:       PetscCall(PetscPrintf(PetscObjectComm((PetscObject)thi), "Large velocity 1km/a %8.2g, velocity gradient %8.2g, eta %8.2g, stress %8.2g, ratio %8.2g\n", (double)u, (double)gradu, (double)eta, (double)(2 * eta * gradu, 2 * eta * gradu / driving)));
638:       THIViscosity(thi, 0.5 * PetscSqr(1e-3 * gradu), &eta, &deta);
639:       PetscCall(PetscPrintf(PetscObjectComm((PetscObject)thi), "Small velocity 1m/a  %8.2g, velocity gradient %8.2g, eta %8.2g, stress %8.2g, ratio %8.2g\n", (double)(1e-3 * u), (double)(1e-3 * gradu), (double)eta, (double)(2 * eta * 1e-3 * gradu, 2 * eta * 1e-3 * gradu / driving)));
640:     }
641:   }

643:   *inthi = thi;
644:   PetscFunctionReturn(PETSC_SUCCESS);
645: }

647: /* Our problem is periodic, but the domain has a mean slope of alpha so the bed does not line up between the upstream
648:  * and downstream ends of the domain.  This function fixes the ghost values so that the domain appears truly periodic in
649:  * the horizontal. */
650: static PetscErrorCode THIFixGhosts(THI thi, DM da3, DM da2, Vec X3, Vec X2)
651: {
652:   DMDALocalInfo info;
653:   PrmNode     **x2;
654:   PetscInt      i, j;

656:   PetscFunctionBeginUser;
657:   PetscCall(DMDAGetLocalInfo(da3, &info));
658:   /* PetscCall(VecView(X2,PETSC_VIEWER_STDOUT_WORLD)); */
659:   PetscCall(DMDAVecGetArray(da2, X2, &x2));
660:   for (i = info.gzs; i < info.gzs + info.gzm; i++) {
661:     if (i > -1 && i < info.mz) continue;
662:     for (j = info.gys; j < info.gys + info.gym; j++) x2[i][j].b += PetscSinReal(thi->alpha) * thi->Lx * (i < 0 ? 1.0 : -1.0);
663:   }
664:   PetscCall(DMDAVecRestoreArray(da2, X2, &x2));
665:   /* PetscCall(VecView(X2,PETSC_VIEWER_STDOUT_WORLD)); */
666:   PetscFunctionReturn(PETSC_SUCCESS);
667: }

669: static PetscErrorCode THIInitializePrm(THI thi, DM da2prm, PrmNode **p)
670: {
671:   PetscInt i, j, xs, xm, ys, ym, mx, my;

673:   PetscFunctionBeginUser;
674:   PetscCall(DMDAGetGhostCorners(da2prm, &ys, &xs, 0, &ym, &xm, 0));
675:   PetscCall(DMDAGetInfo(da2prm, 0, &my, &mx, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
676:   for (i = xs; i < xs + xm; i++) {
677:     for (j = ys; j < ys + ym; j++) {
678:       PetscReal xx = thi->Lx * i / mx, yy = thi->Ly * j / my;
679:       thi->initialize(thi, xx, yy, &p[i][j]);
680:     }
681:   }
682:   PetscFunctionReturn(PETSC_SUCCESS);
683: }

685: static PetscErrorCode THIInitial(THI thi, DM pack, Vec X)
686: {
687:   DM        da3, da2;
688:   PetscInt  i, j, k, xs, xm, ys, ym, zs, zm, mx, my;
689:   PetscReal hx, hy;
690:   PrmNode **prm;
691:   Node   ***x;
692:   Vec       X3g, X2g, X2;

694:   PetscFunctionBeginUser;
695:   PetscCall(DMCompositeGetEntries(pack, &da3, &da2));
696:   PetscCall(DMCompositeGetAccess(pack, X, &X3g, &X2g));
697:   PetscCall(DMGetLocalVector(da2, &X2));

699:   PetscCall(DMDAGetInfo(da3, 0, 0, &my, &mx, 0, 0, 0, 0, 0, 0, 0, 0, 0));
700:   PetscCall(DMDAGetCorners(da3, &zs, &ys, &xs, &zm, &ym, &xm));
701:   PetscCall(DMDAVecGetArray(da3, X3g, &x));
702:   PetscCall(DMDAVecGetArray(da2, X2, &prm));

704:   PetscCall(THIInitializePrm(thi, da2, prm));

706:   hx = thi->Lx / mx;
707:   hy = thi->Ly / my;
708:   for (i = xs; i < xs + xm; i++) {
709:     for (j = ys; j < ys + ym; j++) {
710:       for (k = zs; k < zs + zm; k++) {
711:         const PetscScalar zm1 = zm - 1, drivingx = thi->rhog * (prm[i + 1][j].b + prm[i + 1][j].h - prm[i - 1][j].b - prm[i - 1][j].h) / (2 * hx), drivingy = thi->rhog * (prm[i][j + 1].b + prm[i][j + 1].h - prm[i][j - 1].b - prm[i][j - 1].h) / (2 * hy);
712:         x[i][j][k].u = 0. * drivingx * prm[i][j].h * (PetscScalar)k / zm1;
713:         x[i][j][k].v = 0. * drivingy * prm[i][j].h * (PetscScalar)k / zm1;
714:       }
715:     }
716:   }

718:   PetscCall(DMDAVecRestoreArray(da3, X3g, &x));
719:   PetscCall(DMDAVecRestoreArray(da2, X2, &prm));

721:   PetscCall(DMLocalToGlobalBegin(da2, X2, INSERT_VALUES, X2g));
722:   PetscCall(DMLocalToGlobalEnd(da2, X2, INSERT_VALUES, X2g));
723:   PetscCall(DMRestoreLocalVector(da2, &X2));

725:   PetscCall(DMCompositeRestoreAccess(pack, X, &X3g, &X2g));
726:   PetscFunctionReturn(PETSC_SUCCESS);
727: }

729: static void PointwiseNonlinearity(THI thi, const Node n[restrict 8], const PetscReal phi[restrict 3], PetscReal dphi[restrict 8][3], PetscScalar *restrict u, PetscScalar *restrict v, PetscScalar du[restrict 3], PetscScalar dv[restrict 3], PetscReal *eta, PetscReal *deta)
730: {
731:   PetscInt    l, ll;
732:   PetscScalar gam;

734:   du[0] = du[1] = du[2] = 0;
735:   dv[0] = dv[1] = dv[2] = 0;
736:   *u                    = 0;
737:   *v                    = 0;
738:   for (l = 0; l < 8; l++) {
739:     *u += phi[l] * n[l].u;
740:     *v += phi[l] * n[l].v;
741:     for (ll = 0; ll < 3; ll++) {
742:       du[ll] += dphi[l][ll] * n[l].u;
743:       dv[ll] += dphi[l][ll] * n[l].v;
744:     }
745:   }
746:   gam = Sqr(du[0]) + Sqr(dv[1]) + du[0] * dv[1] + 0.25 * Sqr(du[1] + dv[0]) + 0.25 * Sqr(du[2]) + 0.25 * Sqr(dv[2]);
747:   THIViscosity(thi, PetscRealPart(gam), eta, deta);
748: }

750: static PetscErrorCode THIFunctionLocal_3D(DMDALocalInfo *info, const Node ***x, const PrmNode **prm, const Node ***xdot, Node ***f, THI thi)
751: {
752:   PetscInt  xs, ys, xm, ym, zm, i, j, k, q, l;
753:   PetscReal hx, hy, etamin, etamax, beta2min, beta2max;

755:   PetscFunctionBeginUser;
756:   xs = info->zs;
757:   ys = info->ys;
758:   xm = info->zm;
759:   ym = info->ym;
760:   zm = info->xm;
761:   hx = thi->Lx / info->mz;
762:   hy = thi->Ly / info->my;

764:   etamin   = 1e100;
765:   etamax   = 0;
766:   beta2min = 1e100;
767:   beta2max = 0;

769:   for (i = xs; i < xs + xm; i++) {
770:     for (j = ys; j < ys + ym; j++) {
771:       PrmNode pn[4], dpn[4][2];
772:       QuadExtract(prm, i, j, pn);
773:       PetscCall(QuadComputeGrad4(QuadQDeriv, hx, hy, pn, dpn));
774:       for (k = 0; k < zm - 1; k++) {
775:         PetscInt  ls = 0;
776:         Node      n[8], ndot[8], *fn[8];
777:         PetscReal zn[8], etabase = 0;

779:         PrmHexGetZ(pn, k, zm, zn);
780:         HexExtract(x, i, j, k, n);
781:         HexExtract(xdot, i, j, k, ndot);
782:         HexExtractRef(f, i, j, k, fn);
783:         if (thi->no_slip && k == 0) {
784:           for (l = 0; l < 4; l++) n[l].u = n[l].v = 0;
785:           /* The first 4 basis functions lie on the bottom layer, so their contribution is exactly 0, hence we can skip them */
786:           ls = 4;
787:         }
788:         for (q = 0; q < 8; q++) {
789:           PetscReal   dz[3], phi[8], dphi[8][3], jw, eta, deta;
790:           PetscScalar du[3], dv[3], u, v, udot = 0, vdot = 0;
791:           for (l = ls; l < 8; l++) {
792:             udot += HexQInterp[q][l] * ndot[l].u;
793:             vdot += HexQInterp[q][l] * ndot[l].v;
794:           }
795:           HexGrad(HexQDeriv[q], zn, dz);
796:           HexComputeGeometry(q, hx, hy, dz, phi, dphi, &jw);
797:           PointwiseNonlinearity(thi, n, phi, dphi, &u, &v, du, dv, &eta, &deta);
798:           jw /= thi->rhog; /* scales residuals to be O(1) */
799:           if (q == 0) etabase = eta;
800:           RangeUpdate(&etamin, &etamax, eta);
801:           for (l = ls; l < 8; l++) { /* test functions */
802:             const PetscScalar ds[2] = {dpn[q % 4][0].h + dpn[q % 4][0].b, dpn[q % 4][1].h + dpn[q % 4][1].b};
803:             const PetscReal   pp = phi[l], *dp = dphi[l];
804:             fn[l]->u += dp[0] * jw * eta * (4. * du[0] + 2. * dv[1]) + dp[1] * jw * eta * (du[1] + dv[0]) + dp[2] * jw * eta * du[2] + pp * jw * thi->rhog * ds[0];
805:             fn[l]->v += dp[1] * jw * eta * (2. * du[0] + 4. * dv[1]) + dp[0] * jw * eta * (du[1] + dv[0]) + dp[2] * jw * eta * dv[2] + pp * jw * thi->rhog * ds[1];
806:             fn[l]->u += pp * jw * udot * thi->inertia * pp;
807:             fn[l]->v += pp * jw * vdot * thi->inertia * pp;
808:           }
809:         }
810:         if (k == 0) { /* we are on a bottom face */
811:           if (thi->no_slip) {
812:             /* Note: Non-Galerkin coarse grid operators are very sensitive to the scaling of Dirichlet boundary
813:             * conditions.  After shenanigans above, etabase contains the effective viscosity at the closest quadrature
814:             * point to the bed.  We want the diagonal entry in the Dirichlet condition to have similar magnitude to the
815:             * diagonal entry corresponding to the adjacent node.  The fundamental scaling of the viscous part is in
816:             * diagu, diagv below.  This scaling is easy to recognize by considering the finite difference operator after
817:             * scaling by element size.  The no-slip Dirichlet condition is scaled by this factor, and also in the
818:             * assembled matrix (see the similar block in THIJacobianLocal).
819:             *
820:             * Note that the residual at this Dirichlet node is linear in the state at this node, but also depends
821:             * (nonlinearly in general) on the neighboring interior nodes through the local viscosity.  This will make
822:             * a matrix-free Jacobian have extra entries in the corresponding row.  We assemble only the diagonal part,
823:             * so the solution will exactly satisfy the boundary condition after the first linear iteration.
824:             */
825:             const PetscReal   hz    = PetscRealPart(pn[0].h) / (zm - 1.);
826:             const PetscScalar diagu = 2 * etabase / thi->rhog * (hx * hy / hz + hx * hz / hy + 4 * hy * hz / hx), diagv = 2 * etabase / thi->rhog * (hx * hy / hz + 4 * hx * hz / hy + hy * hz / hx);
827:             fn[0]->u = thi->dirichlet_scale * diagu * x[i][j][k].u;
828:             fn[0]->v = thi->dirichlet_scale * diagv * x[i][j][k].v;
829:           } else {                    /* Integrate over bottom face to apply boundary condition */
830:             for (q = 0; q < 4; q++) { /* We remove the explicit scaling of the residual by 1/rhog because beta2 already has that scaling to be O(1) */
831:               const PetscReal jw = 0.25 * hx * hy, *phi = QuadQInterp[q];
832:               PetscScalar     u = 0, v = 0, rbeta2 = 0;
833:               PetscReal       beta2, dbeta2;
834:               for (l = 0; l < 4; l++) {
835:                 u += phi[l] * n[l].u;
836:                 v += phi[l] * n[l].v;
837:                 rbeta2 += phi[l] * pn[l].beta2;
838:               }
839:               THIFriction(thi, PetscRealPart(rbeta2), PetscRealPart(u * u + v * v) / 2, &beta2, &dbeta2);
840:               RangeUpdate(&beta2min, &beta2max, beta2);
841:               for (l = 0; l < 4; l++) {
842:                 const PetscReal pp = phi[l];
843:                 fn[ls + l]->u += pp * jw * beta2 * u;
844:                 fn[ls + l]->v += pp * jw * beta2 * v;
845:               }
846:             }
847:           }
848:         }
849:       }
850:     }
851:   }

853:   PetscCall(PRangeMinMax(&thi->eta, etamin, etamax));
854:   PetscCall(PRangeMinMax(&thi->beta2, beta2min, beta2max));
855:   PetscFunctionReturn(PETSC_SUCCESS);
856: }

858: static PetscErrorCode THIFunctionLocal_2D(DMDALocalInfo *info, const Node ***x, const PrmNode **prm, const PrmNode **prmdot, PrmNode **f, THI thi)
859: {
860:   PetscInt xs, ys, xm, ym, zm, i, j, k;

862:   PetscFunctionBeginUser;
863:   xs = info->zs;
864:   ys = info->ys;
865:   xm = info->zm;
866:   ym = info->ym;
867:   zm = info->xm;

869:   for (i = xs; i < xs + xm; i++) {
870:     for (j = ys; j < ys + ym; j++) {
871:       PetscScalar div = 0, erate, h[8];
872:       PrmNodeGetFaceMeasure(prm, i, j, h);
873:       for (k = 0; k < zm; k++) {
874:         PetscScalar weight = (k == 0 || k == zm - 1) ? 0.5 / (zm - 1) : 1.0 / (zm - 1);
875:         if (0) { /* centered flux */
876:           div += (-weight * h[0] * StaggeredMidpoint2D(x[i][j][k].u, x[i - 1][j][k].u, x[i - 1][j - 1][k].u, x[i][j - 1][k].u) - weight * h[1] * StaggeredMidpoint2D(x[i][j][k].u, x[i - 1][j][k].u, x[i - 1][j + 1][k].u, x[i][j + 1][k].u) +
877:                   weight * h[2] * StaggeredMidpoint2D(x[i][j][k].u, x[i + 1][j][k].u, x[i + 1][j + 1][k].u, x[i][j + 1][k].u) + weight * h[3] * StaggeredMidpoint2D(x[i][j][k].u, x[i + 1][j][k].u, x[i + 1][j - 1][k].u, x[i][j - 1][k].u) -
878:                   weight * h[4] * StaggeredMidpoint2D(x[i][j][k].v, x[i][j - 1][k].v, x[i + 1][j - 1][k].v, x[i + 1][j][k].v) - weight * h[5] * StaggeredMidpoint2D(x[i][j][k].v, x[i][j - 1][k].v, x[i - 1][j - 1][k].v, x[i - 1][j][k].v) +
879:                   weight * h[6] * StaggeredMidpoint2D(x[i][j][k].v, x[i][j + 1][k].v, x[i - 1][j + 1][k].v, x[i - 1][j][k].v) + weight * h[7] * StaggeredMidpoint2D(x[i][j][k].v, x[i][j + 1][k].v, x[i + 1][j + 1][k].v, x[i + 1][j][k].v));
880:         } else { /* Upwind flux */
881:           div += weight * (-UpwindFluxXW(x, prm, h, i, j, k, 1) - UpwindFluxXW(x, prm, h, i, j, k, -1) + UpwindFluxXE(x, prm, h, i, j, k, 1) + UpwindFluxXE(x, prm, h, i, j, k, -1) - UpwindFluxYS(x, prm, h, i, j, k, 1) - UpwindFluxYS(x, prm, h, i, j, k, -1) + UpwindFluxYN(x, prm, h, i, j, k, 1) + UpwindFluxYN(x, prm, h, i, j, k, -1));
882:         }
883:       }
884:       /* printf("div[%d][%d] %g\n",i,j,div); */
885:       THIErosion(thi, &x[i][j][0], &erate, NULL);
886:       f[i][j].b     = prmdot[i][j].b - erate;
887:       f[i][j].h     = prmdot[i][j].h + div;
888:       f[i][j].beta2 = prmdot[i][j].beta2;
889:     }
890:   }
891:   PetscFunctionReturn(PETSC_SUCCESS);
892: }

894: static PetscErrorCode THIFunction(TS ts, PetscReal t, Vec X, Vec Xdot, Vec F, void *ctx)
895: {
896:   THI             thi = (THI)ctx;
897:   DM              pack, da3, da2;
898:   Vec             X3, X2, Xdot3, Xdot2, F3, F2, F3g, F2g;
899:   const Node   ***x3, ***xdot3;
900:   const PrmNode **x2, **xdot2;
901:   Node         ***f3;
902:   PrmNode       **f2;
903:   DMDALocalInfo   info3;

905:   PetscFunctionBeginUser;
906:   PetscCall(TSGetDM(ts, &pack));
907:   PetscCall(DMCompositeGetEntries(pack, &da3, &da2));
908:   PetscCall(DMDAGetLocalInfo(da3, &info3));
909:   PetscCall(DMCompositeGetLocalVectors(pack, &X3, &X2));
910:   PetscCall(DMCompositeGetLocalVectors(pack, &Xdot3, &Xdot2));
911:   PetscCall(DMCompositeScatter(pack, X, X3, X2));
912:   PetscCall(THIFixGhosts(thi, da3, da2, X3, X2));
913:   PetscCall(DMCompositeScatter(pack, Xdot, Xdot3, Xdot2));

915:   PetscCall(DMGetLocalVector(da3, &F3));
916:   PetscCall(DMGetLocalVector(da2, &F2));
917:   PetscCall(VecZeroEntries(F3));

919:   PetscCall(DMDAVecGetArray(da3, X3, &x3));
920:   PetscCall(DMDAVecGetArray(da2, X2, &x2));
921:   PetscCall(DMDAVecGetArray(da3, Xdot3, &xdot3));
922:   PetscCall(DMDAVecGetArray(da2, Xdot2, &xdot2));
923:   PetscCall(DMDAVecGetArray(da3, F3, &f3));
924:   PetscCall(DMDAVecGetArray(da2, F2, &f2));

926:   PetscCall(THIFunctionLocal_3D(&info3, x3, x2, xdot3, f3, thi));
927:   PetscCall(THIFunctionLocal_2D(&info3, x3, x2, xdot2, f2, thi));

929:   PetscCall(DMDAVecRestoreArray(da3, X3, &x3));
930:   PetscCall(DMDAVecRestoreArray(da2, X2, &x2));
931:   PetscCall(DMDAVecRestoreArray(da3, Xdot3, &xdot3));
932:   PetscCall(DMDAVecRestoreArray(da2, Xdot2, &xdot2));
933:   PetscCall(DMDAVecRestoreArray(da3, F3, &f3));
934:   PetscCall(DMDAVecRestoreArray(da2, F2, &f2));

936:   PetscCall(DMCompositeRestoreLocalVectors(pack, &X3, &X2));
937:   PetscCall(DMCompositeRestoreLocalVectors(pack, &Xdot3, &Xdot2));

939:   PetscCall(VecZeroEntries(F));
940:   PetscCall(DMCompositeGetAccess(pack, F, &F3g, &F2g));
941:   PetscCall(DMLocalToGlobalBegin(da3, F3, ADD_VALUES, F3g));
942:   PetscCall(DMLocalToGlobalEnd(da3, F3, ADD_VALUES, F3g));
943:   PetscCall(DMLocalToGlobalBegin(da2, F2, INSERT_VALUES, F2g));
944:   PetscCall(DMLocalToGlobalEnd(da2, F2, INSERT_VALUES, F2g));

946:   if (thi->verbose) {
947:     PetscViewer viewer;
948:     PetscCall(PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)thi), &viewer));
949:     PetscCall(PetscViewerASCIIPrintf(viewer, "3D_Velocity residual (bs=2):\n"));
950:     PetscCall(PetscViewerASCIIPushTab(viewer));
951:     PetscCall(VecView(F3, viewer));
952:     PetscCall(PetscViewerASCIIPopTab(viewer));
953:     PetscCall(PetscViewerASCIIPrintf(viewer, "2D_Fields residual (bs=3):\n"));
954:     PetscCall(PetscViewerASCIIPushTab(viewer));
955:     PetscCall(VecView(F2, viewer));
956:     PetscCall(PetscViewerASCIIPopTab(viewer));
957:   }

959:   PetscCall(DMCompositeRestoreAccess(pack, F, &F3g, &F2g));

961:   PetscCall(DMRestoreLocalVector(da3, &F3));
962:   PetscCall(DMRestoreLocalVector(da2, &F2));
963:   PetscFunctionReturn(PETSC_SUCCESS);
964: }

966: static PetscErrorCode THIMatrixStatistics(THI thi, Mat B, PetscViewer viewer)
967: {
968:   PetscReal   nrm;
969:   PetscInt    m;
970:   PetscMPIInt rank;

972:   PetscFunctionBeginUser;
973:   PetscCall(MatNorm(B, NORM_FROBENIUS, &nrm));
974:   PetscCall(MatGetSize(B, &m, 0));
975:   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &rank));
976:   if (rank == 0) {
977:     PetscScalar val0, val2;
978:     PetscCall(MatGetValue(B, 0, 0, &val0));
979:     PetscCall(MatGetValue(B, 2, 2, &val2));
980:     PetscCall(PetscViewerASCIIPrintf(viewer, "Matrix dim %8" PetscInt_FMT "  norm %8.2e, (0,0) %8.2e  (2,2) %8.2e, eta [%8.2e,%8.2e] beta2 [%8.2e,%8.2e]\n", m, (double)nrm, (double)PetscRealPart(val0), (double)PetscRealPart(val2), (double)thi->eta.cmin,
981:                                      (double)thi->eta.cmax, (double)thi->beta2.cmin, (double)thi->beta2.cmax));
982:   }
983:   PetscFunctionReturn(PETSC_SUCCESS);
984: }

986: static PetscErrorCode THISurfaceStatistics(DM pack, Vec X, PetscReal *min, PetscReal *max, PetscReal *mean)
987: {
988:   DM          da3, da2;
989:   Vec         X3, X2;
990:   Node     ***x;
991:   PetscInt    i, j, xs, ys, zs, xm, ym, zm, mx, my, mz;
992:   PetscReal   umin = 1e100, umax = -1e100;
993:   PetscScalar usum = 0.0, gusum;

995:   PetscFunctionBeginUser;
996:   PetscCall(DMCompositeGetEntries(pack, &da3, &da2));
997:   PetscCall(DMCompositeGetAccess(pack, X, &X3, &X2));
998:   *min = *max = *mean = 0;
999:   PetscCall(DMDAGetInfo(da3, 0, &mz, &my, &mx, 0, 0, 0, 0, 0, 0, 0, 0, 0));
1000:   PetscCall(DMDAGetCorners(da3, &zs, &ys, &xs, &zm, &ym, &xm));
1001:   PetscCheck(zs == 0 && zm == mz, PETSC_COMM_WORLD, PETSC_ERR_PLIB, "Unexpected decomposition");
1002:   PetscCall(DMDAVecGetArray(da3, X3, &x));
1003:   for (i = xs; i < xs + xm; i++) {
1004:     for (j = ys; j < ys + ym; j++) {
1005:       PetscReal u = PetscRealPart(x[i][j][zm - 1].u);
1006:       RangeUpdate(&umin, &umax, u);
1007:       usum += u;
1008:     }
1009:   }
1010:   PetscCall(DMDAVecRestoreArray(da3, X3, &x));
1011:   PetscCall(DMCompositeRestoreAccess(pack, X, &X3, &X2));

1013:   PetscCallMPI(MPIU_Allreduce(&umin, min, 1, MPIU_REAL, MPIU_MIN, PetscObjectComm((PetscObject)da3)));
1014:   PetscCallMPI(MPIU_Allreduce(&umax, max, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)da3)));
1015:   PetscCallMPI(MPIU_Allreduce(&usum, &gusum, 1, MPIU_SCALAR, MPIU_SUM, PetscObjectComm((PetscObject)da3)));
1016:   *mean = PetscRealPart(gusum) / (mx * my);
1017:   PetscFunctionReturn(PETSC_SUCCESS);
1018: }

1020: static PetscErrorCode THISolveStatistics(THI thi, TS ts, PetscInt coarsened, const char name[])
1021: {
1022:   MPI_Comm comm;
1023:   DM       pack;
1024:   Vec      X, X3, X2;

1026:   PetscFunctionBeginUser;
1027:   PetscCall(PetscObjectGetComm((PetscObject)thi, &comm));
1028:   PetscCall(TSGetDM(ts, &pack));
1029:   PetscCall(TSGetSolution(ts, &X));
1030:   PetscCall(DMCompositeGetAccess(pack, X, &X3, &X2));
1031:   PetscCall(PetscPrintf(comm, "Solution statistics after solve: %s\n", name));
1032:   {
1033:     PetscInt            its, lits;
1034:     SNESConvergedReason reason;
1035:     SNES                snes;
1036:     PetscCall(TSGetSNES(ts, &snes));
1037:     PetscCall(SNESGetIterationNumber(snes, &its));
1038:     PetscCall(SNESGetConvergedReason(snes, &reason));
1039:     PetscCall(SNESGetLinearSolveIterations(snes, &lits));
1040:     PetscCall(PetscPrintf(comm, "%s: Number of SNES iterations = %" PetscInt_FMT ", total linear iterations = %" PetscInt_FMT "\n", SNESConvergedReasons[reason], its, lits));
1041:   }
1042:   {
1043:     PetscReal    nrm2, tmin[3] = {1e100, 1e100, 1e100}, tmax[3] = {-1e100, -1e100, -1e100}, min[3], max[3];
1044:     PetscInt     i, j, m;
1045:     PetscScalar *x;
1046:     PetscCall(VecNorm(X3, NORM_2, &nrm2));
1047:     PetscCall(VecGetLocalSize(X3, &m));
1048:     PetscCall(VecGetArray(X3, &x));
1049:     for (i = 0; i < m; i += 2) {
1050:       PetscReal u = PetscRealPart(x[i]), v = PetscRealPart(x[i + 1]), c = PetscSqrtReal(u * u + v * v);
1051:       tmin[0] = PetscMin(u, tmin[0]);
1052:       tmin[1] = PetscMin(v, tmin[1]);
1053:       tmin[2] = PetscMin(c, tmin[2]);
1054:       tmax[0] = PetscMax(u, tmax[0]);
1055:       tmax[1] = PetscMax(v, tmax[1]);
1056:       tmax[2] = PetscMax(c, tmax[2]);
1057:     }
1058:     PetscCall(VecRestoreArray(X, &x));
1059:     PetscCallMPI(MPIU_Allreduce(tmin, min, 3, MPIU_REAL, MPIU_MIN, PetscObjectComm((PetscObject)thi)));
1060:     PetscCallMPI(MPIU_Allreduce(tmax, max, 3, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)thi)));
1061:     /* Dimensionalize to meters/year */
1062:     nrm2 *= thi->units->year / thi->units->meter;
1063:     for (j = 0; j < 3; j++) {
1064:       min[j] *= thi->units->year / thi->units->meter;
1065:       max[j] *= thi->units->year / thi->units->meter;
1066:     }
1067:     PetscCall(PetscPrintf(comm, "|X|_2 %g   u in [%g, %g]   v in [%g, %g]   c in [%g, %g] \n", (double)nrm2, (double)min[0], (double)max[0], (double)min[1], (double)max[1], (double)min[2], (double)max[2]));
1068:     {
1069:       PetscReal umin, umax, umean;
1070:       PetscCall(THISurfaceStatistics(pack, X, &umin, &umax, &umean));
1071:       umin *= thi->units->year / thi->units->meter;
1072:       umax *= thi->units->year / thi->units->meter;
1073:       umean *= thi->units->year / thi->units->meter;
1074:       PetscCall(PetscPrintf(comm, "Surface statistics: u in [%12.6e, %12.6e] mean %12.6e\n", (double)umin, (double)umax, (double)umean));
1075:     }
1076:     /* These values stay nondimensional */
1077:     PetscCall(PetscPrintf(comm, "Global eta range   [%g, %g], converged range [%g, %g]\n", (double)thi->eta.min, (double)thi->eta.max, (double)thi->eta.cmin, (double)thi->eta.cmax));
1078:     PetscCall(PetscPrintf(comm, "Global beta2 range [%g, %g], converged range [%g, %g]\n", (double)thi->beta2.min, (double)thi->beta2.max, (double)thi->beta2.cmin, (double)thi->beta2.cmax));
1079:   }
1080:   PetscCall(PetscPrintf(comm, "\n"));
1081:   PetscCall(DMCompositeRestoreAccess(pack, X, &X3, &X2));
1082:   PetscFunctionReturn(PETSC_SUCCESS);
1083: }

1085: static inline PetscInt DMDALocalIndex3D(DMDALocalInfo *info, PetscInt i, PetscInt j, PetscInt k)
1086: {
1087:   return ((i - info->gzs) * info->gym + (j - info->gys)) * info->gxm + (k - info->gxs);
1088: }
1089: static inline PetscInt DMDALocalIndex2D(DMDALocalInfo *info, PetscInt i, PetscInt j)
1090: {
1091:   return (i - info->gzs) * info->gym + (j - info->gys);
1092: }

1094: static PetscErrorCode THIJacobianLocal_Momentum(DMDALocalInfo *info, const Node ***x, const PrmNode **prm, Mat B, Mat Bcpl, THI thi)
1095: {
1096:   PetscInt  xs, ys, xm, ym, zm, i, j, k, q, l, ll;
1097:   PetscReal hx, hy;

1099:   PetscFunctionBeginUser;
1100:   xs = info->zs;
1101:   ys = info->ys;
1102:   xm = info->zm;
1103:   ym = info->ym;
1104:   zm = info->xm;
1105:   hx = thi->Lx / info->mz;
1106:   hy = thi->Ly / info->my;

1108:   for (i = xs; i < xs + xm; i++) {
1109:     for (j = ys; j < ys + ym; j++) {
1110:       PrmNode pn[4], dpn[4][2];
1111:       QuadExtract(prm, i, j, pn);
1112:       PetscCall(QuadComputeGrad4(QuadQDeriv, hx, hy, pn, dpn));
1113:       for (k = 0; k < zm - 1; k++) {
1114:         Node        n[8];
1115:         PetscReal   zn[8], etabase = 0;
1116:         PetscScalar Ke[8 * NODE_SIZE][8 * NODE_SIZE], Kcpl[8 * NODE_SIZE][4 * PRMNODE_SIZE];
1117:         PetscInt    ls = 0;

1119:         PrmHexGetZ(pn, k, zm, zn);
1120:         HexExtract(x, i, j, k, n);
1121:         PetscCall(PetscMemzero(Ke, sizeof(Ke)));
1122:         PetscCall(PetscMemzero(Kcpl, sizeof(Kcpl)));
1123:         if (thi->no_slip && k == 0) {
1124:           for (l = 0; l < 4; l++) n[l].u = n[l].v = 0;
1125:           ls = 4;
1126:         }
1127:         for (q = 0; q < 8; q++) {
1128:           PetscReal   dz[3], phi[8], dphi[8][3], jw, eta, deta;
1129:           PetscScalar du[3], dv[3], u, v;
1130:           HexGrad(HexQDeriv[q], zn, dz);
1131:           HexComputeGeometry(q, hx, hy, dz, phi, dphi, &jw);
1132:           PointwiseNonlinearity(thi, n, phi, dphi, &u, &v, du, dv, &eta, &deta);
1133:           jw /= thi->rhog; /* residuals are scaled by this factor */
1134:           if (q == 0) etabase = eta;
1135:           for (l = ls; l < 8; l++) { /* test functions */
1136:             const PetscReal pp = phi[l], *restrict dp = dphi[l];
1137:             for (ll = ls; ll < 8; ll++) { /* trial functions */
1138:               const PetscReal *restrict dpl = dphi[ll];
1139:               PetscScalar dgdu, dgdv;
1140:               dgdu = 2. * du[0] * dpl[0] + dv[1] * dpl[0] + 0.5 * (du[1] + dv[0]) * dpl[1] + 0.5 * du[2] * dpl[2];
1141:               dgdv = 2. * dv[1] * dpl[1] + du[0] * dpl[1] + 0.5 * (du[1] + dv[0]) * dpl[0] + 0.5 * dv[2] * dpl[2];
1142:               /* Picard part */
1143:               Ke[l * 2 + 0][ll * 2 + 0] += dp[0] * jw * eta * 4. * dpl[0] + dp[1] * jw * eta * dpl[1] + dp[2] * jw * eta * dpl[2];
1144:               Ke[l * 2 + 0][ll * 2 + 1] += dp[0] * jw * eta * 2. * dpl[1] + dp[1] * jw * eta * dpl[0];
1145:               Ke[l * 2 + 1][ll * 2 + 0] += dp[1] * jw * eta * 2. * dpl[0] + dp[0] * jw * eta * dpl[1];
1146:               Ke[l * 2 + 1][ll * 2 + 1] += dp[1] * jw * eta * 4. * dpl[1] + dp[0] * jw * eta * dpl[0] + dp[2] * jw * eta * dpl[2];
1147:               /* extra Newton terms */
1148:               Ke[l * 2 + 0][ll * 2 + 0] += dp[0] * jw * deta * dgdu * (4. * du[0] + 2. * dv[1]) + dp[1] * jw * deta * dgdu * (du[1] + dv[0]) + dp[2] * jw * deta * dgdu * du[2];
1149:               Ke[l * 2 + 0][ll * 2 + 1] += dp[0] * jw * deta * dgdv * (4. * du[0] + 2. * dv[1]) + dp[1] * jw * deta * dgdv * (du[1] + dv[0]) + dp[2] * jw * deta * dgdv * du[2];
1150:               Ke[l * 2 + 1][ll * 2 + 0] += dp[1] * jw * deta * dgdu * (4. * dv[1] + 2. * du[0]) + dp[0] * jw * deta * dgdu * (du[1] + dv[0]) + dp[2] * jw * deta * dgdu * dv[2];
1151:               Ke[l * 2 + 1][ll * 2 + 1] += dp[1] * jw * deta * dgdv * (4. * dv[1] + 2. * du[0]) + dp[0] * jw * deta * dgdv * (du[1] + dv[0]) + dp[2] * jw * deta * dgdv * dv[2];
1152:               /* inertial part */
1153:               Ke[l * 2 + 0][ll * 2 + 0] += pp * jw * thi->inertia * pp;
1154:               Ke[l * 2 + 1][ll * 2 + 1] += pp * jw * thi->inertia * pp;
1155:             }
1156:             for (ll = 0; ll < 4; ll++) {                                                              /* Trial functions for surface/bed */
1157:               const PetscReal dpl[] = {QuadQDeriv[q % 4][ll][0] / hx, QuadQDeriv[q % 4][ll][1] / hy}; /* surface = h + b */
1158:               Kcpl[FieldIndex(Node, l, u)][FieldIndex(PrmNode, ll, h)] += pp * jw * thi->rhog * dpl[0];
1159:               Kcpl[FieldIndex(Node, l, u)][FieldIndex(PrmNode, ll, b)] += pp * jw * thi->rhog * dpl[0];
1160:               Kcpl[FieldIndex(Node, l, v)][FieldIndex(PrmNode, ll, h)] += pp * jw * thi->rhog * dpl[1];
1161:               Kcpl[FieldIndex(Node, l, v)][FieldIndex(PrmNode, ll, b)] += pp * jw * thi->rhog * dpl[1];
1162:             }
1163:           }
1164:         }
1165:         if (k == 0) { /* on a bottom face */
1166:           if (thi->no_slip) {
1167:             const PetscReal   hz    = PetscRealPart(pn[0].h) / (zm - 1);
1168:             const PetscScalar diagu = 2 * etabase / thi->rhog * (hx * hy / hz + hx * hz / hy + 4 * hy * hz / hx), diagv = 2 * etabase / thi->rhog * (hx * hy / hz + 4 * hx * hz / hy + hy * hz / hx);
1169:             Ke[0][0] = thi->dirichlet_scale * diagu;
1170:             Ke[0][1] = 0;
1171:             Ke[1][0] = 0;
1172:             Ke[1][1] = thi->dirichlet_scale * diagv;
1173:           } else {
1174:             for (q = 0; q < 4; q++) { /* We remove the explicit scaling by 1/rhog because beta2 already has that scaling to be O(1) */
1175:               const PetscReal jw = 0.25 * hx * hy, *phi = QuadQInterp[q];
1176:               PetscScalar     u = 0, v = 0, rbeta2 = 0;
1177:               PetscReal       beta2, dbeta2;
1178:               for (l = 0; l < 4; l++) {
1179:                 u += phi[l] * n[l].u;
1180:                 v += phi[l] * n[l].v;
1181:                 rbeta2 += phi[l] * pn[l].beta2;
1182:               }
1183:               THIFriction(thi, PetscRealPart(rbeta2), PetscRealPart(u * u + v * v) / 2, &beta2, &dbeta2);
1184:               for (l = 0; l < 4; l++) {
1185:                 const PetscReal pp = phi[l];
1186:                 for (ll = 0; ll < 4; ll++) {
1187:                   const PetscReal ppl = phi[ll];
1188:                   Ke[l * 2 + 0][ll * 2 + 0] += pp * jw * beta2 * ppl + pp * jw * dbeta2 * u * u * ppl;
1189:                   Ke[l * 2 + 0][ll * 2 + 1] += pp * jw * dbeta2 * u * v * ppl;
1190:                   Ke[l * 2 + 1][ll * 2 + 0] += pp * jw * dbeta2 * v * u * ppl;
1191:                   Ke[l * 2 + 1][ll * 2 + 1] += pp * jw * beta2 * ppl + pp * jw * dbeta2 * v * v * ppl;
1192:                 }
1193:               }
1194:             }
1195:           }
1196:         }
1197:         {
1198:           const PetscInt rc3blocked[8]                 = {DMDALocalIndex3D(info, i + 0, j + 0, k + 0), DMDALocalIndex3D(info, i + 1, j + 0, k + 0), DMDALocalIndex3D(info, i + 1, j + 1, k + 0), DMDALocalIndex3D(info, i + 0, j + 1, k + 0),
1199:                                                           DMDALocalIndex3D(info, i + 0, j + 0, k + 1), DMDALocalIndex3D(info, i + 1, j + 0, k + 1), DMDALocalIndex3D(info, i + 1, j + 1, k + 1), DMDALocalIndex3D(info, i + 0, j + 1, k + 1)},
1200:                          col2blocked[PRMNODE_SIZE * 4] = {DMDALocalIndex2D(info, i + 0, j + 0), DMDALocalIndex2D(info, i + 1, j + 0), DMDALocalIndex2D(info, i + 1, j + 1), DMDALocalIndex2D(info, i + 0, j + 1)};
1201: #if !defined COMPUTE_LOWER_TRIANGULAR /* fill in lower-triangular part, this is really cheap compared to computing the entries */
1202:           for (l = 0; l < 8; l++) {
1203:             for (ll = l + 1; ll < 8; ll++) {
1204:               Ke[ll * 2 + 0][l * 2 + 0] = Ke[l * 2 + 0][ll * 2 + 0];
1205:               Ke[ll * 2 + 1][l * 2 + 0] = Ke[l * 2 + 0][ll * 2 + 1];
1206:               Ke[ll * 2 + 0][l * 2 + 1] = Ke[l * 2 + 1][ll * 2 + 0];
1207:               Ke[ll * 2 + 1][l * 2 + 1] = Ke[l * 2 + 1][ll * 2 + 1];
1208:             }
1209:           }
1210: #endif
1211:           PetscCall(MatSetValuesBlockedLocal(B, 8, rc3blocked, 8, rc3blocked, &Ke[0][0], ADD_VALUES)); /* velocity-velocity coupling can use blocked insertion */
1212:           {                                                                                            /* The off-diagonal part cannot (yet) */
1213:             PetscInt row3scalar[NODE_SIZE * 8], col2scalar[PRMNODE_SIZE * 4];
1214:             for (l = 0; l < 8; l++)
1215:               for (ll = 0; ll < NODE_SIZE; ll++) row3scalar[l * NODE_SIZE + ll] = rc3blocked[l] * NODE_SIZE + ll;
1216:             for (l = 0; l < 4; l++)
1217:               for (ll = 0; ll < PRMNODE_SIZE; ll++) col2scalar[l * PRMNODE_SIZE + ll] = col2blocked[l] * PRMNODE_SIZE + ll;
1218:             PetscCall(MatSetValuesLocal(Bcpl, 8 * NODE_SIZE, row3scalar, 4 * PRMNODE_SIZE, col2scalar, &Kcpl[0][0], ADD_VALUES));
1219:           }
1220:         }
1221:       }
1222:     }
1223:   }
1224:   PetscFunctionReturn(PETSC_SUCCESS);
1225: }

1227: static PetscErrorCode THIJacobianLocal_2D(DMDALocalInfo *info, const Node ***x3, const PrmNode **x2, const PrmNode **xdot2, PetscReal a, Mat B22, Mat B21, THI thi)
1228: {
1229:   PetscInt xs, ys, xm, ym, zm, i, j, k;

1231:   PetscFunctionBeginUser;
1232:   xs = info->zs;
1233:   ys = info->ys;
1234:   xm = info->zm;
1235:   ym = info->ym;
1236:   zm = info->xm;

1238:   PetscCheck(zm <= 1024, ((PetscObject)info->da)->comm, PETSC_ERR_SUP, "Need to allocate more space");
1239:   for (i = xs; i < xs + xm; i++) {
1240:     for (j = ys; j < ys + ym; j++) {
1241:       { /* Self-coupling */
1242:         const PetscInt    row[]  = {DMDALocalIndex2D(info, i, j)};
1243:         const PetscInt    col[]  = {DMDALocalIndex2D(info, i, j)};
1244:         const PetscScalar vals[] = {a, 0, 0, 0, a, 0, 0, 0, a};
1245:         PetscCall(MatSetValuesBlockedLocal(B22, 1, row, 1, col, vals, INSERT_VALUES));
1246:       }
1247:       for (k = 0; k < zm; k++) { /* Coupling to velocity problem */
1248:         /* Use a cheaper quadrature than for residual evaluation, because it is much sparser */
1249:         const PetscInt    row[]  = {FieldIndex(PrmNode, DMDALocalIndex2D(info, i, j), h)};
1250:         const PetscInt    cols[] = {FieldIndex(Node, DMDALocalIndex3D(info, i - 1, j, k), u), FieldIndex(Node, DMDALocalIndex3D(info, i, j, k), u), FieldIndex(Node, DMDALocalIndex3D(info, i + 1, j, k), u),
1251:                                     FieldIndex(Node, DMDALocalIndex3D(info, i, j - 1, k), v), FieldIndex(Node, DMDALocalIndex3D(info, i, j, k), v), FieldIndex(Node, DMDALocalIndex3D(info, i, j + 1, k), v)};
1252:         const PetscScalar w = (k && k < zm - 1) ? 0.5 : 0.25, hW = w * (x2[i - 1][j].h + x2[i][j].h) / (zm - 1.), hE = w * (x2[i][j].h + x2[i + 1][j].h) / (zm - 1.), hS = w * (x2[i][j - 1].h + x2[i][j].h) / (zm - 1.),
1253:                           hN = w * (x2[i][j].h + x2[i][j + 1].h) / (zm - 1.);
1254:         PetscScalar *vals, vals_upwind[] = {((PetscRealPart(x3[i][j][k].u) > 0) ? -hW : 0), (PetscRealPart(x3[i][j][k].u) > 0) ? +hE : -hW, (PetscRealPart(x3[i][j][k].u) > 0) ? 0 : +hE,
1255:                                             (PetscRealPart(x3[i][j][k].v) > 0) ? -hS : 0,   (PetscRealPart(x3[i][j][k].v) > 0) ? +hN : -hS, ((PetscRealPart(x3[i][j][k].v) > 0) ? 0 : +hN)},
1256:                            vals_centered[] = {-0.5 * hW, 0.5 * (-hW + hE), 0.5 * hE, -0.5 * hS, 0.5 * (-hS + hN), 0.5 * hN};
1257:         vals                               = 1 ? vals_upwind : vals_centered;
1258:         if (k == 0) {
1259:           Node derate;
1260:           THIErosion(thi, &x3[i][j][0], NULL, &derate);
1261:           vals[1] -= derate.u;
1262:           vals[4] -= derate.v;
1263:         }
1264:         PetscCall(MatSetValuesLocal(B21, 1, row, 6, cols, vals, INSERT_VALUES));
1265:       }
1266:     }
1267:   }
1268:   PetscFunctionReturn(PETSC_SUCCESS);
1269: }

1271: static PetscErrorCode THIJacobian(TS ts, PetscReal t, Vec X, Vec Xdot, PetscReal a, Mat A, Mat B, void *ctx)
1272: {
1273:   THI             thi = (THI)ctx;
1274:   DM              pack, da3, da2;
1275:   Vec             X3, X2, Xdot2;
1276:   Mat             B11, B12, B21, B22;
1277:   DMDALocalInfo   info3;
1278:   IS             *isloc;
1279:   const Node   ***x3;
1280:   const PrmNode **x2, **xdot2;

1282:   PetscFunctionBeginUser;
1283:   PetscCall(TSGetDM(ts, &pack));
1284:   PetscCall(DMCompositeGetEntries(pack, &da3, &da2));
1285:   PetscCall(DMDAGetLocalInfo(da3, &info3));
1286:   PetscCall(DMCompositeGetLocalVectors(pack, &X3, &X2));
1287:   PetscCall(DMCompositeGetLocalVectors(pack, NULL, &Xdot2));
1288:   PetscCall(DMCompositeScatter(pack, X, X3, X2));
1289:   PetscCall(THIFixGhosts(thi, da3, da2, X3, X2));
1290:   PetscCall(DMCompositeScatter(pack, Xdot, NULL, Xdot2));

1292:   PetscCall(MatZeroEntries(B));

1294:   PetscCall(DMCompositeGetLocalISs(pack, &isloc));
1295:   PetscCall(MatGetLocalSubMatrix(B, isloc[0], isloc[0], &B11));
1296:   PetscCall(MatGetLocalSubMatrix(B, isloc[0], isloc[1], &B12));
1297:   PetscCall(MatGetLocalSubMatrix(B, isloc[1], isloc[0], &B21));
1298:   PetscCall(MatGetLocalSubMatrix(B, isloc[1], isloc[1], &B22));

1300:   PetscCall(DMDAVecGetArray(da3, X3, &x3));
1301:   PetscCall(DMDAVecGetArray(da2, X2, &x2));
1302:   PetscCall(DMDAVecGetArray(da2, Xdot2, &xdot2));

1304:   PetscCall(THIJacobianLocal_Momentum(&info3, x3, x2, B11, B12, thi));

1306:   /* Need to switch from ADD_VALUES to INSERT_VALUES */
1307:   PetscCall(MatAssemblyBegin(B, MAT_FLUSH_ASSEMBLY));
1308:   PetscCall(MatAssemblyEnd(B, MAT_FLUSH_ASSEMBLY));

1310:   PetscCall(THIJacobianLocal_2D(&info3, x3, x2, xdot2, a, B22, B21, thi));

1312:   PetscCall(DMDAVecRestoreArray(da3, X3, &x3));
1313:   PetscCall(DMDAVecRestoreArray(da2, X2, &x2));
1314:   PetscCall(DMDAVecRestoreArray(da2, Xdot2, &xdot2));

1316:   PetscCall(MatRestoreLocalSubMatrix(B, isloc[0], isloc[0], &B11));
1317:   PetscCall(MatRestoreLocalSubMatrix(B, isloc[0], isloc[1], &B12));
1318:   PetscCall(MatRestoreLocalSubMatrix(B, isloc[1], isloc[0], &B21));
1319:   PetscCall(MatRestoreLocalSubMatrix(B, isloc[1], isloc[1], &B22));
1320:   PetscCall(ISDestroy(&isloc[0]));
1321:   PetscCall(ISDestroy(&isloc[1]));
1322:   PetscCall(PetscFree(isloc));

1324:   PetscCall(DMCompositeRestoreLocalVectors(pack, &X3, &X2));
1325:   PetscCall(DMCompositeRestoreLocalVectors(pack, 0, &Xdot2));

1327:   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1328:   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1329:   if (A != B) {
1330:     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1331:     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1332:   }
1333:   if (thi->verbose) PetscCall(THIMatrixStatistics(thi, B, PETSC_VIEWER_STDOUT_WORLD));
1334:   PetscFunctionReturn(PETSC_SUCCESS);
1335: }

1337: /* VTK's XML formats are so brain-dead that they can't handle multiple grids in the same file.  Since the communication
1338:  * can be shared between the two grids, we write two files at once, one for velocity (living on a 3D grid defined by
1339:  * h=thickness and b=bed) and another for all properties living on the 2D grid.
1340:  */
1341: static PetscErrorCode THIDAVecView_VTK_XML(THI thi, DM pack, Vec X, const char filename[], const char filename2[])
1342: {
1343:   const PetscInt dof = NODE_SIZE, dof2 = PRMNODE_SIZE;
1344:   Units          units = thi->units;
1345:   MPI_Comm       comm;
1346:   PetscViewer    viewer3, viewer2;
1347:   PetscMPIInt    rank, size, tag, nn, nmax, nn2, nmax2;
1348:   PetscInt       mx, my, mz, r, range[6];
1349:   PetscScalar   *x, *x2;
1350:   DM             da3, da2;
1351:   Vec            X3, X2;

1353:   PetscFunctionBeginUser;
1354:   PetscCall(PetscObjectGetComm((PetscObject)thi, &comm));
1355:   PetscCall(DMCompositeGetEntries(pack, &da3, &da2));
1356:   PetscCall(DMCompositeGetAccess(pack, X, &X3, &X2));
1357:   PetscCall(DMDAGetInfo(da3, 0, &mz, &my, &mx, 0, 0, 0, 0, 0, 0, 0, 0, 0));
1358:   PetscCallMPI(MPI_Comm_size(comm, &size));
1359:   PetscCallMPI(MPI_Comm_rank(comm, &rank));
1360:   PetscCall(PetscViewerASCIIOpen(comm, filename, &viewer3));
1361:   PetscCall(PetscViewerASCIIOpen(comm, filename2, &viewer2));
1362:   PetscCall(PetscViewerASCIIPrintf(viewer3, "<VTKFile type=\"StructuredGrid\" version=\"0.1\" byte_order=\"LittleEndian\">\n"));
1363:   PetscCall(PetscViewerASCIIPrintf(viewer2, "<VTKFile type=\"StructuredGrid\" version=\"0.1\" byte_order=\"LittleEndian\">\n"));
1364:   PetscCall(PetscViewerASCIIPrintf(viewer3, "  <StructuredGrid WholeExtent=\"%d %" PetscInt_FMT " %d %" PetscInt_FMT " %d %" PetscInt_FMT "\">\n", 0, mz - 1, 0, my - 1, 0, mx - 1));
1365:   PetscCall(PetscViewerASCIIPrintf(viewer2, "  <StructuredGrid WholeExtent=\"%d %d %d %" PetscInt_FMT " %d %" PetscInt_FMT "\">\n", 0, 0, 0, my - 1, 0, mx - 1));

1367:   PetscCall(DMDAGetCorners(da3, range, range + 1, range + 2, range + 3, range + 4, range + 5));
1368:   PetscCall(PetscMPIIntCast(range[3] * range[4] * range[5] * dof, &nn));
1369:   PetscCallMPI(MPI_Reduce(&nn, &nmax, 1, MPI_INT, MPI_MAX, 0, comm));
1370:   PetscCall(PetscMPIIntCast(range[4] * range[5] * dof2, &nn2));
1371:   PetscCallMPI(MPI_Reduce(&nn2, &nmax2, 1, MPI_INT, MPI_MAX, 0, comm));
1372:   tag = ((PetscObject)viewer3)->tag;
1373:   PetscCall(VecGetArrayRead(X3, (const PetscScalar **)&x));
1374:   PetscCall(VecGetArrayRead(X2, (const PetscScalar **)&x2));
1375:   if (rank == 0) {
1376:     PetscScalar *array, *array2;
1377:     PetscCall(PetscMalloc2(nmax, &array, nmax2, &array2));
1378:     for (r = 0; r < size; r++) {
1379:       PetscInt i, j, k, f, xs, xm, ys, ym, zs, zm;
1380:       Node    *y3;
1381:       PetscScalar(*y2)[PRMNODE_SIZE];
1382:       MPI_Status status;

1384:       if (r) PetscCallMPI(MPI_Recv(range, 6, MPIU_INT, r, tag, comm, MPI_STATUS_IGNORE));
1385:       zs = range[0];
1386:       ys = range[1];
1387:       xs = range[2];
1388:       zm = range[3];
1389:       ym = range[4];
1390:       xm = range[5];
1391:       PetscCheck(xm * ym * zm * dof <= nmax, PETSC_COMM_SELF, PETSC_ERR_PLIB, "should not happen");
1392:       if (r) {
1393:         PetscCallMPI(MPI_Recv(array, nmax, MPIU_SCALAR, r, tag, comm, &status));
1394:         PetscCallMPI(MPI_Get_count(&status, MPIU_SCALAR, &nn));
1395:         PetscCheck(nn == xm * ym * zm * dof, PETSC_COMM_SELF, PETSC_ERR_PLIB, "corrupt da3 send");
1396:         y3 = (Node *)array;
1397:         PetscCallMPI(MPI_Recv(array2, nmax2, MPIU_SCALAR, r, tag, comm, &status));
1398:         PetscCallMPI(MPI_Get_count(&status, MPIU_SCALAR, &nn2));
1399:         PetscCheck(nn2 == xm * ym * dof2, PETSC_COMM_SELF, PETSC_ERR_PLIB, "corrupt da2 send");
1400:         y2 = (PetscScalar(*)[PRMNODE_SIZE])array2;
1401:       } else {
1402:         y3 = (Node *)x;
1403:         y2 = (PetscScalar(*)[PRMNODE_SIZE])x2;
1404:       }
1405:       PetscCall(PetscViewerASCIIPrintf(viewer3, "    <Piece Extent=\"%" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\">\n", zs, zs + zm - 1, ys, ys + ym - 1, xs, xs + xm - 1));
1406:       PetscCall(PetscViewerASCIIPrintf(viewer2, "    <Piece Extent=\"%d %d %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\">\n", 0, 0, ys, ys + ym - 1, xs, xs + xm - 1));

1408:       PetscCall(PetscViewerASCIIPrintf(viewer3, "      <Points>\n"));
1409:       PetscCall(PetscViewerASCIIPrintf(viewer2, "      <Points>\n"));
1410:       PetscCall(PetscViewerASCIIPrintf(viewer3, "        <DataArray type=\"Float32\" NumberOfComponents=\"3\" format=\"ascii\">\n"));
1411:       PetscCall(PetscViewerASCIIPrintf(viewer2, "        <DataArray type=\"Float32\" NumberOfComponents=\"3\" format=\"ascii\">\n"));
1412:       for (i = xs; i < xs + xm; i++) {
1413:         for (j = ys; j < ys + ym; j++) {
1414:           PetscReal xx = thi->Lx * i / mx, yy = thi->Ly * j / my, b = PetscRealPart(y2[i * ym + j][FieldOffset(PrmNode, b)]), h = PetscRealPart(y2[i * ym + j][FieldOffset(PrmNode, h)]);
1415:           for (k = zs; k < zs + zm; k++) {
1416:             PetscReal zz = b + h * k / (mz - 1.);
1417:             PetscCall(PetscViewerASCIIPrintf(viewer3, "%f %f %f\n", (double)xx, (double)yy, (double)zz));
1418:           }
1419:           PetscCall(PetscViewerASCIIPrintf(viewer2, "%f %f %f\n", (double)xx, (double)yy, (double)0.0));
1420:         }
1421:       }
1422:       PetscCall(PetscViewerASCIIPrintf(viewer3, "        </DataArray>\n"));
1423:       PetscCall(PetscViewerASCIIPrintf(viewer2, "        </DataArray>\n"));
1424:       PetscCall(PetscViewerASCIIPrintf(viewer3, "      </Points>\n"));
1425:       PetscCall(PetscViewerASCIIPrintf(viewer2, "      </Points>\n"));

1427:       { /* Velocity and rank (3D) */
1428:         PetscCall(PetscViewerASCIIPrintf(viewer3, "      <PointData>\n"));
1429:         PetscCall(PetscViewerASCIIPrintf(viewer3, "        <DataArray type=\"Float32\" Name=\"velocity\" NumberOfComponents=\"3\" format=\"ascii\">\n"));
1430:         for (i = 0; i < nn / dof; i++) PetscCall(PetscViewerASCIIPrintf(viewer3, "%f %f %f\n", (double)(PetscRealPart(y3[i].u) * units->year / units->meter), (double)(PetscRealPart(y3[i].v) * units->year / units->meter), 0.0));
1431:         PetscCall(PetscViewerASCIIPrintf(viewer3, "        </DataArray>\n"));

1433:         PetscCall(PetscViewerASCIIPrintf(viewer3, "        <DataArray type=\"Int32\" Name=\"rank\" NumberOfComponents=\"1\" format=\"ascii\">\n"));
1434:         for (i = 0; i < nn; i += dof) PetscCall(PetscViewerASCIIPrintf(viewer3, "%" PetscInt_FMT "\n", r));
1435:         PetscCall(PetscViewerASCIIPrintf(viewer3, "        </DataArray>\n"));
1436:         PetscCall(PetscViewerASCIIPrintf(viewer3, "      </PointData>\n"));
1437:       }

1439:       { /* 2D */
1440:         PetscCall(PetscViewerASCIIPrintf(viewer2, "      <PointData>\n"));
1441:         for (f = 0; f < PRMNODE_SIZE; f++) {
1442:           const char *fieldname;
1443:           PetscCall(DMDAGetFieldName(da2, f, &fieldname));
1444:           PetscCall(PetscViewerASCIIPrintf(viewer2, "        <DataArray type=\"Float32\" Name=\"%s\" format=\"ascii\">\n", fieldname));
1445:           for (i = 0; i < nn2 / PRMNODE_SIZE; i++) PetscCall(PetscViewerASCIIPrintf(viewer2, "%g\n", (double)y2[i][f]));
1446:           PetscCall(PetscViewerASCIIPrintf(viewer2, "        </DataArray>\n"));
1447:         }
1448:         PetscCall(PetscViewerASCIIPrintf(viewer2, "      </PointData>\n"));
1449:       }

1451:       PetscCall(PetscViewerASCIIPrintf(viewer3, "    </Piece>\n"));
1452:       PetscCall(PetscViewerASCIIPrintf(viewer2, "    </Piece>\n"));
1453:     }
1454:     PetscCall(PetscFree2(array, array2));
1455:   } else {
1456:     PetscCallMPI(MPI_Send(range, 6, MPIU_INT, 0, tag, comm));
1457:     PetscCallMPI(MPI_Send(x, nn, MPIU_SCALAR, 0, tag, comm));
1458:     PetscCallMPI(MPI_Send(x2, nn2, MPIU_SCALAR, 0, tag, comm));
1459:   }
1460:   PetscCall(VecRestoreArrayRead(X3, (const PetscScalar **)&x));
1461:   PetscCall(VecRestoreArrayRead(X2, (const PetscScalar **)&x2));
1462:   PetscCall(PetscViewerASCIIPrintf(viewer3, "  </StructuredGrid>\n"));
1463:   PetscCall(PetscViewerASCIIPrintf(viewer2, "  </StructuredGrid>\n"));

1465:   PetscCall(DMCompositeRestoreAccess(pack, X, &X3, &X2));
1466:   PetscCall(PetscViewerASCIIPrintf(viewer3, "</VTKFile>\n"));
1467:   PetscCall(PetscViewerASCIIPrintf(viewer2, "</VTKFile>\n"));
1468:   PetscCall(PetscViewerDestroy(&viewer3));
1469:   PetscCall(PetscViewerDestroy(&viewer2));
1470:   PetscFunctionReturn(PETSC_SUCCESS);
1471: }

1473: static PetscErrorCode THITSMonitor(TS ts, PetscInt step, PetscReal t, Vec X, void *ctx)
1474: {
1475:   THI  thi = (THI)ctx;
1476:   DM   pack;
1477:   char filename3[PETSC_MAX_PATH_LEN], filename2[PETSC_MAX_PATH_LEN];

1479:   PetscFunctionBeginUser;
1480:   if (step < 0) PetscFunctionReturn(PETSC_SUCCESS); /* negative one is used to indicate an interpolated solution */
1481:   PetscCall(PetscPrintf(PetscObjectComm((PetscObject)ts), "%3" PetscInt_FMT ": t=%g\n", step, (double)t));
1482:   if (thi->monitor_interval && step % thi->monitor_interval) PetscFunctionReturn(PETSC_SUCCESS);
1483:   PetscCall(TSGetDM(ts, &pack));
1484:   PetscCall(PetscSNPrintf(filename3, sizeof(filename3), "%s-3d-%03" PetscInt_FMT ".vts", thi->monitor_basename, step));
1485:   PetscCall(PetscSNPrintf(filename2, sizeof(filename2), "%s-2d-%03" PetscInt_FMT ".vts", thi->monitor_basename, step));
1486:   PetscCall(THIDAVecView_VTK_XML(thi, pack, X, filename3, filename2));
1487:   PetscFunctionReturn(PETSC_SUCCESS);
1488: }

1490: static PetscErrorCode THICreateDM3d(THI thi, DM *dm3d)
1491: {
1492:   MPI_Comm comm;
1493:   PetscInt M = 3, N = 3, P = 2;
1494:   DM       da;

1496:   PetscFunctionBeginUser;
1497:   PetscCall(PetscObjectGetComm((PetscObject)thi, &comm));
1498:   PetscOptionsBegin(comm, NULL, "Grid resolution options", "");
1499:   {
1500:     PetscCall(PetscOptionsInt("-M", "Number of elements in x-direction on coarse level", "", M, &M, NULL));
1501:     N = M;
1502:     PetscCall(PetscOptionsInt("-N", "Number of elements in y-direction on coarse level (if different from M)", "", N, &N, NULL));
1503:     PetscCall(PetscOptionsInt("-P", "Number of elements in z-direction on coarse level", "", P, &P, NULL));
1504:   }
1505:   PetscOptionsEnd();
1506:   PetscCall(DMDACreate3d(comm, DM_BOUNDARY_NONE, DM_BOUNDARY_PERIODIC, DM_BOUNDARY_PERIODIC, DMDA_STENCIL_BOX, P, N, M, 1, PETSC_DETERMINE, PETSC_DETERMINE, sizeof(Node) / sizeof(PetscScalar), 1, 0, 0, 0, &da));
1507:   PetscCall(DMSetFromOptions(da));
1508:   PetscCall(DMSetUp(da));
1509:   PetscCall(DMDASetFieldName(da, 0, "x-velocity"));
1510:   PetscCall(DMDASetFieldName(da, 1, "y-velocity"));
1511:   *dm3d = da;
1512:   PetscFunctionReturn(PETSC_SUCCESS);
1513: }

1515: int main(int argc, char *argv[])
1516: {
1517:   MPI_Comm  comm;
1518:   DM        pack, da3, da2;
1519:   TS        ts;
1520:   THI       thi;
1521:   Vec       X;
1522:   Mat       B;
1523:   PetscInt  i, steps;
1524:   PetscReal ftime;

1526:   PetscFunctionBeginUser;
1527:   PetscCall(PetscInitialize(&argc, &argv, 0, help));
1528:   comm = PETSC_COMM_WORLD;

1530:   PetscCall(THICreate(comm, &thi));
1531:   PetscCall(THICreateDM3d(thi, &da3));
1532:   {
1533:     PetscInt        Mx, My, mx, my, s;
1534:     DMDAStencilType st;
1535:     PetscCall(DMDAGetInfo(da3, 0, 0, &My, &Mx, 0, &my, &mx, 0, &s, 0, 0, 0, &st));
1536:     PetscCall(DMDACreate2d(PetscObjectComm((PetscObject)thi), DM_BOUNDARY_PERIODIC, DM_BOUNDARY_PERIODIC, st, My, Mx, my, mx, sizeof(PrmNode) / sizeof(PetscScalar), s, 0, 0, &da2));
1537:     PetscCall(DMSetUp(da2));
1538:   }

1540:   PetscCall(PetscObjectSetName((PetscObject)da3, "3D_Velocity"));
1541:   PetscCall(DMSetOptionsPrefix(da3, "f3d_"));
1542:   PetscCall(DMDASetFieldName(da3, 0, "u"));
1543:   PetscCall(DMDASetFieldName(da3, 1, "v"));
1544:   PetscCall(PetscObjectSetName((PetscObject)da2, "2D_Fields"));
1545:   PetscCall(DMSetOptionsPrefix(da2, "f2d_"));
1546:   PetscCall(DMDASetFieldName(da2, 0, "b"));
1547:   PetscCall(DMDASetFieldName(da2, 1, "h"));
1548:   PetscCall(DMDASetFieldName(da2, 2, "beta2"));
1549:   PetscCall(DMCompositeCreate(comm, &pack));
1550:   PetscCall(DMCompositeAddDM(pack, da3));
1551:   PetscCall(DMCompositeAddDM(pack, da2));
1552:   PetscCall(DMDestroy(&da3));
1553:   PetscCall(DMDestroy(&da2));
1554:   PetscCall(DMSetUp(pack));
1555:   PetscCall(DMCreateMatrix(pack, &B));
1556:   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
1557:   PetscCall(MatSetOptionsPrefix(B, "thi_"));

1559:   for (i = 0; i < thi->nlevels; i++) {
1560:     PetscReal Lx = thi->Lx / thi->units->meter, Ly = thi->Ly / thi->units->meter, Lz = thi->Lz / thi->units->meter;
1561:     PetscInt  Mx, My, Mz;
1562:     PetscCall(DMCompositeGetEntries(pack, &da3, &da2));
1563:     PetscCall(DMDAGetInfo(da3, 0, &Mz, &My, &Mx, 0, 0, 0, 0, 0, 0, 0, 0, 0));
1564:     PetscCall(PetscPrintf(PetscObjectComm((PetscObject)thi), "Level %" PetscInt_FMT " domain size (m) %8.2g x %8.2g x %8.2g, num elements %3d x %3d x %3d (%8d), size (m) %g x %g x %g\n", i, Lx, Ly, Lz, Mx, My, Mz, Mx * My * Mz, Lx / Mx, Ly / My, 1000. / (Mz - 1)));
1565:   }

1567:   PetscCall(DMCreateGlobalVector(pack, &X));
1568:   PetscCall(THIInitial(thi, pack, X));

1570:   PetscCall(TSCreate(comm, &ts));
1571:   PetscCall(TSSetDM(ts, pack));
1572:   PetscCall(TSSetProblemType(ts, TS_NONLINEAR));
1573:   PetscCall(TSMonitorSet(ts, THITSMonitor, thi, NULL));
1574:   PetscCall(TSSetType(ts, TSTHETA));
1575:   PetscCall(TSSetIFunction(ts, NULL, THIFunction, thi));
1576:   PetscCall(TSSetIJacobian(ts, B, B, THIJacobian, thi));
1577:   PetscCall(TSSetMaxTime(ts, 10.0));
1578:   PetscCall(TSSetExactFinalTime(ts, TS_EXACTFINALTIME_STEPOVER));
1579:   PetscCall(TSSetSolution(ts, X));
1580:   PetscCall(TSSetTimeStep(ts, 1e-3));
1581:   PetscCall(TSSetFromOptions(ts));

1583:   PetscCall(TSSolve(ts, X));
1584:   PetscCall(TSGetSolveTime(ts, &ftime));
1585:   PetscCall(TSGetStepNumber(ts, &steps));
1586:   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Steps %" PetscInt_FMT "  final time %g\n", steps, (double)ftime));

1588:   if (0) PetscCall(THISolveStatistics(thi, ts, 0, "Full"));

1590:   {
1591:     PetscBool flg;
1592:     char      filename[PETSC_MAX_PATH_LEN] = "";
1593:     PetscCall(PetscOptionsGetString(NULL, NULL, "-o", filename, sizeof(filename), &flg));
1594:     if (flg) PetscCall(THIDAVecView_VTK_XML(thi, pack, X, filename, NULL));
1595:   }

1597:   PetscCall(VecDestroy(&X));
1598:   PetscCall(MatDestroy(&B));
1599:   PetscCall(DMDestroy(&pack));
1600:   PetscCall(TSDestroy(&ts));
1601:   PetscCall(THIDestroy(&thi));
1602:   PetscCall(PetscFinalize());
1603:   return 0;
1604: }