Actual source code: ex48.c

  1: static const char help[] = "Toy hydrostatic ice flow with multigrid in 3D.\n\
  2: \n\
  3: Solves the hydrostatic (aka Blatter/Pattyn/First Order) equations for ice sheet flow\n\
  4: using multigrid.  The ice uses a power-law rheology with \"Glen\" exponent 3 (corresponds\n\
  5: to p=4/3 in a p-Laplacian).  The focus is on ISMIP-HOM experiments which assume periodic\n\
  6: boundary conditions in the x- and y-directions.\n\
  7: \n\
  8: Equations are rescaled so that the domain size and solution are O(1), details of this scaling\n\
  9: can be controlled by the options -units_meter, -units_second, and -units_kilogram.\n\
 10: \n\
 11: A VTK StructuredGrid output file can be written using the option -o filename.vts\n\
 12: \n\n";

 14: /*
 15: The equations for horizontal velocity (u,v) are

 17:   - [eta (4 u_x + 2 v_y)]_x - [eta (u_y + v_x)]_y - [eta u_z]_z + rho g s_x = 0
 18:   - [eta (4 v_y + 2 u_x)]_y - [eta (u_y + v_x)]_x - [eta v_z]_z + rho g s_y = 0

 20: where

 22:   eta = B/2 (epsilon + gamma)^((p-2)/2)

 24: is the nonlinear effective viscosity with regularization epsilon and hardness parameter B,
 25: written in terms of the second invariant

 27:   gamma = u_x^2 + v_y^2 + u_x v_y + (1/4) (u_y + v_x)^2 + (1/4) u_z^2 + (1/4) v_z^2

 29: The surface boundary conditions are the natural conditions.  The basal boundary conditions
 30: are either no-slip, or Navier (linear) slip with spatially variant friction coefficient beta^2.

 32: In the code, the equations for (u,v) are multiplied through by 1/(rho g) so that residuals are O(1).

 34: The discretization is Q1 finite elements, managed by a DMDA.  The grid is never distorted in the
 35: map (x,y) plane, but the bed and surface may be bumpy.  This is handled as usual in FEM, through
 36: the Jacobian of the coordinate transformation from a reference element to the physical element.

 38: Since ice-flow is tightly coupled in the z-direction (within columns), the DMDA is managed
 39: specially so that columns are never distributed, and are always contiguous in memory.
 40: This amounts to reversing the meaning of X,Y,Z compared to the DMDA's internal interpretation,
 41: and then indexing as vec[i][j][k].  The exotic coarse spaces require 2D DMDAs which are made to
 42: use compatible domain decomposition relative to the 3D DMDAs.

 44: There are two compile-time options:

 46:   NO_SSE2:
 47:     If the host supports SSE2, we use integration code that has been vectorized with SSE2
 48:     intrinsics, unless this macro is defined.  The intrinsics speed up integration by about
 49:     30% on my architecture (P8700, gcc-4.5 snapshot).

 51:   COMPUTE_LOWER_TRIANGULAR:
 52:     The element matrices we assemble are lower-triangular so it is not necessary to compute
 53:     all entries explicitly.  If this macro is defined, the lower-triangular entries are
 54:     computed explicitly.

 56: */

 58: #if defined(PETSC_APPLE_FRAMEWORK)
 59:   #import <PETSc/petscsnes.h>
 60:   #import <PETSc/petsc/private/dmdaimpl.h> /* There is not yet a public interface to manipulate dm->ops */
 61: #else

 63: #include <petscsnes.h>
 64: #include <petsc/private/dmdaimpl.h>
 65: #endif
 66: #include <ctype.h> /* toupper() */

 68: #if defined(__cplusplus) || defined(PETSC_HAVE_WINDOWS_COMPILERS) || defined(__PGI)
 69:   /*  c++ cannot handle  [_restrict_] notation like C does */
 70:   #undef PETSC_RESTRICT
 71:   #define PETSC_RESTRICT
 72: #endif

 74: #if defined(__SSE2__)
 75:   #include <emmintrin.h>
 76: #endif

 78: /* The SSE2 kernels are only for PetscScalar=double on architectures that support it */
 79: #if !defined(NO_SSE2) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_REAL_SINGLE) && !defined(PETSC_USE_REAL___FLOAT128) && !defined(PETSC_USE_REAL___FP16) && defined(__SSE2__)
 80:   #define USE_SSE2_KERNELS 1
 81: #else
 82:   #define USE_SSE2_KERNELS 0
 83: #endif

 85: static PetscClassId THI_CLASSID;

 87: typedef enum {
 88:   QUAD_GAUSS,
 89:   QUAD_LOBATTO
 90: } QuadratureType;
 91: static const char                  *QuadratureTypes[] = {"gauss", "lobatto", "QuadratureType", "QUAD_", 0};
 92: PETSC_UNUSED static const PetscReal HexQWeights[8]    = {1, 1, 1, 1, 1, 1, 1, 1};
 93: PETSC_UNUSED static const PetscReal HexQNodes[]       = {-0.57735026918962573, 0.57735026918962573};
 94: #define G 0.57735026918962573
 95: #define H (0.5 * (1. + G))
 96: #define L (0.5 * (1. - G))
 97: #define M (-0.5)
 98: #define P (0.5)
 99: /* Special quadrature: Lobatto in horizontal, Gauss in vertical */
100: static const PetscReal HexQInterp_Lobatto[8][8] = {
101:   {H, 0, 0, 0, L, 0, 0, 0},
102:   {0, H, 0, 0, 0, L, 0, 0},
103:   {0, 0, H, 0, 0, 0, L, 0},
104:   {0, 0, 0, H, 0, 0, 0, L},
105:   {L, 0, 0, 0, H, 0, 0, 0},
106:   {0, L, 0, 0, 0, H, 0, 0},
107:   {0, 0, L, 0, 0, 0, H, 0},
108:   {0, 0, 0, L, 0, 0, 0, H}
109: };
110: static const PetscReal HexQDeriv_Lobatto[8][8][3] = {
111:   {{M * H, M * H, M}, {P * H, 0, 0},     {0, 0, 0},         {0, P * H, 0},     {M * L, M * L, P}, {P * L, 0, 0},     {0, 0, 0},         {0, P * L, 0}    },
112:   {{M * H, 0, 0},     {P * H, M * H, M}, {0, P * H, 0},     {0, 0, 0},         {M * L, 0, 0},     {P * L, M * L, P}, {0, P * L, 0},     {0, 0, 0}        },
113:   {{0, 0, 0},         {0, M * H, 0},     {P * H, P * H, M}, {M * H, 0, 0},     {0, 0, 0},         {0, M * L, 0},     {P * L, P * L, P}, {M * L, 0, 0}    },
114:   {{0, M * H, 0},     {0, 0, 0},         {P * H, 0, 0},     {M * H, P * H, M}, {0, M * L, 0},     {0, 0, 0},         {P * L, 0, 0},     {M * L, P * L, P}},
115:   {{M * L, M * L, M}, {P * L, 0, 0},     {0, 0, 0},         {0, P * L, 0},     {M * H, M * H, P}, {P * H, 0, 0},     {0, 0, 0},         {0, P * H, 0}    },
116:   {{M * L, 0, 0},     {P * L, M * L, M}, {0, P * L, 0},     {0, 0, 0},         {M * H, 0, 0},     {P * H, M * H, P}, {0, P * H, 0},     {0, 0, 0}        },
117:   {{0, 0, 0},         {0, M * L, 0},     {P * L, P * L, M}, {M * L, 0, 0},     {0, 0, 0},         {0, M * H, 0},     {P * H, P * H, P}, {M * H, 0, 0}    },
118:   {{0, M * L, 0},     {0, 0, 0},         {P * L, 0, 0},     {M * L, P * L, M}, {0, M * H, 0},     {0, 0, 0},         {P * H, 0, 0},     {M * H, P * H, P}}
119: };
120: /* Standard Gauss */
121: static const PetscReal HexQInterp_Gauss[8][8] = {
122:   {H * H * H, L * H * H, L * L * H, H * L * H, H * H * L, L * H * L, L * L * L, H * L * L},
123:   {L * H * H, H * H * H, H * L * H, L * L * H, L * H * L, H * H * L, H * L * L, L * L * L},
124:   {L * L * H, H * L * H, H * H * H, L * H * H, L * L * L, H * L * L, H * H * L, L * H * L},
125:   {H * L * H, L * L * H, L * H * H, H * H * H, H * L * L, L * L * L, L * H * L, H * H * L},
126:   {H * H * L, L * H * L, L * L * L, H * L * L, H * H * H, L * H * H, L * L * H, H * L * H},
127:   {L * H * L, H * H * L, H * L * L, L * L * L, L * H * H, H * H * H, H * L * H, L * L * H},
128:   {L * L * L, H * L * L, H * H * L, L * H * L, L * L * H, H * L * H, H * H * H, L * H * H},
129:   {H * L * L, L * L * L, L * H * L, H * H * L, H * L * H, L * L * H, L * H * H, H * H * H}
130: };
131: static const PetscReal HexQDeriv_Gauss[8][8][3] = {
132:   {{M * H * H, H * M * H, H * H * M}, {P * H * H, L * M * H, L * H * M}, {P * L * H, L * P * H, L * L * M}, {M * L * H, H * P * H, H * L * M}, {M * H * L, H * M * L, H * H * P}, {P * H * L, L * M * L, L * H * P}, {P * L * L, L * P * L, L * L * P}, {M * L * L, H * P * L, H * L * P}},
133:   {{M * H * H, L * M * H, L * H * M}, {P * H * H, H * M * H, H * H * M}, {P * L * H, H * P * H, H * L * M}, {M * L * H, L * P * H, L * L * M}, {M * H * L, L * M * L, L * H * P}, {P * H * L, H * M * L, H * H * P}, {P * L * L, H * P * L, H * L * P}, {M * L * L, L * P * L, L * L * P}},
134:   {{M * L * H, L * M * H, L * L * M}, {P * L * H, H * M * H, H * L * M}, {P * H * H, H * P * H, H * H * M}, {M * H * H, L * P * H, L * H * M}, {M * L * L, L * M * L, L * L * P}, {P * L * L, H * M * L, H * L * P}, {P * H * L, H * P * L, H * H * P}, {M * H * L, L * P * L, L * H * P}},
135:   {{M * L * H, H * M * H, H * L * M}, {P * L * H, L * M * H, L * L * M}, {P * H * H, L * P * H, L * H * M}, {M * H * H, H * P * H, H * H * M}, {M * L * L, H * M * L, H * L * P}, {P * L * L, L * M * L, L * L * P}, {P * H * L, L * P * L, L * H * P}, {M * H * L, H * P * L, H * H * P}},
136:   {{M * H * L, H * M * L, H * H * M}, {P * H * L, L * M * L, L * H * M}, {P * L * L, L * P * L, L * L * M}, {M * L * L, H * P * L, H * L * M}, {M * H * H, H * M * H, H * H * P}, {P * H * H, L * M * H, L * H * P}, {P * L * H, L * P * H, L * L * P}, {M * L * H, H * P * H, H * L * P}},
137:   {{M * H * L, L * M * L, L * H * M}, {P * H * L, H * M * L, H * H * M}, {P * L * L, H * P * L, H * L * M}, {M * L * L, L * P * L, L * L * M}, {M * H * H, L * M * H, L * H * P}, {P * H * H, H * M * H, H * H * P}, {P * L * H, H * P * H, H * L * P}, {M * L * H, L * P * H, L * L * P}},
138:   {{M * L * L, L * M * L, L * L * M}, {P * L * L, H * M * L, H * L * M}, {P * H * L, H * P * L, H * H * M}, {M * H * L, L * P * L, L * H * M}, {M * L * H, L * M * H, L * L * P}, {P * L * H, H * M * H, H * L * P}, {P * H * H, H * P * H, H * H * P}, {M * H * H, L * P * H, L * H * P}},
139:   {{M * L * L, H * M * L, H * L * M}, {P * L * L, L * M * L, L * L * M}, {P * H * L, L * P * L, L * H * M}, {M * H * L, H * P * L, H * H * M}, {M * L * H, H * M * H, H * L * P}, {P * L * H, L * M * H, L * L * P}, {P * H * H, L * P * H, L * H * P}, {M * H * H, H * P * H, H * H * P}}
140: };
141: static const PetscReal (*HexQInterp)[8], (*HexQDeriv)[8][3];
142: /* Standard 2x2 Gauss quadrature for the bottom layer. */
143: static const PetscReal QuadQInterp[4][4] = {
144:   {H * H, L * H, L * L, H * L},
145:   {L * H, H * H, H * L, L * L},
146:   {L * L, H * L, H * H, L * H},
147:   {H * L, L * L, L * H, H * H}
148: };
149: static const PetscReal QuadQDeriv[4][4][2] = {
150:   {{M * H, M * H}, {P * H, M * L}, {P * L, P * L}, {M * L, P * H}},
151:   {{M * H, M * L}, {P * H, M * H}, {P * L, P * H}, {M * L, P * L}},
152:   {{M * L, M * L}, {P * L, M * H}, {P * H, P * H}, {M * H, P * L}},
153:   {{M * L, M * H}, {P * L, M * L}, {P * H, P * L}, {M * H, P * H}}
154: };
155: #undef G
156: #undef H
157: #undef L
158: #undef M
159: #undef P

161: #define HexExtract(x, i, j, k, n) \
162:   do { \
163:     (n)[0] = (x)[i][j][k]; \
164:     (n)[1] = (x)[i + 1][j][k]; \
165:     (n)[2] = (x)[i + 1][j + 1][k]; \
166:     (n)[3] = (x)[i][j + 1][k]; \
167:     (n)[4] = (x)[i][j][k + 1]; \
168:     (n)[5] = (x)[i + 1][j][k + 1]; \
169:     (n)[6] = (x)[i + 1][j + 1][k + 1]; \
170:     (n)[7] = (x)[i][j + 1][k + 1]; \
171:   } while (0)

173: #define HexExtractRef(x, i, j, k, n) \
174:   do { \
175:     (n)[0] = &(x)[i][j][k]; \
176:     (n)[1] = &(x)[i + 1][j][k]; \
177:     (n)[2] = &(x)[i + 1][j + 1][k]; \
178:     (n)[3] = &(x)[i][j + 1][k]; \
179:     (n)[4] = &(x)[i][j][k + 1]; \
180:     (n)[5] = &(x)[i + 1][j][k + 1]; \
181:     (n)[6] = &(x)[i + 1][j + 1][k + 1]; \
182:     (n)[7] = &(x)[i][j + 1][k + 1]; \
183:   } while (0)

185: #define QuadExtract(x, i, j, n) \
186:   do { \
187:     (n)[0] = (x)[i][j]; \
188:     (n)[1] = (x)[i + 1][j]; \
189:     (n)[2] = (x)[i + 1][j + 1]; \
190:     (n)[3] = (x)[i][j + 1]; \
191:   } while (0)

193: static void HexGrad(const PetscReal dphi[][3], const PetscReal zn[], PetscReal dz[])
194: {
195:   PetscInt i;
196:   dz[0] = dz[1] = dz[2] = 0;
197:   for (i = 0; i < 8; i++) {
198:     dz[0] += dphi[i][0] * zn[i];
199:     dz[1] += dphi[i][1] * zn[i];
200:     dz[2] += dphi[i][2] * zn[i];
201:   }
202: }

204: static void HexComputeGeometry(PetscInt q, PetscReal hx, PetscReal hy, const PetscReal dz[PETSC_RESTRICT], PetscReal phi[PETSC_RESTRICT], PetscReal dphi[PETSC_RESTRICT][3], PetscReal *PETSC_RESTRICT jw)
205: {
206:   const PetscReal jac[3][3] = {
207:     {hx / 2, 0,      0    },
208:     {0,      hy / 2, 0    },
209:     {dz[0],  dz[1],  dz[2]}
210:   };
211:   const PetscReal ijac[3][3] = {
212:     {1 / jac[0][0],                        0,                                    0            },
213:     {0,                                    1 / jac[1][1],                        0            },
214:     {-jac[2][0] / (jac[0][0] * jac[2][2]), -jac[2][1] / (jac[1][1] * jac[2][2]), 1 / jac[2][2]}
215:   };
216:   const PetscReal jdet = jac[0][0] * jac[1][1] * jac[2][2];
217:   PetscInt        i;

219:   for (i = 0; i < 8; i++) {
220:     const PetscReal *dphir = HexQDeriv[q][i];
221:     phi[i]                 = HexQInterp[q][i];
222:     dphi[i][0]             = dphir[0] * ijac[0][0] + dphir[1] * ijac[1][0] + dphir[2] * ijac[2][0];
223:     dphi[i][1]             = dphir[0] * ijac[0][1] + dphir[1] * ijac[1][1] + dphir[2] * ijac[2][1];
224:     dphi[i][2]             = dphir[0] * ijac[0][2] + dphir[1] * ijac[1][2] + dphir[2] * ijac[2][2];
225:   }
226:   *jw = 1.0 * jdet;
227: }

229: typedef struct _p_THI   *THI;
230: typedef struct _n_Units *Units;

232: typedef struct {
233:   PetscScalar u, v;
234: } Node;

236: typedef struct {
237:   PetscScalar b;     /* bed */
238:   PetscScalar h;     /* thickness */
239:   PetscScalar beta2; /* friction */
240: } PrmNode;

242: typedef struct {
243:   PetscReal min, max, cmin, cmax;
244: } PRange;

246: typedef enum {
247:   THIASSEMBLY_TRIDIAGONAL,
248:   THIASSEMBLY_FULL
249: } THIAssemblyMode;

251: struct _p_THI {
252:   PETSCHEADER(int);
253:   void (*initialize)(THI, PetscReal x, PetscReal y, PrmNode *p);
254:   PetscInt  zlevels;
255:   PetscReal Lx, Ly, Lz; /* Model domain */
256:   PetscReal alpha;      /* Bed angle */
257:   Units     units;
258:   PetscReal dirichlet_scale;
259:   PetscReal ssa_friction_scale;
260:   PRange    eta;
261:   PRange    beta2;
262:   struct {
263:     PetscReal Bd2, eps, exponent;
264:   } viscosity;
265:   struct {
266:     PetscReal irefgam, eps2, exponent, refvel, epsvel;
267:   } friction;
268:   PetscReal rhog;
269:   PetscBool no_slip;
270:   PetscBool tridiagonal;
271:   PetscBool coarse2d;
272:   PetscBool verbose;
273:   MatType   mattype;
274: };

276: struct _n_Units {
277:   /* fundamental */
278:   PetscReal meter;
279:   PetscReal kilogram;
280:   PetscReal second;
281:   /* derived */
282:   PetscReal Pascal;
283:   PetscReal year;
284: };

286: static PetscErrorCode THIJacobianLocal_3D_Full(DMDALocalInfo *, Node ***, Mat, Mat, THI);
287: static PetscErrorCode THIJacobianLocal_3D_Tridiagonal(DMDALocalInfo *, Node ***, Mat, Mat, THI);
288: static PetscErrorCode THIJacobianLocal_2D(DMDALocalInfo *, Node **, Mat, Mat, THI);

290: static void PrmHexGetZ(const PrmNode pn[], PetscInt k, PetscInt zm, PetscReal zn[])
291: {
292:   const PetscScalar zm1 = zm - 1, znl[8] = {pn[0].b + pn[0].h * (PetscScalar)k / zm1,       pn[1].b + pn[1].h * (PetscScalar)k / zm1,       pn[2].b + pn[2].h * (PetscScalar)k / zm1,       pn[3].b + pn[3].h * (PetscScalar)k / zm1,
293:                                             pn[0].b + pn[0].h * (PetscScalar)(k + 1) / zm1, pn[1].b + pn[1].h * (PetscScalar)(k + 1) / zm1, pn[2].b + pn[2].h * (PetscScalar)(k + 1) / zm1, pn[3].b + pn[3].h * (PetscScalar)(k + 1) / zm1};
294:   for (PetscInt i = 0; i < 8; i++) zn[i] = PetscRealPart(znl[i]);
295: }

297: /* Tests A and C are from the ISMIP-HOM paper (Pattyn et al. 2008) */
298: static void THIInitialize_HOM_A(THI thi, PetscReal x, PetscReal y, PrmNode *p)
299: {
300:   Units     units = thi->units;
301:   PetscReal s     = -x * PetscSinReal(thi->alpha);

303:   p->b     = s - 1000 * units->meter + 500 * units->meter * PetscSinReal(x * 2 * PETSC_PI / thi->Lx) * PetscSinReal(y * 2 * PETSC_PI / thi->Ly);
304:   p->h     = s - p->b;
305:   p->beta2 = 1e30;
306: }

308: static void THIInitialize_HOM_C(THI thi, PetscReal x, PetscReal y, PrmNode *p)
309: {
310:   Units     units = thi->units;
311:   PetscReal s     = -x * PetscSinReal(thi->alpha);

313:   p->b = s - 1000 * units->meter;
314:   p->h = s - p->b;
315:   /* tau_b = beta2 v   is a stress (Pa) */
316:   p->beta2 = 1000 * (1 + PetscSinReal(x * 2 * PETSC_PI / thi->Lx) * PetscSinReal(y * 2 * PETSC_PI / thi->Ly)) * units->Pascal * units->year / units->meter;
317: }

319: /* These are just toys */

321: /* Same bed as test A, free slip everywhere except for a discontinuous jump to a circular sticky region in the middle. */
322: static void THIInitialize_HOM_X(THI thi, PetscReal xx, PetscReal yy, PrmNode *p)
323: {
324:   Units     units = thi->units;
325:   PetscReal x = xx * 2 * PETSC_PI / thi->Lx - PETSC_PI, y = yy * 2 * PETSC_PI / thi->Ly - PETSC_PI; /* [-pi,pi] */
326:   PetscReal r = PetscSqrtReal(x * x + y * y), s = -x * PetscSinReal(thi->alpha);
327:   p->b     = s - 1000 * units->meter + 500 * units->meter * PetscSinReal(x + PETSC_PI) * PetscSinReal(y + PETSC_PI);
328:   p->h     = s - p->b;
329:   p->beta2 = 1000 * (r < 1 ? 2 : 0) * units->Pascal * units->year / units->meter;
330: }

332: /* Like Z, but with 200 meter cliffs */
333: static void THIInitialize_HOM_Y(THI thi, PetscReal xx, PetscReal yy, PrmNode *p)
334: {
335:   Units     units = thi->units;
336:   PetscReal x = xx * 2 * PETSC_PI / thi->Lx - PETSC_PI, y = yy * 2 * PETSC_PI / thi->Ly - PETSC_PI; /* [-pi,pi] */
337:   PetscReal r = PetscSqrtReal(x * x + y * y), s = -x * PetscSinReal(thi->alpha);

339:   p->b = s - 1000 * units->meter + 500 * units->meter * PetscSinReal(x + PETSC_PI) * PetscSinReal(y + PETSC_PI);
340:   if (PetscRealPart(p->b) > -700 * units->meter) p->b += 200 * units->meter;
341:   p->h     = s - p->b;
342:   p->beta2 = 1000 * (1. + PetscSinReal(PetscSqrtReal(16 * r)) / PetscSqrtReal(1e-2 + 16 * r) * PetscCosReal(x * 3 / 2) * PetscCosReal(y * 3 / 2)) * units->Pascal * units->year / units->meter;
343: }

345: /* Same bed as A, smoothly varying slipperiness, similar to MATLAB's "sombrero" (uncorrelated with bathymetry) */
346: static void THIInitialize_HOM_Z(THI thi, PetscReal xx, PetscReal yy, PrmNode *p)
347: {
348:   Units     units = thi->units;
349:   PetscReal x = xx * 2 * PETSC_PI / thi->Lx - PETSC_PI, y = yy * 2 * PETSC_PI / thi->Ly - PETSC_PI; /* [-pi,pi] */
350:   PetscReal r = PetscSqrtReal(x * x + y * y), s = -x * PetscSinReal(thi->alpha);

352:   p->b     = s - 1000 * units->meter + 500 * units->meter * PetscSinReal(x + PETSC_PI) * PetscSinReal(y + PETSC_PI);
353:   p->h     = s - p->b;
354:   p->beta2 = 1000 * (1. + PetscSinReal(PetscSqrtReal(16 * r)) / PetscSqrtReal(1e-2 + 16 * r) * PetscCosReal(x * 3 / 2) * PetscCosReal(y * 3 / 2)) * units->Pascal * units->year / units->meter;
355: }

357: static void THIFriction(THI thi, PetscReal rbeta2, PetscReal gam, PetscReal *beta2, PetscReal *dbeta2)
358: {
359:   if (thi->friction.irefgam == 0) {
360:     Units units           = thi->units;
361:     thi->friction.irefgam = 1. / (0.5 * PetscSqr(thi->friction.refvel * units->meter / units->year));
362:     thi->friction.eps2    = 0.5 * PetscSqr(thi->friction.epsvel * units->meter / units->year) * thi->friction.irefgam;
363:   }
364:   if (thi->friction.exponent == 0) {
365:     *beta2  = rbeta2;
366:     *dbeta2 = 0;
367:   } else {
368:     *beta2  = rbeta2 * PetscPowReal(thi->friction.eps2 + gam * thi->friction.irefgam, thi->friction.exponent);
369:     *dbeta2 = thi->friction.exponent * *beta2 / (thi->friction.eps2 + gam * thi->friction.irefgam) * thi->friction.irefgam;
370:   }
371: }

373: static void THIViscosity(THI thi, PetscReal gam, PetscReal *eta, PetscReal *deta)
374: {
375:   PetscReal Bd2, eps, exponent;
376:   if (thi->viscosity.Bd2 == 0) {
377:     Units           units   = thi->units;
378:     const PetscReal n       = 3.,                                                     /* Glen exponent */
379:       p                     = 1. + 1. / n,                                            /* for Stokes */
380:       A                     = 1.e-16 * PetscPowReal(units->Pascal, -n) / units->year, /* softness parameter (Pa^{-n}/s) */
381:       B                     = PetscPowReal(A, -1. / n);                               /* hardness parameter */
382:     thi->viscosity.Bd2      = B / 2;
383:     thi->viscosity.exponent = (p - 2) / 2;
384:     thi->viscosity.eps      = 0.5 * PetscSqr(1e-5 / units->year);
385:   }
386:   Bd2      = thi->viscosity.Bd2;
387:   exponent = thi->viscosity.exponent;
388:   eps      = thi->viscosity.eps;
389:   *eta     = Bd2 * PetscPowReal(eps + gam, exponent);
390:   *deta    = exponent * (*eta) / (eps + gam);
391: }

393: static void RangeUpdate(PetscReal *min, PetscReal *max, PetscReal x)
394: {
395:   if (x < *min) *min = x;
396:   if (x > *max) *max = x;
397: }

399: static void PRangeClear(PRange *p)
400: {
401:   p->cmin = p->min = 1e100;
402:   p->cmax = p->max = -1e100;
403: }

405: static PetscErrorCode PRangeMinMax(PRange *p, PetscReal min, PetscReal max)
406: {
407:   PetscFunctionBeginUser;
408:   p->cmin = min;
409:   p->cmax = max;
410:   if (min < p->min) p->min = min;
411:   if (max > p->max) p->max = max;
412:   PetscFunctionReturn(PETSC_SUCCESS);
413: }

415: static PetscErrorCode THIDestroy(THI *thi)
416: {
417:   PetscFunctionBeginUser;
418:   if (!*thi) PetscFunctionReturn(PETSC_SUCCESS);
419:   if (--((PetscObject)*thi)->refct > 0) {
420:     *thi = 0;
421:     PetscFunctionReturn(PETSC_SUCCESS);
422:   }
423:   PetscCall(PetscFree((*thi)->units));
424:   PetscCall(PetscFree((*thi)->mattype));
425:   PetscCall(PetscHeaderDestroy(thi));
426:   PetscFunctionReturn(PETSC_SUCCESS);
427: }

429: static PetscErrorCode THICreate(MPI_Comm comm, THI *inthi)
430: {
431:   static PetscBool registered = PETSC_FALSE;
432:   THI              thi;
433:   Units            units;

435:   PetscFunctionBeginUser;
436:   *inthi = 0;
437:   if (!registered) {
438:     PetscCall(PetscClassIdRegister("Toy Hydrostatic Ice", &THI_CLASSID));
439:     registered = PETSC_TRUE;
440:   }
441:   PetscCall(PetscHeaderCreate(thi, THI_CLASSID, "THI", "Toy Hydrostatic Ice", "", comm, THIDestroy, 0));

443:   PetscCall(PetscNew(&thi->units));
444:   units           = thi->units;
445:   units->meter    = 1e-2;
446:   units->second   = 1e-7;
447:   units->kilogram = 1e-12;

449:   PetscOptionsBegin(comm, NULL, "Scaled units options", "");
450:   {
451:     PetscCall(PetscOptionsReal("-units_meter", "1 meter in scaled length units", "", units->meter, &units->meter, NULL));
452:     PetscCall(PetscOptionsReal("-units_second", "1 second in scaled time units", "", units->second, &units->second, NULL));
453:     PetscCall(PetscOptionsReal("-units_kilogram", "1 kilogram in scaled mass units", "", units->kilogram, &units->kilogram, NULL));
454:   }
455:   PetscOptionsEnd();
456:   units->Pascal = units->kilogram / (units->meter * PetscSqr(units->second));
457:   units->year   = 31556926. * units->second; /* seconds per year */

459:   thi->Lx              = 10.e3;
460:   thi->Ly              = 10.e3;
461:   thi->Lz              = 1000;
462:   thi->dirichlet_scale = 1;
463:   thi->verbose         = PETSC_FALSE;

465:   PetscOptionsBegin(comm, NULL, "Toy Hydrostatic Ice options", "");
466:   {
467:     QuadratureType quad       = QUAD_GAUSS;
468:     char           homexp[]   = "A";
469:     char           mtype[256] = MATSBAIJ;
470:     PetscReal      L, m = 1.0;
471:     PetscBool      flg;
472:     L = thi->Lx;
473:     PetscCall(PetscOptionsReal("-thi_L", "Domain size (m)", "", L, &L, &flg));
474:     if (flg) thi->Lx = thi->Ly = L;
475:     PetscCall(PetscOptionsReal("-thi_Lx", "X Domain size (m)", "", thi->Lx, &thi->Lx, NULL));
476:     PetscCall(PetscOptionsReal("-thi_Ly", "Y Domain size (m)", "", thi->Ly, &thi->Ly, NULL));
477:     PetscCall(PetscOptionsReal("-thi_Lz", "Z Domain size (m)", "", thi->Lz, &thi->Lz, NULL));
478:     PetscCall(PetscOptionsString("-thi_hom", "ISMIP-HOM experiment (A or C)", "", homexp, homexp, sizeof(homexp), NULL));
479:     switch (homexp[0] = (char)toupper(homexp[0])) {
480:     case 'A':
481:       thi->initialize = THIInitialize_HOM_A;
482:       thi->no_slip    = PETSC_TRUE;
483:       thi->alpha      = 0.5;
484:       break;
485:     case 'C':
486:       thi->initialize = THIInitialize_HOM_C;
487:       thi->no_slip    = PETSC_FALSE;
488:       thi->alpha      = 0.1;
489:       break;
490:     case 'X':
491:       thi->initialize = THIInitialize_HOM_X;
492:       thi->no_slip    = PETSC_FALSE;
493:       thi->alpha      = 0.3;
494:       break;
495:     case 'Y':
496:       thi->initialize = THIInitialize_HOM_Y;
497:       thi->no_slip    = PETSC_FALSE;
498:       thi->alpha      = 0.5;
499:       break;
500:     case 'Z':
501:       thi->initialize = THIInitialize_HOM_Z;
502:       thi->no_slip    = PETSC_FALSE;
503:       thi->alpha      = 0.5;
504:       break;
505:     default:
506:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "HOM experiment '%c' not implemented", homexp[0]);
507:     }
508:     PetscCall(PetscOptionsEnum("-thi_quadrature", "Quadrature to use for 3D elements", "", QuadratureTypes, (PetscEnum)quad, (PetscEnum *)&quad, NULL));
509:     switch (quad) {
510:     case QUAD_GAUSS:
511:       HexQInterp = HexQInterp_Gauss;
512:       HexQDeriv  = HexQDeriv_Gauss;
513:       break;
514:     case QUAD_LOBATTO:
515:       HexQInterp = HexQInterp_Lobatto;
516:       HexQDeriv  = HexQDeriv_Lobatto;
517:       break;
518:     }
519:     PetscCall(PetscOptionsReal("-thi_alpha", "Bed angle (degrees)", "", thi->alpha, &thi->alpha, NULL));

521:     thi->friction.refvel = 100.;
522:     thi->friction.epsvel = 1.;

524:     PetscCall(PetscOptionsReal("-thi_friction_refvel", "Reference velocity for sliding", "", thi->friction.refvel, &thi->friction.refvel, NULL));
525:     PetscCall(PetscOptionsReal("-thi_friction_epsvel", "Regularization velocity for sliding", "", thi->friction.epsvel, &thi->friction.epsvel, NULL));
526:     PetscCall(PetscOptionsReal("-thi_friction_m", "Friction exponent, 0=Coulomb, 1=Navier", "", m, &m, NULL));

528:     thi->friction.exponent = (m - 1) / 2;

530:     PetscCall(PetscOptionsReal("-thi_dirichlet_scale", "Scale Dirichlet boundary conditions by this factor", "", thi->dirichlet_scale, &thi->dirichlet_scale, NULL));
531:     PetscCall(PetscOptionsReal("-thi_ssa_friction_scale", "Scale slip boundary conditions by this factor in SSA (2D) assembly", "", thi->ssa_friction_scale, &thi->ssa_friction_scale, NULL));
532:     PetscCall(PetscOptionsBool("-thi_coarse2d", "Use a 2D coarse space corresponding to SSA", "", thi->coarse2d, &thi->coarse2d, NULL));
533:     PetscCall(PetscOptionsBool("-thi_tridiagonal", "Assemble a tridiagonal system (column coupling only) on the finest level", "", thi->tridiagonal, &thi->tridiagonal, NULL));
534:     PetscCall(PetscOptionsFList("-thi_mat_type", "Matrix type", "MatSetType", MatList, mtype, (char *)mtype, sizeof(mtype), NULL));
535:     PetscCall(PetscStrallocpy(mtype, (char **)&thi->mattype));
536:     PetscCall(PetscOptionsBool("-thi_verbose", "Enable verbose output (like matrix sizes and statistics)", "", thi->verbose, &thi->verbose, NULL));
537:   }
538:   PetscOptionsEnd();

540:   /* dimensionalize */
541:   thi->Lx *= units->meter;
542:   thi->Ly *= units->meter;
543:   thi->Lz *= units->meter;
544:   thi->alpha *= PETSC_PI / 180;

546:   PRangeClear(&thi->eta);
547:   PRangeClear(&thi->beta2);

549:   {
550:     PetscReal u = 1000 * units->meter / (3e7 * units->second), gradu = u / (100 * units->meter), eta, deta, rho = 910 * units->kilogram / PetscPowReal(units->meter, 3), grav = 9.81 * units->meter / PetscSqr(units->second),
551:               driving = rho * grav * PetscSinReal(thi->alpha) * 1000 * units->meter;
552:     THIViscosity(thi, 0.5 * gradu * gradu, &eta, &deta);
553:     thi->rhog = rho * grav;
554:     if (thi->verbose) {
555:       PetscCall(PetscPrintf(PetscObjectComm((PetscObject)thi), "Units: meter %8.2g  second %8.2g  kg %8.2g  Pa %8.2g\n", (double)units->meter, (double)units->second, (double)units->kilogram, (double)units->Pascal));
556:       PetscCall(PetscPrintf(PetscObjectComm((PetscObject)thi), "Domain (%6.2g,%6.2g,%6.2g), pressure %8.2g, driving stress %8.2g\n", (double)thi->Lx, (double)thi->Ly, (double)thi->Lz, (double)(rho * grav * 1e3 * units->meter), (double)driving));
557:       PetscCall(PetscPrintf(PetscObjectComm((PetscObject)thi), "Large velocity 1km/a %8.2g, velocity gradient %8.2g, eta %8.2g, stress %8.2g, ratio %8.2g\n", (double)u, (double)gradu, (double)eta, (double)(2 * eta * gradu), (double)(2 * eta * gradu / driving)));
558:       THIViscosity(thi, 0.5 * PetscSqr(1e-3 * gradu), &eta, &deta);
559:       PetscCall(PetscPrintf(PetscObjectComm((PetscObject)thi), "Small velocity 1m/a  %8.2g, velocity gradient %8.2g, eta %8.2g, stress %8.2g, ratio %8.2g\n", (double)(1e-3 * u), (double)(1e-3 * gradu), (double)eta, (double)(2 * eta * 1e-3 * gradu), (double)(2 * eta * 1e-3 * gradu / driving)));
560:     }
561:   }

563:   *inthi = thi;
564:   PetscFunctionReturn(PETSC_SUCCESS);
565: }

567: static PetscErrorCode THIInitializePrm(THI thi, DM da2prm, Vec prm)
568: {
569:   PrmNode **p;
570:   PetscInt  i, j, xs, xm, ys, ym, mx, my;

572:   PetscFunctionBeginUser;
573:   PetscCall(DMDAGetGhostCorners(da2prm, &ys, &xs, 0, &ym, &xm, 0));
574:   PetscCall(DMDAGetInfo(da2prm, 0, &my, &mx, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
575:   PetscCall(DMDAVecGetArray(da2prm, prm, &p));
576:   for (i = xs; i < xs + xm; i++) {
577:     for (j = ys; j < ys + ym; j++) {
578:       PetscReal xx = thi->Lx * i / mx, yy = thi->Ly * j / my;
579:       thi->initialize(thi, xx, yy, &p[i][j]);
580:     }
581:   }
582:   PetscCall(DMDAVecRestoreArray(da2prm, prm, &p));
583:   PetscFunctionReturn(PETSC_SUCCESS);
584: }

586: static PetscErrorCode THISetUpDM(THI thi, DM dm)
587: {
588:   PetscInt        refinelevel, coarsenlevel, level, dim, Mx, My, Mz, mx, my, s;
589:   DMDAStencilType st;
590:   DM              da2prm;
591:   Vec             X;

593:   PetscFunctionBeginUser;
594:   PetscCall(DMDAGetInfo(dm, &dim, &Mz, &My, &Mx, 0, &my, &mx, 0, &s, 0, 0, 0, &st));
595:   if (dim == 2) PetscCall(DMDAGetInfo(dm, &dim, &My, &Mx, 0, &my, &mx, 0, 0, &s, 0, 0, 0, &st));
596:   PetscCall(DMGetRefineLevel(dm, &refinelevel));
597:   PetscCall(DMGetCoarsenLevel(dm, &coarsenlevel));
598:   level = refinelevel - coarsenlevel;
599:   PetscCall(DMDACreate2d(PetscObjectComm((PetscObject)thi), DM_BOUNDARY_PERIODIC, DM_BOUNDARY_PERIODIC, st, My, Mx, my, mx, sizeof(PrmNode) / sizeof(PetscScalar), s, 0, 0, &da2prm));
600:   PetscCall(DMSetUp(da2prm));
601:   PetscCall(DMCreateLocalVector(da2prm, &X));
602:   {
603:     PetscReal Lx = thi->Lx / thi->units->meter, Ly = thi->Ly / thi->units->meter, Lz = thi->Lz / thi->units->meter;
604:     if (dim == 2) {
605:       PetscCall(PetscPrintf(PetscObjectComm((PetscObject)thi), "Level %" PetscInt_FMT " domain size (m) %8.2g x %8.2g, num elements %" PetscInt_FMT " x %" PetscInt_FMT " (%" PetscInt_FMT "), size (m) %g x %g\n", level, (double)Lx, (double)Ly, Mx, My, Mx * My, (double)(Lx / Mx), (double)(Ly / My)));
606:     } else {
607:       PetscCall(PetscPrintf(PetscObjectComm((PetscObject)thi), "Level %" PetscInt_FMT " domain size (m) %8.2g x %8.2g x %8.2g, num elements %" PetscInt_FMT " x %" PetscInt_FMT " x %" PetscInt_FMT " (%" PetscInt_FMT "), size (m) %g x %g x %g\n", level, (double)Lx, (double)Ly, (double)Lz, Mx, My, Mz, Mx * My * Mz, (double)(Lx / Mx), (double)(Ly / My), (double)(1000. / (Mz - 1))));
608:     }
609:   }
610:   PetscCall(THIInitializePrm(thi, da2prm, X));
611:   if (thi->tridiagonal) { /* Reset coarse Jacobian evaluation */
612:     PetscCall(DMDASNESSetJacobianLocal(dm, (DMDASNESJacobianFn *)THIJacobianLocal_3D_Full, thi));
613:   }
614:   if (thi->coarse2d) PetscCall(DMDASNESSetJacobianLocal(dm, (DMDASNESJacobianFn *)THIJacobianLocal_2D, thi));
615:   PetscCall(PetscObjectCompose((PetscObject)dm, "DMDA2Prm", (PetscObject)da2prm));
616:   PetscCall(PetscObjectCompose((PetscObject)dm, "DMDA2Prm_Vec", (PetscObject)X));
617:   PetscCall(DMDestroy(&da2prm));
618:   PetscCall(VecDestroy(&X));
619:   PetscFunctionReturn(PETSC_SUCCESS);
620: }

622: static PetscErrorCode DMCoarsenHook_THI(DM dmf, DM dmc, PetscCtx ctx)
623: {
624:   THI      thi = (THI)ctx;
625:   PetscInt rlevel, clevel;

627:   PetscFunctionBeginUser;
628:   PetscCall(THISetUpDM(thi, dmc));
629:   PetscCall(DMGetRefineLevel(dmc, &rlevel));
630:   PetscCall(DMGetCoarsenLevel(dmc, &clevel));
631:   if (rlevel - clevel == 0) PetscCall(DMSetMatType(dmc, MATAIJ));
632:   PetscCall(DMCoarsenHookAdd(dmc, DMCoarsenHook_THI, NULL, thi));
633:   PetscFunctionReturn(PETSC_SUCCESS);
634: }

636: static PetscErrorCode DMRefineHook_THI(DM dmc, DM dmf, PetscCtx ctx)
637: {
638:   THI thi = (THI)ctx;

640:   PetscFunctionBeginUser;
641:   PetscCall(THISetUpDM(thi, dmf));
642:   PetscCall(DMSetMatType(dmf, thi->mattype));
643:   PetscCall(DMRefineHookAdd(dmf, DMRefineHook_THI, NULL, thi));
644:   /* With grid sequencing, a formerly-refined DM will later be coarsened by PCSetUp_MG */
645:   PetscCall(DMCoarsenHookAdd(dmf, DMCoarsenHook_THI, NULL, thi));
646:   PetscFunctionReturn(PETSC_SUCCESS);
647: }

649: static PetscErrorCode THIDAGetPrm(DM da, PrmNode ***prm)
650: {
651:   DM  da2prm;
652:   Vec X;

654:   PetscFunctionBeginUser;
655:   PetscCall(PetscObjectQuery((PetscObject)da, "DMDA2Prm", (PetscObject *)&da2prm));
656:   PetscCheck(da2prm, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "No DMDA2Prm composed with given DMDA");
657:   PetscCall(PetscObjectQuery((PetscObject)da, "DMDA2Prm_Vec", (PetscObject *)&X));
658:   PetscCheck(X, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "No DMDA2Prm_Vec composed with given DMDA");
659:   PetscCall(DMDAVecGetArray(da2prm, X, prm));
660:   PetscFunctionReturn(PETSC_SUCCESS);
661: }

663: static PetscErrorCode THIDARestorePrm(DM da, PrmNode ***prm)
664: {
665:   DM  da2prm;
666:   Vec X;

668:   PetscFunctionBeginUser;
669:   PetscCall(PetscObjectQuery((PetscObject)da, "DMDA2Prm", (PetscObject *)&da2prm));
670:   PetscCheck(da2prm, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "No DMDA2Prm composed with given DMDA");
671:   PetscCall(PetscObjectQuery((PetscObject)da, "DMDA2Prm_Vec", (PetscObject *)&X));
672:   PetscCheck(X, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "No DMDA2Prm_Vec composed with given DMDA");
673:   PetscCall(DMDAVecRestoreArray(da2prm, X, prm));
674:   PetscFunctionReturn(PETSC_SUCCESS);
675: }

677: static PetscErrorCode THIInitial(SNES snes, Vec X, PetscCtx ctx)
678: {
679:   THI       thi;
680:   PetscInt  i, j, k, xs, xm, ys, ym, zs, zm, mx, my;
681:   PetscReal hx, hy;
682:   PrmNode **prm;
683:   Node   ***x;
684:   DM        da;

686:   PetscFunctionBeginUser;
687:   PetscCall(SNESGetDM(snes, &da));
688:   PetscCall(DMGetApplicationContext(da, &thi));
689:   PetscCall(DMDAGetInfo(da, 0, 0, &my, &mx, 0, 0, 0, 0, 0, 0, 0, 0, 0));
690:   PetscCall(DMDAGetCorners(da, &zs, &ys, &xs, &zm, &ym, &xm));
691:   PetscCall(DMDAVecGetArray(da, X, &x));
692:   PetscCall(THIDAGetPrm(da, &prm));
693:   hx = thi->Lx / mx;
694:   hy = thi->Ly / my;
695:   for (i = xs; i < xs + xm; i++) {
696:     for (j = ys; j < ys + ym; j++) {
697:       for (k = zs; k < zs + zm; k++) {
698:         const PetscScalar zm1 = zm - 1, drivingx = thi->rhog * (prm[i + 1][j].b + prm[i + 1][j].h - prm[i - 1][j].b - prm[i - 1][j].h) / (2 * hx), drivingy = thi->rhog * (prm[i][j + 1].b + prm[i][j + 1].h - prm[i][j - 1].b - prm[i][j - 1].h) / (2 * hy);
699:         x[i][j][k].u = 0. * drivingx * prm[i][j].h * (PetscScalar)k / zm1;
700:         x[i][j][k].v = 0. * drivingy * prm[i][j].h * (PetscScalar)k / zm1;
701:       }
702:     }
703:   }
704:   PetscCall(DMDAVecRestoreArray(da, X, &x));
705:   PetscCall(THIDARestorePrm(da, &prm));
706:   PetscFunctionReturn(PETSC_SUCCESS);
707: }

709: static void PointwiseNonlinearity(THI thi, const Node n[PETSC_RESTRICT], const PetscReal phi[PETSC_RESTRICT], PetscReal dphi[PETSC_RESTRICT][3], PetscScalar *PETSC_RESTRICT u, PetscScalar *PETSC_RESTRICT v, PetscScalar du[PETSC_RESTRICT], PetscScalar dv[PETSC_RESTRICT], PetscReal *eta, PetscReal *deta)
710: {
711:   PetscInt    l, ll;
712:   PetscScalar gam;

714:   du[0] = du[1] = du[2] = 0;
715:   dv[0] = dv[1] = dv[2] = 0;
716:   *u                    = 0;
717:   *v                    = 0;
718:   for (l = 0; l < 8; l++) {
719:     *u += phi[l] * n[l].u;
720:     *v += phi[l] * n[l].v;
721:     for (ll = 0; ll < 3; ll++) {
722:       du[ll] += dphi[l][ll] * n[l].u;
723:       dv[ll] += dphi[l][ll] * n[l].v;
724:     }
725:   }
726:   gam = PetscSqr(du[0]) + PetscSqr(dv[1]) + du[0] * dv[1] + 0.25 * PetscSqr(du[1] + dv[0]) + 0.25 * PetscSqr(du[2]) + 0.25 * PetscSqr(dv[2]);
727:   THIViscosity(thi, PetscRealPart(gam), eta, deta);
728: }

730: static void PointwiseNonlinearity2D(THI thi, Node n[], PetscReal phi[], PetscReal dphi[4][2], PetscScalar *u, PetscScalar *v, PetscScalar du[], PetscScalar dv[], PetscReal *eta, PetscReal *deta)
731: {
732:   PetscInt    l, ll;
733:   PetscScalar gam;

735:   du[0] = du[1] = 0;
736:   dv[0] = dv[1] = 0;
737:   *u            = 0;
738:   *v            = 0;
739:   for (l = 0; l < 4; l++) {
740:     *u += phi[l] * n[l].u;
741:     *v += phi[l] * n[l].v;
742:     for (ll = 0; ll < 2; ll++) {
743:       du[ll] += dphi[l][ll] * n[l].u;
744:       dv[ll] += dphi[l][ll] * n[l].v;
745:     }
746:   }
747:   gam = PetscSqr(du[0]) + PetscSqr(dv[1]) + du[0] * dv[1] + 0.25 * PetscSqr(du[1] + dv[0]);
748:   THIViscosity(thi, PetscRealPart(gam), eta, deta);
749: }

751: static PetscErrorCode THIFunctionLocal(DMDALocalInfo *info, Node ***x, Node ***f, THI thi)
752: {
753:   PetscInt  xs, ys, xm, ym, zm, i, j, k, q, l;
754:   PetscReal hx, hy, etamin, etamax, beta2min, beta2max;
755:   PrmNode **prm;

757:   PetscFunctionBeginUser;
758:   xs = info->zs;
759:   ys = info->ys;
760:   xm = info->zm;
761:   ym = info->ym;
762:   zm = info->xm;
763:   hx = thi->Lx / info->mz;
764:   hy = thi->Ly / info->my;

766:   etamin   = 1e100;
767:   etamax   = 0;
768:   beta2min = 1e100;
769:   beta2max = 0;

771:   PetscCall(THIDAGetPrm(info->da, &prm));

773:   for (i = xs; i < xs + xm; i++) {
774:     for (j = ys; j < ys + ym; j++) {
775:       PrmNode pn[4];
776:       QuadExtract(prm, i, j, pn);
777:       for (k = 0; k < zm - 1; k++) {
778:         PetscInt  ls = 0;
779:         Node      n[8], *fn[8];
780:         PetscReal zn[8], etabase = 0;
781:         PrmHexGetZ(pn, k, zm, zn);
782:         HexExtract(x, i, j, k, n);
783:         HexExtractRef(f, i, j, k, fn);
784:         if (thi->no_slip && k == 0) {
785:           for (l = 0; l < 4; l++) n[l].u = n[l].v = 0;
786:           /* The first 4 basis functions lie on the bottom layer, so their contribution is exactly 0, hence we can skip them */
787:           ls = 4;
788:         }
789:         for (q = 0; q < 8; q++) {
790:           PetscReal   dz[3], phi[8], dphi[8][3], jw, eta, deta;
791:           PetscScalar du[3], dv[3], u, v;
792:           HexGrad(HexQDeriv[q], zn, dz);
793:           HexComputeGeometry(q, hx, hy, dz, phi, dphi, &jw);
794:           PointwiseNonlinearity(thi, n, phi, dphi, &u, &v, du, dv, &eta, &deta);
795:           jw /= thi->rhog; /* scales residuals to be O(1) */
796:           if (q == 0) etabase = eta;
797:           RangeUpdate(&etamin, &etamax, eta);
798:           for (l = ls; l < 8; l++) { /* test functions */
799:             const PetscReal ds[2] = {-PetscSinReal(thi->alpha), 0};
800:             const PetscReal pp = phi[l], *dp = dphi[l];
801:             fn[l]->u += dp[0] * jw * eta * (4. * du[0] + 2. * dv[1]) + dp[1] * jw * eta * (du[1] + dv[0]) + dp[2] * jw * eta * du[2] + pp * jw * thi->rhog * ds[0];
802:             fn[l]->v += dp[1] * jw * eta * (2. * du[0] + 4. * dv[1]) + dp[0] * jw * eta * (du[1] + dv[0]) + dp[2] * jw * eta * dv[2] + pp * jw * thi->rhog * ds[1];
803:           }
804:         }
805:         if (k == 0) { /* we are on a bottom face */
806:           if (thi->no_slip) {
807:             /* Note: Non-Galerkin coarse grid operators are very sensitive to the scaling of Dirichlet boundary
808:             * conditions.  After shenanigans above, etabase contains the effective viscosity at the closest quadrature
809:             * point to the bed.  We want the diagonal entry in the Dirichlet condition to have similar magnitude to the
810:             * diagonal entry corresponding to the adjacent node.  The fundamental scaling of the viscous part is in
811:             * diagu, diagv below.  This scaling is easy to recognize by considering the finite difference operator after
812:             * scaling by element size.  The no-slip Dirichlet condition is scaled by this factor, and also in the
813:             * assembled matrix (see the similar block in THIJacobianLocal).
814:             *
815:             * Note that the residual at this Dirichlet node is linear in the state at this node, but also depends
816:             * (nonlinearly in general) on the neighboring interior nodes through the local viscosity.  This will make
817:             * a matrix-free Jacobian have extra entries in the corresponding row.  We assemble only the diagonal part,
818:             * so the solution will exactly satisfy the boundary condition after the first linear iteration.
819:             */
820:             const PetscReal   hz    = PetscRealPart(pn[0].h) / (zm - 1.);
821:             const PetscScalar diagu = 2 * etabase / thi->rhog * (hx * hy / hz + hx * hz / hy + 4 * hy * hz / hx), diagv = 2 * etabase / thi->rhog * (hx * hy / hz + 4 * hx * hz / hy + hy * hz / hx);
822:             fn[0]->u = thi->dirichlet_scale * diagu * x[i][j][k].u;
823:             fn[0]->v = thi->dirichlet_scale * diagv * x[i][j][k].v;
824:           } else { /* Integrate over bottom face to apply boundary condition */
825:             for (q = 0; q < 4; q++) {
826:               const PetscReal jw = 0.25 * hx * hy / thi->rhog, *phi = QuadQInterp[q];
827:               PetscScalar     u = 0, v = 0, rbeta2 = 0;
828:               PetscReal       beta2, dbeta2;
829:               for (l = 0; l < 4; l++) {
830:                 u += phi[l] * n[l].u;
831:                 v += phi[l] * n[l].v;
832:                 rbeta2 += phi[l] * pn[l].beta2;
833:               }
834:               THIFriction(thi, PetscRealPart(rbeta2), PetscRealPart(u * u + v * v) / 2, &beta2, &dbeta2);
835:               RangeUpdate(&beta2min, &beta2max, beta2);
836:               for (l = 0; l < 4; l++) {
837:                 const PetscReal pp = phi[l];
838:                 fn[ls + l]->u += pp * jw * beta2 * u;
839:                 fn[ls + l]->v += pp * jw * beta2 * v;
840:               }
841:             }
842:           }
843:         }
844:       }
845:     }
846:   }

848:   PetscCall(THIDARestorePrm(info->da, &prm));

850:   PetscCall(PRangeMinMax(&thi->eta, etamin, etamax));
851:   PetscCall(PRangeMinMax(&thi->beta2, beta2min, beta2max));
852:   PetscFunctionReturn(PETSC_SUCCESS);
853: }

855: static PetscErrorCode THIMatrixStatistics(THI thi, Mat B, PetscViewer viewer)
856: {
857:   PetscReal   nrm;
858:   PetscInt    m;
859:   PetscMPIInt rank;

861:   PetscFunctionBeginUser;
862:   PetscCall(MatNorm(B, NORM_FROBENIUS, &nrm));
863:   PetscCall(MatGetSize(B, &m, 0));
864:   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &rank));
865:   if (rank == 0) {
866:     PetscScalar val0, val2;
867:     PetscCall(MatGetValue(B, 0, 0, &val0));
868:     PetscCall(MatGetValue(B, 2, 2, &val2));
869:     PetscCall(PetscViewerASCIIPrintf(viewer, "Matrix dim %" PetscInt_FMT " norm %8.2e (0,0) %8.2e  (2,2) %8.2e %8.2e <= eta <= %8.2e %8.2e <= beta2 <= %8.2e\n", m, (double)nrm, (double)PetscRealPart(val0), (double)PetscRealPart(val2),
870:                                      (double)thi->eta.cmin, (double)thi->eta.cmax, (double)thi->beta2.cmin, (double)thi->beta2.cmax));
871:   }
872:   PetscFunctionReturn(PETSC_SUCCESS);
873: }

875: static PetscErrorCode THISurfaceStatistics(DM da, Vec X, PetscReal *min, PetscReal *max, PetscReal *mean)
876: {
877:   Node     ***x;
878:   PetscInt    i, j, xs, ys, zs, xm, ym, zm, mx, my, mz;
879:   PetscReal   umin = 1e100, umax = -1e100;
880:   PetscScalar usum = 0.0, gusum;

882:   PetscFunctionBeginUser;
883:   *min = *max = *mean = 0;
884:   PetscCall(DMDAGetInfo(da, 0, &mz, &my, &mx, 0, 0, 0, 0, 0, 0, 0, 0, 0));
885:   PetscCall(DMDAGetCorners(da, &zs, &ys, &xs, &zm, &ym, &xm));
886:   PetscCheck(zs == 0 && zm == mz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected decomposition");
887:   PetscCall(DMDAVecGetArray(da, X, &x));
888:   for (i = xs; i < xs + xm; i++) {
889:     for (j = ys; j < ys + ym; j++) {
890:       PetscReal u = PetscRealPart(x[i][j][zm - 1].u);
891:       RangeUpdate(&umin, &umax, u);
892:       usum += u;
893:     }
894:   }
895:   PetscCall(DMDAVecRestoreArray(da, X, &x));
896:   PetscCallMPI(MPIU_Allreduce(&umin, min, 1, MPIU_REAL, MPIU_MIN, PetscObjectComm((PetscObject)da)));
897:   PetscCallMPI(MPIU_Allreduce(&umax, max, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)da)));
898:   PetscCallMPI(MPIU_Allreduce(&usum, &gusum, 1, MPIU_SCALAR, MPIU_SUM, PetscObjectComm((PetscObject)da)));
899:   *mean = PetscRealPart(gusum) / (mx * my);
900:   PetscFunctionReturn(PETSC_SUCCESS);
901: }

903: static PetscErrorCode THISolveStatistics(THI thi, SNES snes, PetscInt coarsened, const char name[])
904: {
905:   MPI_Comm comm;
906:   Vec      X;
907:   DM       dm;

909:   PetscFunctionBeginUser;
910:   PetscCall(PetscObjectGetComm((PetscObject)thi, &comm));
911:   PetscCall(SNESGetSolution(snes, &X));
912:   PetscCall(SNESGetDM(snes, &dm));
913:   PetscCall(PetscPrintf(comm, "Solution statistics after solve: %s\n", name));
914:   {
915:     PetscInt            its, lits;
916:     SNESConvergedReason reason;
917:     PetscCall(SNESGetIterationNumber(snes, &its));
918:     PetscCall(SNESGetConvergedReason(snes, &reason));
919:     PetscCall(SNESGetLinearSolveIterations(snes, &lits));
920:     PetscCall(PetscPrintf(comm, "%s: Number of SNES iterations = %" PetscInt_FMT ", total linear iterations = %" PetscInt_FMT "\n", SNESConvergedReasons[reason], its, lits));
921:   }
922:   {
923:     PetscReal          nrm2, tmin[3] = {1e100, 1e100, 1e100}, tmax[3] = {-1e100, -1e100, -1e100}, min[3], max[3];
924:     PetscInt           i, j, m;
925:     const PetscScalar *x;
926:     PetscCall(VecNorm(X, NORM_2, &nrm2));
927:     PetscCall(VecGetLocalSize(X, &m));
928:     PetscCall(VecGetArrayRead(X, &x));
929:     for (i = 0; i < m; i += 2) {
930:       PetscReal u = PetscRealPart(x[i]), v = PetscRealPart(x[i + 1]), c = PetscSqrtReal(u * u + v * v);
931:       tmin[0] = PetscMin(u, tmin[0]);
932:       tmin[1] = PetscMin(v, tmin[1]);
933:       tmin[2] = PetscMin(c, tmin[2]);
934:       tmax[0] = PetscMax(u, tmax[0]);
935:       tmax[1] = PetscMax(v, tmax[1]);
936:       tmax[2] = PetscMax(c, tmax[2]);
937:     }
938:     PetscCall(VecRestoreArrayRead(X, &x));
939:     PetscCallMPI(MPIU_Allreduce(tmin, min, 3, MPIU_REAL, MPIU_MIN, PetscObjectComm((PetscObject)thi)));
940:     PetscCallMPI(MPIU_Allreduce(tmax, max, 3, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)thi)));
941:     /* Dimensionalize to meters/year */
942:     nrm2 *= thi->units->year / thi->units->meter;
943:     for (j = 0; j < 3; j++) {
944:       min[j] *= thi->units->year / thi->units->meter;
945:       max[j] *= thi->units->year / thi->units->meter;
946:     }
947:     if (min[0] == 0.0) min[0] = 0.0;
948:     PetscCall(PetscPrintf(comm, "|X|_2 %g   %g <= u <=  %g   %g <= v <=  %g   %g <= c <=  %g \n", (double)nrm2, (double)min[0], (double)max[0], (double)min[1], (double)max[1], (double)min[2], (double)max[2]));
949:     {
950:       PetscReal umin, umax, umean;
951:       PetscCall(THISurfaceStatistics(dm, X, &umin, &umax, &umean));
952:       umin *= thi->units->year / thi->units->meter;
953:       umax *= thi->units->year / thi->units->meter;
954:       umean *= thi->units->year / thi->units->meter;
955:       PetscCall(PetscPrintf(comm, "Surface statistics: u in [%12.6e, %12.6e] mean %12.6e\n", (double)umin, (double)umax, (double)umean));
956:     }
957:     /* These values stay nondimensional */
958:     PetscCall(PetscPrintf(comm, "Global eta range   %g to %g converged range %g to %g\n", (double)thi->eta.min, (double)thi->eta.max, (double)thi->eta.cmin, (double)thi->eta.cmax));
959:     PetscCall(PetscPrintf(comm, "Global beta2 range %g to %g converged range %g to %g\n", (double)thi->beta2.min, (double)thi->beta2.max, (double)thi->beta2.cmin, (double)thi->beta2.cmax));
960:   }
961:   PetscFunctionReturn(PETSC_SUCCESS);
962: }

964: static PetscErrorCode THIJacobianLocal_2D(DMDALocalInfo *info, Node **x, Mat J, Mat B, THI thi)
965: {
966:   PetscInt  xs, ys, xm, ym, i, j, q, l, ll;
967:   PetscReal hx, hy;
968:   PrmNode **prm;

970:   PetscFunctionBeginUser;
971:   xs = info->ys;
972:   ys = info->xs;
973:   xm = info->ym;
974:   ym = info->xm;
975:   hx = thi->Lx / info->my;
976:   hy = thi->Ly / info->mx;

978:   PetscCall(MatZeroEntries(B));
979:   PetscCall(THIDAGetPrm(info->da, &prm));

981:   for (i = xs; i < xs + xm; i++) {
982:     for (j = ys; j < ys + ym; j++) {
983:       Node        n[4];
984:       PrmNode     pn[4];
985:       PetscScalar Ke[4 * 2][4 * 2];
986:       QuadExtract(prm, i, j, pn);
987:       QuadExtract(x, i, j, n);
988:       PetscCall(PetscMemzero(Ke, sizeof(Ke)));
989:       for (q = 0; q < 4; q++) {
990:         PetscReal   phi[4], dphi[4][2], jw, eta, deta, beta2, dbeta2;
991:         PetscScalar u, v, du[2], dv[2], h = 0, rbeta2 = 0;
992:         for (l = 0; l < 4; l++) {
993:           phi[l]     = QuadQInterp[q][l];
994:           dphi[l][0] = QuadQDeriv[q][l][0] * 2. / hx;
995:           dphi[l][1] = QuadQDeriv[q][l][1] * 2. / hy;
996:           h += phi[l] * pn[l].h;
997:           rbeta2 += phi[l] * pn[l].beta2;
998:         }
999:         jw = 0.25 * hx * hy / thi->rhog; /* rhog is only scaling */
1000:         PointwiseNonlinearity2D(thi, n, phi, dphi, &u, &v, du, dv, &eta, &deta);
1001:         THIFriction(thi, PetscRealPart(rbeta2), PetscRealPart(u * u + v * v) / 2, &beta2, &dbeta2);
1002:         for (l = 0; l < 4; l++) {
1003:           const PetscReal pp = phi[l], *dp = dphi[l];
1004:           for (ll = 0; ll < 4; ll++) {
1005:             const PetscReal ppl = phi[ll], *dpl = dphi[ll];
1006:             PetscScalar     dgdu, dgdv;
1007:             dgdu = 2. * du[0] * dpl[0] + dv[1] * dpl[0] + 0.5 * (du[1] + dv[0]) * dpl[1];
1008:             dgdv = 2. * dv[1] * dpl[1] + du[0] * dpl[1] + 0.5 * (du[1] + dv[0]) * dpl[0];
1009:             /* Picard part */
1010:             Ke[l * 2 + 0][ll * 2 + 0] += dp[0] * jw * eta * 4. * dpl[0] + dp[1] * jw * eta * dpl[1] + pp * jw * (beta2 / h) * ppl * thi->ssa_friction_scale;
1011:             Ke[l * 2 + 0][ll * 2 + 1] += dp[0] * jw * eta * 2. * dpl[1] + dp[1] * jw * eta * dpl[0];
1012:             Ke[l * 2 + 1][ll * 2 + 0] += dp[1] * jw * eta * 2. * dpl[0] + dp[0] * jw * eta * dpl[1];
1013:             Ke[l * 2 + 1][ll * 2 + 1] += dp[1] * jw * eta * 4. * dpl[1] + dp[0] * jw * eta * dpl[0] + pp * jw * (beta2 / h) * ppl * thi->ssa_friction_scale;
1014:             /* extra Newton terms */
1015:             Ke[l * 2 + 0][ll * 2 + 0] += dp[0] * jw * deta * dgdu * (4. * du[0] + 2. * dv[1]) + dp[1] * jw * deta * dgdu * (du[1] + dv[0]) + pp * jw * (dbeta2 / h) * u * u * ppl * thi->ssa_friction_scale;
1016:             Ke[l * 2 + 0][ll * 2 + 1] += dp[0] * jw * deta * dgdv * (4. * du[0] + 2. * dv[1]) + dp[1] * jw * deta * dgdv * (du[1] + dv[0]) + pp * jw * (dbeta2 / h) * u * v * ppl * thi->ssa_friction_scale;
1017:             Ke[l * 2 + 1][ll * 2 + 0] += dp[1] * jw * deta * dgdu * (4. * dv[1] + 2. * du[0]) + dp[0] * jw * deta * dgdu * (du[1] + dv[0]) + pp * jw * (dbeta2 / h) * v * u * ppl * thi->ssa_friction_scale;
1018:             Ke[l * 2 + 1][ll * 2 + 1] += dp[1] * jw * deta * dgdv * (4. * dv[1] + 2. * du[0]) + dp[0] * jw * deta * dgdv * (du[1] + dv[0]) + pp * jw * (dbeta2 / h) * v * v * ppl * thi->ssa_friction_scale;
1019:           }
1020:         }
1021:       }
1022:       {
1023:         const MatStencil rc[4] = {
1024:           {0, i,     j,     0},
1025:           {0, i + 1, j,     0},
1026:           {0, i + 1, j + 1, 0},
1027:           {0, i,     j + 1, 0}
1028:         };
1029:         PetscCall(MatSetValuesBlockedStencil(B, 4, rc, 4, rc, &Ke[0][0], ADD_VALUES));
1030:       }
1031:     }
1032:   }
1033:   PetscCall(THIDARestorePrm(info->da, &prm));

1035:   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1036:   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1037:   PetscCall(MatSetOption(B, MAT_SYMMETRIC, PETSC_TRUE));
1038:   if (thi->verbose) PetscCall(THIMatrixStatistics(thi, B, PETSC_VIEWER_STDOUT_WORLD));
1039:   PetscFunctionReturn(PETSC_SUCCESS);
1040: }

1042: static PetscErrorCode THIJacobianLocal_3D(DMDALocalInfo *info, Node ***x, Mat B, THI thi, THIAssemblyMode amode)
1043: {
1044:   PetscInt  xs, ys, xm, ym, zm, i, j, k, q, l, ll;
1045:   PetscReal hx, hy;
1046:   PrmNode **prm;

1048:   PetscFunctionBeginUser;
1049:   xs = info->zs;
1050:   ys = info->ys;
1051:   xm = info->zm;
1052:   ym = info->ym;
1053:   zm = info->xm;
1054:   hx = thi->Lx / info->mz;
1055:   hy = thi->Ly / info->my;

1057:   PetscCall(MatZeroEntries(B));
1058:   PetscCall(MatSetOption(B, MAT_SUBSET_OFF_PROC_ENTRIES, PETSC_TRUE));
1059:   PetscCall(THIDAGetPrm(info->da, &prm));

1061:   for (i = xs; i < xs + xm; i++) {
1062:     for (j = ys; j < ys + ym; j++) {
1063:       PrmNode pn[4];
1064:       QuadExtract(prm, i, j, pn);
1065:       for (k = 0; k < zm - 1; k++) {
1066:         Node        n[8];
1067:         PetscReal   zn[8], etabase = 0;
1068:         PetscScalar Ke[8 * 2][8 * 2];
1069:         PetscInt    ls = 0;

1071:         PrmHexGetZ(pn, k, zm, zn);
1072:         HexExtract(x, i, j, k, n);
1073:         PetscCall(PetscMemzero(Ke, sizeof(Ke)));
1074:         if (thi->no_slip && k == 0) {
1075:           for (l = 0; l < 4; l++) n[l].u = n[l].v = 0;
1076:           ls = 4;
1077:         }
1078:         for (q = 0; q < 8; q++) {
1079:           PetscReal   dz[3], phi[8], dphi[8][3], jw, eta, deta;
1080:           PetscScalar du[3], dv[3], u, v;
1081:           HexGrad(HexQDeriv[q], zn, dz);
1082:           HexComputeGeometry(q, hx, hy, dz, phi, dphi, &jw);
1083:           PointwiseNonlinearity(thi, n, phi, dphi, &u, &v, du, dv, &eta, &deta);
1084:           jw /= thi->rhog; /* residuals are scaled by this factor */
1085:           if (q == 0) etabase = eta;
1086:           for (l = ls; l < 8; l++) { /* test functions */
1087:             const PetscReal *PETSC_RESTRICT dp = dphi[l];
1088: #if USE_SSE2_KERNELS
1089:             /* gcc (up to my 4.5 snapshot) is really bad at hoisting intrinsics so we do it manually */
1090:             __m128d p4 = _mm_set1_pd(4), p2 = _mm_set1_pd(2), p05 = _mm_set1_pd(0.5), p42 = _mm_setr_pd(4, 2), p24 = _mm_shuffle_pd(p42, p42, _MM_SHUFFLE2(0, 1)), du0 = _mm_set1_pd(du[0]), du1 = _mm_set1_pd(du[1]), du2 = _mm_set1_pd(du[2]), dv0 = _mm_set1_pd(dv[0]), dv1 = _mm_set1_pd(dv[1]), dv2 = _mm_set1_pd(dv[2]), jweta = _mm_set1_pd(jw * eta), jwdeta = _mm_set1_pd(jw * deta), dp0 = _mm_set1_pd(dp[0]), dp1 = _mm_set1_pd(dp[1]), dp2 = _mm_set1_pd(dp[2]), dp0jweta = _mm_mul_pd(dp0, jweta), dp1jweta = _mm_mul_pd(dp1, jweta), dp2jweta = _mm_mul_pd(dp2, jweta), p4du0p2dv1 = _mm_add_pd(_mm_mul_pd(p4, du0), _mm_mul_pd(p2, dv1)), /* 4 du0 + 2 dv1 */
1091:               p4dv1p2du0 = _mm_add_pd(_mm_mul_pd(p4, dv1), _mm_mul_pd(p2, du0)), /* 4 dv1 + 2 du0 */
1092:               pdu2dv2    = _mm_unpacklo_pd(du2, dv2),                            /* [du2, dv2] */
1093:               du1pdv0    = _mm_add_pd(du1, dv0),                                 /* du1 + dv0 */
1094:               t1         = _mm_mul_pd(dp0, p4du0p2dv1),                          /* dp0 (4 du0 + 2 dv1) */
1095:               t2         = _mm_mul_pd(dp1, p4dv1p2du0);                          /* dp1 (4 dv1 + 2 du0) */

1097: #endif
1098: #if defined(COMPUTE_LOWER_TRIANGULAR)     /* The element matrices are always symmetric so computing the lower-triangular part is not necessary */
1099:             for (ll = ls; ll < 8; ll++) { /* trial functions */
1100: #else
1101:             for (ll = l; ll < 8; ll++) {
1102: #endif
1103:               const PetscReal *PETSC_RESTRICT dpl = dphi[ll];
1104:               if (amode == THIASSEMBLY_TRIDIAGONAL && (l - ll) % 4) continue; /* these entries would not be inserted */
1105: #if !USE_SSE2_KERNELS
1106:               /* The analytic Jacobian in nice, easy-to-read form */
1107:               {
1108:                 PetscScalar dgdu, dgdv;
1109:                 dgdu = 2. * du[0] * dpl[0] + dv[1] * dpl[0] + 0.5 * (du[1] + dv[0]) * dpl[1] + 0.5 * du[2] * dpl[2];
1110:                 dgdv = 2. * dv[1] * dpl[1] + du[0] * dpl[1] + 0.5 * (du[1] + dv[0]) * dpl[0] + 0.5 * dv[2] * dpl[2];
1111:                 /* Picard part */
1112:                 Ke[l * 2 + 0][ll * 2 + 0] += dp[0] * jw * eta * 4. * dpl[0] + dp[1] * jw * eta * dpl[1] + dp[2] * jw * eta * dpl[2];
1113:                 Ke[l * 2 + 0][ll * 2 + 1] += dp[0] * jw * eta * 2. * dpl[1] + dp[1] * jw * eta * dpl[0];
1114:                 Ke[l * 2 + 1][ll * 2 + 0] += dp[1] * jw * eta * 2. * dpl[0] + dp[0] * jw * eta * dpl[1];
1115:                 Ke[l * 2 + 1][ll * 2 + 1] += dp[1] * jw * eta * 4. * dpl[1] + dp[0] * jw * eta * dpl[0] + dp[2] * jw * eta * dpl[2];
1116:                 /* extra Newton terms */
1117:                 Ke[l * 2 + 0][ll * 2 + 0] += dp[0] * jw * deta * dgdu * (4. * du[0] + 2. * dv[1]) + dp[1] * jw * deta * dgdu * (du[1] + dv[0]) + dp[2] * jw * deta * dgdu * du[2];
1118:                 Ke[l * 2 + 0][ll * 2 + 1] += dp[0] * jw * deta * dgdv * (4. * du[0] + 2. * dv[1]) + dp[1] * jw * deta * dgdv * (du[1] + dv[0]) + dp[2] * jw * deta * dgdv * du[2];
1119:                 Ke[l * 2 + 1][ll * 2 + 0] += dp[1] * jw * deta * dgdu * (4. * dv[1] + 2. * du[0]) + dp[0] * jw * deta * dgdu * (du[1] + dv[0]) + dp[2] * jw * deta * dgdu * dv[2];
1120:                 Ke[l * 2 + 1][ll * 2 + 1] += dp[1] * jw * deta * dgdv * (4. * dv[1] + 2. * du[0]) + dp[0] * jw * deta * dgdv * (du[1] + dv[0]) + dp[2] * jw * deta * dgdv * dv[2];
1121:               }
1122: #else
1123:               /* This SSE2 code is an exact replica of above, but uses explicit packed instructions for some speed
1124:               * benefit.  On my hardware, these intrinsics are almost twice as fast as above, reducing total assembly cost
1125:               * by 25 to 30 percent. */
1126:               {
1127:                 __m128d keu = _mm_loadu_pd(&Ke[l * 2 + 0][ll * 2 + 0]), kev = _mm_loadu_pd(&Ke[l * 2 + 1][ll * 2 + 0]), dpl01 = _mm_loadu_pd(&dpl[0]), dpl10 = _mm_shuffle_pd(dpl01, dpl01, _MM_SHUFFLE2(0, 1)), dpl2 = _mm_set_sd(dpl[2]), t0, t3, pdgduv;
1128:                 keu    = _mm_add_pd(keu, _mm_add_pd(_mm_mul_pd(_mm_mul_pd(dp0jweta, p42), dpl01), _mm_add_pd(_mm_mul_pd(dp1jweta, dpl10), _mm_mul_pd(dp2jweta, dpl2))));
1129:                 kev    = _mm_add_pd(kev, _mm_add_pd(_mm_mul_pd(_mm_mul_pd(dp1jweta, p24), dpl01), _mm_add_pd(_mm_mul_pd(dp0jweta, dpl10), _mm_mul_pd(dp2jweta, _mm_shuffle_pd(dpl2, dpl2, _MM_SHUFFLE2(0, 1))))));
1130:                 pdgduv = _mm_mul_pd(p05, _mm_add_pd(_mm_add_pd(_mm_mul_pd(p42, _mm_mul_pd(du0, dpl01)), _mm_mul_pd(p24, _mm_mul_pd(dv1, dpl01))), _mm_add_pd(_mm_mul_pd(du1pdv0, dpl10), _mm_mul_pd(pdu2dv2, _mm_set1_pd(dpl[2]))))); /* [dgdu, dgdv] */
1131:                 t0     = _mm_mul_pd(jwdeta, pdgduv); /* jw deta [dgdu, dgdv] */
1132:                 t3     = _mm_mul_pd(t0, du1pdv0);    /* t0 (du1 + dv0) */
1133:                 _mm_storeu_pd(&Ke[l * 2 + 0][ll * 2 + 0], _mm_add_pd(keu, _mm_add_pd(_mm_mul_pd(t1, t0), _mm_add_pd(_mm_mul_pd(dp1, t3), _mm_mul_pd(t0, _mm_mul_pd(dp2, du2))))));
1134:                 _mm_storeu_pd(&Ke[l * 2 + 1][ll * 2 + 0], _mm_add_pd(kev, _mm_add_pd(_mm_mul_pd(t2, t0), _mm_add_pd(_mm_mul_pd(dp0, t3), _mm_mul_pd(t0, _mm_mul_pd(dp2, dv2))))));
1135:               }
1136: #endif
1137:             }
1138:           }
1139:         }
1140:         if (k == 0) { /* on a bottom face */
1141:           if (thi->no_slip) {
1142:             const PetscReal   hz    = PetscRealPart(pn[0].h) / (zm - 1);
1143:             const PetscScalar diagu = 2 * etabase / thi->rhog * (hx * hy / hz + hx * hz / hy + 4 * hy * hz / hx), diagv = 2 * etabase / thi->rhog * (hx * hy / hz + 4 * hx * hz / hy + hy * hz / hx);
1144:             Ke[0][0] = thi->dirichlet_scale * diagu;
1145:             Ke[1][1] = thi->dirichlet_scale * diagv;
1146:           } else {
1147:             for (q = 0; q < 4; q++) {
1148:               const PetscReal jw = 0.25 * hx * hy / thi->rhog, *phi = QuadQInterp[q];
1149:               PetscScalar     u = 0, v = 0, rbeta2 = 0;
1150:               PetscReal       beta2, dbeta2;
1151:               for (l = 0; l < 4; l++) {
1152:                 u += phi[l] * n[l].u;
1153:                 v += phi[l] * n[l].v;
1154:                 rbeta2 += phi[l] * pn[l].beta2;
1155:               }
1156:               THIFriction(thi, PetscRealPart(rbeta2), PetscRealPart(u * u + v * v) / 2, &beta2, &dbeta2);
1157:               for (l = 0; l < 4; l++) {
1158:                 const PetscReal pp = phi[l];
1159:                 for (ll = 0; ll < 4; ll++) {
1160:                   const PetscReal ppl = phi[ll];
1161:                   Ke[l * 2 + 0][ll * 2 + 0] += pp * jw * beta2 * ppl + pp * jw * dbeta2 * u * u * ppl;
1162:                   Ke[l * 2 + 0][ll * 2 + 1] += pp * jw * dbeta2 * u * v * ppl;
1163:                   Ke[l * 2 + 1][ll * 2 + 0] += pp * jw * dbeta2 * v * u * ppl;
1164:                   Ke[l * 2 + 1][ll * 2 + 1] += pp * jw * beta2 * ppl + pp * jw * dbeta2 * v * v * ppl;
1165:                 }
1166:               }
1167:             }
1168:           }
1169:         }
1170:         {
1171:           const MatStencil rc[8] = {
1172:             {i,     j,     k,     0},
1173:             {i + 1, j,     k,     0},
1174:             {i + 1, j + 1, k,     0},
1175:             {i,     j + 1, k,     0},
1176:             {i,     j,     k + 1, 0},
1177:             {i + 1, j,     k + 1, 0},
1178:             {i + 1, j + 1, k + 1, 0},
1179:             {i,     j + 1, k + 1, 0}
1180:           };
1181:           if (amode == THIASSEMBLY_TRIDIAGONAL) {
1182:             for (l = 0; l < 4; l++) { /* Copy out each of the blocks, discarding horizontal coupling */
1183:               const PetscInt   l4     = l + 4;
1184:               const MatStencil rcl[2] = {
1185:                 {rc[l].k,  rc[l].j,  rc[l].i,  0},
1186:                 {rc[l4].k, rc[l4].j, rc[l4].i, 0}
1187:               };
1188: #if defined(COMPUTE_LOWER_TRIANGULAR)
1189:               const PetscScalar Kel[4][4] = {
1190:                 {Ke[2 * l + 0][2 * l + 0],  Ke[2 * l + 0][2 * l + 1],  Ke[2 * l + 0][2 * l4 + 0],  Ke[2 * l + 0][2 * l4 + 1] },
1191:                 {Ke[2 * l + 1][2 * l + 0],  Ke[2 * l + 1][2 * l + 1],  Ke[2 * l + 1][2 * l4 + 0],  Ke[2 * l + 1][2 * l4 + 1] },
1192:                 {Ke[2 * l4 + 0][2 * l + 0], Ke[2 * l4 + 0][2 * l + 1], Ke[2 * l4 + 0][2 * l4 + 0], Ke[2 * l4 + 0][2 * l4 + 1]},
1193:                 {Ke[2 * l4 + 1][2 * l + 0], Ke[2 * l4 + 1][2 * l + 1], Ke[2 * l4 + 1][2 * l4 + 0], Ke[2 * l4 + 1][2 * l4 + 1]}
1194:               };
1195: #else
1196:               /* Same as above except for the lower-left block */
1197:               const PetscScalar Kel[4][4] = {
1198:                 {Ke[2 * l + 0][2 * l + 0],  Ke[2 * l + 0][2 * l + 1],  Ke[2 * l + 0][2 * l4 + 0],  Ke[2 * l + 0][2 * l4 + 1] },
1199:                 {Ke[2 * l + 1][2 * l + 0],  Ke[2 * l + 1][2 * l + 1],  Ke[2 * l + 1][2 * l4 + 0],  Ke[2 * l + 1][2 * l4 + 1] },
1200:                 {Ke[2 * l + 0][2 * l4 + 0], Ke[2 * l + 1][2 * l4 + 0], Ke[2 * l4 + 0][2 * l4 + 0], Ke[2 * l4 + 0][2 * l4 + 1]},
1201:                 {Ke[2 * l + 0][2 * l4 + 1], Ke[2 * l + 1][2 * l4 + 1], Ke[2 * l4 + 1][2 * l4 + 0], Ke[2 * l4 + 1][2 * l4 + 1]}
1202:               };
1203: #endif
1204:               PetscCall(MatSetValuesBlockedStencil(B, 2, rcl, 2, rcl, &Kel[0][0], ADD_VALUES));
1205:             }
1206:           } else {
1207: #if !defined(COMPUTE_LOWER_TRIANGULAR) /* fill in lower-triangular part, this is really cheap compared to computing the entries */
1208:             for (l = 0; l < 8; l++) {
1209:               for (ll = l + 1; ll < 8; ll++) {
1210:                 Ke[ll * 2 + 0][l * 2 + 0] = Ke[l * 2 + 0][ll * 2 + 0];
1211:                 Ke[ll * 2 + 1][l * 2 + 0] = Ke[l * 2 + 0][ll * 2 + 1];
1212:                 Ke[ll * 2 + 0][l * 2 + 1] = Ke[l * 2 + 1][ll * 2 + 0];
1213:                 Ke[ll * 2 + 1][l * 2 + 1] = Ke[l * 2 + 1][ll * 2 + 1];
1214:               }
1215:             }
1216: #endif
1217:             PetscCall(MatSetValuesBlockedStencil(B, 8, rc, 8, rc, &Ke[0][0], ADD_VALUES));
1218:           }
1219:         }
1220:       }
1221:     }
1222:   }
1223:   PetscCall(THIDARestorePrm(info->da, &prm));

1225:   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1226:   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1227:   PetscCall(MatSetOption(B, MAT_SYMMETRIC, PETSC_TRUE));
1228:   if (thi->verbose) PetscCall(THIMatrixStatistics(thi, B, PETSC_VIEWER_STDOUT_WORLD));
1229:   PetscFunctionReturn(PETSC_SUCCESS);
1230: }

1232: static PetscErrorCode THIJacobianLocal_3D_Full(DMDALocalInfo *info, Node ***x, Mat A, Mat B, THI thi)
1233: {
1234:   PetscFunctionBeginUser;
1235:   PetscCall(THIJacobianLocal_3D(info, x, B, thi, THIASSEMBLY_FULL));
1236:   PetscFunctionReturn(PETSC_SUCCESS);
1237: }

1239: static PetscErrorCode THIJacobianLocal_3D_Tridiagonal(DMDALocalInfo *info, Node ***x, Mat A, Mat B, THI thi)
1240: {
1241:   PetscFunctionBeginUser;
1242:   PetscCall(THIJacobianLocal_3D(info, x, B, thi, THIASSEMBLY_TRIDIAGONAL));
1243:   PetscFunctionReturn(PETSC_SUCCESS);
1244: }

1246: static PetscErrorCode DMRefineHierarchy_THI(DM dac0, PetscInt nlevels, DM hierarchy[])
1247: {
1248:   THI             thi;
1249:   PetscInt        dim, M, N, m, n, s, dof;
1250:   DM              dac, daf;
1251:   DMDAStencilType st;
1252:   DM_DA          *ddf, *ddc;

1254:   PetscFunctionBeginUser;
1255:   PetscCall(PetscObjectQuery((PetscObject)dac0, "THI", (PetscObject *)&thi));
1256:   PetscCheck(thi, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot refine this DMDA, missing composed THI instance");
1257:   if (nlevels > 1) {
1258:     PetscCall(DMRefineHierarchy(dac0, nlevels - 1, hierarchy));
1259:     dac = hierarchy[nlevels - 2];
1260:   } else {
1261:     dac = dac0;
1262:   }
1263:   PetscCall(DMDAGetInfo(dac, &dim, &N, &M, 0, &n, &m, 0, &dof, &s, 0, 0, 0, &st));
1264:   PetscCheck(dim == 2, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "This function can only refine 2D DMDAs");

1266:   /* Creates a 3D DMDA with the same map-plane layout as the 2D one, with contiguous columns */
1267:   PetscCall(DMDACreate3d(PetscObjectComm((PetscObject)dac), DM_BOUNDARY_NONE, DM_BOUNDARY_PERIODIC, DM_BOUNDARY_PERIODIC, st, thi->zlevels, N, M, 1, n, m, dof, s, NULL, NULL, NULL, &daf));
1268:   PetscCall(DMSetUp(daf));

1270:   daf->ops->creatematrix        = dac->ops->creatematrix;
1271:   daf->ops->createinterpolation = dac->ops->createinterpolation;
1272:   daf->ops->getcoloring         = dac->ops->getcoloring;
1273:   ddf                           = (DM_DA *)daf->data;
1274:   ddc                           = (DM_DA *)dac->data;
1275:   ddf->interptype               = ddc->interptype;

1277:   PetscCall(DMDASetFieldName(daf, 0, "x-velocity"));
1278:   PetscCall(DMDASetFieldName(daf, 1, "y-velocity"));

1280:   hierarchy[nlevels - 1] = daf;
1281:   PetscFunctionReturn(PETSC_SUCCESS);
1282: }

1284: static PetscErrorCode DMCreateInterpolation_DA_THI(DM dac, DM daf, Mat *A, Vec *scale)
1285: {
1286:   PetscInt dim;

1288:   PetscFunctionBeginUser;
1291:   PetscAssertPointer(A, 3);
1292:   if (scale) PetscAssertPointer(scale, 4);
1293:   PetscCall(DMDAGetInfo(daf, &dim, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
1294:   if (dim == 2) {
1295:     /* We are in the 2D problem and use normal DMDA interpolation */
1296:     PetscCall(DMCreateInterpolation(dac, daf, A, scale));
1297:   } else {
1298:     PetscInt i, j, k, xs, ys, zs, xm, ym, zm, mx, my, mz, rstart, cstart;
1299:     Mat      B;

1301:     PetscCall(DMDAGetInfo(daf, 0, &mz, &my, &mx, 0, 0, 0, 0, 0, 0, 0, 0, 0));
1302:     PetscCall(DMDAGetCorners(daf, &zs, &ys, &xs, &zm, &ym, &xm));
1303:     PetscCheck(!zs, PETSC_COMM_SELF, PETSC_ERR_PLIB, "unexpected");
1304:     PetscCall(MatCreate(PetscObjectComm((PetscObject)daf), &B));
1305:     PetscCall(MatSetSizes(B, xm * ym * zm, xm * ym, mx * my * mz, mx * my));

1307:     PetscCall(MatSetType(B, MATAIJ));
1308:     PetscCall(MatSeqAIJSetPreallocation(B, 1, NULL));
1309:     PetscCall(MatMPIAIJSetPreallocation(B, 1, NULL, 0, NULL));
1310:     PetscCall(MatGetOwnershipRange(B, &rstart, NULL));
1311:     PetscCall(MatGetOwnershipRangeColumn(B, &cstart, NULL));
1312:     for (i = xs; i < xs + xm; i++) {
1313:       for (j = ys; j < ys + ym; j++) {
1314:         for (k = zs; k < zs + zm; k++) {
1315:           PetscInt    i2 = i * ym + j, i3 = i2 * zm + k;
1316:           PetscScalar val = ((k == 0 || k == mz - 1) ? 0.5 : 1.) / (mz - 1.); /* Integration using trapezoid rule */
1317:           PetscCall(MatSetValue(B, cstart + i3, rstart + i2, val, INSERT_VALUES));
1318:         }
1319:       }
1320:     }
1321:     PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1322:     PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1323:     PetscCall(MatCreateMAIJ(B, sizeof(Node) / sizeof(PetscScalar), A));
1324:     PetscCall(MatDestroy(&B));
1325:   }
1326:   PetscFunctionReturn(PETSC_SUCCESS);
1327: }

1329: static PetscErrorCode DMCreateMatrix_THI_Tridiagonal(DM da, Mat *J)
1330: {
1331:   Mat                    A;
1332:   PetscInt               xm, ym, zm, dim, dof = 2, starts[3], dims[3];
1333:   ISLocalToGlobalMapping ltog;

1335:   PetscFunctionBeginUser;
1336:   PetscCall(DMDAGetInfo(da, &dim, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
1337:   PetscCheck(dim == 3, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Expected DMDA to be 3D");
1338:   PetscCall(DMDAGetCorners(da, 0, 0, 0, &zm, &ym, &xm));
1339:   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1340:   PetscCall(MatCreate(PetscObjectComm((PetscObject)da), &A));
1341:   PetscCall(MatSetSizes(A, dof * xm * ym * zm, dof * xm * ym * zm, PETSC_DETERMINE, PETSC_DETERMINE));
1342:   PetscCall(MatSetType(A, da->mattype));
1343:   PetscCall(MatSetFromOptions(A));
1344:   PetscCall(MatSeqAIJSetPreallocation(A, 3 * 2, NULL));
1345:   PetscCall(MatMPIAIJSetPreallocation(A, 3 * 2, NULL, 0, NULL));
1346:   PetscCall(MatSeqBAIJSetPreallocation(A, 2, 3, NULL));
1347:   PetscCall(MatMPIBAIJSetPreallocation(A, 2, 3, NULL, 0, NULL));
1348:   PetscCall(MatSeqSBAIJSetPreallocation(A, 2, 2, NULL));
1349:   PetscCall(MatMPISBAIJSetPreallocation(A, 2, 2, NULL, 0, NULL));
1350:   PetscCall(MatSetLocalToGlobalMapping(A, ltog, ltog));
1351:   PetscCall(DMDAGetGhostCorners(da, &starts[0], &starts[1], &starts[2], &dims[0], &dims[1], &dims[2]));
1352:   PetscCall(MatSetStencil(A, dim, dims, starts, dof));
1353:   *J = A;
1354:   PetscFunctionReturn(PETSC_SUCCESS);
1355: }

1357: static PetscErrorCode THIDAVecView_VTK_XML(THI thi, DM da, Vec X, const char filename[])
1358: {
1359:   const PetscInt     dof   = 2;
1360:   Units              units = thi->units;
1361:   MPI_Comm           comm;
1362:   PetscViewer        viewer;
1363:   PetscMPIInt        rank, size, tag, nn, nmax;
1364:   PetscInt           mx, my, mz, range[6];
1365:   const PetscScalar *x;

1367:   PetscFunctionBeginUser;
1368:   PetscCall(PetscObjectGetComm((PetscObject)thi, &comm));
1369:   PetscCall(DMDAGetInfo(da, 0, &mz, &my, &mx, 0, 0, 0, 0, 0, 0, 0, 0, 0));
1370:   PetscCallMPI(MPI_Comm_size(comm, &size));
1371:   PetscCallMPI(MPI_Comm_rank(comm, &rank));
1372:   PetscCall(PetscViewerASCIIOpen(comm, filename, &viewer));
1373:   PetscCall(PetscViewerASCIIPrintf(viewer, "<VTKFile type=\"StructuredGrid\" version=\"0.1\" byte_order=\"LittleEndian\">\n"));
1374:   PetscCall(PetscViewerASCIIPrintf(viewer, "  <StructuredGrid WholeExtent=\"%d %" PetscInt_FMT " %d %" PetscInt_FMT " %d %" PetscInt_FMT "\">\n", 0, mz - 1, 0, my - 1, 0, mx - 1));

1376:   PetscCall(DMDAGetCorners(da, range, range + 1, range + 2, range + 3, range + 4, range + 5));
1377:   PetscCall(PetscMPIIntCast(range[3] * range[4] * range[5] * dof, &nn));
1378:   PetscCallMPI(MPI_Reduce(&nn, &nmax, 1, MPI_INT, MPI_MAX, 0, comm));
1379:   tag = ((PetscObject)viewer)->tag;
1380:   PetscCall(VecGetArrayRead(X, &x));
1381:   if (rank == 0) {
1382:     PetscScalar *array;
1383:     PetscCall(PetscMalloc1(nmax, &array));
1384:     for (PetscMPIInt r = 0; r < size; r++) {
1385:       PetscInt           i, j, k, xs, xm, ys, ym, zs, zm;
1386:       const PetscScalar *ptr;
1387:       MPI_Status         status;
1388:       if (r) PetscCallMPI(MPIU_Recv(range, 6, MPIU_INT, r, tag, comm, MPI_STATUS_IGNORE));
1389:       zs = range[0];
1390:       ys = range[1];
1391:       xs = range[2];
1392:       zm = range[3];
1393:       ym = range[4];
1394:       xm = range[5];
1395:       PetscCheck(xm * ym * zm * dof <= nmax, PETSC_COMM_SELF, PETSC_ERR_PLIB, "should not happen");
1396:       if (r) {
1397:         PetscCallMPI(MPIU_Recv(array, nmax, MPIU_SCALAR, r, tag, comm, &status));
1398:         PetscCallMPI(MPI_Get_count(&status, MPIU_SCALAR, &nn));
1399:         PetscCheck(nn == xm * ym * zm * dof, PETSC_COMM_SELF, PETSC_ERR_PLIB, "should not happen");
1400:         ptr = array;
1401:       } else ptr = x;
1402:       PetscCall(PetscViewerASCIIPrintf(viewer, "    <Piece Extent=\"%" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\">\n", zs, zs + zm - 1, ys, ys + ym - 1, xs, xs + xm - 1));

1404:       PetscCall(PetscViewerASCIIPrintf(viewer, "      <Points>\n"));
1405:       PetscCall(PetscViewerASCIIPrintf(viewer, "        <DataArray type=\"Float32\" NumberOfComponents=\"3\" format=\"ascii\">\n"));
1406:       for (i = xs; i < xs + xm; i++) {
1407:         for (j = ys; j < ys + ym; j++) {
1408:           for (k = zs; k < zs + zm; k++) {
1409:             PrmNode   p;
1410:             PetscReal xx = thi->Lx * i / mx, yy = thi->Ly * j / my, zz;
1411:             thi->initialize(thi, xx, yy, &p);
1412:             zz = PetscRealPart(p.b) + PetscRealPart(p.h) * k / (mz - 1);
1413:             PetscCall(PetscViewerASCIIPrintf(viewer, "%f %f %f\n", (double)xx, (double)yy, (double)zz));
1414:           }
1415:         }
1416:       }
1417:       PetscCall(PetscViewerASCIIPrintf(viewer, "        </DataArray>\n"));
1418:       PetscCall(PetscViewerASCIIPrintf(viewer, "      </Points>\n"));

1420:       PetscCall(PetscViewerASCIIPrintf(viewer, "      <PointData>\n"));
1421:       PetscCall(PetscViewerASCIIPrintf(viewer, "        <DataArray type=\"Float32\" Name=\"velocity\" NumberOfComponents=\"3\" format=\"ascii\">\n"));
1422:       for (i = 0; i < nn; i += dof) PetscCall(PetscViewerASCIIPrintf(viewer, "%f %f %f\n", (double)(PetscRealPart(ptr[i]) * units->year / units->meter), (double)(PetscRealPart(ptr[i + 1]) * units->year / units->meter), 0.0));
1423:       PetscCall(PetscViewerASCIIPrintf(viewer, "        </DataArray>\n"));

1425:       PetscCall(PetscViewerASCIIPrintf(viewer, "        <DataArray type=\"Int32\" Name=\"rank\" NumberOfComponents=\"1\" format=\"ascii\">\n"));
1426:       for (i = 0; i < nn; i += dof) PetscCall(PetscViewerASCIIPrintf(viewer, "%d\n", r));
1427:       PetscCall(PetscViewerASCIIPrintf(viewer, "        </DataArray>\n"));
1428:       PetscCall(PetscViewerASCIIPrintf(viewer, "      </PointData>\n"));

1430:       PetscCall(PetscViewerASCIIPrintf(viewer, "    </Piece>\n"));
1431:     }
1432:     PetscCall(PetscFree(array));
1433:   } else {
1434:     PetscCallMPI(MPIU_Send(range, 6, MPIU_INT, 0, tag, comm));
1435:     PetscCallMPI(MPIU_Send((PetscScalar *)x, nn, MPIU_SCALAR, 0, tag, comm));
1436:   }
1437:   PetscCall(VecRestoreArrayRead(X, &x));
1438:   PetscCall(PetscViewerASCIIPrintf(viewer, "  </StructuredGrid>\n"));
1439:   PetscCall(PetscViewerASCIIPrintf(viewer, "</VTKFile>\n"));
1440:   PetscCall(PetscViewerDestroy(&viewer));
1441:   PetscFunctionReturn(PETSC_SUCCESS);
1442: }

1444: int main(int argc, char *argv[])
1445: {
1446:   MPI_Comm comm;
1447:   THI      thi;
1448:   DM       da;
1449:   SNES     snes;

1451:   PetscFunctionBeginUser;
1452:   PetscCall(PetscInitialize(&argc, &argv, 0, help));
1453:   comm = PETSC_COMM_WORLD;

1455:   PetscCall(THICreate(comm, &thi));
1456:   {
1457:     PetscInt M = 3, N = 3, P = 2;
1458:     PetscOptionsBegin(comm, NULL, "Grid resolution options", "");
1459:     {
1460:       PetscCall(PetscOptionsInt("-M", "Number of elements in x-direction on coarse level", "", M, &M, NULL));
1461:       N = M;
1462:       PetscCall(PetscOptionsInt("-N", "Number of elements in y-direction on coarse level (if different from M)", "", N, &N, NULL));
1463:       if (thi->coarse2d) {
1464:         PetscCall(PetscOptionsInt("-zlevels", "Number of elements in z-direction on fine level", "", thi->zlevels, &thi->zlevels, NULL));
1465:       } else {
1466:         PetscCall(PetscOptionsInt("-P", "Number of elements in z-direction on coarse level", "", P, &P, NULL));
1467:       }
1468:     }
1469:     PetscOptionsEnd();
1470:     if (thi->coarse2d) {
1471:       PetscCall(DMDACreate2d(comm, DM_BOUNDARY_PERIODIC, DM_BOUNDARY_PERIODIC, DMDA_STENCIL_BOX, N, M, PETSC_DETERMINE, PETSC_DETERMINE, sizeof(Node) / sizeof(PetscScalar), 1, 0, 0, &da));
1472:       PetscCall(DMSetFromOptions(da));
1473:       PetscCall(DMSetUp(da));
1474:       da->ops->refinehierarchy     = DMRefineHierarchy_THI;
1475:       da->ops->createinterpolation = DMCreateInterpolation_DA_THI;

1477:       PetscCall(PetscObjectCompose((PetscObject)da, "THI", (PetscObject)thi));
1478:     } else {
1479:       PetscCall(DMDACreate3d(comm, DM_BOUNDARY_NONE, DM_BOUNDARY_PERIODIC, DM_BOUNDARY_PERIODIC, DMDA_STENCIL_BOX, P, N, M, 1, PETSC_DETERMINE, PETSC_DETERMINE, sizeof(Node) / sizeof(PetscScalar), 1, 0, 0, 0, &da));
1480:       PetscCall(DMSetFromOptions(da));
1481:       PetscCall(DMSetUp(da));
1482:     }
1483:     PetscCall(DMDASetFieldName(da, 0, "x-velocity"));
1484:     PetscCall(DMDASetFieldName(da, 1, "y-velocity"));
1485:   }
1486:   PetscCall(THISetUpDM(thi, da));
1487:   if (thi->tridiagonal) da->ops->creatematrix = DMCreateMatrix_THI_Tridiagonal;

1489:   { /* Set the fine level matrix type if -da_refine */
1490:     PetscInt rlevel, clevel;
1491:     PetscCall(DMGetRefineLevel(da, &rlevel));
1492:     PetscCall(DMGetCoarsenLevel(da, &clevel));
1493:     if (rlevel - clevel > 0) PetscCall(DMSetMatType(da, thi->mattype));
1494:   }

1496:   PetscCall(DMDASNESSetFunctionLocal(da, ADD_VALUES, (DMDASNESFunctionFn *)THIFunctionLocal, thi));
1497:   if (thi->tridiagonal) {
1498:     PetscCall(DMDASNESSetJacobianLocal(da, (DMDASNESJacobianFn *)THIJacobianLocal_3D_Tridiagonal, thi));
1499:   } else {
1500:     PetscCall(DMDASNESSetJacobianLocal(da, (DMDASNESJacobianFn *)THIJacobianLocal_3D_Full, thi));
1501:   }
1502:   PetscCall(DMCoarsenHookAdd(da, DMCoarsenHook_THI, NULL, thi));
1503:   PetscCall(DMRefineHookAdd(da, DMRefineHook_THI, NULL, thi));

1505:   PetscCall(DMSetApplicationContext(da, thi));

1507:   PetscCall(SNESCreate(comm, &snes));
1508:   PetscCall(SNESSetDM(snes, da));
1509:   PetscCall(DMDestroy(&da));
1510:   PetscCall(SNESSetComputeInitialGuess(snes, THIInitial, NULL));
1511:   PetscCall(SNESSetFromOptions(snes));

1513:   PetscCall(SNESSolve(snes, NULL, NULL));

1515:   PetscCall(THISolveStatistics(thi, snes, 0, "Full"));

1517:   {
1518:     PetscBool flg;
1519:     char      filename[PETSC_MAX_PATH_LEN] = "";
1520:     PetscCall(PetscOptionsGetString(NULL, NULL, "-o", filename, sizeof(filename), &flg));
1521:     if (flg) {
1522:       Vec X;
1523:       DM  dm;
1524:       PetscCall(SNESGetSolution(snes, &X));
1525:       PetscCall(SNESGetDM(snes, &dm));
1526:       PetscCall(THIDAVecView_VTK_XML(thi, dm, X, filename));
1527:     }
1528:   }

1530:   PetscCall(DMDestroy(&da));
1531:   PetscCall(SNESDestroy(&snes));
1532:   PetscCall(THIDestroy(&thi));
1533:   PetscCall(PetscFinalize());
1534:   return 0;
1535: }

1537: /*TEST

1539:    build:
1540:       requires: !single

1542:    test:
1543:       args: -M 6 -P 4 -da_refine 1 -snes_monitor_short -snes_converged_reason -ksp_monitor_short -ksp_converged_reason -thi_mat_type sbaij -ksp_type fgmres -pc_type mg -pc_mg_type full -mg_levels_ksp_type gmres -mg_levels_ksp_max_it 1 -mg_levels_pc_type icc

1545:    test:
1546:       suffix: 2
1547:       nsize: 2
1548:       args: -M 6 -P 4 -thi_hom z -snes_monitor_short -snes_converged_reason -ksp_monitor_short -ksp_converged_reason -thi_mat_type sbaij -ksp_type fgmres -pc_type mg -pc_mg_type full -mg_levels_ksp_type gmres -mg_levels_ksp_max_it 1 -mg_levels_pc_type asm -mg_levels_pc_asm_blocks 6 -mg_levels_0_pc_type redundant -snes_grid_sequence 1 -mat_partitioning_type current -ksp_atol 0

1550:    test:
1551:       suffix: 3
1552:       nsize: 3
1553:       args: -M 7 -P 4 -thi_hom z -da_refine 1 -snes_monitor_short -snes_converged_reason -ksp_monitor_short -ksp_converged_reason -thi_mat_type baij -ksp_type fgmres -pc_type mg -pc_mg_type full -mg_levels_pc_asm_type restrict -mg_levels_pc_type asm -mg_levels_pc_asm_blocks 9 -mg_levels_ksp_type gmres -mg_levels_ksp_max_it 1 -mat_partitioning_type current

1555:    test:
1556:       suffix: 4
1557:       nsize: 6
1558:       args: -M 4 -P 2 -da_refine_hierarchy_x 1,1,3 -da_refine_hierarchy_y 2,2,1 -da_refine_hierarchy_z 2,2,1 -snes_grid_sequence 3 -ksp_converged_reason -ksp_type fgmres -ksp_rtol 1e-2 -pc_type mg -mg_levels_ksp_type gmres -mg_levels_ksp_max_it 1 -mg_levels_pc_type bjacobi -mg_levels_1_sub_pc_type cholesky -pc_mg_type multiplicative -snes_converged_reason -snes_stol 1e-12 -thi_L 80e3 -thi_alpha 0.05 -thi_friction_m 1 -thi_hom x -snes_view -mg_levels_0_pc_type redundant -mg_levels_0_ksp_type preonly -ksp_atol 0

1560:    test:
1561:       suffix: 5
1562:       nsize: 6
1563:       args: -M 12 -P 5 -snes_monitor_short -ksp_converged_reason -pc_type asm -pc_asm_type restrict -dm_mat_type {{aij baij sbaij}}

1565: TEST*/