Actual source code: ex48.c
1: static const char help[] = "Toy hydrostatic ice flow with multigrid in 3D.\n\
2: \n\
3: Solves the hydrostatic (aka Blatter/Pattyn/First Order) equations for ice sheet flow\n\
4: using multigrid. The ice uses a power-law rheology with \"Glen\" exponent 3 (corresponds\n\
5: to p=4/3 in a p-Laplacian). The focus is on ISMIP-HOM experiments which assume periodic\n\
6: boundary conditions in the x- and y-directions.\n\
7: \n\
8: Equations are rescaled so that the domain size and solution are O(1), details of this scaling\n\
9: can be controlled by the options -units_meter, -units_second, and -units_kilogram.\n\
10: \n\
11: A VTK StructuredGrid output file can be written using the option -o filename.vts\n\
12: \n\n";
14: /*
15: The equations for horizontal velocity (u,v) are
17: - [eta (4 u_x + 2 v_y)]_x - [eta (u_y + v_x)]_y - [eta u_z]_z + rho g s_x = 0
18: - [eta (4 v_y + 2 u_x)]_y - [eta (u_y + v_x)]_x - [eta v_z]_z + rho g s_y = 0
20: where
22: eta = B/2 (epsilon + gamma)^((p-2)/2)
24: is the nonlinear effective viscosity with regularization epsilon and hardness parameter B,
25: written in terms of the second invariant
27: gamma = u_x^2 + v_y^2 + u_x v_y + (1/4) (u_y + v_x)^2 + (1/4) u_z^2 + (1/4) v_z^2
29: The surface boundary conditions are the natural conditions. The basal boundary conditions
30: are either no-slip, or Navier (linear) slip with spatially variant friction coefficient beta^2.
32: In the code, the equations for (u,v) are multiplied through by 1/(rho g) so that residuals are O(1).
34: The discretization is Q1 finite elements, managed by a DMDA. The grid is never distorted in the
35: map (x,y) plane, but the bed and surface may be bumpy. This is handled as usual in FEM, through
36: the Jacobian of the coordinate transformation from a reference element to the physical element.
38: Since ice-flow is tightly coupled in the z-direction (within columns), the DMDA is managed
39: specially so that columns are never distributed, and are always contiguous in memory.
40: This amounts to reversing the meaning of X,Y,Z compared to the DMDA's internal interpretation,
41: and then indexing as vec[i][j][k]. The exotic coarse spaces require 2D DMDAs which are made to
42: use compatible domain decomposition relative to the 3D DMDAs.
44: There are two compile-time options:
46: NO_SSE2:
47: If the host supports SSE2, we use integration code that has been vectorized with SSE2
48: intrinsics, unless this macro is defined. The intrinsics speed up integration by about
49: 30% on my architecture (P8700, gcc-4.5 snapshot).
51: COMPUTE_LOWER_TRIANGULAR:
52: The element matrices we assemble are lower-triangular so it is not necessary to compute
53: all entries explicitly. If this macro is defined, the lower-triangular entries are
54: computed explicitly.
56: */
58: #if defined(PETSC_APPLE_FRAMEWORK)
59: #import <PETSc/petscsnes.h>
60: #import <PETSc/petsc/private/dmdaimpl.h> /* There is not yet a public interface to manipulate dm->ops */
61: #else
63: #include <petscsnes.h>
64: #include <petsc/private/dmdaimpl.h>
65: #endif
66: #include <ctype.h> /* toupper() */
68: #if defined(__cplusplus) || defined(PETSC_HAVE_WINDOWS_COMPILERS) || defined(__PGI)
69: /* c++ cannot handle [_restrict_] notation like C does */
70: #undef PETSC_RESTRICT
71: #define PETSC_RESTRICT
72: #endif
74: #if defined __SSE2__
75: #include <emmintrin.h>
76: #endif
78: /* The SSE2 kernels are only for PetscScalar=double on architectures that support it */
79: #if !defined NO_SSE2 && !defined PETSC_USE_COMPLEX && !defined PETSC_USE_REAL_SINGLE && !defined PETSC_USE_REAL___FLOAT128 && !defined PETSC_USE_REAL___FP16 && defined __SSE2__
80: #define USE_SSE2_KERNELS 1
81: #else
82: #define USE_SSE2_KERNELS 0
83: #endif
85: static PetscClassId THI_CLASSID;
87: typedef enum {
88: QUAD_GAUSS,
89: QUAD_LOBATTO
90: } QuadratureType;
91: static const char *QuadratureTypes[] = {"gauss", "lobatto", "QuadratureType", "QUAD_", 0};
92: PETSC_UNUSED static const PetscReal HexQWeights[8] = {1, 1, 1, 1, 1, 1, 1, 1};
93: PETSC_UNUSED static const PetscReal HexQNodes[] = {-0.57735026918962573, 0.57735026918962573};
94: #define G 0.57735026918962573
95: #define H (0.5 * (1. + G))
96: #define L (0.5 * (1. - G))
97: #define M (-0.5)
98: #define P (0.5)
99: /* Special quadrature: Lobatto in horizontal, Gauss in vertical */
100: static const PetscReal HexQInterp_Lobatto[8][8] = {
101: {H, 0, 0, 0, L, 0, 0, 0},
102: {0, H, 0, 0, 0, L, 0, 0},
103: {0, 0, H, 0, 0, 0, L, 0},
104: {0, 0, 0, H, 0, 0, 0, L},
105: {L, 0, 0, 0, H, 0, 0, 0},
106: {0, L, 0, 0, 0, H, 0, 0},
107: {0, 0, L, 0, 0, 0, H, 0},
108: {0, 0, 0, L, 0, 0, 0, H}
109: };
110: static const PetscReal HexQDeriv_Lobatto[8][8][3] = {
111: {{M * H, M *H, M}, {P * H, 0, 0}, {0, 0, 0}, {0, P *H, 0}, {M * L, M *L, P}, {P * L, 0, 0}, {0, 0, 0}, {0, P *L, 0} },
112: {{M * H, 0, 0}, {P * H, M *H, M}, {0, P *H, 0}, {0, 0, 0}, {M * L, 0, 0}, {P * L, M *L, P}, {0, P *L, 0}, {0, 0, 0} },
113: {{0, 0, 0}, {0, M *H, 0}, {P * H, P *H, M}, {M * H, 0, 0}, {0, 0, 0}, {0, M *L, 0}, {P * L, P *L, P}, {M * L, 0, 0} },
114: {{0, M *H, 0}, {0, 0, 0}, {P * H, 0, 0}, {M * H, P *H, M}, {0, M *L, 0}, {0, 0, 0}, {P * L, 0, 0}, {M * L, P *L, P}},
115: {{M * L, M *L, M}, {P * L, 0, 0}, {0, 0, 0}, {0, P *L, 0}, {M * H, M *H, P}, {P * H, 0, 0}, {0, 0, 0}, {0, P *H, 0} },
116: {{M * L, 0, 0}, {P * L, M *L, M}, {0, P *L, 0}, {0, 0, 0}, {M * H, 0, 0}, {P * H, M *H, P}, {0, P *H, 0}, {0, 0, 0} },
117: {{0, 0, 0}, {0, M *L, 0}, {P * L, P *L, M}, {M * L, 0, 0}, {0, 0, 0}, {0, M *H, 0}, {P * H, P *H, P}, {M * H, 0, 0} },
118: {{0, M *L, 0}, {0, 0, 0}, {P * L, 0, 0}, {M * L, P *L, M}, {0, M *H, 0}, {0, 0, 0}, {P * H, 0, 0}, {M * H, P *H, P}}
119: };
120: /* Stanndard Gauss */
121: static const PetscReal HexQInterp_Gauss[8][8] = {
122: {H * H * H, L *H *H, L *L *H, H *L *H, H *H *L, L *H *L, L *L *L, H *L *L},
123: {L * H * H, H *H *H, H *L *H, L *L *H, L *H *L, H *H *L, H *L *L, L *L *L},
124: {L * L * H, H *L *H, H *H *H, L *H *H, L *L *L, H *L *L, H *H *L, L *H *L},
125: {H * L * H, L *L *H, L *H *H, H *H *H, H *L *L, L *L *L, L *H *L, H *H *L},
126: {H * H * L, L *H *L, L *L *L, H *L *L, H *H *H, L *H *H, L *L *H, H *L *H},
127: {L * H * L, H *H *L, H *L *L, L *L *L, L *H *H, H *H *H, H *L *H, L *L *H},
128: {L * L * L, H *L *L, H *H *L, L *H *L, L *L *H, H *L *H, H *H *H, L *H *H},
129: {H * L * L, L *L *L, L *H *L, H *H *L, H *L *H, L *L *H, L *H *H, H *H *H}
130: };
131: static const PetscReal HexQDeriv_Gauss[8][8][3] = {
132: {{M * H * H, H *M *H, H *H *M}, {P * H * H, L *M *H, L *H *M}, {P * L * H, L *P *H, L *L *M}, {M * L * H, H *P *H, H *L *M}, {M * H * L, H *M *L, H *H *P}, {P * H * L, L *M *L, L *H *P}, {P * L * L, L *P *L, L *L *P}, {M * L * L, H *P *L, H *L *P}},
133: {{M * H * H, L *M *H, L *H *M}, {P * H * H, H *M *H, H *H *M}, {P * L * H, H *P *H, H *L *M}, {M * L * H, L *P *H, L *L *M}, {M * H * L, L *M *L, L *H *P}, {P * H * L, H *M *L, H *H *P}, {P * L * L, H *P *L, H *L *P}, {M * L * L, L *P *L, L *L *P}},
134: {{M * L * H, L *M *H, L *L *M}, {P * L * H, H *M *H, H *L *M}, {P * H * H, H *P *H, H *H *M}, {M * H * H, L *P *H, L *H *M}, {M * L * L, L *M *L, L *L *P}, {P * L * L, H *M *L, H *L *P}, {P * H * L, H *P *L, H *H *P}, {M * H * L, L *P *L, L *H *P}},
135: {{M * L * H, H *M *H, H *L *M}, {P * L * H, L *M *H, L *L *M}, {P * H * H, L *P *H, L *H *M}, {M * H * H, H *P *H, H *H *M}, {M * L * L, H *M *L, H *L *P}, {P * L * L, L *M *L, L *L *P}, {P * H * L, L *P *L, L *H *P}, {M * H * L, H *P *L, H *H *P}},
136: {{M * H * L, H *M *L, H *H *M}, {P * H * L, L *M *L, L *H *M}, {P * L * L, L *P *L, L *L *M}, {M * L * L, H *P *L, H *L *M}, {M * H * H, H *M *H, H *H *P}, {P * H * H, L *M *H, L *H *P}, {P * L * H, L *P *H, L *L *P}, {M * L * H, H *P *H, H *L *P}},
137: {{M * H * L, L *M *L, L *H *M}, {P * H * L, H *M *L, H *H *M}, {P * L * L, H *P *L, H *L *M}, {M * L * L, L *P *L, L *L *M}, {M * H * H, L *M *H, L *H *P}, {P * H * H, H *M *H, H *H *P}, {P * L * H, H *P *H, H *L *P}, {M * L * H, L *P *H, L *L *P}},
138: {{M * L * L, L *M *L, L *L *M}, {P * L * L, H *M *L, H *L *M}, {P * H * L, H *P *L, H *H *M}, {M * H * L, L *P *L, L *H *M}, {M * L * H, L *M *H, L *L *P}, {P * L * H, H *M *H, H *L *P}, {P * H * H, H *P *H, H *H *P}, {M * H * H, L *P *H, L *H *P}},
139: {{M * L * L, H *M *L, H *L *M}, {P * L * L, L *M *L, L *L *M}, {P * H * L, L *P *L, L *H *M}, {M * H * L, H *P *L, H *H *M}, {M * L * H, H *M *H, H *L *P}, {P * L * H, L *M *H, L *L *P}, {P * H * H, L *P *H, L *H *P}, {M * H * H, H *P *H, H *H *P}}
140: };
141: static const PetscReal (*HexQInterp)[8], (*HexQDeriv)[8][3];
142: /* Standard 2x2 Gauss quadrature for the bottom layer. */
143: static const PetscReal QuadQInterp[4][4] = {
144: {H * H, L *H, L *L, H *L},
145: {L * H, H *H, H *L, L *L},
146: {L * L, H *L, H *H, L *H},
147: {H * L, L *L, L *H, H *H}
148: };
149: static const PetscReal QuadQDeriv[4][4][2] = {
150: {{M * H, M *H}, {P * H, M *L}, {P * L, P *L}, {M * L, P *H}},
151: {{M * H, M *L}, {P * H, M *H}, {P * L, P *H}, {M * L, P *L}},
152: {{M * L, M *L}, {P * L, M *H}, {P * H, P *H}, {M * H, P *L}},
153: {{M * L, M *H}, {P * L, M *L}, {P * H, P *L}, {M * H, P *H}}
154: };
155: #undef G
156: #undef H
157: #undef L
158: #undef M
159: #undef P
161: #define HexExtract(x, i, j, k, n) \
162: do { \
163: (n)[0] = (x)[i][j][k]; \
164: (n)[1] = (x)[i + 1][j][k]; \
165: (n)[2] = (x)[i + 1][j + 1][k]; \
166: (n)[3] = (x)[i][j + 1][k]; \
167: (n)[4] = (x)[i][j][k + 1]; \
168: (n)[5] = (x)[i + 1][j][k + 1]; \
169: (n)[6] = (x)[i + 1][j + 1][k + 1]; \
170: (n)[7] = (x)[i][j + 1][k + 1]; \
171: } while (0)
173: #define HexExtractRef(x, i, j, k, n) \
174: do { \
175: (n)[0] = &(x)[i][j][k]; \
176: (n)[1] = &(x)[i + 1][j][k]; \
177: (n)[2] = &(x)[i + 1][j + 1][k]; \
178: (n)[3] = &(x)[i][j + 1][k]; \
179: (n)[4] = &(x)[i][j][k + 1]; \
180: (n)[5] = &(x)[i + 1][j][k + 1]; \
181: (n)[6] = &(x)[i + 1][j + 1][k + 1]; \
182: (n)[7] = &(x)[i][j + 1][k + 1]; \
183: } while (0)
185: #define QuadExtract(x, i, j, n) \
186: do { \
187: (n)[0] = (x)[i][j]; \
188: (n)[1] = (x)[i + 1][j]; \
189: (n)[2] = (x)[i + 1][j + 1]; \
190: (n)[3] = (x)[i][j + 1]; \
191: } while (0)
193: static void HexGrad(const PetscReal dphi[][3], const PetscReal zn[], PetscReal dz[])
194: {
195: PetscInt i;
196: dz[0] = dz[1] = dz[2] = 0;
197: for (i = 0; i < 8; i++) {
198: dz[0] += dphi[i][0] * zn[i];
199: dz[1] += dphi[i][1] * zn[i];
200: dz[2] += dphi[i][2] * zn[i];
201: }
202: }
204: static void HexComputeGeometry(PetscInt q, PetscReal hx, PetscReal hy, const PetscReal dz[PETSC_RESTRICT], PetscReal phi[PETSC_RESTRICT], PetscReal dphi[PETSC_RESTRICT][3], PetscReal *PETSC_RESTRICT jw)
205: {
206: const PetscReal jac[3][3] = {
207: {hx / 2, 0, 0 },
208: {0, hy / 2, 0 },
209: {dz[0], dz[1], dz[2]}
210: };
211: const PetscReal ijac[3][3] = {
212: {1 / jac[0][0], 0, 0 },
213: {0, 1 / jac[1][1], 0 },
214: {-jac[2][0] / (jac[0][0] * jac[2][2]), -jac[2][1] / (jac[1][1] * jac[2][2]), 1 / jac[2][2]}
215: };
216: const PetscReal jdet = jac[0][0] * jac[1][1] * jac[2][2];
217: PetscInt i;
219: for (i = 0; i < 8; i++) {
220: const PetscReal *dphir = HexQDeriv[q][i];
221: phi[i] = HexQInterp[q][i];
222: dphi[i][0] = dphir[0] * ijac[0][0] + dphir[1] * ijac[1][0] + dphir[2] * ijac[2][0];
223: dphi[i][1] = dphir[0] * ijac[0][1] + dphir[1] * ijac[1][1] + dphir[2] * ijac[2][1];
224: dphi[i][2] = dphir[0] * ijac[0][2] + dphir[1] * ijac[1][2] + dphir[2] * ijac[2][2];
225: }
226: *jw = 1.0 * jdet;
227: }
229: typedef struct _p_THI *THI;
230: typedef struct _n_Units *Units;
232: typedef struct {
233: PetscScalar u, v;
234: } Node;
236: typedef struct {
237: PetscScalar b; /* bed */
238: PetscScalar h; /* thickness */
239: PetscScalar beta2; /* friction */
240: } PrmNode;
242: typedef struct {
243: PetscReal min, max, cmin, cmax;
244: } PRange;
246: typedef enum {
247: THIASSEMBLY_TRIDIAGONAL,
248: THIASSEMBLY_FULL
249: } THIAssemblyMode;
251: struct _p_THI {
252: PETSCHEADER(int);
253: void (*initialize)(THI, PetscReal x, PetscReal y, PrmNode *p);
254: PetscInt zlevels;
255: PetscReal Lx, Ly, Lz; /* Model domain */
256: PetscReal alpha; /* Bed angle */
257: Units units;
258: PetscReal dirichlet_scale;
259: PetscReal ssa_friction_scale;
260: PRange eta;
261: PRange beta2;
262: struct {
263: PetscReal Bd2, eps, exponent;
264: } viscosity;
265: struct {
266: PetscReal irefgam, eps2, exponent, refvel, epsvel;
267: } friction;
268: PetscReal rhog;
269: PetscBool no_slip;
270: PetscBool tridiagonal;
271: PetscBool coarse2d;
272: PetscBool verbose;
273: MatType mattype;
274: };
276: struct _n_Units {
277: /* fundamental */
278: PetscReal meter;
279: PetscReal kilogram;
280: PetscReal second;
281: /* derived */
282: PetscReal Pascal;
283: PetscReal year;
284: };
286: static PetscErrorCode THIJacobianLocal_3D_Full(DMDALocalInfo *, Node ***, Mat, Mat, THI);
287: static PetscErrorCode THIJacobianLocal_3D_Tridiagonal(DMDALocalInfo *, Node ***, Mat, Mat, THI);
288: static PetscErrorCode THIJacobianLocal_2D(DMDALocalInfo *, Node **, Mat, Mat, THI);
290: static void PrmHexGetZ(const PrmNode pn[], PetscInt k, PetscInt zm, PetscReal zn[])
291: {
292: const PetscScalar zm1 = zm - 1, znl[8] = {pn[0].b + pn[0].h * (PetscScalar)k / zm1, pn[1].b + pn[1].h * (PetscScalar)k / zm1, pn[2].b + pn[2].h * (PetscScalar)k / zm1, pn[3].b + pn[3].h * (PetscScalar)k / zm1,
293: pn[0].b + pn[0].h * (PetscScalar)(k + 1) / zm1, pn[1].b + pn[1].h * (PetscScalar)(k + 1) / zm1, pn[2].b + pn[2].h * (PetscScalar)(k + 1) / zm1, pn[3].b + pn[3].h * (PetscScalar)(k + 1) / zm1};
294: PetscInt i;
295: for (i = 0; i < 8; i++) zn[i] = PetscRealPart(znl[i]);
296: }
298: /* Tests A and C are from the ISMIP-HOM paper (Pattyn et al. 2008) */
299: static void THIInitialize_HOM_A(THI thi, PetscReal x, PetscReal y, PrmNode *p)
300: {
301: Units units = thi->units;
302: PetscReal s = -x * PetscSinReal(thi->alpha);
304: p->b = s - 1000 * units->meter + 500 * units->meter * PetscSinReal(x * 2 * PETSC_PI / thi->Lx) * PetscSinReal(y * 2 * PETSC_PI / thi->Ly);
305: p->h = s - p->b;
306: p->beta2 = 1e30;
307: }
309: static void THIInitialize_HOM_C(THI thi, PetscReal x, PetscReal y, PrmNode *p)
310: {
311: Units units = thi->units;
312: PetscReal s = -x * PetscSinReal(thi->alpha);
314: p->b = s - 1000 * units->meter;
315: p->h = s - p->b;
316: /* tau_b = beta2 v is a stress (Pa) */
317: p->beta2 = 1000 * (1 + PetscSinReal(x * 2 * PETSC_PI / thi->Lx) * PetscSinReal(y * 2 * PETSC_PI / thi->Ly)) * units->Pascal * units->year / units->meter;
318: }
320: /* These are just toys */
322: /* Same bed as test A, free slip everywhere except for a discontinuous jump to a circular sticky region in the middle. */
323: static void THIInitialize_HOM_X(THI thi, PetscReal xx, PetscReal yy, PrmNode *p)
324: {
325: Units units = thi->units;
326: PetscReal x = xx * 2 * PETSC_PI / thi->Lx - PETSC_PI, y = yy * 2 * PETSC_PI / thi->Ly - PETSC_PI; /* [-pi,pi] */
327: PetscReal r = PetscSqrtReal(x * x + y * y), s = -x * PetscSinReal(thi->alpha);
328: p->b = s - 1000 * units->meter + 500 * units->meter * PetscSinReal(x + PETSC_PI) * PetscSinReal(y + PETSC_PI);
329: p->h = s - p->b;
330: p->beta2 = 1000 * (r < 1 ? 2 : 0) * units->Pascal * units->year / units->meter;
331: }
333: /* Like Z, but with 200 meter cliffs */
334: static void THIInitialize_HOM_Y(THI thi, PetscReal xx, PetscReal yy, PrmNode *p)
335: {
336: Units units = thi->units;
337: PetscReal x = xx * 2 * PETSC_PI / thi->Lx - PETSC_PI, y = yy * 2 * PETSC_PI / thi->Ly - PETSC_PI; /* [-pi,pi] */
338: PetscReal r = PetscSqrtReal(x * x + y * y), s = -x * PetscSinReal(thi->alpha);
340: p->b = s - 1000 * units->meter + 500 * units->meter * PetscSinReal(x + PETSC_PI) * PetscSinReal(y + PETSC_PI);
341: if (PetscRealPart(p->b) > -700 * units->meter) p->b += 200 * units->meter;
342: p->h = s - p->b;
343: p->beta2 = 1000 * (1. + PetscSinReal(PetscSqrtReal(16 * r)) / PetscSqrtReal(1e-2 + 16 * r) * PetscCosReal(x * 3 / 2) * PetscCosReal(y * 3 / 2)) * units->Pascal * units->year / units->meter;
344: }
346: /* Same bed as A, smoothly varying slipperiness, similar to MATLAB's "sombrero" (uncorrelated with bathymetry) */
347: static void THIInitialize_HOM_Z(THI thi, PetscReal xx, PetscReal yy, PrmNode *p)
348: {
349: Units units = thi->units;
350: PetscReal x = xx * 2 * PETSC_PI / thi->Lx - PETSC_PI, y = yy * 2 * PETSC_PI / thi->Ly - PETSC_PI; /* [-pi,pi] */
351: PetscReal r = PetscSqrtReal(x * x + y * y), s = -x * PetscSinReal(thi->alpha);
353: p->b = s - 1000 * units->meter + 500 * units->meter * PetscSinReal(x + PETSC_PI) * PetscSinReal(y + PETSC_PI);
354: p->h = s - p->b;
355: p->beta2 = 1000 * (1. + PetscSinReal(PetscSqrtReal(16 * r)) / PetscSqrtReal(1e-2 + 16 * r) * PetscCosReal(x * 3 / 2) * PetscCosReal(y * 3 / 2)) * units->Pascal * units->year / units->meter;
356: }
358: static void THIFriction(THI thi, PetscReal rbeta2, PetscReal gam, PetscReal *beta2, PetscReal *dbeta2)
359: {
360: if (thi->friction.irefgam == 0) {
361: Units units = thi->units;
362: thi->friction.irefgam = 1. / (0.5 * PetscSqr(thi->friction.refvel * units->meter / units->year));
363: thi->friction.eps2 = 0.5 * PetscSqr(thi->friction.epsvel * units->meter / units->year) * thi->friction.irefgam;
364: }
365: if (thi->friction.exponent == 0) {
366: *beta2 = rbeta2;
367: *dbeta2 = 0;
368: } else {
369: *beta2 = rbeta2 * PetscPowReal(thi->friction.eps2 + gam * thi->friction.irefgam, thi->friction.exponent);
370: *dbeta2 = thi->friction.exponent * *beta2 / (thi->friction.eps2 + gam * thi->friction.irefgam) * thi->friction.irefgam;
371: }
372: }
374: static void THIViscosity(THI thi, PetscReal gam, PetscReal *eta, PetscReal *deta)
375: {
376: PetscReal Bd2, eps, exponent;
377: if (thi->viscosity.Bd2 == 0) {
378: Units units = thi->units;
379: const PetscReal n = 3., /* Glen exponent */
380: p = 1. + 1. / n, /* for Stokes */
381: A = 1.e-16 * PetscPowReal(units->Pascal, -n) / units->year, /* softness parameter (Pa^{-n}/s) */
382: B = PetscPowReal(A, -1. / n); /* hardness parameter */
383: thi->viscosity.Bd2 = B / 2;
384: thi->viscosity.exponent = (p - 2) / 2;
385: thi->viscosity.eps = 0.5 * PetscSqr(1e-5 / units->year);
386: }
387: Bd2 = thi->viscosity.Bd2;
388: exponent = thi->viscosity.exponent;
389: eps = thi->viscosity.eps;
390: *eta = Bd2 * PetscPowReal(eps + gam, exponent);
391: *deta = exponent * (*eta) / (eps + gam);
392: }
394: static void RangeUpdate(PetscReal *min, PetscReal *max, PetscReal x)
395: {
396: if (x < *min) *min = x;
397: if (x > *max) *max = x;
398: }
400: static void PRangeClear(PRange *p)
401: {
402: p->cmin = p->min = 1e100;
403: p->cmax = p->max = -1e100;
404: }
406: static PetscErrorCode PRangeMinMax(PRange *p, PetscReal min, PetscReal max)
407: {
408: PetscFunctionBeginUser;
409: p->cmin = min;
410: p->cmax = max;
411: if (min < p->min) p->min = min;
412: if (max > p->max) p->max = max;
413: PetscFunctionReturn(PETSC_SUCCESS);
414: }
416: static PetscErrorCode THIDestroy(THI *thi)
417: {
418: PetscFunctionBeginUser;
419: if (!*thi) PetscFunctionReturn(PETSC_SUCCESS);
420: if (--((PetscObject)*thi)->refct > 0) {
421: *thi = 0;
422: PetscFunctionReturn(PETSC_SUCCESS);
423: }
424: PetscCall(PetscFree((*thi)->units));
425: PetscCall(PetscFree((*thi)->mattype));
426: PetscCall(PetscHeaderDestroy(thi));
427: PetscFunctionReturn(PETSC_SUCCESS);
428: }
430: static PetscErrorCode THICreate(MPI_Comm comm, THI *inthi)
431: {
432: static PetscBool registered = PETSC_FALSE;
433: THI thi;
434: Units units;
436: PetscFunctionBeginUser;
437: *inthi = 0;
438: if (!registered) {
439: PetscCall(PetscClassIdRegister("Toy Hydrostatic Ice", &THI_CLASSID));
440: registered = PETSC_TRUE;
441: }
442: PetscCall(PetscHeaderCreate(thi, THI_CLASSID, "THI", "Toy Hydrostatic Ice", "", comm, THIDestroy, 0));
444: PetscCall(PetscNew(&thi->units));
445: units = thi->units;
446: units->meter = 1e-2;
447: units->second = 1e-7;
448: units->kilogram = 1e-12;
450: PetscOptionsBegin(comm, NULL, "Scaled units options", "");
451: {
452: PetscCall(PetscOptionsReal("-units_meter", "1 meter in scaled length units", "", units->meter, &units->meter, NULL));
453: PetscCall(PetscOptionsReal("-units_second", "1 second in scaled time units", "", units->second, &units->second, NULL));
454: PetscCall(PetscOptionsReal("-units_kilogram", "1 kilogram in scaled mass units", "", units->kilogram, &units->kilogram, NULL));
455: }
456: PetscOptionsEnd();
457: units->Pascal = units->kilogram / (units->meter * PetscSqr(units->second));
458: units->year = 31556926. * units->second; /* seconds per year */
460: thi->Lx = 10.e3;
461: thi->Ly = 10.e3;
462: thi->Lz = 1000;
463: thi->dirichlet_scale = 1;
464: thi->verbose = PETSC_FALSE;
466: PetscOptionsBegin(comm, NULL, "Toy Hydrostatic Ice options", "");
467: {
468: QuadratureType quad = QUAD_GAUSS;
469: char homexp[] = "A";
470: char mtype[256] = MATSBAIJ;
471: PetscReal L, m = 1.0;
472: PetscBool flg;
473: L = thi->Lx;
474: PetscCall(PetscOptionsReal("-thi_L", "Domain size (m)", "", L, &L, &flg));
475: if (flg) thi->Lx = thi->Ly = L;
476: PetscCall(PetscOptionsReal("-thi_Lx", "X Domain size (m)", "", thi->Lx, &thi->Lx, NULL));
477: PetscCall(PetscOptionsReal("-thi_Ly", "Y Domain size (m)", "", thi->Ly, &thi->Ly, NULL));
478: PetscCall(PetscOptionsReal("-thi_Lz", "Z Domain size (m)", "", thi->Lz, &thi->Lz, NULL));
479: PetscCall(PetscOptionsString("-thi_hom", "ISMIP-HOM experiment (A or C)", "", homexp, homexp, sizeof(homexp), NULL));
480: switch (homexp[0] = (char)toupper(homexp[0])) {
481: case 'A':
482: thi->initialize = THIInitialize_HOM_A;
483: thi->no_slip = PETSC_TRUE;
484: thi->alpha = 0.5;
485: break;
486: case 'C':
487: thi->initialize = THIInitialize_HOM_C;
488: thi->no_slip = PETSC_FALSE;
489: thi->alpha = 0.1;
490: break;
491: case 'X':
492: thi->initialize = THIInitialize_HOM_X;
493: thi->no_slip = PETSC_FALSE;
494: thi->alpha = 0.3;
495: break;
496: case 'Y':
497: thi->initialize = THIInitialize_HOM_Y;
498: thi->no_slip = PETSC_FALSE;
499: thi->alpha = 0.5;
500: break;
501: case 'Z':
502: thi->initialize = THIInitialize_HOM_Z;
503: thi->no_slip = PETSC_FALSE;
504: thi->alpha = 0.5;
505: break;
506: default:
507: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "HOM experiment '%c' not implemented", homexp[0]);
508: }
509: PetscCall(PetscOptionsEnum("-thi_quadrature", "Quadrature to use for 3D elements", "", QuadratureTypes, (PetscEnum)quad, (PetscEnum *)&quad, NULL));
510: switch (quad) {
511: case QUAD_GAUSS:
512: HexQInterp = HexQInterp_Gauss;
513: HexQDeriv = HexQDeriv_Gauss;
514: break;
515: case QUAD_LOBATTO:
516: HexQInterp = HexQInterp_Lobatto;
517: HexQDeriv = HexQDeriv_Lobatto;
518: break;
519: }
520: PetscCall(PetscOptionsReal("-thi_alpha", "Bed angle (degrees)", "", thi->alpha, &thi->alpha, NULL));
522: thi->friction.refvel = 100.;
523: thi->friction.epsvel = 1.;
525: PetscCall(PetscOptionsReal("-thi_friction_refvel", "Reference velocity for sliding", "", thi->friction.refvel, &thi->friction.refvel, NULL));
526: PetscCall(PetscOptionsReal("-thi_friction_epsvel", "Regularization velocity for sliding", "", thi->friction.epsvel, &thi->friction.epsvel, NULL));
527: PetscCall(PetscOptionsReal("-thi_friction_m", "Friction exponent, 0=Coulomb, 1=Navier", "", m, &m, NULL));
529: thi->friction.exponent = (m - 1) / 2;
531: PetscCall(PetscOptionsReal("-thi_dirichlet_scale", "Scale Dirichlet boundary conditions by this factor", "", thi->dirichlet_scale, &thi->dirichlet_scale, NULL));
532: PetscCall(PetscOptionsReal("-thi_ssa_friction_scale", "Scale slip boundary conditions by this factor in SSA (2D) assembly", "", thi->ssa_friction_scale, &thi->ssa_friction_scale, NULL));
533: PetscCall(PetscOptionsBool("-thi_coarse2d", "Use a 2D coarse space corresponding to SSA", "", thi->coarse2d, &thi->coarse2d, NULL));
534: PetscCall(PetscOptionsBool("-thi_tridiagonal", "Assemble a tridiagonal system (column coupling only) on the finest level", "", thi->tridiagonal, &thi->tridiagonal, NULL));
535: PetscCall(PetscOptionsFList("-thi_mat_type", "Matrix type", "MatSetType", MatList, mtype, (char *)mtype, sizeof(mtype), NULL));
536: PetscCall(PetscStrallocpy(mtype, (char **)&thi->mattype));
537: PetscCall(PetscOptionsBool("-thi_verbose", "Enable verbose output (like matrix sizes and statistics)", "", thi->verbose, &thi->verbose, NULL));
538: }
539: PetscOptionsEnd();
541: /* dimensionalize */
542: thi->Lx *= units->meter;
543: thi->Ly *= units->meter;
544: thi->Lz *= units->meter;
545: thi->alpha *= PETSC_PI / 180;
547: PRangeClear(&thi->eta);
548: PRangeClear(&thi->beta2);
550: {
551: PetscReal u = 1000 * units->meter / (3e7 * units->second), gradu = u / (100 * units->meter), eta, deta, rho = 910 * units->kilogram / PetscPowReal(units->meter, 3), grav = 9.81 * units->meter / PetscSqr(units->second),
552: driving = rho * grav * PetscSinReal(thi->alpha) * 1000 * units->meter;
553: THIViscosity(thi, 0.5 * gradu * gradu, &eta, &deta);
554: thi->rhog = rho * grav;
555: if (thi->verbose) {
556: PetscCall(PetscPrintf(PetscObjectComm((PetscObject)thi), "Units: meter %8.2g second %8.2g kg %8.2g Pa %8.2g\n", (double)units->meter, (double)units->second, (double)units->kilogram, (double)units->Pascal));
557: PetscCall(PetscPrintf(PetscObjectComm((PetscObject)thi), "Domain (%6.2g,%6.2g,%6.2g), pressure %8.2g, driving stress %8.2g\n", (double)thi->Lx, (double)thi->Ly, (double)thi->Lz, (double)(rho * grav * 1e3 * units->meter), (double)driving));
558: PetscCall(PetscPrintf(PetscObjectComm((PetscObject)thi), "Large velocity 1km/a %8.2g, velocity gradient %8.2g, eta %8.2g, stress %8.2g, ratio %8.2g\n", (double)u, (double)gradu, (double)eta, (double)(2 * eta * gradu), (double)(2 * eta * gradu / driving)));
559: THIViscosity(thi, 0.5 * PetscSqr(1e-3 * gradu), &eta, &deta);
560: PetscCall(PetscPrintf(PetscObjectComm((PetscObject)thi), "Small velocity 1m/a %8.2g, velocity gradient %8.2g, eta %8.2g, stress %8.2g, ratio %8.2g\n", (double)(1e-3 * u), (double)(1e-3 * gradu), (double)eta, (double)(2 * eta * 1e-3 * gradu), (double)(2 * eta * 1e-3 * gradu / driving)));
561: }
562: }
564: *inthi = thi;
565: PetscFunctionReturn(PETSC_SUCCESS);
566: }
568: static PetscErrorCode THIInitializePrm(THI thi, DM da2prm, Vec prm)
569: {
570: PrmNode **p;
571: PetscInt i, j, xs, xm, ys, ym, mx, my;
573: PetscFunctionBeginUser;
574: PetscCall(DMDAGetGhostCorners(da2prm, &ys, &xs, 0, &ym, &xm, 0));
575: PetscCall(DMDAGetInfo(da2prm, 0, &my, &mx, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
576: PetscCall(DMDAVecGetArray(da2prm, prm, &p));
577: for (i = xs; i < xs + xm; i++) {
578: for (j = ys; j < ys + ym; j++) {
579: PetscReal xx = thi->Lx * i / mx, yy = thi->Ly * j / my;
580: thi->initialize(thi, xx, yy, &p[i][j]);
581: }
582: }
583: PetscCall(DMDAVecRestoreArray(da2prm, prm, &p));
584: PetscFunctionReturn(PETSC_SUCCESS);
585: }
587: static PetscErrorCode THISetUpDM(THI thi, DM dm)
588: {
589: PetscInt refinelevel, coarsenlevel, level, dim, Mx, My, Mz, mx, my, s;
590: DMDAStencilType st;
591: DM da2prm;
592: Vec X;
594: PetscFunctionBeginUser;
595: PetscCall(DMDAGetInfo(dm, &dim, &Mz, &My, &Mx, 0, &my, &mx, 0, &s, 0, 0, 0, &st));
596: if (dim == 2) PetscCall(DMDAGetInfo(dm, &dim, &My, &Mx, 0, &my, &mx, 0, 0, &s, 0, 0, 0, &st));
597: PetscCall(DMGetRefineLevel(dm, &refinelevel));
598: PetscCall(DMGetCoarsenLevel(dm, &coarsenlevel));
599: level = refinelevel - coarsenlevel;
600: PetscCall(DMDACreate2d(PetscObjectComm((PetscObject)thi), DM_BOUNDARY_PERIODIC, DM_BOUNDARY_PERIODIC, st, My, Mx, my, mx, sizeof(PrmNode) / sizeof(PetscScalar), s, 0, 0, &da2prm));
601: PetscCall(DMSetUp(da2prm));
602: PetscCall(DMCreateLocalVector(da2prm, &X));
603: {
604: PetscReal Lx = thi->Lx / thi->units->meter, Ly = thi->Ly / thi->units->meter, Lz = thi->Lz / thi->units->meter;
605: if (dim == 2) {
606: PetscCall(PetscPrintf(PetscObjectComm((PetscObject)thi), "Level %" PetscInt_FMT " domain size (m) %8.2g x %8.2g, num elements %" PetscInt_FMT " x %" PetscInt_FMT " (%" PetscInt_FMT "), size (m) %g x %g\n", level, (double)Lx, (double)Ly, Mx, My, Mx * My, (double)(Lx / Mx), (double)(Ly / My)));
607: } else {
608: PetscCall(PetscPrintf(PetscObjectComm((PetscObject)thi), "Level %" PetscInt_FMT " domain size (m) %8.2g x %8.2g x %8.2g, num elements %" PetscInt_FMT " x %" PetscInt_FMT " x %" PetscInt_FMT " (%" PetscInt_FMT "), size (m) %g x %g x %g\n", level, (double)Lx, (double)Ly, (double)Lz, Mx, My, Mz, Mx * My * Mz, (double)(Lx / Mx), (double)(Ly / My), (double)(1000. / (Mz - 1))));
609: }
610: }
611: PetscCall(THIInitializePrm(thi, da2prm, X));
612: if (thi->tridiagonal) { /* Reset coarse Jacobian evaluation */
613: PetscCall(DMDASNESSetJacobianLocal(dm, (DMDASNESJacobianFn *)THIJacobianLocal_3D_Full, thi));
614: }
615: if (thi->coarse2d) PetscCall(DMDASNESSetJacobianLocal(dm, (DMDASNESJacobianFn *)THIJacobianLocal_2D, thi));
616: PetscCall(PetscObjectCompose((PetscObject)dm, "DMDA2Prm", (PetscObject)da2prm));
617: PetscCall(PetscObjectCompose((PetscObject)dm, "DMDA2Prm_Vec", (PetscObject)X));
618: PetscCall(DMDestroy(&da2prm));
619: PetscCall(VecDestroy(&X));
620: PetscFunctionReturn(PETSC_SUCCESS);
621: }
623: static PetscErrorCode DMCoarsenHook_THI(DM dmf, DM dmc, void *ctx)
624: {
625: THI thi = (THI)ctx;
626: PetscInt rlevel, clevel;
628: PetscFunctionBeginUser;
629: PetscCall(THISetUpDM(thi, dmc));
630: PetscCall(DMGetRefineLevel(dmc, &rlevel));
631: PetscCall(DMGetCoarsenLevel(dmc, &clevel));
632: if (rlevel - clevel == 0) PetscCall(DMSetMatType(dmc, MATAIJ));
633: PetscCall(DMCoarsenHookAdd(dmc, DMCoarsenHook_THI, NULL, thi));
634: PetscFunctionReturn(PETSC_SUCCESS);
635: }
637: static PetscErrorCode DMRefineHook_THI(DM dmc, DM dmf, void *ctx)
638: {
639: THI thi = (THI)ctx;
641: PetscFunctionBeginUser;
642: PetscCall(THISetUpDM(thi, dmf));
643: PetscCall(DMSetMatType(dmf, thi->mattype));
644: PetscCall(DMRefineHookAdd(dmf, DMRefineHook_THI, NULL, thi));
645: /* With grid sequencing, a formerly-refined DM will later be coarsened by PCSetUp_MG */
646: PetscCall(DMCoarsenHookAdd(dmf, DMCoarsenHook_THI, NULL, thi));
647: PetscFunctionReturn(PETSC_SUCCESS);
648: }
650: static PetscErrorCode THIDAGetPrm(DM da, PrmNode ***prm)
651: {
652: DM da2prm;
653: Vec X;
655: PetscFunctionBeginUser;
656: PetscCall(PetscObjectQuery((PetscObject)da, "DMDA2Prm", (PetscObject *)&da2prm));
657: PetscCheck(da2prm, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "No DMDA2Prm composed with given DMDA");
658: PetscCall(PetscObjectQuery((PetscObject)da, "DMDA2Prm_Vec", (PetscObject *)&X));
659: PetscCheck(X, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "No DMDA2Prm_Vec composed with given DMDA");
660: PetscCall(DMDAVecGetArray(da2prm, X, prm));
661: PetscFunctionReturn(PETSC_SUCCESS);
662: }
664: static PetscErrorCode THIDARestorePrm(DM da, PrmNode ***prm)
665: {
666: DM da2prm;
667: Vec X;
669: PetscFunctionBeginUser;
670: PetscCall(PetscObjectQuery((PetscObject)da, "DMDA2Prm", (PetscObject *)&da2prm));
671: PetscCheck(da2prm, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "No DMDA2Prm composed with given DMDA");
672: PetscCall(PetscObjectQuery((PetscObject)da, "DMDA2Prm_Vec", (PetscObject *)&X));
673: PetscCheck(X, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "No DMDA2Prm_Vec composed with given DMDA");
674: PetscCall(DMDAVecRestoreArray(da2prm, X, prm));
675: PetscFunctionReturn(PETSC_SUCCESS);
676: }
678: static PetscErrorCode THIInitial(SNES snes, Vec X, void *ctx)
679: {
680: THI thi;
681: PetscInt i, j, k, xs, xm, ys, ym, zs, zm, mx, my;
682: PetscReal hx, hy;
683: PrmNode **prm;
684: Node ***x;
685: DM da;
687: PetscFunctionBeginUser;
688: PetscCall(SNESGetDM(snes, &da));
689: PetscCall(DMGetApplicationContext(da, &thi));
690: PetscCall(DMDAGetInfo(da, 0, 0, &my, &mx, 0, 0, 0, 0, 0, 0, 0, 0, 0));
691: PetscCall(DMDAGetCorners(da, &zs, &ys, &xs, &zm, &ym, &xm));
692: PetscCall(DMDAVecGetArray(da, X, &x));
693: PetscCall(THIDAGetPrm(da, &prm));
694: hx = thi->Lx / mx;
695: hy = thi->Ly / my;
696: for (i = xs; i < xs + xm; i++) {
697: for (j = ys; j < ys + ym; j++) {
698: for (k = zs; k < zs + zm; k++) {
699: const PetscScalar zm1 = zm - 1, drivingx = thi->rhog * (prm[i + 1][j].b + prm[i + 1][j].h - prm[i - 1][j].b - prm[i - 1][j].h) / (2 * hx), drivingy = thi->rhog * (prm[i][j + 1].b + prm[i][j + 1].h - prm[i][j - 1].b - prm[i][j - 1].h) / (2 * hy);
700: x[i][j][k].u = 0. * drivingx * prm[i][j].h * (PetscScalar)k / zm1;
701: x[i][j][k].v = 0. * drivingy * prm[i][j].h * (PetscScalar)k / zm1;
702: }
703: }
704: }
705: PetscCall(DMDAVecRestoreArray(da, X, &x));
706: PetscCall(THIDARestorePrm(da, &prm));
707: PetscFunctionReturn(PETSC_SUCCESS);
708: }
710: static void PointwiseNonlinearity(THI thi, const Node n[PETSC_RESTRICT], const PetscReal phi[PETSC_RESTRICT], PetscReal dphi[PETSC_RESTRICT][3], PetscScalar *PETSC_RESTRICT u, PetscScalar *PETSC_RESTRICT v, PetscScalar du[PETSC_RESTRICT], PetscScalar dv[PETSC_RESTRICT], PetscReal *eta, PetscReal *deta)
711: {
712: PetscInt l, ll;
713: PetscScalar gam;
715: du[0] = du[1] = du[2] = 0;
716: dv[0] = dv[1] = dv[2] = 0;
717: *u = 0;
718: *v = 0;
719: for (l = 0; l < 8; l++) {
720: *u += phi[l] * n[l].u;
721: *v += phi[l] * n[l].v;
722: for (ll = 0; ll < 3; ll++) {
723: du[ll] += dphi[l][ll] * n[l].u;
724: dv[ll] += dphi[l][ll] * n[l].v;
725: }
726: }
727: gam = PetscSqr(du[0]) + PetscSqr(dv[1]) + du[0] * dv[1] + 0.25 * PetscSqr(du[1] + dv[0]) + 0.25 * PetscSqr(du[2]) + 0.25 * PetscSqr(dv[2]);
728: THIViscosity(thi, PetscRealPart(gam), eta, deta);
729: }
731: static void PointwiseNonlinearity2D(THI thi, Node n[], PetscReal phi[], PetscReal dphi[4][2], PetscScalar *u, PetscScalar *v, PetscScalar du[], PetscScalar dv[], PetscReal *eta, PetscReal *deta)
732: {
733: PetscInt l, ll;
734: PetscScalar gam;
736: du[0] = du[1] = 0;
737: dv[0] = dv[1] = 0;
738: *u = 0;
739: *v = 0;
740: for (l = 0; l < 4; l++) {
741: *u += phi[l] * n[l].u;
742: *v += phi[l] * n[l].v;
743: for (ll = 0; ll < 2; ll++) {
744: du[ll] += dphi[l][ll] * n[l].u;
745: dv[ll] += dphi[l][ll] * n[l].v;
746: }
747: }
748: gam = PetscSqr(du[0]) + PetscSqr(dv[1]) + du[0] * dv[1] + 0.25 * PetscSqr(du[1] + dv[0]);
749: THIViscosity(thi, PetscRealPart(gam), eta, deta);
750: }
752: static PetscErrorCode THIFunctionLocal(DMDALocalInfo *info, Node ***x, Node ***f, THI thi)
753: {
754: PetscInt xs, ys, xm, ym, zm, i, j, k, q, l;
755: PetscReal hx, hy, etamin, etamax, beta2min, beta2max;
756: PrmNode **prm;
758: PetscFunctionBeginUser;
759: xs = info->zs;
760: ys = info->ys;
761: xm = info->zm;
762: ym = info->ym;
763: zm = info->xm;
764: hx = thi->Lx / info->mz;
765: hy = thi->Ly / info->my;
767: etamin = 1e100;
768: etamax = 0;
769: beta2min = 1e100;
770: beta2max = 0;
772: PetscCall(THIDAGetPrm(info->da, &prm));
774: for (i = xs; i < xs + xm; i++) {
775: for (j = ys; j < ys + ym; j++) {
776: PrmNode pn[4];
777: QuadExtract(prm, i, j, pn);
778: for (k = 0; k < zm - 1; k++) {
779: PetscInt ls = 0;
780: Node n[8], *fn[8];
781: PetscReal zn[8], etabase = 0;
782: PrmHexGetZ(pn, k, zm, zn);
783: HexExtract(x, i, j, k, n);
784: HexExtractRef(f, i, j, k, fn);
785: if (thi->no_slip && k == 0) {
786: for (l = 0; l < 4; l++) n[l].u = n[l].v = 0;
787: /* The first 4 basis functions lie on the bottom layer, so their contribution is exactly 0, hence we can skip them */
788: ls = 4;
789: }
790: for (q = 0; q < 8; q++) {
791: PetscReal dz[3], phi[8], dphi[8][3], jw, eta, deta;
792: PetscScalar du[3], dv[3], u, v;
793: HexGrad(HexQDeriv[q], zn, dz);
794: HexComputeGeometry(q, hx, hy, dz, phi, dphi, &jw);
795: PointwiseNonlinearity(thi, n, phi, dphi, &u, &v, du, dv, &eta, &deta);
796: jw /= thi->rhog; /* scales residuals to be O(1) */
797: if (q == 0) etabase = eta;
798: RangeUpdate(&etamin, &etamax, eta);
799: for (l = ls; l < 8; l++) { /* test functions */
800: const PetscReal ds[2] = {-PetscSinReal(thi->alpha), 0};
801: const PetscReal pp = phi[l], *dp = dphi[l];
802: fn[l]->u += dp[0] * jw * eta * (4. * du[0] + 2. * dv[1]) + dp[1] * jw * eta * (du[1] + dv[0]) + dp[2] * jw * eta * du[2] + pp * jw * thi->rhog * ds[0];
803: fn[l]->v += dp[1] * jw * eta * (2. * du[0] + 4. * dv[1]) + dp[0] * jw * eta * (du[1] + dv[0]) + dp[2] * jw * eta * dv[2] + pp * jw * thi->rhog * ds[1];
804: }
805: }
806: if (k == 0) { /* we are on a bottom face */
807: if (thi->no_slip) {
808: /* Note: Non-Galerkin coarse grid operators are very sensitive to the scaling of Dirichlet boundary
809: * conditions. After shenanigans above, etabase contains the effective viscosity at the closest quadrature
810: * point to the bed. We want the diagonal entry in the Dirichlet condition to have similar magnitude to the
811: * diagonal entry corresponding to the adjacent node. The fundamental scaling of the viscous part is in
812: * diagu, diagv below. This scaling is easy to recognize by considering the finite difference operator after
813: * scaling by element size. The no-slip Dirichlet condition is scaled by this factor, and also in the
814: * assembled matrix (see the similar block in THIJacobianLocal).
815: *
816: * Note that the residual at this Dirichlet node is linear in the state at this node, but also depends
817: * (nonlinearly in general) on the neighboring interior nodes through the local viscosity. This will make
818: * a matrix-free Jacobian have extra entries in the corresponding row. We assemble only the diagonal part,
819: * so the solution will exactly satisfy the boundary condition after the first linear iteration.
820: */
821: const PetscReal hz = PetscRealPart(pn[0].h) / (zm - 1.);
822: const PetscScalar diagu = 2 * etabase / thi->rhog * (hx * hy / hz + hx * hz / hy + 4 * hy * hz / hx), diagv = 2 * etabase / thi->rhog * (hx * hy / hz + 4 * hx * hz / hy + hy * hz / hx);
823: fn[0]->u = thi->dirichlet_scale * diagu * x[i][j][k].u;
824: fn[0]->v = thi->dirichlet_scale * diagv * x[i][j][k].v;
825: } else { /* Integrate over bottom face to apply boundary condition */
826: for (q = 0; q < 4; q++) {
827: const PetscReal jw = 0.25 * hx * hy / thi->rhog, *phi = QuadQInterp[q];
828: PetscScalar u = 0, v = 0, rbeta2 = 0;
829: PetscReal beta2, dbeta2;
830: for (l = 0; l < 4; l++) {
831: u += phi[l] * n[l].u;
832: v += phi[l] * n[l].v;
833: rbeta2 += phi[l] * pn[l].beta2;
834: }
835: THIFriction(thi, PetscRealPart(rbeta2), PetscRealPart(u * u + v * v) / 2, &beta2, &dbeta2);
836: RangeUpdate(&beta2min, &beta2max, beta2);
837: for (l = 0; l < 4; l++) {
838: const PetscReal pp = phi[l];
839: fn[ls + l]->u += pp * jw * beta2 * u;
840: fn[ls + l]->v += pp * jw * beta2 * v;
841: }
842: }
843: }
844: }
845: }
846: }
847: }
849: PetscCall(THIDARestorePrm(info->da, &prm));
851: PetscCall(PRangeMinMax(&thi->eta, etamin, etamax));
852: PetscCall(PRangeMinMax(&thi->beta2, beta2min, beta2max));
853: PetscFunctionReturn(PETSC_SUCCESS);
854: }
856: static PetscErrorCode THIMatrixStatistics(THI thi, Mat B, PetscViewer viewer)
857: {
858: PetscReal nrm;
859: PetscInt m;
860: PetscMPIInt rank;
862: PetscFunctionBeginUser;
863: PetscCall(MatNorm(B, NORM_FROBENIUS, &nrm));
864: PetscCall(MatGetSize(B, &m, 0));
865: PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &rank));
866: if (rank == 0) {
867: PetscScalar val0, val2;
868: PetscCall(MatGetValue(B, 0, 0, &val0));
869: PetscCall(MatGetValue(B, 2, 2, &val2));
870: PetscCall(PetscViewerASCIIPrintf(viewer, "Matrix dim %" PetscInt_FMT " norm %8.2e (0,0) %8.2e (2,2) %8.2e %8.2e <= eta <= %8.2e %8.2e <= beta2 <= %8.2e\n", m, (double)nrm, (double)PetscRealPart(val0), (double)PetscRealPart(val2),
871: (double)thi->eta.cmin, (double)thi->eta.cmax, (double)thi->beta2.cmin, (double)thi->beta2.cmax));
872: }
873: PetscFunctionReturn(PETSC_SUCCESS);
874: }
876: static PetscErrorCode THISurfaceStatistics(DM da, Vec X, PetscReal *min, PetscReal *max, PetscReal *mean)
877: {
878: Node ***x;
879: PetscInt i, j, xs, ys, zs, xm, ym, zm, mx, my, mz;
880: PetscReal umin = 1e100, umax = -1e100;
881: PetscScalar usum = 0.0, gusum;
883: PetscFunctionBeginUser;
884: *min = *max = *mean = 0;
885: PetscCall(DMDAGetInfo(da, 0, &mz, &my, &mx, 0, 0, 0, 0, 0, 0, 0, 0, 0));
886: PetscCall(DMDAGetCorners(da, &zs, &ys, &xs, &zm, &ym, &xm));
887: PetscCheck(zs == 0 && zm == mz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected decomposition");
888: PetscCall(DMDAVecGetArray(da, X, &x));
889: for (i = xs; i < xs + xm; i++) {
890: for (j = ys; j < ys + ym; j++) {
891: PetscReal u = PetscRealPart(x[i][j][zm - 1].u);
892: RangeUpdate(&umin, &umax, u);
893: usum += u;
894: }
895: }
896: PetscCall(DMDAVecRestoreArray(da, X, &x));
897: PetscCallMPI(MPIU_Allreduce(&umin, min, 1, MPIU_REAL, MPIU_MIN, PetscObjectComm((PetscObject)da)));
898: PetscCallMPI(MPIU_Allreduce(&umax, max, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)da)));
899: PetscCallMPI(MPIU_Allreduce(&usum, &gusum, 1, MPIU_SCALAR, MPIU_SUM, PetscObjectComm((PetscObject)da)));
900: *mean = PetscRealPart(gusum) / (mx * my);
901: PetscFunctionReturn(PETSC_SUCCESS);
902: }
904: static PetscErrorCode THISolveStatistics(THI thi, SNES snes, PetscInt coarsened, const char name[])
905: {
906: MPI_Comm comm;
907: Vec X;
908: DM dm;
910: PetscFunctionBeginUser;
911: PetscCall(PetscObjectGetComm((PetscObject)thi, &comm));
912: PetscCall(SNESGetSolution(snes, &X));
913: PetscCall(SNESGetDM(snes, &dm));
914: PetscCall(PetscPrintf(comm, "Solution statistics after solve: %s\n", name));
915: {
916: PetscInt its, lits;
917: SNESConvergedReason reason;
918: PetscCall(SNESGetIterationNumber(snes, &its));
919: PetscCall(SNESGetConvergedReason(snes, &reason));
920: PetscCall(SNESGetLinearSolveIterations(snes, &lits));
921: PetscCall(PetscPrintf(comm, "%s: Number of SNES iterations = %" PetscInt_FMT ", total linear iterations = %" PetscInt_FMT "\n", SNESConvergedReasons[reason], its, lits));
922: }
923: {
924: PetscReal nrm2, tmin[3] = {1e100, 1e100, 1e100}, tmax[3] = {-1e100, -1e100, -1e100}, min[3], max[3];
925: PetscInt i, j, m;
926: const PetscScalar *x;
927: PetscCall(VecNorm(X, NORM_2, &nrm2));
928: PetscCall(VecGetLocalSize(X, &m));
929: PetscCall(VecGetArrayRead(X, &x));
930: for (i = 0; i < m; i += 2) {
931: PetscReal u = PetscRealPart(x[i]), v = PetscRealPart(x[i + 1]), c = PetscSqrtReal(u * u + v * v);
932: tmin[0] = PetscMin(u, tmin[0]);
933: tmin[1] = PetscMin(v, tmin[1]);
934: tmin[2] = PetscMin(c, tmin[2]);
935: tmax[0] = PetscMax(u, tmax[0]);
936: tmax[1] = PetscMax(v, tmax[1]);
937: tmax[2] = PetscMax(c, tmax[2]);
938: }
939: PetscCall(VecRestoreArrayRead(X, &x));
940: PetscCallMPI(MPIU_Allreduce(tmin, min, 3, MPIU_REAL, MPIU_MIN, PetscObjectComm((PetscObject)thi)));
941: PetscCallMPI(MPIU_Allreduce(tmax, max, 3, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)thi)));
942: /* Dimensionalize to meters/year */
943: nrm2 *= thi->units->year / thi->units->meter;
944: for (j = 0; j < 3; j++) {
945: min[j] *= thi->units->year / thi->units->meter;
946: max[j] *= thi->units->year / thi->units->meter;
947: }
948: if (min[0] == 0.0) min[0] = 0.0;
949: PetscCall(PetscPrintf(comm, "|X|_2 %g %g <= u <= %g %g <= v <= %g %g <= c <= %g \n", (double)nrm2, (double)min[0], (double)max[0], (double)min[1], (double)max[1], (double)min[2], (double)max[2]));
950: {
951: PetscReal umin, umax, umean;
952: PetscCall(THISurfaceStatistics(dm, X, &umin, &umax, &umean));
953: umin *= thi->units->year / thi->units->meter;
954: umax *= thi->units->year / thi->units->meter;
955: umean *= thi->units->year / thi->units->meter;
956: PetscCall(PetscPrintf(comm, "Surface statistics: u in [%12.6e, %12.6e] mean %12.6e\n", (double)umin, (double)umax, (double)umean));
957: }
958: /* These values stay nondimensional */
959: PetscCall(PetscPrintf(comm, "Global eta range %g to %g converged range %g to %g\n", (double)thi->eta.min, (double)thi->eta.max, (double)thi->eta.cmin, (double)thi->eta.cmax));
960: PetscCall(PetscPrintf(comm, "Global beta2 range %g to %g converged range %g to %g\n", (double)thi->beta2.min, (double)thi->beta2.max, (double)thi->beta2.cmin, (double)thi->beta2.cmax));
961: }
962: PetscFunctionReturn(PETSC_SUCCESS);
963: }
965: static PetscErrorCode THIJacobianLocal_2D(DMDALocalInfo *info, Node **x, Mat J, Mat B, THI thi)
966: {
967: PetscInt xs, ys, xm, ym, i, j, q, l, ll;
968: PetscReal hx, hy;
969: PrmNode **prm;
971: PetscFunctionBeginUser;
972: xs = info->ys;
973: ys = info->xs;
974: xm = info->ym;
975: ym = info->xm;
976: hx = thi->Lx / info->my;
977: hy = thi->Ly / info->mx;
979: PetscCall(MatZeroEntries(B));
980: PetscCall(THIDAGetPrm(info->da, &prm));
982: for (i = xs; i < xs + xm; i++) {
983: for (j = ys; j < ys + ym; j++) {
984: Node n[4];
985: PrmNode pn[4];
986: PetscScalar Ke[4 * 2][4 * 2];
987: QuadExtract(prm, i, j, pn);
988: QuadExtract(x, i, j, n);
989: PetscCall(PetscMemzero(Ke, sizeof(Ke)));
990: for (q = 0; q < 4; q++) {
991: PetscReal phi[4], dphi[4][2], jw, eta, deta, beta2, dbeta2;
992: PetscScalar u, v, du[2], dv[2], h = 0, rbeta2 = 0;
993: for (l = 0; l < 4; l++) {
994: phi[l] = QuadQInterp[q][l];
995: dphi[l][0] = QuadQDeriv[q][l][0] * 2. / hx;
996: dphi[l][1] = QuadQDeriv[q][l][1] * 2. / hy;
997: h += phi[l] * pn[l].h;
998: rbeta2 += phi[l] * pn[l].beta2;
999: }
1000: jw = 0.25 * hx * hy / thi->rhog; /* rhog is only scaling */
1001: PointwiseNonlinearity2D(thi, n, phi, dphi, &u, &v, du, dv, &eta, &deta);
1002: THIFriction(thi, PetscRealPart(rbeta2), PetscRealPart(u * u + v * v) / 2, &beta2, &dbeta2);
1003: for (l = 0; l < 4; l++) {
1004: const PetscReal pp = phi[l], *dp = dphi[l];
1005: for (ll = 0; ll < 4; ll++) {
1006: const PetscReal ppl = phi[ll], *dpl = dphi[ll];
1007: PetscScalar dgdu, dgdv;
1008: dgdu = 2. * du[0] * dpl[0] + dv[1] * dpl[0] + 0.5 * (du[1] + dv[0]) * dpl[1];
1009: dgdv = 2. * dv[1] * dpl[1] + du[0] * dpl[1] + 0.5 * (du[1] + dv[0]) * dpl[0];
1010: /* Picard part */
1011: Ke[l * 2 + 0][ll * 2 + 0] += dp[0] * jw * eta * 4. * dpl[0] + dp[1] * jw * eta * dpl[1] + pp * jw * (beta2 / h) * ppl * thi->ssa_friction_scale;
1012: Ke[l * 2 + 0][ll * 2 + 1] += dp[0] * jw * eta * 2. * dpl[1] + dp[1] * jw * eta * dpl[0];
1013: Ke[l * 2 + 1][ll * 2 + 0] += dp[1] * jw * eta * 2. * dpl[0] + dp[0] * jw * eta * dpl[1];
1014: Ke[l * 2 + 1][ll * 2 + 1] += dp[1] * jw * eta * 4. * dpl[1] + dp[0] * jw * eta * dpl[0] + pp * jw * (beta2 / h) * ppl * thi->ssa_friction_scale;
1015: /* extra Newton terms */
1016: Ke[l * 2 + 0][ll * 2 + 0] += dp[0] * jw * deta * dgdu * (4. * du[0] + 2. * dv[1]) + dp[1] * jw * deta * dgdu * (du[1] + dv[0]) + pp * jw * (dbeta2 / h) * u * u * ppl * thi->ssa_friction_scale;
1017: Ke[l * 2 + 0][ll * 2 + 1] += dp[0] * jw * deta * dgdv * (4. * du[0] + 2. * dv[1]) + dp[1] * jw * deta * dgdv * (du[1] + dv[0]) + pp * jw * (dbeta2 / h) * u * v * ppl * thi->ssa_friction_scale;
1018: Ke[l * 2 + 1][ll * 2 + 0] += dp[1] * jw * deta * dgdu * (4. * dv[1] + 2. * du[0]) + dp[0] * jw * deta * dgdu * (du[1] + dv[0]) + pp * jw * (dbeta2 / h) * v * u * ppl * thi->ssa_friction_scale;
1019: Ke[l * 2 + 1][ll * 2 + 1] += dp[1] * jw * deta * dgdv * (4. * dv[1] + 2. * du[0]) + dp[0] * jw * deta * dgdv * (du[1] + dv[0]) + pp * jw * (dbeta2 / h) * v * v * ppl * thi->ssa_friction_scale;
1020: }
1021: }
1022: }
1023: {
1024: const MatStencil rc[4] = {
1025: {0, i, j, 0},
1026: {0, i + 1, j, 0},
1027: {0, i + 1, j + 1, 0},
1028: {0, i, j + 1, 0}
1029: };
1030: PetscCall(MatSetValuesBlockedStencil(B, 4, rc, 4, rc, &Ke[0][0], ADD_VALUES));
1031: }
1032: }
1033: }
1034: PetscCall(THIDARestorePrm(info->da, &prm));
1036: PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1037: PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1038: PetscCall(MatSetOption(B, MAT_SYMMETRIC, PETSC_TRUE));
1039: if (thi->verbose) PetscCall(THIMatrixStatistics(thi, B, PETSC_VIEWER_STDOUT_WORLD));
1040: PetscFunctionReturn(PETSC_SUCCESS);
1041: }
1043: static PetscErrorCode THIJacobianLocal_3D(DMDALocalInfo *info, Node ***x, Mat B, THI thi, THIAssemblyMode amode)
1044: {
1045: PetscInt xs, ys, xm, ym, zm, i, j, k, q, l, ll;
1046: PetscReal hx, hy;
1047: PrmNode **prm;
1049: PetscFunctionBeginUser;
1050: xs = info->zs;
1051: ys = info->ys;
1052: xm = info->zm;
1053: ym = info->ym;
1054: zm = info->xm;
1055: hx = thi->Lx / info->mz;
1056: hy = thi->Ly / info->my;
1058: PetscCall(MatZeroEntries(B));
1059: PetscCall(MatSetOption(B, MAT_SUBSET_OFF_PROC_ENTRIES, PETSC_TRUE));
1060: PetscCall(THIDAGetPrm(info->da, &prm));
1062: for (i = xs; i < xs + xm; i++) {
1063: for (j = ys; j < ys + ym; j++) {
1064: PrmNode pn[4];
1065: QuadExtract(prm, i, j, pn);
1066: for (k = 0; k < zm - 1; k++) {
1067: Node n[8];
1068: PetscReal zn[8], etabase = 0;
1069: PetscScalar Ke[8 * 2][8 * 2];
1070: PetscInt ls = 0;
1072: PrmHexGetZ(pn, k, zm, zn);
1073: HexExtract(x, i, j, k, n);
1074: PetscCall(PetscMemzero(Ke, sizeof(Ke)));
1075: if (thi->no_slip && k == 0) {
1076: for (l = 0; l < 4; l++) n[l].u = n[l].v = 0;
1077: ls = 4;
1078: }
1079: for (q = 0; q < 8; q++) {
1080: PetscReal dz[3], phi[8], dphi[8][3], jw, eta, deta;
1081: PetscScalar du[3], dv[3], u, v;
1082: HexGrad(HexQDeriv[q], zn, dz);
1083: HexComputeGeometry(q, hx, hy, dz, phi, dphi, &jw);
1084: PointwiseNonlinearity(thi, n, phi, dphi, &u, &v, du, dv, &eta, &deta);
1085: jw /= thi->rhog; /* residuals are scaled by this factor */
1086: if (q == 0) etabase = eta;
1087: for (l = ls; l < 8; l++) { /* test functions */
1088: const PetscReal *PETSC_RESTRICT dp = dphi[l];
1089: #if USE_SSE2_KERNELS
1090: /* gcc (up to my 4.5 snapshot) is really bad at hoisting intrinsics so we do it manually */
1091: __m128d p4 = _mm_set1_pd(4), p2 = _mm_set1_pd(2), p05 = _mm_set1_pd(0.5), p42 = _mm_setr_pd(4, 2), p24 = _mm_shuffle_pd(p42, p42, _MM_SHUFFLE2(0, 1)), du0 = _mm_set1_pd(du[0]), du1 = _mm_set1_pd(du[1]), du2 = _mm_set1_pd(du[2]), dv0 = _mm_set1_pd(dv[0]), dv1 = _mm_set1_pd(dv[1]), dv2 = _mm_set1_pd(dv[2]), jweta = _mm_set1_pd(jw * eta), jwdeta = _mm_set1_pd(jw * deta), dp0 = _mm_set1_pd(dp[0]), dp1 = _mm_set1_pd(dp[1]), dp2 = _mm_set1_pd(dp[2]), dp0jweta = _mm_mul_pd(dp0, jweta), dp1jweta = _mm_mul_pd(dp1, jweta), dp2jweta = _mm_mul_pd(dp2, jweta), p4du0p2dv1 = _mm_add_pd(_mm_mul_pd(p4, du0), _mm_mul_pd(p2, dv1)), /* 4 du0 + 2 dv1 */
1092: p4dv1p2du0 = _mm_add_pd(_mm_mul_pd(p4, dv1), _mm_mul_pd(p2, du0)), /* 4 dv1 + 2 du0 */
1093: pdu2dv2 = _mm_unpacklo_pd(du2, dv2), /* [du2, dv2] */
1094: du1pdv0 = _mm_add_pd(du1, dv0), /* du1 + dv0 */
1095: t1 = _mm_mul_pd(dp0, p4du0p2dv1), /* dp0 (4 du0 + 2 dv1) */
1096: t2 = _mm_mul_pd(dp1, p4dv1p2du0); /* dp1 (4 dv1 + 2 du0) */
1098: #endif
1099: #if defined COMPUTE_LOWER_TRIANGULAR /* The element matrices are always symmetric so computing the lower-triangular part is not necessary */
1100: for (ll = ls; ll < 8; ll++) { /* trial functions */
1101: #else
1102: for (ll = l; ll < 8; ll++) {
1103: #endif
1104: const PetscReal *PETSC_RESTRICT dpl = dphi[ll];
1105: if (amode == THIASSEMBLY_TRIDIAGONAL && (l - ll) % 4) continue; /* these entries would not be inserted */
1106: #if !USE_SSE2_KERNELS
1107: /* The analytic Jacobian in nice, easy-to-read form */
1108: {
1109: PetscScalar dgdu, dgdv;
1110: dgdu = 2. * du[0] * dpl[0] + dv[1] * dpl[0] + 0.5 * (du[1] + dv[0]) * dpl[1] + 0.5 * du[2] * dpl[2];
1111: dgdv = 2. * dv[1] * dpl[1] + du[0] * dpl[1] + 0.5 * (du[1] + dv[0]) * dpl[0] + 0.5 * dv[2] * dpl[2];
1112: /* Picard part */
1113: Ke[l * 2 + 0][ll * 2 + 0] += dp[0] * jw * eta * 4. * dpl[0] + dp[1] * jw * eta * dpl[1] + dp[2] * jw * eta * dpl[2];
1114: Ke[l * 2 + 0][ll * 2 + 1] += dp[0] * jw * eta * 2. * dpl[1] + dp[1] * jw * eta * dpl[0];
1115: Ke[l * 2 + 1][ll * 2 + 0] += dp[1] * jw * eta * 2. * dpl[0] + dp[0] * jw * eta * dpl[1];
1116: Ke[l * 2 + 1][ll * 2 + 1] += dp[1] * jw * eta * 4. * dpl[1] + dp[0] * jw * eta * dpl[0] + dp[2] * jw * eta * dpl[2];
1117: /* extra Newton terms */
1118: Ke[l * 2 + 0][ll * 2 + 0] += dp[0] * jw * deta * dgdu * (4. * du[0] + 2. * dv[1]) + dp[1] * jw * deta * dgdu * (du[1] + dv[0]) + dp[2] * jw * deta * dgdu * du[2];
1119: Ke[l * 2 + 0][ll * 2 + 1] += dp[0] * jw * deta * dgdv * (4. * du[0] + 2. * dv[1]) + dp[1] * jw * deta * dgdv * (du[1] + dv[0]) + dp[2] * jw * deta * dgdv * du[2];
1120: Ke[l * 2 + 1][ll * 2 + 0] += dp[1] * jw * deta * dgdu * (4. * dv[1] + 2. * du[0]) + dp[0] * jw * deta * dgdu * (du[1] + dv[0]) + dp[2] * jw * deta * dgdu * dv[2];
1121: Ke[l * 2 + 1][ll * 2 + 1] += dp[1] * jw * deta * dgdv * (4. * dv[1] + 2. * du[0]) + dp[0] * jw * deta * dgdv * (du[1] + dv[0]) + dp[2] * jw * deta * dgdv * dv[2];
1122: }
1123: #else
1124: /* This SSE2 code is an exact replica of above, but uses explicit packed instructions for some speed
1125: * benefit. On my hardware, these intrinsics are almost twice as fast as above, reducing total assembly cost
1126: * by 25 to 30 percent. */
1127: {
1128: __m128d keu = _mm_loadu_pd(&Ke[l * 2 + 0][ll * 2 + 0]), kev = _mm_loadu_pd(&Ke[l * 2 + 1][ll * 2 + 0]), dpl01 = _mm_loadu_pd(&dpl[0]), dpl10 = _mm_shuffle_pd(dpl01, dpl01, _MM_SHUFFLE2(0, 1)), dpl2 = _mm_set_sd(dpl[2]), t0, t3, pdgduv;
1129: keu = _mm_add_pd(keu, _mm_add_pd(_mm_mul_pd(_mm_mul_pd(dp0jweta, p42), dpl01), _mm_add_pd(_mm_mul_pd(dp1jweta, dpl10), _mm_mul_pd(dp2jweta, dpl2))));
1130: kev = _mm_add_pd(kev, _mm_add_pd(_mm_mul_pd(_mm_mul_pd(dp1jweta, p24), dpl01), _mm_add_pd(_mm_mul_pd(dp0jweta, dpl10), _mm_mul_pd(dp2jweta, _mm_shuffle_pd(dpl2, dpl2, _MM_SHUFFLE2(0, 1))))));
1131: pdgduv = _mm_mul_pd(p05, _mm_add_pd(_mm_add_pd(_mm_mul_pd(p42, _mm_mul_pd(du0, dpl01)), _mm_mul_pd(p24, _mm_mul_pd(dv1, dpl01))), _mm_add_pd(_mm_mul_pd(du1pdv0, dpl10), _mm_mul_pd(pdu2dv2, _mm_set1_pd(dpl[2]))))); /* [dgdu, dgdv] */
1132: t0 = _mm_mul_pd(jwdeta, pdgduv); /* jw deta [dgdu, dgdv] */
1133: t3 = _mm_mul_pd(t0, du1pdv0); /* t0 (du1 + dv0) */
1134: _mm_storeu_pd(&Ke[l * 2 + 0][ll * 2 + 0], _mm_add_pd(keu, _mm_add_pd(_mm_mul_pd(t1, t0), _mm_add_pd(_mm_mul_pd(dp1, t3), _mm_mul_pd(t0, _mm_mul_pd(dp2, du2))))));
1135: _mm_storeu_pd(&Ke[l * 2 + 1][ll * 2 + 0], _mm_add_pd(kev, _mm_add_pd(_mm_mul_pd(t2, t0), _mm_add_pd(_mm_mul_pd(dp0, t3), _mm_mul_pd(t0, _mm_mul_pd(dp2, dv2))))));
1136: }
1137: #endif
1138: }
1139: }
1140: }
1141: if (k == 0) { /* on a bottom face */
1142: if (thi->no_slip) {
1143: const PetscReal hz = PetscRealPart(pn[0].h) / (zm - 1);
1144: const PetscScalar diagu = 2 * etabase / thi->rhog * (hx * hy / hz + hx * hz / hy + 4 * hy * hz / hx), diagv = 2 * etabase / thi->rhog * (hx * hy / hz + 4 * hx * hz / hy + hy * hz / hx);
1145: Ke[0][0] = thi->dirichlet_scale * diagu;
1146: Ke[1][1] = thi->dirichlet_scale * diagv;
1147: } else {
1148: for (q = 0; q < 4; q++) {
1149: const PetscReal jw = 0.25 * hx * hy / thi->rhog, *phi = QuadQInterp[q];
1150: PetscScalar u = 0, v = 0, rbeta2 = 0;
1151: PetscReal beta2, dbeta2;
1152: for (l = 0; l < 4; l++) {
1153: u += phi[l] * n[l].u;
1154: v += phi[l] * n[l].v;
1155: rbeta2 += phi[l] * pn[l].beta2;
1156: }
1157: THIFriction(thi, PetscRealPart(rbeta2), PetscRealPart(u * u + v * v) / 2, &beta2, &dbeta2);
1158: for (l = 0; l < 4; l++) {
1159: const PetscReal pp = phi[l];
1160: for (ll = 0; ll < 4; ll++) {
1161: const PetscReal ppl = phi[ll];
1162: Ke[l * 2 + 0][ll * 2 + 0] += pp * jw * beta2 * ppl + pp * jw * dbeta2 * u * u * ppl;
1163: Ke[l * 2 + 0][ll * 2 + 1] += pp * jw * dbeta2 * u * v * ppl;
1164: Ke[l * 2 + 1][ll * 2 + 0] += pp * jw * dbeta2 * v * u * ppl;
1165: Ke[l * 2 + 1][ll * 2 + 1] += pp * jw * beta2 * ppl + pp * jw * dbeta2 * v * v * ppl;
1166: }
1167: }
1168: }
1169: }
1170: }
1171: {
1172: const MatStencil rc[8] = {
1173: {i, j, k, 0},
1174: {i + 1, j, k, 0},
1175: {i + 1, j + 1, k, 0},
1176: {i, j + 1, k, 0},
1177: {i, j, k + 1, 0},
1178: {i + 1, j, k + 1, 0},
1179: {i + 1, j + 1, k + 1, 0},
1180: {i, j + 1, k + 1, 0}
1181: };
1182: if (amode == THIASSEMBLY_TRIDIAGONAL) {
1183: for (l = 0; l < 4; l++) { /* Copy out each of the blocks, discarding horizontal coupling */
1184: const PetscInt l4 = l + 4;
1185: const MatStencil rcl[2] = {
1186: {rc[l].k, rc[l].j, rc[l].i, 0},
1187: {rc[l4].k, rc[l4].j, rc[l4].i, 0}
1188: };
1189: #if defined COMPUTE_LOWER_TRIANGULAR
1190: const PetscScalar Kel[4][4] = {
1191: {Ke[2 * l + 0][2 * l + 0], Ke[2 * l + 0][2 * l + 1], Ke[2 * l + 0][2 * l4 + 0], Ke[2 * l + 0][2 * l4 + 1] },
1192: {Ke[2 * l + 1][2 * l + 0], Ke[2 * l + 1][2 * l + 1], Ke[2 * l + 1][2 * l4 + 0], Ke[2 * l + 1][2 * l4 + 1] },
1193: {Ke[2 * l4 + 0][2 * l + 0], Ke[2 * l4 + 0][2 * l + 1], Ke[2 * l4 + 0][2 * l4 + 0], Ke[2 * l4 + 0][2 * l4 + 1]},
1194: {Ke[2 * l4 + 1][2 * l + 0], Ke[2 * l4 + 1][2 * l + 1], Ke[2 * l4 + 1][2 * l4 + 0], Ke[2 * l4 + 1][2 * l4 + 1]}
1195: };
1196: #else
1197: /* Same as above except for the lower-left block */
1198: const PetscScalar Kel[4][4] = {
1199: {Ke[2 * l + 0][2 * l + 0], Ke[2 * l + 0][2 * l + 1], Ke[2 * l + 0][2 * l4 + 0], Ke[2 * l + 0][2 * l4 + 1] },
1200: {Ke[2 * l + 1][2 * l + 0], Ke[2 * l + 1][2 * l + 1], Ke[2 * l + 1][2 * l4 + 0], Ke[2 * l + 1][2 * l4 + 1] },
1201: {Ke[2 * l + 0][2 * l4 + 0], Ke[2 * l + 1][2 * l4 + 0], Ke[2 * l4 + 0][2 * l4 + 0], Ke[2 * l4 + 0][2 * l4 + 1]},
1202: {Ke[2 * l + 0][2 * l4 + 1], Ke[2 * l + 1][2 * l4 + 1], Ke[2 * l4 + 1][2 * l4 + 0], Ke[2 * l4 + 1][2 * l4 + 1]}
1203: };
1204: #endif
1205: PetscCall(MatSetValuesBlockedStencil(B, 2, rcl, 2, rcl, &Kel[0][0], ADD_VALUES));
1206: }
1207: } else {
1208: #if !defined COMPUTE_LOWER_TRIANGULAR /* fill in lower-triangular part, this is really cheap compared to computing the entries */
1209: for (l = 0; l < 8; l++) {
1210: for (ll = l + 1; ll < 8; ll++) {
1211: Ke[ll * 2 + 0][l * 2 + 0] = Ke[l * 2 + 0][ll * 2 + 0];
1212: Ke[ll * 2 + 1][l * 2 + 0] = Ke[l * 2 + 0][ll * 2 + 1];
1213: Ke[ll * 2 + 0][l * 2 + 1] = Ke[l * 2 + 1][ll * 2 + 0];
1214: Ke[ll * 2 + 1][l * 2 + 1] = Ke[l * 2 + 1][ll * 2 + 1];
1215: }
1216: }
1217: #endif
1218: PetscCall(MatSetValuesBlockedStencil(B, 8, rc, 8, rc, &Ke[0][0], ADD_VALUES));
1219: }
1220: }
1221: }
1222: }
1223: }
1224: PetscCall(THIDARestorePrm(info->da, &prm));
1226: PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1227: PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1228: PetscCall(MatSetOption(B, MAT_SYMMETRIC, PETSC_TRUE));
1229: if (thi->verbose) PetscCall(THIMatrixStatistics(thi, B, PETSC_VIEWER_STDOUT_WORLD));
1230: PetscFunctionReturn(PETSC_SUCCESS);
1231: }
1233: static PetscErrorCode THIJacobianLocal_3D_Full(DMDALocalInfo *info, Node ***x, Mat A, Mat B, THI thi)
1234: {
1235: PetscFunctionBeginUser;
1236: PetscCall(THIJacobianLocal_3D(info, x, B, thi, THIASSEMBLY_FULL));
1237: PetscFunctionReturn(PETSC_SUCCESS);
1238: }
1240: static PetscErrorCode THIJacobianLocal_3D_Tridiagonal(DMDALocalInfo *info, Node ***x, Mat A, Mat B, THI thi)
1241: {
1242: PetscFunctionBeginUser;
1243: PetscCall(THIJacobianLocal_3D(info, x, B, thi, THIASSEMBLY_TRIDIAGONAL));
1244: PetscFunctionReturn(PETSC_SUCCESS);
1245: }
1247: static PetscErrorCode DMRefineHierarchy_THI(DM dac0, PetscInt nlevels, DM hierarchy[])
1248: {
1249: THI thi;
1250: PetscInt dim, M, N, m, n, s, dof;
1251: DM dac, daf;
1252: DMDAStencilType st;
1253: DM_DA *ddf, *ddc;
1255: PetscFunctionBeginUser;
1256: PetscCall(PetscObjectQuery((PetscObject)dac0, "THI", (PetscObject *)&thi));
1257: PetscCheck(thi, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot refine this DMDA, missing composed THI instance");
1258: if (nlevels > 1) {
1259: PetscCall(DMRefineHierarchy(dac0, nlevels - 1, hierarchy));
1260: dac = hierarchy[nlevels - 2];
1261: } else {
1262: dac = dac0;
1263: }
1264: PetscCall(DMDAGetInfo(dac, &dim, &N, &M, 0, &n, &m, 0, &dof, &s, 0, 0, 0, &st));
1265: PetscCheck(dim == 2, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "This function can only refine 2D DMDAs");
1267: /* Creates a 3D DMDA with the same map-plane layout as the 2D one, with contiguous columns */
1268: PetscCall(DMDACreate3d(PetscObjectComm((PetscObject)dac), DM_BOUNDARY_NONE, DM_BOUNDARY_PERIODIC, DM_BOUNDARY_PERIODIC, st, thi->zlevels, N, M, 1, n, m, dof, s, NULL, NULL, NULL, &daf));
1269: PetscCall(DMSetUp(daf));
1271: daf->ops->creatematrix = dac->ops->creatematrix;
1272: daf->ops->createinterpolation = dac->ops->createinterpolation;
1273: daf->ops->getcoloring = dac->ops->getcoloring;
1274: ddf = (DM_DA *)daf->data;
1275: ddc = (DM_DA *)dac->data;
1276: ddf->interptype = ddc->interptype;
1278: PetscCall(DMDASetFieldName(daf, 0, "x-velocity"));
1279: PetscCall(DMDASetFieldName(daf, 1, "y-velocity"));
1281: hierarchy[nlevels - 1] = daf;
1282: PetscFunctionReturn(PETSC_SUCCESS);
1283: }
1285: static PetscErrorCode DMCreateInterpolation_DA_THI(DM dac, DM daf, Mat *A, Vec *scale)
1286: {
1287: PetscInt dim;
1289: PetscFunctionBeginUser;
1292: PetscAssertPointer(A, 3);
1293: if (scale) PetscAssertPointer(scale, 4);
1294: PetscCall(DMDAGetInfo(daf, &dim, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
1295: if (dim == 2) {
1296: /* We are in the 2D problem and use normal DMDA interpolation */
1297: PetscCall(DMCreateInterpolation(dac, daf, A, scale));
1298: } else {
1299: PetscInt i, j, k, xs, ys, zs, xm, ym, zm, mx, my, mz, rstart, cstart;
1300: Mat B;
1302: PetscCall(DMDAGetInfo(daf, 0, &mz, &my, &mx, 0, 0, 0, 0, 0, 0, 0, 0, 0));
1303: PetscCall(DMDAGetCorners(daf, &zs, &ys, &xs, &zm, &ym, &xm));
1304: PetscCheck(!zs, PETSC_COMM_SELF, PETSC_ERR_PLIB, "unexpected");
1305: PetscCall(MatCreate(PetscObjectComm((PetscObject)daf), &B));
1306: PetscCall(MatSetSizes(B, xm * ym * zm, xm * ym, mx * my * mz, mx * my));
1308: PetscCall(MatSetType(B, MATAIJ));
1309: PetscCall(MatSeqAIJSetPreallocation(B, 1, NULL));
1310: PetscCall(MatMPIAIJSetPreallocation(B, 1, NULL, 0, NULL));
1311: PetscCall(MatGetOwnershipRange(B, &rstart, NULL));
1312: PetscCall(MatGetOwnershipRangeColumn(B, &cstart, NULL));
1313: for (i = xs; i < xs + xm; i++) {
1314: for (j = ys; j < ys + ym; j++) {
1315: for (k = zs; k < zs + zm; k++) {
1316: PetscInt i2 = i * ym + j, i3 = i2 * zm + k;
1317: PetscScalar val = ((k == 0 || k == mz - 1) ? 0.5 : 1.) / (mz - 1.); /* Integration using trapezoid rule */
1318: PetscCall(MatSetValue(B, cstart + i3, rstart + i2, val, INSERT_VALUES));
1319: }
1320: }
1321: }
1322: PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1323: PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1324: PetscCall(MatCreateMAIJ(B, sizeof(Node) / sizeof(PetscScalar), A));
1325: PetscCall(MatDestroy(&B));
1326: }
1327: PetscFunctionReturn(PETSC_SUCCESS);
1328: }
1330: static PetscErrorCode DMCreateMatrix_THI_Tridiagonal(DM da, Mat *J)
1331: {
1332: Mat A;
1333: PetscInt xm, ym, zm, dim, dof = 2, starts[3], dims[3];
1334: ISLocalToGlobalMapping ltog;
1336: PetscFunctionBeginUser;
1337: PetscCall(DMDAGetInfo(da, &dim, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
1338: PetscCheck(dim == 3, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Expected DMDA to be 3D");
1339: PetscCall(DMDAGetCorners(da, 0, 0, 0, &zm, &ym, &xm));
1340: PetscCall(DMGetLocalToGlobalMapping(da, <og));
1341: PetscCall(MatCreate(PetscObjectComm((PetscObject)da), &A));
1342: PetscCall(MatSetSizes(A, dof * xm * ym * zm, dof * xm * ym * zm, PETSC_DETERMINE, PETSC_DETERMINE));
1343: PetscCall(MatSetType(A, da->mattype));
1344: PetscCall(MatSetFromOptions(A));
1345: PetscCall(MatSeqAIJSetPreallocation(A, 3 * 2, NULL));
1346: PetscCall(MatMPIAIJSetPreallocation(A, 3 * 2, NULL, 0, NULL));
1347: PetscCall(MatSeqBAIJSetPreallocation(A, 2, 3, NULL));
1348: PetscCall(MatMPIBAIJSetPreallocation(A, 2, 3, NULL, 0, NULL));
1349: PetscCall(MatSeqSBAIJSetPreallocation(A, 2, 2, NULL));
1350: PetscCall(MatMPISBAIJSetPreallocation(A, 2, 2, NULL, 0, NULL));
1351: PetscCall(MatSetLocalToGlobalMapping(A, ltog, ltog));
1352: PetscCall(DMDAGetGhostCorners(da, &starts[0], &starts[1], &starts[2], &dims[0], &dims[1], &dims[2]));
1353: PetscCall(MatSetStencil(A, dim, dims, starts, dof));
1354: *J = A;
1355: PetscFunctionReturn(PETSC_SUCCESS);
1356: }
1358: static PetscErrorCode THIDAVecView_VTK_XML(THI thi, DM da, Vec X, const char filename[])
1359: {
1360: const PetscInt dof = 2;
1361: Units units = thi->units;
1362: MPI_Comm comm;
1363: PetscViewer viewer;
1364: PetscMPIInt rank, size, tag, nn, nmax;
1365: PetscInt mx, my, mz, range[6];
1366: const PetscScalar *x;
1368: PetscFunctionBeginUser;
1369: PetscCall(PetscObjectGetComm((PetscObject)thi, &comm));
1370: PetscCall(DMDAGetInfo(da, 0, &mz, &my, &mx, 0, 0, 0, 0, 0, 0, 0, 0, 0));
1371: PetscCallMPI(MPI_Comm_size(comm, &size));
1372: PetscCallMPI(MPI_Comm_rank(comm, &rank));
1373: PetscCall(PetscViewerASCIIOpen(comm, filename, &viewer));
1374: PetscCall(PetscViewerASCIIPrintf(viewer, "<VTKFile type=\"StructuredGrid\" version=\"0.1\" byte_order=\"LittleEndian\">\n"));
1375: PetscCall(PetscViewerASCIIPrintf(viewer, " <StructuredGrid WholeExtent=\"%d %" PetscInt_FMT " %d %" PetscInt_FMT " %d %" PetscInt_FMT "\">\n", 0, mz - 1, 0, my - 1, 0, mx - 1));
1377: PetscCall(DMDAGetCorners(da, range, range + 1, range + 2, range + 3, range + 4, range + 5));
1378: PetscCall(PetscMPIIntCast(range[3] * range[4] * range[5] * dof, &nn));
1379: PetscCallMPI(MPI_Reduce(&nn, &nmax, 1, MPI_INT, MPI_MAX, 0, comm));
1380: tag = ((PetscObject)viewer)->tag;
1381: PetscCall(VecGetArrayRead(X, &x));
1382: if (rank == 0) {
1383: PetscScalar *array;
1384: PetscCall(PetscMalloc1(nmax, &array));
1385: for (PetscMPIInt r = 0; r < size; r++) {
1386: PetscInt i, j, k, xs, xm, ys, ym, zs, zm;
1387: const PetscScalar *ptr;
1388: MPI_Status status;
1389: if (r) PetscCallMPI(MPIU_Recv(range, 6, MPIU_INT, r, tag, comm, MPI_STATUS_IGNORE));
1390: zs = range[0];
1391: ys = range[1];
1392: xs = range[2];
1393: zm = range[3];
1394: ym = range[4];
1395: xm = range[5];
1396: PetscCheck(xm * ym * zm * dof <= nmax, PETSC_COMM_SELF, PETSC_ERR_PLIB, "should not happen");
1397: if (r) {
1398: PetscCallMPI(MPIU_Recv(array, nmax, MPIU_SCALAR, r, tag, comm, &status));
1399: PetscCallMPI(MPI_Get_count(&status, MPIU_SCALAR, &nn));
1400: PetscCheck(nn == xm * ym * zm * dof, PETSC_COMM_SELF, PETSC_ERR_PLIB, "should not happen");
1401: ptr = array;
1402: } else ptr = x;
1403: PetscCall(PetscViewerASCIIPrintf(viewer, " <Piece Extent=\"%" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\">\n", zs, zs + zm - 1, ys, ys + ym - 1, xs, xs + xm - 1));
1405: PetscCall(PetscViewerASCIIPrintf(viewer, " <Points>\n"));
1406: PetscCall(PetscViewerASCIIPrintf(viewer, " <DataArray type=\"Float32\" NumberOfComponents=\"3\" format=\"ascii\">\n"));
1407: for (i = xs; i < xs + xm; i++) {
1408: for (j = ys; j < ys + ym; j++) {
1409: for (k = zs; k < zs + zm; k++) {
1410: PrmNode p;
1411: PetscReal xx = thi->Lx * i / mx, yy = thi->Ly * j / my, zz;
1412: thi->initialize(thi, xx, yy, &p);
1413: zz = PetscRealPart(p.b) + PetscRealPart(p.h) * k / (mz - 1);
1414: PetscCall(PetscViewerASCIIPrintf(viewer, "%f %f %f\n", (double)xx, (double)yy, (double)zz));
1415: }
1416: }
1417: }
1418: PetscCall(PetscViewerASCIIPrintf(viewer, " </DataArray>\n"));
1419: PetscCall(PetscViewerASCIIPrintf(viewer, " </Points>\n"));
1421: PetscCall(PetscViewerASCIIPrintf(viewer, " <PointData>\n"));
1422: PetscCall(PetscViewerASCIIPrintf(viewer, " <DataArray type=\"Float32\" Name=\"velocity\" NumberOfComponents=\"3\" format=\"ascii\">\n"));
1423: for (i = 0; i < nn; i += dof) PetscCall(PetscViewerASCIIPrintf(viewer, "%f %f %f\n", (double)(PetscRealPart(ptr[i]) * units->year / units->meter), (double)(PetscRealPart(ptr[i + 1]) * units->year / units->meter), 0.0));
1424: PetscCall(PetscViewerASCIIPrintf(viewer, " </DataArray>\n"));
1426: PetscCall(PetscViewerASCIIPrintf(viewer, " <DataArray type=\"Int32\" Name=\"rank\" NumberOfComponents=\"1\" format=\"ascii\">\n"));
1427: for (i = 0; i < nn; i += dof) PetscCall(PetscViewerASCIIPrintf(viewer, "%d\n", r));
1428: PetscCall(PetscViewerASCIIPrintf(viewer, " </DataArray>\n"));
1429: PetscCall(PetscViewerASCIIPrintf(viewer, " </PointData>\n"));
1431: PetscCall(PetscViewerASCIIPrintf(viewer, " </Piece>\n"));
1432: }
1433: PetscCall(PetscFree(array));
1434: } else {
1435: PetscCallMPI(MPIU_Send(range, 6, MPIU_INT, 0, tag, comm));
1436: PetscCallMPI(MPIU_Send((PetscScalar *)x, nn, MPIU_SCALAR, 0, tag, comm));
1437: }
1438: PetscCall(VecRestoreArrayRead(X, &x));
1439: PetscCall(PetscViewerASCIIPrintf(viewer, " </StructuredGrid>\n"));
1440: PetscCall(PetscViewerASCIIPrintf(viewer, "</VTKFile>\n"));
1441: PetscCall(PetscViewerDestroy(&viewer));
1442: PetscFunctionReturn(PETSC_SUCCESS);
1443: }
1445: int main(int argc, char *argv[])
1446: {
1447: MPI_Comm comm;
1448: THI thi;
1449: DM da;
1450: SNES snes;
1452: PetscFunctionBeginUser;
1453: PetscCall(PetscInitialize(&argc, &argv, 0, help));
1454: comm = PETSC_COMM_WORLD;
1456: PetscCall(THICreate(comm, &thi));
1457: {
1458: PetscInt M = 3, N = 3, P = 2;
1459: PetscOptionsBegin(comm, NULL, "Grid resolution options", "");
1460: {
1461: PetscCall(PetscOptionsInt("-M", "Number of elements in x-direction on coarse level", "", M, &M, NULL));
1462: N = M;
1463: PetscCall(PetscOptionsInt("-N", "Number of elements in y-direction on coarse level (if different from M)", "", N, &N, NULL));
1464: if (thi->coarse2d) {
1465: PetscCall(PetscOptionsInt("-zlevels", "Number of elements in z-direction on fine level", "", thi->zlevels, &thi->zlevels, NULL));
1466: } else {
1467: PetscCall(PetscOptionsInt("-P", "Number of elements in z-direction on coarse level", "", P, &P, NULL));
1468: }
1469: }
1470: PetscOptionsEnd();
1471: if (thi->coarse2d) {
1472: PetscCall(DMDACreate2d(comm, DM_BOUNDARY_PERIODIC, DM_BOUNDARY_PERIODIC, DMDA_STENCIL_BOX, N, M, PETSC_DETERMINE, PETSC_DETERMINE, sizeof(Node) / sizeof(PetscScalar), 1, 0, 0, &da));
1473: PetscCall(DMSetFromOptions(da));
1474: PetscCall(DMSetUp(da));
1475: da->ops->refinehierarchy = DMRefineHierarchy_THI;
1476: da->ops->createinterpolation = DMCreateInterpolation_DA_THI;
1478: PetscCall(PetscObjectCompose((PetscObject)da, "THI", (PetscObject)thi));
1479: } else {
1480: PetscCall(DMDACreate3d(comm, DM_BOUNDARY_NONE, DM_BOUNDARY_PERIODIC, DM_BOUNDARY_PERIODIC, DMDA_STENCIL_BOX, P, N, M, 1, PETSC_DETERMINE, PETSC_DETERMINE, sizeof(Node) / sizeof(PetscScalar), 1, 0, 0, 0, &da));
1481: PetscCall(DMSetFromOptions(da));
1482: PetscCall(DMSetUp(da));
1483: }
1484: PetscCall(DMDASetFieldName(da, 0, "x-velocity"));
1485: PetscCall(DMDASetFieldName(da, 1, "y-velocity"));
1486: }
1487: PetscCall(THISetUpDM(thi, da));
1488: if (thi->tridiagonal) da->ops->creatematrix = DMCreateMatrix_THI_Tridiagonal;
1490: { /* Set the fine level matrix type if -da_refine */
1491: PetscInt rlevel, clevel;
1492: PetscCall(DMGetRefineLevel(da, &rlevel));
1493: PetscCall(DMGetCoarsenLevel(da, &clevel));
1494: if (rlevel - clevel > 0) PetscCall(DMSetMatType(da, thi->mattype));
1495: }
1497: PetscCall(DMDASNESSetFunctionLocal(da, ADD_VALUES, (DMDASNESFunctionFn *)THIFunctionLocal, thi));
1498: if (thi->tridiagonal) {
1499: PetscCall(DMDASNESSetJacobianLocal(da, (DMDASNESJacobianFn *)THIJacobianLocal_3D_Tridiagonal, thi));
1500: } else {
1501: PetscCall(DMDASNESSetJacobianLocal(da, (DMDASNESJacobianFn *)THIJacobianLocal_3D_Full, thi));
1502: }
1503: PetscCall(DMCoarsenHookAdd(da, DMCoarsenHook_THI, NULL, thi));
1504: PetscCall(DMRefineHookAdd(da, DMRefineHook_THI, NULL, thi));
1506: PetscCall(DMSetApplicationContext(da, thi));
1508: PetscCall(SNESCreate(comm, &snes));
1509: PetscCall(SNESSetDM(snes, da));
1510: PetscCall(DMDestroy(&da));
1511: PetscCall(SNESSetComputeInitialGuess(snes, THIInitial, NULL));
1512: PetscCall(SNESSetFromOptions(snes));
1514: PetscCall(SNESSolve(snes, NULL, NULL));
1516: PetscCall(THISolveStatistics(thi, snes, 0, "Full"));
1518: {
1519: PetscBool flg;
1520: char filename[PETSC_MAX_PATH_LEN] = "";
1521: PetscCall(PetscOptionsGetString(NULL, NULL, "-o", filename, sizeof(filename), &flg));
1522: if (flg) {
1523: Vec X;
1524: DM dm;
1525: PetscCall(SNESGetSolution(snes, &X));
1526: PetscCall(SNESGetDM(snes, &dm));
1527: PetscCall(THIDAVecView_VTK_XML(thi, dm, X, filename));
1528: }
1529: }
1531: PetscCall(DMDestroy(&da));
1532: PetscCall(SNESDestroy(&snes));
1533: PetscCall(THIDestroy(&thi));
1534: PetscCall(PetscFinalize());
1535: return 0;
1536: }
1538: /*TEST
1540: build:
1541: requires: !single
1543: test:
1544: args: -M 6 -P 4 -da_refine 1 -snes_monitor_short -snes_converged_reason -ksp_monitor_short -ksp_converged_reason -thi_mat_type sbaij -ksp_type fgmres -pc_type mg -pc_mg_type full -mg_levels_ksp_type gmres -mg_levels_ksp_max_it 1 -mg_levels_pc_type icc
1546: test:
1547: suffix: 2
1548: nsize: 2
1549: args: -M 6 -P 4 -thi_hom z -snes_monitor_short -snes_converged_reason -ksp_monitor_short -ksp_converged_reason -thi_mat_type sbaij -ksp_type fgmres -pc_type mg -pc_mg_type full -mg_levels_ksp_type gmres -mg_levels_ksp_max_it 1 -mg_levels_pc_type asm -mg_levels_pc_asm_blocks 6 -mg_levels_0_pc_type redundant -snes_grid_sequence 1 -mat_partitioning_type current -ksp_atol 0
1551: test:
1552: suffix: 3
1553: nsize: 3
1554: args: -M 7 -P 4 -thi_hom z -da_refine 1 -snes_monitor_short -snes_converged_reason -ksp_monitor_short -ksp_converged_reason -thi_mat_type baij -ksp_type fgmres -pc_type mg -pc_mg_type full -mg_levels_pc_asm_type restrict -mg_levels_pc_type asm -mg_levels_pc_asm_blocks 9 -mg_levels_ksp_type gmres -mg_levels_ksp_max_it 1 -mat_partitioning_type current
1556: test:
1557: suffix: 4
1558: nsize: 6
1559: args: -M 4 -P 2 -da_refine_hierarchy_x 1,1,3 -da_refine_hierarchy_y 2,2,1 -da_refine_hierarchy_z 2,2,1 -snes_grid_sequence 3 -ksp_converged_reason -ksp_type fgmres -ksp_rtol 1e-2 -pc_type mg -mg_levels_ksp_type gmres -mg_levels_ksp_max_it 1 -mg_levels_pc_type bjacobi -mg_levels_1_sub_pc_type cholesky -pc_mg_type multiplicative -snes_converged_reason -snes_stol 1e-12 -thi_L 80e3 -thi_alpha 0.05 -thi_friction_m 1 -thi_hom x -snes_view -mg_levels_0_pc_type redundant -mg_levels_0_ksp_type preonly -ksp_atol 0
1561: test:
1562: suffix: 5
1563: nsize: 6
1564: args: -M 12 -P 5 -snes_monitor_short -ksp_converged_reason -pc_type asm -pc_asm_type restrict -dm_mat_type {{aij baij sbaij}}
1566: TEST*/