Actual source code: ex48.c
1: static const char help[] = "Toy hydrostatic ice flow with multigrid in 3D.\n\
2: \n\
3: Solves the hydrostatic (aka Blatter/Pattyn/First Order) equations for ice sheet flow\n\
4: using multigrid. The ice uses a power-law rheology with \"Glen\" exponent 3 (corresponds\n\
5: to p=4/3 in a p-Laplacian). The focus is on ISMIP-HOM experiments which assume periodic\n\
6: boundary conditions in the x- and y-directions.\n\
7: \n\
8: Equations are rescaled so that the domain size and solution are O(1), details of this scaling\n\
9: can be controlled by the options -units_meter, -units_second, and -units_kilogram.\n\
10: \n\
11: A VTK StructuredGrid output file can be written using the option -o filename.vts\n\
12: \n\n";
14: /*
15: The equations for horizontal velocity (u,v) are
17: - [eta (4 u_x + 2 v_y)]_x - [eta (u_y + v_x)]_y - [eta u_z]_z + rho g s_x = 0
18: - [eta (4 v_y + 2 u_x)]_y - [eta (u_y + v_x)]_x - [eta v_z]_z + rho g s_y = 0
20: where
22: eta = B/2 (epsilon + gamma)^((p-2)/2)
24: is the nonlinear effective viscosity with regularization epsilon and hardness parameter B,
25: written in terms of the second invariant
27: gamma = u_x^2 + v_y^2 + u_x v_y + (1/4) (u_y + v_x)^2 + (1/4) u_z^2 + (1/4) v_z^2
29: The surface boundary conditions are the natural conditions. The basal boundary conditions
30: are either no-slip, or Navier (linear) slip with spatially variant friction coefficient beta^2.
32: In the code, the equations for (u,v) are multiplied through by 1/(rho g) so that residuals are O(1).
34: The discretization is Q1 finite elements, managed by a DMDA. The grid is never distorted in the
35: map (x,y) plane, but the bed and surface may be bumpy. This is handled as usual in FEM, through
36: the Jacobian of the coordinate transformation from a reference element to the physical element.
38: Since ice-flow is tightly coupled in the z-direction (within columns), the DMDA is managed
39: specially so that columns are never distributed, and are always contiguous in memory.
40: This amounts to reversing the meaning of X,Y,Z compared to the DMDA's internal interpretation,
41: and then indexing as vec[i][j][k]. The exotic coarse spaces require 2D DMDAs which are made to
42: use compatible domain decomposition relative to the 3D DMDAs.
44: There are two compile-time options:
46: NO_SSE2:
47: If the host supports SSE2, we use integration code that has been vectorized with SSE2
48: intrinsics, unless this macro is defined. The intrinsics speed up integration by about
49: 30% on my architecture (P8700, gcc-4.5 snapshot).
51: COMPUTE_LOWER_TRIANGULAR:
52: The element matrices we assemble are lower-triangular so it is not necessary to compute
53: all entries explicitly. If this macro is defined, the lower-triangular entries are
54: computed explicitly.
56: */
58: #if defined(PETSC_APPLE_FRAMEWORK)
59: #import <PETSc/petscsnes.h>
60: #import <PETSc/petsc/private/dmdaimpl.h> /* There is not yet a public interface to manipulate dm->ops */
61: #else
63: #include <petscsnes.h>
64: #include <petsc/private/dmdaimpl.h>
65: #endif
66: #include <ctype.h> /* toupper() */
68: #if defined(__cplusplus) || defined(PETSC_HAVE_WINDOWS_COMPILERS) || defined(__PGI)
69: /* c++ cannot handle [_restrict_] notation like C does */
70: #undef PETSC_RESTRICT
71: #define PETSC_RESTRICT
72: #endif
74: #if defined(__SSE2__)
75: #include <emmintrin.h>
76: #endif
78: /* The SSE2 kernels are only for PetscScalar=double on architectures that support it */
79: #if !defined(NO_SSE2) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_REAL_SINGLE) && !defined(PETSC_USE_REAL___FLOAT128) && !defined(PETSC_USE_REAL___FP16) && defined(__SSE2__)
80: #define USE_SSE2_KERNELS 1
81: #else
82: #define USE_SSE2_KERNELS 0
83: #endif
85: static PetscClassId THI_CLASSID;
87: typedef enum {
88: QUAD_GAUSS,
89: QUAD_LOBATTO
90: } QuadratureType;
91: static const char *QuadratureTypes[] = {"gauss", "lobatto", "QuadratureType", "QUAD_", 0};
92: PETSC_UNUSED static const PetscReal HexQWeights[8] = {1, 1, 1, 1, 1, 1, 1, 1};
93: PETSC_UNUSED static const PetscReal HexQNodes[] = {-0.57735026918962573, 0.57735026918962573};
94: #define G 0.57735026918962573
95: #define H (0.5 * (1. + G))
96: #define L (0.5 * (1. - G))
97: #define M (-0.5)
98: #define P (0.5)
99: /* Special quadrature: Lobatto in horizontal, Gauss in vertical */
100: static const PetscReal HexQInterp_Lobatto[8][8] = {
101: {H, 0, 0, 0, L, 0, 0, 0},
102: {0, H, 0, 0, 0, L, 0, 0},
103: {0, 0, H, 0, 0, 0, L, 0},
104: {0, 0, 0, H, 0, 0, 0, L},
105: {L, 0, 0, 0, H, 0, 0, 0},
106: {0, L, 0, 0, 0, H, 0, 0},
107: {0, 0, L, 0, 0, 0, H, 0},
108: {0, 0, 0, L, 0, 0, 0, H}
109: };
110: static const PetscReal HexQDeriv_Lobatto[8][8][3] = {
111: {{M * H, M * H, M}, {P * H, 0, 0}, {0, 0, 0}, {0, P * H, 0}, {M * L, M * L, P}, {P * L, 0, 0}, {0, 0, 0}, {0, P * L, 0} },
112: {{M * H, 0, 0}, {P * H, M * H, M}, {0, P * H, 0}, {0, 0, 0}, {M * L, 0, 0}, {P * L, M * L, P}, {0, P * L, 0}, {0, 0, 0} },
113: {{0, 0, 0}, {0, M * H, 0}, {P * H, P * H, M}, {M * H, 0, 0}, {0, 0, 0}, {0, M * L, 0}, {P * L, P * L, P}, {M * L, 0, 0} },
114: {{0, M * H, 0}, {0, 0, 0}, {P * H, 0, 0}, {M * H, P * H, M}, {0, M * L, 0}, {0, 0, 0}, {P * L, 0, 0}, {M * L, P * L, P}},
115: {{M * L, M * L, M}, {P * L, 0, 0}, {0, 0, 0}, {0, P * L, 0}, {M * H, M * H, P}, {P * H, 0, 0}, {0, 0, 0}, {0, P * H, 0} },
116: {{M * L, 0, 0}, {P * L, M * L, M}, {0, P * L, 0}, {0, 0, 0}, {M * H, 0, 0}, {P * H, M * H, P}, {0, P * H, 0}, {0, 0, 0} },
117: {{0, 0, 0}, {0, M * L, 0}, {P * L, P * L, M}, {M * L, 0, 0}, {0, 0, 0}, {0, M * H, 0}, {P * H, P * H, P}, {M * H, 0, 0} },
118: {{0, M * L, 0}, {0, 0, 0}, {P * L, 0, 0}, {M * L, P * L, M}, {0, M * H, 0}, {0, 0, 0}, {P * H, 0, 0}, {M * H, P * H, P}}
119: };
120: /* Standard Gauss */
121: static const PetscReal HexQInterp_Gauss[8][8] = {
122: {H * H * H, L * H * H, L * L * H, H * L * H, H * H * L, L * H * L, L * L * L, H * L * L},
123: {L * H * H, H * H * H, H * L * H, L * L * H, L * H * L, H * H * L, H * L * L, L * L * L},
124: {L * L * H, H * L * H, H * H * H, L * H * H, L * L * L, H * L * L, H * H * L, L * H * L},
125: {H * L * H, L * L * H, L * H * H, H * H * H, H * L * L, L * L * L, L * H * L, H * H * L},
126: {H * H * L, L * H * L, L * L * L, H * L * L, H * H * H, L * H * H, L * L * H, H * L * H},
127: {L * H * L, H * H * L, H * L * L, L * L * L, L * H * H, H * H * H, H * L * H, L * L * H},
128: {L * L * L, H * L * L, H * H * L, L * H * L, L * L * H, H * L * H, H * H * H, L * H * H},
129: {H * L * L, L * L * L, L * H * L, H * H * L, H * L * H, L * L * H, L * H * H, H * H * H}
130: };
131: static const PetscReal HexQDeriv_Gauss[8][8][3] = {
132: {{M * H * H, H * M * H, H * H * M}, {P * H * H, L * M * H, L * H * M}, {P * L * H, L * P * H, L * L * M}, {M * L * H, H * P * H, H * L * M}, {M * H * L, H * M * L, H * H * P}, {P * H * L, L * M * L, L * H * P}, {P * L * L, L * P * L, L * L * P}, {M * L * L, H * P * L, H * L * P}},
133: {{M * H * H, L * M * H, L * H * M}, {P * H * H, H * M * H, H * H * M}, {P * L * H, H * P * H, H * L * M}, {M * L * H, L * P * H, L * L * M}, {M * H * L, L * M * L, L * H * P}, {P * H * L, H * M * L, H * H * P}, {P * L * L, H * P * L, H * L * P}, {M * L * L, L * P * L, L * L * P}},
134: {{M * L * H, L * M * H, L * L * M}, {P * L * H, H * M * H, H * L * M}, {P * H * H, H * P * H, H * H * M}, {M * H * H, L * P * H, L * H * M}, {M * L * L, L * M * L, L * L * P}, {P * L * L, H * M * L, H * L * P}, {P * H * L, H * P * L, H * H * P}, {M * H * L, L * P * L, L * H * P}},
135: {{M * L * H, H * M * H, H * L * M}, {P * L * H, L * M * H, L * L * M}, {P * H * H, L * P * H, L * H * M}, {M * H * H, H * P * H, H * H * M}, {M * L * L, H * M * L, H * L * P}, {P * L * L, L * M * L, L * L * P}, {P * H * L, L * P * L, L * H * P}, {M * H * L, H * P * L, H * H * P}},
136: {{M * H * L, H * M * L, H * H * M}, {P * H * L, L * M * L, L * H * M}, {P * L * L, L * P * L, L * L * M}, {M * L * L, H * P * L, H * L * M}, {M * H * H, H * M * H, H * H * P}, {P * H * H, L * M * H, L * H * P}, {P * L * H, L * P * H, L * L * P}, {M * L * H, H * P * H, H * L * P}},
137: {{M * H * L, L * M * L, L * H * M}, {P * H * L, H * M * L, H * H * M}, {P * L * L, H * P * L, H * L * M}, {M * L * L, L * P * L, L * L * M}, {M * H * H, L * M * H, L * H * P}, {P * H * H, H * M * H, H * H * P}, {P * L * H, H * P * H, H * L * P}, {M * L * H, L * P * H, L * L * P}},
138: {{M * L * L, L * M * L, L * L * M}, {P * L * L, H * M * L, H * L * M}, {P * H * L, H * P * L, H * H * M}, {M * H * L, L * P * L, L * H * M}, {M * L * H, L * M * H, L * L * P}, {P * L * H, H * M * H, H * L * P}, {P * H * H, H * P * H, H * H * P}, {M * H * H, L * P * H, L * H * P}},
139: {{M * L * L, H * M * L, H * L * M}, {P * L * L, L * M * L, L * L * M}, {P * H * L, L * P * L, L * H * M}, {M * H * L, H * P * L, H * H * M}, {M * L * H, H * M * H, H * L * P}, {P * L * H, L * M * H, L * L * P}, {P * H * H, L * P * H, L * H * P}, {M * H * H, H * P * H, H * H * P}}
140: };
141: static const PetscReal (*HexQInterp)[8], (*HexQDeriv)[8][3];
142: /* Standard 2x2 Gauss quadrature for the bottom layer. */
143: static const PetscReal QuadQInterp[4][4] = {
144: {H * H, L * H, L * L, H * L},
145: {L * H, H * H, H * L, L * L},
146: {L * L, H * L, H * H, L * H},
147: {H * L, L * L, L * H, H * H}
148: };
149: static const PetscReal QuadQDeriv[4][4][2] = {
150: {{M * H, M * H}, {P * H, M * L}, {P * L, P * L}, {M * L, P * H}},
151: {{M * H, M * L}, {P * H, M * H}, {P * L, P * H}, {M * L, P * L}},
152: {{M * L, M * L}, {P * L, M * H}, {P * H, P * H}, {M * H, P * L}},
153: {{M * L, M * H}, {P * L, M * L}, {P * H, P * L}, {M * H, P * H}}
154: };
155: #undef G
156: #undef H
157: #undef L
158: #undef M
159: #undef P
161: #define HexExtract(x, i, j, k, n) \
162: do { \
163: (n)[0] = (x)[i][j][k]; \
164: (n)[1] = (x)[i + 1][j][k]; \
165: (n)[2] = (x)[i + 1][j + 1][k]; \
166: (n)[3] = (x)[i][j + 1][k]; \
167: (n)[4] = (x)[i][j][k + 1]; \
168: (n)[5] = (x)[i + 1][j][k + 1]; \
169: (n)[6] = (x)[i + 1][j + 1][k + 1]; \
170: (n)[7] = (x)[i][j + 1][k + 1]; \
171: } while (0)
173: #define HexExtractRef(x, i, j, k, n) \
174: do { \
175: (n)[0] = &(x)[i][j][k]; \
176: (n)[1] = &(x)[i + 1][j][k]; \
177: (n)[2] = &(x)[i + 1][j + 1][k]; \
178: (n)[3] = &(x)[i][j + 1][k]; \
179: (n)[4] = &(x)[i][j][k + 1]; \
180: (n)[5] = &(x)[i + 1][j][k + 1]; \
181: (n)[6] = &(x)[i + 1][j + 1][k + 1]; \
182: (n)[7] = &(x)[i][j + 1][k + 1]; \
183: } while (0)
185: #define QuadExtract(x, i, j, n) \
186: do { \
187: (n)[0] = (x)[i][j]; \
188: (n)[1] = (x)[i + 1][j]; \
189: (n)[2] = (x)[i + 1][j + 1]; \
190: (n)[3] = (x)[i][j + 1]; \
191: } while (0)
193: static void HexGrad(const PetscReal dphi[][3], const PetscReal zn[], PetscReal dz[])
194: {
195: PetscInt i;
196: dz[0] = dz[1] = dz[2] = 0;
197: for (i = 0; i < 8; i++) {
198: dz[0] += dphi[i][0] * zn[i];
199: dz[1] += dphi[i][1] * zn[i];
200: dz[2] += dphi[i][2] * zn[i];
201: }
202: }
204: static void HexComputeGeometry(PetscInt q, PetscReal hx, PetscReal hy, const PetscReal dz[PETSC_RESTRICT], PetscReal phi[PETSC_RESTRICT], PetscReal dphi[PETSC_RESTRICT][3], PetscReal *PETSC_RESTRICT jw)
205: {
206: const PetscReal jac[3][3] = {
207: {hx / 2, 0, 0 },
208: {0, hy / 2, 0 },
209: {dz[0], dz[1], dz[2]}
210: };
211: const PetscReal ijac[3][3] = {
212: {1 / jac[0][0], 0, 0 },
213: {0, 1 / jac[1][1], 0 },
214: {-jac[2][0] / (jac[0][0] * jac[2][2]), -jac[2][1] / (jac[1][1] * jac[2][2]), 1 / jac[2][2]}
215: };
216: const PetscReal jdet = jac[0][0] * jac[1][1] * jac[2][2];
217: PetscInt i;
219: for (i = 0; i < 8; i++) {
220: const PetscReal *dphir = HexQDeriv[q][i];
221: phi[i] = HexQInterp[q][i];
222: dphi[i][0] = dphir[0] * ijac[0][0] + dphir[1] * ijac[1][0] + dphir[2] * ijac[2][0];
223: dphi[i][1] = dphir[0] * ijac[0][1] + dphir[1] * ijac[1][1] + dphir[2] * ijac[2][1];
224: dphi[i][2] = dphir[0] * ijac[0][2] + dphir[1] * ijac[1][2] + dphir[2] * ijac[2][2];
225: }
226: *jw = 1.0 * jdet;
227: }
229: typedef struct _p_THI *THI;
230: typedef struct _n_Units *Units;
232: typedef struct {
233: PetscScalar u, v;
234: } Node;
236: typedef struct {
237: PetscScalar b; /* bed */
238: PetscScalar h; /* thickness */
239: PetscScalar beta2; /* friction */
240: } PrmNode;
242: typedef struct {
243: PetscReal min, max, cmin, cmax;
244: } PRange;
246: typedef enum {
247: THIASSEMBLY_TRIDIAGONAL,
248: THIASSEMBLY_FULL
249: } THIAssemblyMode;
251: struct _p_THI {
252: PETSCHEADER(int);
253: void (*initialize)(THI, PetscReal x, PetscReal y, PrmNode *p);
254: PetscInt zlevels;
255: PetscReal Lx, Ly, Lz; /* Model domain */
256: PetscReal alpha; /* Bed angle */
257: Units units;
258: PetscReal dirichlet_scale;
259: PetscReal ssa_friction_scale;
260: PRange eta;
261: PRange beta2;
262: struct {
263: PetscReal Bd2, eps, exponent;
264: } viscosity;
265: struct {
266: PetscReal irefgam, eps2, exponent, refvel, epsvel;
267: } friction;
268: PetscReal rhog;
269: PetscBool no_slip;
270: PetscBool tridiagonal;
271: PetscBool coarse2d;
272: PetscBool verbose;
273: MatType mattype;
274: };
276: struct _n_Units {
277: /* fundamental */
278: PetscReal meter;
279: PetscReal kilogram;
280: PetscReal second;
281: /* derived */
282: PetscReal Pascal;
283: PetscReal year;
284: };
286: static PetscErrorCode THIJacobianLocal_3D_Full(DMDALocalInfo *, Node ***, Mat, Mat, THI);
287: static PetscErrorCode THIJacobianLocal_3D_Tridiagonal(DMDALocalInfo *, Node ***, Mat, Mat, THI);
288: static PetscErrorCode THIJacobianLocal_2D(DMDALocalInfo *, Node **, Mat, Mat, THI);
290: static void PrmHexGetZ(const PrmNode pn[], PetscInt k, PetscInt zm, PetscReal zn[])
291: {
292: const PetscScalar zm1 = zm - 1, znl[8] = {pn[0].b + pn[0].h * (PetscScalar)k / zm1, pn[1].b + pn[1].h * (PetscScalar)k / zm1, pn[2].b + pn[2].h * (PetscScalar)k / zm1, pn[3].b + pn[3].h * (PetscScalar)k / zm1,
293: pn[0].b + pn[0].h * (PetscScalar)(k + 1) / zm1, pn[1].b + pn[1].h * (PetscScalar)(k + 1) / zm1, pn[2].b + pn[2].h * (PetscScalar)(k + 1) / zm1, pn[3].b + pn[3].h * (PetscScalar)(k + 1) / zm1};
294: for (PetscInt i = 0; i < 8; i++) zn[i] = PetscRealPart(znl[i]);
295: }
297: /* Tests A and C are from the ISMIP-HOM paper (Pattyn et al. 2008) */
298: static void THIInitialize_HOM_A(THI thi, PetscReal x, PetscReal y, PrmNode *p)
299: {
300: Units units = thi->units;
301: PetscReal s = -x * PetscSinReal(thi->alpha);
303: p->b = s - 1000 * units->meter + 500 * units->meter * PetscSinReal(x * 2 * PETSC_PI / thi->Lx) * PetscSinReal(y * 2 * PETSC_PI / thi->Ly);
304: p->h = s - p->b;
305: p->beta2 = 1e30;
306: }
308: static void THIInitialize_HOM_C(THI thi, PetscReal x, PetscReal y, PrmNode *p)
309: {
310: Units units = thi->units;
311: PetscReal s = -x * PetscSinReal(thi->alpha);
313: p->b = s - 1000 * units->meter;
314: p->h = s - p->b;
315: /* tau_b = beta2 v is a stress (Pa) */
316: p->beta2 = 1000 * (1 + PetscSinReal(x * 2 * PETSC_PI / thi->Lx) * PetscSinReal(y * 2 * PETSC_PI / thi->Ly)) * units->Pascal * units->year / units->meter;
317: }
319: /* These are just toys */
321: /* Same bed as test A, free slip everywhere except for a discontinuous jump to a circular sticky region in the middle. */
322: static void THIInitialize_HOM_X(THI thi, PetscReal xx, PetscReal yy, PrmNode *p)
323: {
324: Units units = thi->units;
325: PetscReal x = xx * 2 * PETSC_PI / thi->Lx - PETSC_PI, y = yy * 2 * PETSC_PI / thi->Ly - PETSC_PI; /* [-pi,pi] */
326: PetscReal r = PetscSqrtReal(x * x + y * y), s = -x * PetscSinReal(thi->alpha);
327: p->b = s - 1000 * units->meter + 500 * units->meter * PetscSinReal(x + PETSC_PI) * PetscSinReal(y + PETSC_PI);
328: p->h = s - p->b;
329: p->beta2 = 1000 * (r < 1 ? 2 : 0) * units->Pascal * units->year / units->meter;
330: }
332: /* Like Z, but with 200 meter cliffs */
333: static void THIInitialize_HOM_Y(THI thi, PetscReal xx, PetscReal yy, PrmNode *p)
334: {
335: Units units = thi->units;
336: PetscReal x = xx * 2 * PETSC_PI / thi->Lx - PETSC_PI, y = yy * 2 * PETSC_PI / thi->Ly - PETSC_PI; /* [-pi,pi] */
337: PetscReal r = PetscSqrtReal(x * x + y * y), s = -x * PetscSinReal(thi->alpha);
339: p->b = s - 1000 * units->meter + 500 * units->meter * PetscSinReal(x + PETSC_PI) * PetscSinReal(y + PETSC_PI);
340: if (PetscRealPart(p->b) > -700 * units->meter) p->b += 200 * units->meter;
341: p->h = s - p->b;
342: p->beta2 = 1000 * (1. + PetscSinReal(PetscSqrtReal(16 * r)) / PetscSqrtReal(1e-2 + 16 * r) * PetscCosReal(x * 3 / 2) * PetscCosReal(y * 3 / 2)) * units->Pascal * units->year / units->meter;
343: }
345: /* Same bed as A, smoothly varying slipperiness, similar to MATLAB's "sombrero" (uncorrelated with bathymetry) */
346: static void THIInitialize_HOM_Z(THI thi, PetscReal xx, PetscReal yy, PrmNode *p)
347: {
348: Units units = thi->units;
349: PetscReal x = xx * 2 * PETSC_PI / thi->Lx - PETSC_PI, y = yy * 2 * PETSC_PI / thi->Ly - PETSC_PI; /* [-pi,pi] */
350: PetscReal r = PetscSqrtReal(x * x + y * y), s = -x * PetscSinReal(thi->alpha);
352: p->b = s - 1000 * units->meter + 500 * units->meter * PetscSinReal(x + PETSC_PI) * PetscSinReal(y + PETSC_PI);
353: p->h = s - p->b;
354: p->beta2 = 1000 * (1. + PetscSinReal(PetscSqrtReal(16 * r)) / PetscSqrtReal(1e-2 + 16 * r) * PetscCosReal(x * 3 / 2) * PetscCosReal(y * 3 / 2)) * units->Pascal * units->year / units->meter;
355: }
357: static void THIFriction(THI thi, PetscReal rbeta2, PetscReal gam, PetscReal *beta2, PetscReal *dbeta2)
358: {
359: if (thi->friction.irefgam == 0) {
360: Units units = thi->units;
361: thi->friction.irefgam = 1. / (0.5 * PetscSqr(thi->friction.refvel * units->meter / units->year));
362: thi->friction.eps2 = 0.5 * PetscSqr(thi->friction.epsvel * units->meter / units->year) * thi->friction.irefgam;
363: }
364: if (thi->friction.exponent == 0) {
365: *beta2 = rbeta2;
366: *dbeta2 = 0;
367: } else {
368: *beta2 = rbeta2 * PetscPowReal(thi->friction.eps2 + gam * thi->friction.irefgam, thi->friction.exponent);
369: *dbeta2 = thi->friction.exponent * *beta2 / (thi->friction.eps2 + gam * thi->friction.irefgam) * thi->friction.irefgam;
370: }
371: }
373: static void THIViscosity(THI thi, PetscReal gam, PetscReal *eta, PetscReal *deta)
374: {
375: PetscReal Bd2, eps, exponent;
376: if (thi->viscosity.Bd2 == 0) {
377: Units units = thi->units;
378: const PetscReal n = 3., /* Glen exponent */
379: p = 1. + 1. / n, /* for Stokes */
380: A = 1.e-16 * PetscPowReal(units->Pascal, -n) / units->year, /* softness parameter (Pa^{-n}/s) */
381: B = PetscPowReal(A, -1. / n); /* hardness parameter */
382: thi->viscosity.Bd2 = B / 2;
383: thi->viscosity.exponent = (p - 2) / 2;
384: thi->viscosity.eps = 0.5 * PetscSqr(1e-5 / units->year);
385: }
386: Bd2 = thi->viscosity.Bd2;
387: exponent = thi->viscosity.exponent;
388: eps = thi->viscosity.eps;
389: *eta = Bd2 * PetscPowReal(eps + gam, exponent);
390: *deta = exponent * (*eta) / (eps + gam);
391: }
393: static void RangeUpdate(PetscReal *min, PetscReal *max, PetscReal x)
394: {
395: if (x < *min) *min = x;
396: if (x > *max) *max = x;
397: }
399: static void PRangeClear(PRange *p)
400: {
401: p->cmin = p->min = 1e100;
402: p->cmax = p->max = -1e100;
403: }
405: static PetscErrorCode PRangeMinMax(PRange *p, PetscReal min, PetscReal max)
406: {
407: PetscFunctionBeginUser;
408: p->cmin = min;
409: p->cmax = max;
410: if (min < p->min) p->min = min;
411: if (max > p->max) p->max = max;
412: PetscFunctionReturn(PETSC_SUCCESS);
413: }
415: static PetscErrorCode THIDestroy(THI *thi)
416: {
417: PetscFunctionBeginUser;
418: if (!*thi) PetscFunctionReturn(PETSC_SUCCESS);
419: if (--((PetscObject)*thi)->refct > 0) {
420: *thi = 0;
421: PetscFunctionReturn(PETSC_SUCCESS);
422: }
423: PetscCall(PetscFree((*thi)->units));
424: PetscCall(PetscFree((*thi)->mattype));
425: PetscCall(PetscHeaderDestroy(thi));
426: PetscFunctionReturn(PETSC_SUCCESS);
427: }
429: static PetscErrorCode THICreate(MPI_Comm comm, THI *inthi)
430: {
431: static PetscBool registered = PETSC_FALSE;
432: THI thi;
433: Units units;
435: PetscFunctionBeginUser;
436: *inthi = 0;
437: if (!registered) {
438: PetscCall(PetscClassIdRegister("Toy Hydrostatic Ice", &THI_CLASSID));
439: registered = PETSC_TRUE;
440: }
441: PetscCall(PetscHeaderCreate(thi, THI_CLASSID, "THI", "Toy Hydrostatic Ice", "", comm, THIDestroy, 0));
443: PetscCall(PetscNew(&thi->units));
444: units = thi->units;
445: units->meter = 1e-2;
446: units->second = 1e-7;
447: units->kilogram = 1e-12;
449: PetscOptionsBegin(comm, NULL, "Scaled units options", "");
450: {
451: PetscCall(PetscOptionsReal("-units_meter", "1 meter in scaled length units", "", units->meter, &units->meter, NULL));
452: PetscCall(PetscOptionsReal("-units_second", "1 second in scaled time units", "", units->second, &units->second, NULL));
453: PetscCall(PetscOptionsReal("-units_kilogram", "1 kilogram in scaled mass units", "", units->kilogram, &units->kilogram, NULL));
454: }
455: PetscOptionsEnd();
456: units->Pascal = units->kilogram / (units->meter * PetscSqr(units->second));
457: units->year = 31556926. * units->second; /* seconds per year */
459: thi->Lx = 10.e3;
460: thi->Ly = 10.e3;
461: thi->Lz = 1000;
462: thi->dirichlet_scale = 1;
463: thi->verbose = PETSC_FALSE;
465: PetscOptionsBegin(comm, NULL, "Toy Hydrostatic Ice options", "");
466: {
467: QuadratureType quad = QUAD_GAUSS;
468: char homexp[] = "A";
469: char mtype[256] = MATSBAIJ;
470: PetscReal L, m = 1.0;
471: PetscBool flg;
472: L = thi->Lx;
473: PetscCall(PetscOptionsReal("-thi_L", "Domain size (m)", "", L, &L, &flg));
474: if (flg) thi->Lx = thi->Ly = L;
475: PetscCall(PetscOptionsReal("-thi_Lx", "X Domain size (m)", "", thi->Lx, &thi->Lx, NULL));
476: PetscCall(PetscOptionsReal("-thi_Ly", "Y Domain size (m)", "", thi->Ly, &thi->Ly, NULL));
477: PetscCall(PetscOptionsReal("-thi_Lz", "Z Domain size (m)", "", thi->Lz, &thi->Lz, NULL));
478: PetscCall(PetscOptionsString("-thi_hom", "ISMIP-HOM experiment (A or C)", "", homexp, homexp, sizeof(homexp), NULL));
479: switch (homexp[0] = (char)toupper(homexp[0])) {
480: case 'A':
481: thi->initialize = THIInitialize_HOM_A;
482: thi->no_slip = PETSC_TRUE;
483: thi->alpha = 0.5;
484: break;
485: case 'C':
486: thi->initialize = THIInitialize_HOM_C;
487: thi->no_slip = PETSC_FALSE;
488: thi->alpha = 0.1;
489: break;
490: case 'X':
491: thi->initialize = THIInitialize_HOM_X;
492: thi->no_slip = PETSC_FALSE;
493: thi->alpha = 0.3;
494: break;
495: case 'Y':
496: thi->initialize = THIInitialize_HOM_Y;
497: thi->no_slip = PETSC_FALSE;
498: thi->alpha = 0.5;
499: break;
500: case 'Z':
501: thi->initialize = THIInitialize_HOM_Z;
502: thi->no_slip = PETSC_FALSE;
503: thi->alpha = 0.5;
504: break;
505: default:
506: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "HOM experiment '%c' not implemented", homexp[0]);
507: }
508: PetscCall(PetscOptionsEnum("-thi_quadrature", "Quadrature to use for 3D elements", "", QuadratureTypes, (PetscEnum)quad, (PetscEnum *)&quad, NULL));
509: switch (quad) {
510: case QUAD_GAUSS:
511: HexQInterp = HexQInterp_Gauss;
512: HexQDeriv = HexQDeriv_Gauss;
513: break;
514: case QUAD_LOBATTO:
515: HexQInterp = HexQInterp_Lobatto;
516: HexQDeriv = HexQDeriv_Lobatto;
517: break;
518: }
519: PetscCall(PetscOptionsReal("-thi_alpha", "Bed angle (degrees)", "", thi->alpha, &thi->alpha, NULL));
521: thi->friction.refvel = 100.;
522: thi->friction.epsvel = 1.;
524: PetscCall(PetscOptionsReal("-thi_friction_refvel", "Reference velocity for sliding", "", thi->friction.refvel, &thi->friction.refvel, NULL));
525: PetscCall(PetscOptionsReal("-thi_friction_epsvel", "Regularization velocity for sliding", "", thi->friction.epsvel, &thi->friction.epsvel, NULL));
526: PetscCall(PetscOptionsReal("-thi_friction_m", "Friction exponent, 0=Coulomb, 1=Navier", "", m, &m, NULL));
528: thi->friction.exponent = (m - 1) / 2;
530: PetscCall(PetscOptionsReal("-thi_dirichlet_scale", "Scale Dirichlet boundary conditions by this factor", "", thi->dirichlet_scale, &thi->dirichlet_scale, NULL));
531: PetscCall(PetscOptionsReal("-thi_ssa_friction_scale", "Scale slip boundary conditions by this factor in SSA (2D) assembly", "", thi->ssa_friction_scale, &thi->ssa_friction_scale, NULL));
532: PetscCall(PetscOptionsBool("-thi_coarse2d", "Use a 2D coarse space corresponding to SSA", "", thi->coarse2d, &thi->coarse2d, NULL));
533: PetscCall(PetscOptionsBool("-thi_tridiagonal", "Assemble a tridiagonal system (column coupling only) on the finest level", "", thi->tridiagonal, &thi->tridiagonal, NULL));
534: PetscCall(PetscOptionsFList("-thi_mat_type", "Matrix type", "MatSetType", MatList, mtype, (char *)mtype, sizeof(mtype), NULL));
535: PetscCall(PetscStrallocpy(mtype, (char **)&thi->mattype));
536: PetscCall(PetscOptionsBool("-thi_verbose", "Enable verbose output (like matrix sizes and statistics)", "", thi->verbose, &thi->verbose, NULL));
537: }
538: PetscOptionsEnd();
540: /* dimensionalize */
541: thi->Lx *= units->meter;
542: thi->Ly *= units->meter;
543: thi->Lz *= units->meter;
544: thi->alpha *= PETSC_PI / 180;
546: PRangeClear(&thi->eta);
547: PRangeClear(&thi->beta2);
549: {
550: PetscReal u = 1000 * units->meter / (3e7 * units->second), gradu = u / (100 * units->meter), eta, deta, rho = 910 * units->kilogram / PetscPowReal(units->meter, 3), grav = 9.81 * units->meter / PetscSqr(units->second),
551: driving = rho * grav * PetscSinReal(thi->alpha) * 1000 * units->meter;
552: THIViscosity(thi, 0.5 * gradu * gradu, &eta, &deta);
553: thi->rhog = rho * grav;
554: if (thi->verbose) {
555: PetscCall(PetscPrintf(PetscObjectComm((PetscObject)thi), "Units: meter %8.2g second %8.2g kg %8.2g Pa %8.2g\n", (double)units->meter, (double)units->second, (double)units->kilogram, (double)units->Pascal));
556: PetscCall(PetscPrintf(PetscObjectComm((PetscObject)thi), "Domain (%6.2g,%6.2g,%6.2g), pressure %8.2g, driving stress %8.2g\n", (double)thi->Lx, (double)thi->Ly, (double)thi->Lz, (double)(rho * grav * 1e3 * units->meter), (double)driving));
557: PetscCall(PetscPrintf(PetscObjectComm((PetscObject)thi), "Large velocity 1km/a %8.2g, velocity gradient %8.2g, eta %8.2g, stress %8.2g, ratio %8.2g\n", (double)u, (double)gradu, (double)eta, (double)(2 * eta * gradu), (double)(2 * eta * gradu / driving)));
558: THIViscosity(thi, 0.5 * PetscSqr(1e-3 * gradu), &eta, &deta);
559: PetscCall(PetscPrintf(PetscObjectComm((PetscObject)thi), "Small velocity 1m/a %8.2g, velocity gradient %8.2g, eta %8.2g, stress %8.2g, ratio %8.2g\n", (double)(1e-3 * u), (double)(1e-3 * gradu), (double)eta, (double)(2 * eta * 1e-3 * gradu), (double)(2 * eta * 1e-3 * gradu / driving)));
560: }
561: }
563: *inthi = thi;
564: PetscFunctionReturn(PETSC_SUCCESS);
565: }
567: static PetscErrorCode THIInitializePrm(THI thi, DM da2prm, Vec prm)
568: {
569: PrmNode **p;
570: PetscInt i, j, xs, xm, ys, ym, mx, my;
572: PetscFunctionBeginUser;
573: PetscCall(DMDAGetGhostCorners(da2prm, &ys, &xs, 0, &ym, &xm, 0));
574: PetscCall(DMDAGetInfo(da2prm, 0, &my, &mx, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
575: PetscCall(DMDAVecGetArray(da2prm, prm, &p));
576: for (i = xs; i < xs + xm; i++) {
577: for (j = ys; j < ys + ym; j++) {
578: PetscReal xx = thi->Lx * i / mx, yy = thi->Ly * j / my;
579: thi->initialize(thi, xx, yy, &p[i][j]);
580: }
581: }
582: PetscCall(DMDAVecRestoreArray(da2prm, prm, &p));
583: PetscFunctionReturn(PETSC_SUCCESS);
584: }
586: static PetscErrorCode THISetUpDM(THI thi, DM dm)
587: {
588: PetscInt refinelevel, coarsenlevel, level, dim, Mx, My, Mz, mx, my, s;
589: DMDAStencilType st;
590: DM da2prm;
591: Vec X;
593: PetscFunctionBeginUser;
594: PetscCall(DMDAGetInfo(dm, &dim, &Mz, &My, &Mx, 0, &my, &mx, 0, &s, 0, 0, 0, &st));
595: if (dim == 2) PetscCall(DMDAGetInfo(dm, &dim, &My, &Mx, 0, &my, &mx, 0, 0, &s, 0, 0, 0, &st));
596: PetscCall(DMGetRefineLevel(dm, &refinelevel));
597: PetscCall(DMGetCoarsenLevel(dm, &coarsenlevel));
598: level = refinelevel - coarsenlevel;
599: PetscCall(DMDACreate2d(PetscObjectComm((PetscObject)thi), DM_BOUNDARY_PERIODIC, DM_BOUNDARY_PERIODIC, st, My, Mx, my, mx, sizeof(PrmNode) / sizeof(PetscScalar), s, 0, 0, &da2prm));
600: PetscCall(DMSetUp(da2prm));
601: PetscCall(DMCreateLocalVector(da2prm, &X));
602: {
603: PetscReal Lx = thi->Lx / thi->units->meter, Ly = thi->Ly / thi->units->meter, Lz = thi->Lz / thi->units->meter;
604: if (dim == 2) {
605: PetscCall(PetscPrintf(PetscObjectComm((PetscObject)thi), "Level %" PetscInt_FMT " domain size (m) %8.2g x %8.2g, num elements %" PetscInt_FMT " x %" PetscInt_FMT " (%" PetscInt_FMT "), size (m) %g x %g\n", level, (double)Lx, (double)Ly, Mx, My, Mx * My, (double)(Lx / Mx), (double)(Ly / My)));
606: } else {
607: PetscCall(PetscPrintf(PetscObjectComm((PetscObject)thi), "Level %" PetscInt_FMT " domain size (m) %8.2g x %8.2g x %8.2g, num elements %" PetscInt_FMT " x %" PetscInt_FMT " x %" PetscInt_FMT " (%" PetscInt_FMT "), size (m) %g x %g x %g\n", level, (double)Lx, (double)Ly, (double)Lz, Mx, My, Mz, Mx * My * Mz, (double)(Lx / Mx), (double)(Ly / My), (double)(1000. / (Mz - 1))));
608: }
609: }
610: PetscCall(THIInitializePrm(thi, da2prm, X));
611: if (thi->tridiagonal) { /* Reset coarse Jacobian evaluation */
612: PetscCall(DMDASNESSetJacobianLocal(dm, (DMDASNESJacobianFn *)THIJacobianLocal_3D_Full, thi));
613: }
614: if (thi->coarse2d) PetscCall(DMDASNESSetJacobianLocal(dm, (DMDASNESJacobianFn *)THIJacobianLocal_2D, thi));
615: PetscCall(PetscObjectCompose((PetscObject)dm, "DMDA2Prm", (PetscObject)da2prm));
616: PetscCall(PetscObjectCompose((PetscObject)dm, "DMDA2Prm_Vec", (PetscObject)X));
617: PetscCall(DMDestroy(&da2prm));
618: PetscCall(VecDestroy(&X));
619: PetscFunctionReturn(PETSC_SUCCESS);
620: }
622: static PetscErrorCode DMCoarsenHook_THI(DM dmf, DM dmc, PetscCtx ctx)
623: {
624: THI thi = (THI)ctx;
625: PetscInt rlevel, clevel;
627: PetscFunctionBeginUser;
628: PetscCall(THISetUpDM(thi, dmc));
629: PetscCall(DMGetRefineLevel(dmc, &rlevel));
630: PetscCall(DMGetCoarsenLevel(dmc, &clevel));
631: if (rlevel - clevel == 0) PetscCall(DMSetMatType(dmc, MATAIJ));
632: PetscCall(DMCoarsenHookAdd(dmc, DMCoarsenHook_THI, NULL, thi));
633: PetscFunctionReturn(PETSC_SUCCESS);
634: }
636: static PetscErrorCode DMRefineHook_THI(DM dmc, DM dmf, PetscCtx ctx)
637: {
638: THI thi = (THI)ctx;
640: PetscFunctionBeginUser;
641: PetscCall(THISetUpDM(thi, dmf));
642: PetscCall(DMSetMatType(dmf, thi->mattype));
643: PetscCall(DMRefineHookAdd(dmf, DMRefineHook_THI, NULL, thi));
644: /* With grid sequencing, a formerly-refined DM will later be coarsened by PCSetUp_MG */
645: PetscCall(DMCoarsenHookAdd(dmf, DMCoarsenHook_THI, NULL, thi));
646: PetscFunctionReturn(PETSC_SUCCESS);
647: }
649: static PetscErrorCode THIDAGetPrm(DM da, PrmNode ***prm)
650: {
651: DM da2prm;
652: Vec X;
654: PetscFunctionBeginUser;
655: PetscCall(PetscObjectQuery((PetscObject)da, "DMDA2Prm", (PetscObject *)&da2prm));
656: PetscCheck(da2prm, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "No DMDA2Prm composed with given DMDA");
657: PetscCall(PetscObjectQuery((PetscObject)da, "DMDA2Prm_Vec", (PetscObject *)&X));
658: PetscCheck(X, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "No DMDA2Prm_Vec composed with given DMDA");
659: PetscCall(DMDAVecGetArray(da2prm, X, prm));
660: PetscFunctionReturn(PETSC_SUCCESS);
661: }
663: static PetscErrorCode THIDARestorePrm(DM da, PrmNode ***prm)
664: {
665: DM da2prm;
666: Vec X;
668: PetscFunctionBeginUser;
669: PetscCall(PetscObjectQuery((PetscObject)da, "DMDA2Prm", (PetscObject *)&da2prm));
670: PetscCheck(da2prm, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "No DMDA2Prm composed with given DMDA");
671: PetscCall(PetscObjectQuery((PetscObject)da, "DMDA2Prm_Vec", (PetscObject *)&X));
672: PetscCheck(X, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "No DMDA2Prm_Vec composed with given DMDA");
673: PetscCall(DMDAVecRestoreArray(da2prm, X, prm));
674: PetscFunctionReturn(PETSC_SUCCESS);
675: }
677: static PetscErrorCode THIInitial(SNES snes, Vec X, PetscCtx ctx)
678: {
679: THI thi;
680: PetscInt i, j, k, xs, xm, ys, ym, zs, zm, mx, my;
681: PetscReal hx, hy;
682: PrmNode **prm;
683: Node ***x;
684: DM da;
686: PetscFunctionBeginUser;
687: PetscCall(SNESGetDM(snes, &da));
688: PetscCall(DMGetApplicationContext(da, &thi));
689: PetscCall(DMDAGetInfo(da, 0, 0, &my, &mx, 0, 0, 0, 0, 0, 0, 0, 0, 0));
690: PetscCall(DMDAGetCorners(da, &zs, &ys, &xs, &zm, &ym, &xm));
691: PetscCall(DMDAVecGetArray(da, X, &x));
692: PetscCall(THIDAGetPrm(da, &prm));
693: hx = thi->Lx / mx;
694: hy = thi->Ly / my;
695: for (i = xs; i < xs + xm; i++) {
696: for (j = ys; j < ys + ym; j++) {
697: for (k = zs; k < zs + zm; k++) {
698: const PetscScalar zm1 = zm - 1, drivingx = thi->rhog * (prm[i + 1][j].b + prm[i + 1][j].h - prm[i - 1][j].b - prm[i - 1][j].h) / (2 * hx), drivingy = thi->rhog * (prm[i][j + 1].b + prm[i][j + 1].h - prm[i][j - 1].b - prm[i][j - 1].h) / (2 * hy);
699: x[i][j][k].u = 0. * drivingx * prm[i][j].h * (PetscScalar)k / zm1;
700: x[i][j][k].v = 0. * drivingy * prm[i][j].h * (PetscScalar)k / zm1;
701: }
702: }
703: }
704: PetscCall(DMDAVecRestoreArray(da, X, &x));
705: PetscCall(THIDARestorePrm(da, &prm));
706: PetscFunctionReturn(PETSC_SUCCESS);
707: }
709: static void PointwiseNonlinearity(THI thi, const Node n[PETSC_RESTRICT], const PetscReal phi[PETSC_RESTRICT], PetscReal dphi[PETSC_RESTRICT][3], PetscScalar *PETSC_RESTRICT u, PetscScalar *PETSC_RESTRICT v, PetscScalar du[PETSC_RESTRICT], PetscScalar dv[PETSC_RESTRICT], PetscReal *eta, PetscReal *deta)
710: {
711: PetscInt l, ll;
712: PetscScalar gam;
714: du[0] = du[1] = du[2] = 0;
715: dv[0] = dv[1] = dv[2] = 0;
716: *u = 0;
717: *v = 0;
718: for (l = 0; l < 8; l++) {
719: *u += phi[l] * n[l].u;
720: *v += phi[l] * n[l].v;
721: for (ll = 0; ll < 3; ll++) {
722: du[ll] += dphi[l][ll] * n[l].u;
723: dv[ll] += dphi[l][ll] * n[l].v;
724: }
725: }
726: gam = PetscSqr(du[0]) + PetscSqr(dv[1]) + du[0] * dv[1] + 0.25 * PetscSqr(du[1] + dv[0]) + 0.25 * PetscSqr(du[2]) + 0.25 * PetscSqr(dv[2]);
727: THIViscosity(thi, PetscRealPart(gam), eta, deta);
728: }
730: static void PointwiseNonlinearity2D(THI thi, Node n[], PetscReal phi[], PetscReal dphi[4][2], PetscScalar *u, PetscScalar *v, PetscScalar du[], PetscScalar dv[], PetscReal *eta, PetscReal *deta)
731: {
732: PetscInt l, ll;
733: PetscScalar gam;
735: du[0] = du[1] = 0;
736: dv[0] = dv[1] = 0;
737: *u = 0;
738: *v = 0;
739: for (l = 0; l < 4; l++) {
740: *u += phi[l] * n[l].u;
741: *v += phi[l] * n[l].v;
742: for (ll = 0; ll < 2; ll++) {
743: du[ll] += dphi[l][ll] * n[l].u;
744: dv[ll] += dphi[l][ll] * n[l].v;
745: }
746: }
747: gam = PetscSqr(du[0]) + PetscSqr(dv[1]) + du[0] * dv[1] + 0.25 * PetscSqr(du[1] + dv[0]);
748: THIViscosity(thi, PetscRealPart(gam), eta, deta);
749: }
751: static PetscErrorCode THIFunctionLocal(DMDALocalInfo *info, Node ***x, Node ***f, THI thi)
752: {
753: PetscInt xs, ys, xm, ym, zm, i, j, k, q, l;
754: PetscReal hx, hy, etamin, etamax, beta2min, beta2max;
755: PrmNode **prm;
757: PetscFunctionBeginUser;
758: xs = info->zs;
759: ys = info->ys;
760: xm = info->zm;
761: ym = info->ym;
762: zm = info->xm;
763: hx = thi->Lx / info->mz;
764: hy = thi->Ly / info->my;
766: etamin = 1e100;
767: etamax = 0;
768: beta2min = 1e100;
769: beta2max = 0;
771: PetscCall(THIDAGetPrm(info->da, &prm));
773: for (i = xs; i < xs + xm; i++) {
774: for (j = ys; j < ys + ym; j++) {
775: PrmNode pn[4];
776: QuadExtract(prm, i, j, pn);
777: for (k = 0; k < zm - 1; k++) {
778: PetscInt ls = 0;
779: Node n[8], *fn[8];
780: PetscReal zn[8], etabase = 0;
781: PrmHexGetZ(pn, k, zm, zn);
782: HexExtract(x, i, j, k, n);
783: HexExtractRef(f, i, j, k, fn);
784: if (thi->no_slip && k == 0) {
785: for (l = 0; l < 4; l++) n[l].u = n[l].v = 0;
786: /* The first 4 basis functions lie on the bottom layer, so their contribution is exactly 0, hence we can skip them */
787: ls = 4;
788: }
789: for (q = 0; q < 8; q++) {
790: PetscReal dz[3], phi[8], dphi[8][3], jw, eta, deta;
791: PetscScalar du[3], dv[3], u, v;
792: HexGrad(HexQDeriv[q], zn, dz);
793: HexComputeGeometry(q, hx, hy, dz, phi, dphi, &jw);
794: PointwiseNonlinearity(thi, n, phi, dphi, &u, &v, du, dv, &eta, &deta);
795: jw /= thi->rhog; /* scales residuals to be O(1) */
796: if (q == 0) etabase = eta;
797: RangeUpdate(&etamin, &etamax, eta);
798: for (l = ls; l < 8; l++) { /* test functions */
799: const PetscReal ds[2] = {-PetscSinReal(thi->alpha), 0};
800: const PetscReal pp = phi[l], *dp = dphi[l];
801: fn[l]->u += dp[0] * jw * eta * (4. * du[0] + 2. * dv[1]) + dp[1] * jw * eta * (du[1] + dv[0]) + dp[2] * jw * eta * du[2] + pp * jw * thi->rhog * ds[0];
802: fn[l]->v += dp[1] * jw * eta * (2. * du[0] + 4. * dv[1]) + dp[0] * jw * eta * (du[1] + dv[0]) + dp[2] * jw * eta * dv[2] + pp * jw * thi->rhog * ds[1];
803: }
804: }
805: if (k == 0) { /* we are on a bottom face */
806: if (thi->no_slip) {
807: /* Note: Non-Galerkin coarse grid operators are very sensitive to the scaling of Dirichlet boundary
808: * conditions. After shenanigans above, etabase contains the effective viscosity at the closest quadrature
809: * point to the bed. We want the diagonal entry in the Dirichlet condition to have similar magnitude to the
810: * diagonal entry corresponding to the adjacent node. The fundamental scaling of the viscous part is in
811: * diagu, diagv below. This scaling is easy to recognize by considering the finite difference operator after
812: * scaling by element size. The no-slip Dirichlet condition is scaled by this factor, and also in the
813: * assembled matrix (see the similar block in THIJacobianLocal).
814: *
815: * Note that the residual at this Dirichlet node is linear in the state at this node, but also depends
816: * (nonlinearly in general) on the neighboring interior nodes through the local viscosity. This will make
817: * a matrix-free Jacobian have extra entries in the corresponding row. We assemble only the diagonal part,
818: * so the solution will exactly satisfy the boundary condition after the first linear iteration.
819: */
820: const PetscReal hz = PetscRealPart(pn[0].h) / (zm - 1.);
821: const PetscScalar diagu = 2 * etabase / thi->rhog * (hx * hy / hz + hx * hz / hy + 4 * hy * hz / hx), diagv = 2 * etabase / thi->rhog * (hx * hy / hz + 4 * hx * hz / hy + hy * hz / hx);
822: fn[0]->u = thi->dirichlet_scale * diagu * x[i][j][k].u;
823: fn[0]->v = thi->dirichlet_scale * diagv * x[i][j][k].v;
824: } else { /* Integrate over bottom face to apply boundary condition */
825: for (q = 0; q < 4; q++) {
826: const PetscReal jw = 0.25 * hx * hy / thi->rhog, *phi = QuadQInterp[q];
827: PetscScalar u = 0, v = 0, rbeta2 = 0;
828: PetscReal beta2, dbeta2;
829: for (l = 0; l < 4; l++) {
830: u += phi[l] * n[l].u;
831: v += phi[l] * n[l].v;
832: rbeta2 += phi[l] * pn[l].beta2;
833: }
834: THIFriction(thi, PetscRealPart(rbeta2), PetscRealPart(u * u + v * v) / 2, &beta2, &dbeta2);
835: RangeUpdate(&beta2min, &beta2max, beta2);
836: for (l = 0; l < 4; l++) {
837: const PetscReal pp = phi[l];
838: fn[ls + l]->u += pp * jw * beta2 * u;
839: fn[ls + l]->v += pp * jw * beta2 * v;
840: }
841: }
842: }
843: }
844: }
845: }
846: }
848: PetscCall(THIDARestorePrm(info->da, &prm));
850: PetscCall(PRangeMinMax(&thi->eta, etamin, etamax));
851: PetscCall(PRangeMinMax(&thi->beta2, beta2min, beta2max));
852: PetscFunctionReturn(PETSC_SUCCESS);
853: }
855: static PetscErrorCode THIMatrixStatistics(THI thi, Mat B, PetscViewer viewer)
856: {
857: PetscReal nrm;
858: PetscInt m;
859: PetscMPIInt rank;
861: PetscFunctionBeginUser;
862: PetscCall(MatNorm(B, NORM_FROBENIUS, &nrm));
863: PetscCall(MatGetSize(B, &m, 0));
864: PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &rank));
865: if (rank == 0) {
866: PetscScalar val0, val2;
867: PetscCall(MatGetValue(B, 0, 0, &val0));
868: PetscCall(MatGetValue(B, 2, 2, &val2));
869: PetscCall(PetscViewerASCIIPrintf(viewer, "Matrix dim %" PetscInt_FMT " norm %8.2e (0,0) %8.2e (2,2) %8.2e %8.2e <= eta <= %8.2e %8.2e <= beta2 <= %8.2e\n", m, (double)nrm, (double)PetscRealPart(val0), (double)PetscRealPart(val2),
870: (double)thi->eta.cmin, (double)thi->eta.cmax, (double)thi->beta2.cmin, (double)thi->beta2.cmax));
871: }
872: PetscFunctionReturn(PETSC_SUCCESS);
873: }
875: static PetscErrorCode THISurfaceStatistics(DM da, Vec X, PetscReal *min, PetscReal *max, PetscReal *mean)
876: {
877: Node ***x;
878: PetscInt i, j, xs, ys, zs, xm, ym, zm, mx, my, mz;
879: PetscReal umin = 1e100, umax = -1e100;
880: PetscScalar usum = 0.0, gusum;
882: PetscFunctionBeginUser;
883: *min = *max = *mean = 0;
884: PetscCall(DMDAGetInfo(da, 0, &mz, &my, &mx, 0, 0, 0, 0, 0, 0, 0, 0, 0));
885: PetscCall(DMDAGetCorners(da, &zs, &ys, &xs, &zm, &ym, &xm));
886: PetscCheck(zs == 0 && zm == mz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected decomposition");
887: PetscCall(DMDAVecGetArray(da, X, &x));
888: for (i = xs; i < xs + xm; i++) {
889: for (j = ys; j < ys + ym; j++) {
890: PetscReal u = PetscRealPart(x[i][j][zm - 1].u);
891: RangeUpdate(&umin, &umax, u);
892: usum += u;
893: }
894: }
895: PetscCall(DMDAVecRestoreArray(da, X, &x));
896: PetscCallMPI(MPIU_Allreduce(&umin, min, 1, MPIU_REAL, MPIU_MIN, PetscObjectComm((PetscObject)da)));
897: PetscCallMPI(MPIU_Allreduce(&umax, max, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)da)));
898: PetscCallMPI(MPIU_Allreduce(&usum, &gusum, 1, MPIU_SCALAR, MPIU_SUM, PetscObjectComm((PetscObject)da)));
899: *mean = PetscRealPart(gusum) / (mx * my);
900: PetscFunctionReturn(PETSC_SUCCESS);
901: }
903: static PetscErrorCode THISolveStatistics(THI thi, SNES snes, PetscInt coarsened, const char name[])
904: {
905: MPI_Comm comm;
906: Vec X;
907: DM dm;
909: PetscFunctionBeginUser;
910: PetscCall(PetscObjectGetComm((PetscObject)thi, &comm));
911: PetscCall(SNESGetSolution(snes, &X));
912: PetscCall(SNESGetDM(snes, &dm));
913: PetscCall(PetscPrintf(comm, "Solution statistics after solve: %s\n", name));
914: {
915: PetscInt its, lits;
916: SNESConvergedReason reason;
917: PetscCall(SNESGetIterationNumber(snes, &its));
918: PetscCall(SNESGetConvergedReason(snes, &reason));
919: PetscCall(SNESGetLinearSolveIterations(snes, &lits));
920: PetscCall(PetscPrintf(comm, "%s: Number of SNES iterations = %" PetscInt_FMT ", total linear iterations = %" PetscInt_FMT "\n", SNESConvergedReasons[reason], its, lits));
921: }
922: {
923: PetscReal nrm2, tmin[3] = {1e100, 1e100, 1e100}, tmax[3] = {-1e100, -1e100, -1e100}, min[3], max[3];
924: PetscInt i, j, m;
925: const PetscScalar *x;
926: PetscCall(VecNorm(X, NORM_2, &nrm2));
927: PetscCall(VecGetLocalSize(X, &m));
928: PetscCall(VecGetArrayRead(X, &x));
929: for (i = 0; i < m; i += 2) {
930: PetscReal u = PetscRealPart(x[i]), v = PetscRealPart(x[i + 1]), c = PetscSqrtReal(u * u + v * v);
931: tmin[0] = PetscMin(u, tmin[0]);
932: tmin[1] = PetscMin(v, tmin[1]);
933: tmin[2] = PetscMin(c, tmin[2]);
934: tmax[0] = PetscMax(u, tmax[0]);
935: tmax[1] = PetscMax(v, tmax[1]);
936: tmax[2] = PetscMax(c, tmax[2]);
937: }
938: PetscCall(VecRestoreArrayRead(X, &x));
939: PetscCallMPI(MPIU_Allreduce(tmin, min, 3, MPIU_REAL, MPIU_MIN, PetscObjectComm((PetscObject)thi)));
940: PetscCallMPI(MPIU_Allreduce(tmax, max, 3, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)thi)));
941: /* Dimensionalize to meters/year */
942: nrm2 *= thi->units->year / thi->units->meter;
943: for (j = 0; j < 3; j++) {
944: min[j] *= thi->units->year / thi->units->meter;
945: max[j] *= thi->units->year / thi->units->meter;
946: }
947: if (min[0] == 0.0) min[0] = 0.0;
948: PetscCall(PetscPrintf(comm, "|X|_2 %g %g <= u <= %g %g <= v <= %g %g <= c <= %g \n", (double)nrm2, (double)min[0], (double)max[0], (double)min[1], (double)max[1], (double)min[2], (double)max[2]));
949: {
950: PetscReal umin, umax, umean;
951: PetscCall(THISurfaceStatistics(dm, X, &umin, &umax, &umean));
952: umin *= thi->units->year / thi->units->meter;
953: umax *= thi->units->year / thi->units->meter;
954: umean *= thi->units->year / thi->units->meter;
955: PetscCall(PetscPrintf(comm, "Surface statistics: u in [%12.6e, %12.6e] mean %12.6e\n", (double)umin, (double)umax, (double)umean));
956: }
957: /* These values stay nondimensional */
958: PetscCall(PetscPrintf(comm, "Global eta range %g to %g converged range %g to %g\n", (double)thi->eta.min, (double)thi->eta.max, (double)thi->eta.cmin, (double)thi->eta.cmax));
959: PetscCall(PetscPrintf(comm, "Global beta2 range %g to %g converged range %g to %g\n", (double)thi->beta2.min, (double)thi->beta2.max, (double)thi->beta2.cmin, (double)thi->beta2.cmax));
960: }
961: PetscFunctionReturn(PETSC_SUCCESS);
962: }
964: static PetscErrorCode THIJacobianLocal_2D(DMDALocalInfo *info, Node **x, Mat J, Mat B, THI thi)
965: {
966: PetscInt xs, ys, xm, ym, i, j, q, l, ll;
967: PetscReal hx, hy;
968: PrmNode **prm;
970: PetscFunctionBeginUser;
971: xs = info->ys;
972: ys = info->xs;
973: xm = info->ym;
974: ym = info->xm;
975: hx = thi->Lx / info->my;
976: hy = thi->Ly / info->mx;
978: PetscCall(MatZeroEntries(B));
979: PetscCall(THIDAGetPrm(info->da, &prm));
981: for (i = xs; i < xs + xm; i++) {
982: for (j = ys; j < ys + ym; j++) {
983: Node n[4];
984: PrmNode pn[4];
985: PetscScalar Ke[4 * 2][4 * 2];
986: QuadExtract(prm, i, j, pn);
987: QuadExtract(x, i, j, n);
988: PetscCall(PetscMemzero(Ke, sizeof(Ke)));
989: for (q = 0; q < 4; q++) {
990: PetscReal phi[4], dphi[4][2], jw, eta, deta, beta2, dbeta2;
991: PetscScalar u, v, du[2], dv[2], h = 0, rbeta2 = 0;
992: for (l = 0; l < 4; l++) {
993: phi[l] = QuadQInterp[q][l];
994: dphi[l][0] = QuadQDeriv[q][l][0] * 2. / hx;
995: dphi[l][1] = QuadQDeriv[q][l][1] * 2. / hy;
996: h += phi[l] * pn[l].h;
997: rbeta2 += phi[l] * pn[l].beta2;
998: }
999: jw = 0.25 * hx * hy / thi->rhog; /* rhog is only scaling */
1000: PointwiseNonlinearity2D(thi, n, phi, dphi, &u, &v, du, dv, &eta, &deta);
1001: THIFriction(thi, PetscRealPart(rbeta2), PetscRealPart(u * u + v * v) / 2, &beta2, &dbeta2);
1002: for (l = 0; l < 4; l++) {
1003: const PetscReal pp = phi[l], *dp = dphi[l];
1004: for (ll = 0; ll < 4; ll++) {
1005: const PetscReal ppl = phi[ll], *dpl = dphi[ll];
1006: PetscScalar dgdu, dgdv;
1007: dgdu = 2. * du[0] * dpl[0] + dv[1] * dpl[0] + 0.5 * (du[1] + dv[0]) * dpl[1];
1008: dgdv = 2. * dv[1] * dpl[1] + du[0] * dpl[1] + 0.5 * (du[1] + dv[0]) * dpl[0];
1009: /* Picard part */
1010: Ke[l * 2 + 0][ll * 2 + 0] += dp[0] * jw * eta * 4. * dpl[0] + dp[1] * jw * eta * dpl[1] + pp * jw * (beta2 / h) * ppl * thi->ssa_friction_scale;
1011: Ke[l * 2 + 0][ll * 2 + 1] += dp[0] * jw * eta * 2. * dpl[1] + dp[1] * jw * eta * dpl[0];
1012: Ke[l * 2 + 1][ll * 2 + 0] += dp[1] * jw * eta * 2. * dpl[0] + dp[0] * jw * eta * dpl[1];
1013: Ke[l * 2 + 1][ll * 2 + 1] += dp[1] * jw * eta * 4. * dpl[1] + dp[0] * jw * eta * dpl[0] + pp * jw * (beta2 / h) * ppl * thi->ssa_friction_scale;
1014: /* extra Newton terms */
1015: Ke[l * 2 + 0][ll * 2 + 0] += dp[0] * jw * deta * dgdu * (4. * du[0] + 2. * dv[1]) + dp[1] * jw * deta * dgdu * (du[1] + dv[0]) + pp * jw * (dbeta2 / h) * u * u * ppl * thi->ssa_friction_scale;
1016: Ke[l * 2 + 0][ll * 2 + 1] += dp[0] * jw * deta * dgdv * (4. * du[0] + 2. * dv[1]) + dp[1] * jw * deta * dgdv * (du[1] + dv[0]) + pp * jw * (dbeta2 / h) * u * v * ppl * thi->ssa_friction_scale;
1017: Ke[l * 2 + 1][ll * 2 + 0] += dp[1] * jw * deta * dgdu * (4. * dv[1] + 2. * du[0]) + dp[0] * jw * deta * dgdu * (du[1] + dv[0]) + pp * jw * (dbeta2 / h) * v * u * ppl * thi->ssa_friction_scale;
1018: Ke[l * 2 + 1][ll * 2 + 1] += dp[1] * jw * deta * dgdv * (4. * dv[1] + 2. * du[0]) + dp[0] * jw * deta * dgdv * (du[1] + dv[0]) + pp * jw * (dbeta2 / h) * v * v * ppl * thi->ssa_friction_scale;
1019: }
1020: }
1021: }
1022: {
1023: const MatStencil rc[4] = {
1024: {0, i, j, 0},
1025: {0, i + 1, j, 0},
1026: {0, i + 1, j + 1, 0},
1027: {0, i, j + 1, 0}
1028: };
1029: PetscCall(MatSetValuesBlockedStencil(B, 4, rc, 4, rc, &Ke[0][0], ADD_VALUES));
1030: }
1031: }
1032: }
1033: PetscCall(THIDARestorePrm(info->da, &prm));
1035: PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1036: PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1037: PetscCall(MatSetOption(B, MAT_SYMMETRIC, PETSC_TRUE));
1038: if (thi->verbose) PetscCall(THIMatrixStatistics(thi, B, PETSC_VIEWER_STDOUT_WORLD));
1039: PetscFunctionReturn(PETSC_SUCCESS);
1040: }
1042: static PetscErrorCode THIJacobianLocal_3D(DMDALocalInfo *info, Node ***x, Mat B, THI thi, THIAssemblyMode amode)
1043: {
1044: PetscInt xs, ys, xm, ym, zm, i, j, k, q, l, ll;
1045: PetscReal hx, hy;
1046: PrmNode **prm;
1048: PetscFunctionBeginUser;
1049: xs = info->zs;
1050: ys = info->ys;
1051: xm = info->zm;
1052: ym = info->ym;
1053: zm = info->xm;
1054: hx = thi->Lx / info->mz;
1055: hy = thi->Ly / info->my;
1057: PetscCall(MatZeroEntries(B));
1058: PetscCall(MatSetOption(B, MAT_SUBSET_OFF_PROC_ENTRIES, PETSC_TRUE));
1059: PetscCall(THIDAGetPrm(info->da, &prm));
1061: for (i = xs; i < xs + xm; i++) {
1062: for (j = ys; j < ys + ym; j++) {
1063: PrmNode pn[4];
1064: QuadExtract(prm, i, j, pn);
1065: for (k = 0; k < zm - 1; k++) {
1066: Node n[8];
1067: PetscReal zn[8], etabase = 0;
1068: PetscScalar Ke[8 * 2][8 * 2];
1069: PetscInt ls = 0;
1071: PrmHexGetZ(pn, k, zm, zn);
1072: HexExtract(x, i, j, k, n);
1073: PetscCall(PetscMemzero(Ke, sizeof(Ke)));
1074: if (thi->no_slip && k == 0) {
1075: for (l = 0; l < 4; l++) n[l].u = n[l].v = 0;
1076: ls = 4;
1077: }
1078: for (q = 0; q < 8; q++) {
1079: PetscReal dz[3], phi[8], dphi[8][3], jw, eta, deta;
1080: PetscScalar du[3], dv[3], u, v;
1081: HexGrad(HexQDeriv[q], zn, dz);
1082: HexComputeGeometry(q, hx, hy, dz, phi, dphi, &jw);
1083: PointwiseNonlinearity(thi, n, phi, dphi, &u, &v, du, dv, &eta, &deta);
1084: jw /= thi->rhog; /* residuals are scaled by this factor */
1085: if (q == 0) etabase = eta;
1086: for (l = ls; l < 8; l++) { /* test functions */
1087: const PetscReal *PETSC_RESTRICT dp = dphi[l];
1088: #if USE_SSE2_KERNELS
1089: /* gcc (up to my 4.5 snapshot) is really bad at hoisting intrinsics so we do it manually */
1090: __m128d p4 = _mm_set1_pd(4), p2 = _mm_set1_pd(2), p05 = _mm_set1_pd(0.5), p42 = _mm_setr_pd(4, 2), p24 = _mm_shuffle_pd(p42, p42, _MM_SHUFFLE2(0, 1)), du0 = _mm_set1_pd(du[0]), du1 = _mm_set1_pd(du[1]), du2 = _mm_set1_pd(du[2]), dv0 = _mm_set1_pd(dv[0]), dv1 = _mm_set1_pd(dv[1]), dv2 = _mm_set1_pd(dv[2]), jweta = _mm_set1_pd(jw * eta), jwdeta = _mm_set1_pd(jw * deta), dp0 = _mm_set1_pd(dp[0]), dp1 = _mm_set1_pd(dp[1]), dp2 = _mm_set1_pd(dp[2]), dp0jweta = _mm_mul_pd(dp0, jweta), dp1jweta = _mm_mul_pd(dp1, jweta), dp2jweta = _mm_mul_pd(dp2, jweta), p4du0p2dv1 = _mm_add_pd(_mm_mul_pd(p4, du0), _mm_mul_pd(p2, dv1)), /* 4 du0 + 2 dv1 */
1091: p4dv1p2du0 = _mm_add_pd(_mm_mul_pd(p4, dv1), _mm_mul_pd(p2, du0)), /* 4 dv1 + 2 du0 */
1092: pdu2dv2 = _mm_unpacklo_pd(du2, dv2), /* [du2, dv2] */
1093: du1pdv0 = _mm_add_pd(du1, dv0), /* du1 + dv0 */
1094: t1 = _mm_mul_pd(dp0, p4du0p2dv1), /* dp0 (4 du0 + 2 dv1) */
1095: t2 = _mm_mul_pd(dp1, p4dv1p2du0); /* dp1 (4 dv1 + 2 du0) */
1097: #endif
1098: #if defined(COMPUTE_LOWER_TRIANGULAR) /* The element matrices are always symmetric so computing the lower-triangular part is not necessary */
1099: for (ll = ls; ll < 8; ll++) { /* trial functions */
1100: #else
1101: for (ll = l; ll < 8; ll++) {
1102: #endif
1103: const PetscReal *PETSC_RESTRICT dpl = dphi[ll];
1104: if (amode == THIASSEMBLY_TRIDIAGONAL && (l - ll) % 4) continue; /* these entries would not be inserted */
1105: #if !USE_SSE2_KERNELS
1106: /* The analytic Jacobian in nice, easy-to-read form */
1107: {
1108: PetscScalar dgdu, dgdv;
1109: dgdu = 2. * du[0] * dpl[0] + dv[1] * dpl[0] + 0.5 * (du[1] + dv[0]) * dpl[1] + 0.5 * du[2] * dpl[2];
1110: dgdv = 2. * dv[1] * dpl[1] + du[0] * dpl[1] + 0.5 * (du[1] + dv[0]) * dpl[0] + 0.5 * dv[2] * dpl[2];
1111: /* Picard part */
1112: Ke[l * 2 + 0][ll * 2 + 0] += dp[0] * jw * eta * 4. * dpl[0] + dp[1] * jw * eta * dpl[1] + dp[2] * jw * eta * dpl[2];
1113: Ke[l * 2 + 0][ll * 2 + 1] += dp[0] * jw * eta * 2. * dpl[1] + dp[1] * jw * eta * dpl[0];
1114: Ke[l * 2 + 1][ll * 2 + 0] += dp[1] * jw * eta * 2. * dpl[0] + dp[0] * jw * eta * dpl[1];
1115: Ke[l * 2 + 1][ll * 2 + 1] += dp[1] * jw * eta * 4. * dpl[1] + dp[0] * jw * eta * dpl[0] + dp[2] * jw * eta * dpl[2];
1116: /* extra Newton terms */
1117: Ke[l * 2 + 0][ll * 2 + 0] += dp[0] * jw * deta * dgdu * (4. * du[0] + 2. * dv[1]) + dp[1] * jw * deta * dgdu * (du[1] + dv[0]) + dp[2] * jw * deta * dgdu * du[2];
1118: Ke[l * 2 + 0][ll * 2 + 1] += dp[0] * jw * deta * dgdv * (4. * du[0] + 2. * dv[1]) + dp[1] * jw * deta * dgdv * (du[1] + dv[0]) + dp[2] * jw * deta * dgdv * du[2];
1119: Ke[l * 2 + 1][ll * 2 + 0] += dp[1] * jw * deta * dgdu * (4. * dv[1] + 2. * du[0]) + dp[0] * jw * deta * dgdu * (du[1] + dv[0]) + dp[2] * jw * deta * dgdu * dv[2];
1120: Ke[l * 2 + 1][ll * 2 + 1] += dp[1] * jw * deta * dgdv * (4. * dv[1] + 2. * du[0]) + dp[0] * jw * deta * dgdv * (du[1] + dv[0]) + dp[2] * jw * deta * dgdv * dv[2];
1121: }
1122: #else
1123: /* This SSE2 code is an exact replica of above, but uses explicit packed instructions for some speed
1124: * benefit. On my hardware, these intrinsics are almost twice as fast as above, reducing total assembly cost
1125: * by 25 to 30 percent. */
1126: {
1127: __m128d keu = _mm_loadu_pd(&Ke[l * 2 + 0][ll * 2 + 0]), kev = _mm_loadu_pd(&Ke[l * 2 + 1][ll * 2 + 0]), dpl01 = _mm_loadu_pd(&dpl[0]), dpl10 = _mm_shuffle_pd(dpl01, dpl01, _MM_SHUFFLE2(0, 1)), dpl2 = _mm_set_sd(dpl[2]), t0, t3, pdgduv;
1128: keu = _mm_add_pd(keu, _mm_add_pd(_mm_mul_pd(_mm_mul_pd(dp0jweta, p42), dpl01), _mm_add_pd(_mm_mul_pd(dp1jweta, dpl10), _mm_mul_pd(dp2jweta, dpl2))));
1129: kev = _mm_add_pd(kev, _mm_add_pd(_mm_mul_pd(_mm_mul_pd(dp1jweta, p24), dpl01), _mm_add_pd(_mm_mul_pd(dp0jweta, dpl10), _mm_mul_pd(dp2jweta, _mm_shuffle_pd(dpl2, dpl2, _MM_SHUFFLE2(0, 1))))));
1130: pdgduv = _mm_mul_pd(p05, _mm_add_pd(_mm_add_pd(_mm_mul_pd(p42, _mm_mul_pd(du0, dpl01)), _mm_mul_pd(p24, _mm_mul_pd(dv1, dpl01))), _mm_add_pd(_mm_mul_pd(du1pdv0, dpl10), _mm_mul_pd(pdu2dv2, _mm_set1_pd(dpl[2]))))); /* [dgdu, dgdv] */
1131: t0 = _mm_mul_pd(jwdeta, pdgduv); /* jw deta [dgdu, dgdv] */
1132: t3 = _mm_mul_pd(t0, du1pdv0); /* t0 (du1 + dv0) */
1133: _mm_storeu_pd(&Ke[l * 2 + 0][ll * 2 + 0], _mm_add_pd(keu, _mm_add_pd(_mm_mul_pd(t1, t0), _mm_add_pd(_mm_mul_pd(dp1, t3), _mm_mul_pd(t0, _mm_mul_pd(dp2, du2))))));
1134: _mm_storeu_pd(&Ke[l * 2 + 1][ll * 2 + 0], _mm_add_pd(kev, _mm_add_pd(_mm_mul_pd(t2, t0), _mm_add_pd(_mm_mul_pd(dp0, t3), _mm_mul_pd(t0, _mm_mul_pd(dp2, dv2))))));
1135: }
1136: #endif
1137: }
1138: }
1139: }
1140: if (k == 0) { /* on a bottom face */
1141: if (thi->no_slip) {
1142: const PetscReal hz = PetscRealPart(pn[0].h) / (zm - 1);
1143: const PetscScalar diagu = 2 * etabase / thi->rhog * (hx * hy / hz + hx * hz / hy + 4 * hy * hz / hx), diagv = 2 * etabase / thi->rhog * (hx * hy / hz + 4 * hx * hz / hy + hy * hz / hx);
1144: Ke[0][0] = thi->dirichlet_scale * diagu;
1145: Ke[1][1] = thi->dirichlet_scale * diagv;
1146: } else {
1147: for (q = 0; q < 4; q++) {
1148: const PetscReal jw = 0.25 * hx * hy / thi->rhog, *phi = QuadQInterp[q];
1149: PetscScalar u = 0, v = 0, rbeta2 = 0;
1150: PetscReal beta2, dbeta2;
1151: for (l = 0; l < 4; l++) {
1152: u += phi[l] * n[l].u;
1153: v += phi[l] * n[l].v;
1154: rbeta2 += phi[l] * pn[l].beta2;
1155: }
1156: THIFriction(thi, PetscRealPart(rbeta2), PetscRealPart(u * u + v * v) / 2, &beta2, &dbeta2);
1157: for (l = 0; l < 4; l++) {
1158: const PetscReal pp = phi[l];
1159: for (ll = 0; ll < 4; ll++) {
1160: const PetscReal ppl = phi[ll];
1161: Ke[l * 2 + 0][ll * 2 + 0] += pp * jw * beta2 * ppl + pp * jw * dbeta2 * u * u * ppl;
1162: Ke[l * 2 + 0][ll * 2 + 1] += pp * jw * dbeta2 * u * v * ppl;
1163: Ke[l * 2 + 1][ll * 2 + 0] += pp * jw * dbeta2 * v * u * ppl;
1164: Ke[l * 2 + 1][ll * 2 + 1] += pp * jw * beta2 * ppl + pp * jw * dbeta2 * v * v * ppl;
1165: }
1166: }
1167: }
1168: }
1169: }
1170: {
1171: const MatStencil rc[8] = {
1172: {i, j, k, 0},
1173: {i + 1, j, k, 0},
1174: {i + 1, j + 1, k, 0},
1175: {i, j + 1, k, 0},
1176: {i, j, k + 1, 0},
1177: {i + 1, j, k + 1, 0},
1178: {i + 1, j + 1, k + 1, 0},
1179: {i, j + 1, k + 1, 0}
1180: };
1181: if (amode == THIASSEMBLY_TRIDIAGONAL) {
1182: for (l = 0; l < 4; l++) { /* Copy out each of the blocks, discarding horizontal coupling */
1183: const PetscInt l4 = l + 4;
1184: const MatStencil rcl[2] = {
1185: {rc[l].k, rc[l].j, rc[l].i, 0},
1186: {rc[l4].k, rc[l4].j, rc[l4].i, 0}
1187: };
1188: #if defined(COMPUTE_LOWER_TRIANGULAR)
1189: const PetscScalar Kel[4][4] = {
1190: {Ke[2 * l + 0][2 * l + 0], Ke[2 * l + 0][2 * l + 1], Ke[2 * l + 0][2 * l4 + 0], Ke[2 * l + 0][2 * l4 + 1] },
1191: {Ke[2 * l + 1][2 * l + 0], Ke[2 * l + 1][2 * l + 1], Ke[2 * l + 1][2 * l4 + 0], Ke[2 * l + 1][2 * l4 + 1] },
1192: {Ke[2 * l4 + 0][2 * l + 0], Ke[2 * l4 + 0][2 * l + 1], Ke[2 * l4 + 0][2 * l4 + 0], Ke[2 * l4 + 0][2 * l4 + 1]},
1193: {Ke[2 * l4 + 1][2 * l + 0], Ke[2 * l4 + 1][2 * l + 1], Ke[2 * l4 + 1][2 * l4 + 0], Ke[2 * l4 + 1][2 * l4 + 1]}
1194: };
1195: #else
1196: /* Same as above except for the lower-left block */
1197: const PetscScalar Kel[4][4] = {
1198: {Ke[2 * l + 0][2 * l + 0], Ke[2 * l + 0][2 * l + 1], Ke[2 * l + 0][2 * l4 + 0], Ke[2 * l + 0][2 * l4 + 1] },
1199: {Ke[2 * l + 1][2 * l + 0], Ke[2 * l + 1][2 * l + 1], Ke[2 * l + 1][2 * l4 + 0], Ke[2 * l + 1][2 * l4 + 1] },
1200: {Ke[2 * l + 0][2 * l4 + 0], Ke[2 * l + 1][2 * l4 + 0], Ke[2 * l4 + 0][2 * l4 + 0], Ke[2 * l4 + 0][2 * l4 + 1]},
1201: {Ke[2 * l + 0][2 * l4 + 1], Ke[2 * l + 1][2 * l4 + 1], Ke[2 * l4 + 1][2 * l4 + 0], Ke[2 * l4 + 1][2 * l4 + 1]}
1202: };
1203: #endif
1204: PetscCall(MatSetValuesBlockedStencil(B, 2, rcl, 2, rcl, &Kel[0][0], ADD_VALUES));
1205: }
1206: } else {
1207: #if !defined(COMPUTE_LOWER_TRIANGULAR) /* fill in lower-triangular part, this is really cheap compared to computing the entries */
1208: for (l = 0; l < 8; l++) {
1209: for (ll = l + 1; ll < 8; ll++) {
1210: Ke[ll * 2 + 0][l * 2 + 0] = Ke[l * 2 + 0][ll * 2 + 0];
1211: Ke[ll * 2 + 1][l * 2 + 0] = Ke[l * 2 + 0][ll * 2 + 1];
1212: Ke[ll * 2 + 0][l * 2 + 1] = Ke[l * 2 + 1][ll * 2 + 0];
1213: Ke[ll * 2 + 1][l * 2 + 1] = Ke[l * 2 + 1][ll * 2 + 1];
1214: }
1215: }
1216: #endif
1217: PetscCall(MatSetValuesBlockedStencil(B, 8, rc, 8, rc, &Ke[0][0], ADD_VALUES));
1218: }
1219: }
1220: }
1221: }
1222: }
1223: PetscCall(THIDARestorePrm(info->da, &prm));
1225: PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1226: PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1227: PetscCall(MatSetOption(B, MAT_SYMMETRIC, PETSC_TRUE));
1228: if (thi->verbose) PetscCall(THIMatrixStatistics(thi, B, PETSC_VIEWER_STDOUT_WORLD));
1229: PetscFunctionReturn(PETSC_SUCCESS);
1230: }
1232: static PetscErrorCode THIJacobianLocal_3D_Full(DMDALocalInfo *info, Node ***x, Mat A, Mat B, THI thi)
1233: {
1234: PetscFunctionBeginUser;
1235: PetscCall(THIJacobianLocal_3D(info, x, B, thi, THIASSEMBLY_FULL));
1236: PetscFunctionReturn(PETSC_SUCCESS);
1237: }
1239: static PetscErrorCode THIJacobianLocal_3D_Tridiagonal(DMDALocalInfo *info, Node ***x, Mat A, Mat B, THI thi)
1240: {
1241: PetscFunctionBeginUser;
1242: PetscCall(THIJacobianLocal_3D(info, x, B, thi, THIASSEMBLY_TRIDIAGONAL));
1243: PetscFunctionReturn(PETSC_SUCCESS);
1244: }
1246: static PetscErrorCode DMRefineHierarchy_THI(DM dac0, PetscInt nlevels, DM hierarchy[])
1247: {
1248: THI thi;
1249: PetscInt dim, M, N, m, n, s, dof;
1250: DM dac, daf;
1251: DMDAStencilType st;
1252: DM_DA *ddf, *ddc;
1254: PetscFunctionBeginUser;
1255: PetscCall(PetscObjectQuery((PetscObject)dac0, "THI", (PetscObject *)&thi));
1256: PetscCheck(thi, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot refine this DMDA, missing composed THI instance");
1257: if (nlevels > 1) {
1258: PetscCall(DMRefineHierarchy(dac0, nlevels - 1, hierarchy));
1259: dac = hierarchy[nlevels - 2];
1260: } else {
1261: dac = dac0;
1262: }
1263: PetscCall(DMDAGetInfo(dac, &dim, &N, &M, 0, &n, &m, 0, &dof, &s, 0, 0, 0, &st));
1264: PetscCheck(dim == 2, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "This function can only refine 2D DMDAs");
1266: /* Creates a 3D DMDA with the same map-plane layout as the 2D one, with contiguous columns */
1267: PetscCall(DMDACreate3d(PetscObjectComm((PetscObject)dac), DM_BOUNDARY_NONE, DM_BOUNDARY_PERIODIC, DM_BOUNDARY_PERIODIC, st, thi->zlevels, N, M, 1, n, m, dof, s, NULL, NULL, NULL, &daf));
1268: PetscCall(DMSetUp(daf));
1270: daf->ops->creatematrix = dac->ops->creatematrix;
1271: daf->ops->createinterpolation = dac->ops->createinterpolation;
1272: daf->ops->getcoloring = dac->ops->getcoloring;
1273: ddf = (DM_DA *)daf->data;
1274: ddc = (DM_DA *)dac->data;
1275: ddf->interptype = ddc->interptype;
1277: PetscCall(DMDASetFieldName(daf, 0, "x-velocity"));
1278: PetscCall(DMDASetFieldName(daf, 1, "y-velocity"));
1280: hierarchy[nlevels - 1] = daf;
1281: PetscFunctionReturn(PETSC_SUCCESS);
1282: }
1284: static PetscErrorCode DMCreateInterpolation_DA_THI(DM dac, DM daf, Mat *A, Vec *scale)
1285: {
1286: PetscInt dim;
1288: PetscFunctionBeginUser;
1291: PetscAssertPointer(A, 3);
1292: if (scale) PetscAssertPointer(scale, 4);
1293: PetscCall(DMDAGetInfo(daf, &dim, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
1294: if (dim == 2) {
1295: /* We are in the 2D problem and use normal DMDA interpolation */
1296: PetscCall(DMCreateInterpolation(dac, daf, A, scale));
1297: } else {
1298: PetscInt i, j, k, xs, ys, zs, xm, ym, zm, mx, my, mz, rstart, cstart;
1299: Mat B;
1301: PetscCall(DMDAGetInfo(daf, 0, &mz, &my, &mx, 0, 0, 0, 0, 0, 0, 0, 0, 0));
1302: PetscCall(DMDAGetCorners(daf, &zs, &ys, &xs, &zm, &ym, &xm));
1303: PetscCheck(!zs, PETSC_COMM_SELF, PETSC_ERR_PLIB, "unexpected");
1304: PetscCall(MatCreate(PetscObjectComm((PetscObject)daf), &B));
1305: PetscCall(MatSetSizes(B, xm * ym * zm, xm * ym, mx * my * mz, mx * my));
1307: PetscCall(MatSetType(B, MATAIJ));
1308: PetscCall(MatSeqAIJSetPreallocation(B, 1, NULL));
1309: PetscCall(MatMPIAIJSetPreallocation(B, 1, NULL, 0, NULL));
1310: PetscCall(MatGetOwnershipRange(B, &rstart, NULL));
1311: PetscCall(MatGetOwnershipRangeColumn(B, &cstart, NULL));
1312: for (i = xs; i < xs + xm; i++) {
1313: for (j = ys; j < ys + ym; j++) {
1314: for (k = zs; k < zs + zm; k++) {
1315: PetscInt i2 = i * ym + j, i3 = i2 * zm + k;
1316: PetscScalar val = ((k == 0 || k == mz - 1) ? 0.5 : 1.) / (mz - 1.); /* Integration using trapezoid rule */
1317: PetscCall(MatSetValue(B, cstart + i3, rstart + i2, val, INSERT_VALUES));
1318: }
1319: }
1320: }
1321: PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1322: PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1323: PetscCall(MatCreateMAIJ(B, sizeof(Node) / sizeof(PetscScalar), A));
1324: PetscCall(MatDestroy(&B));
1325: }
1326: PetscFunctionReturn(PETSC_SUCCESS);
1327: }
1329: static PetscErrorCode DMCreateMatrix_THI_Tridiagonal(DM da, Mat *J)
1330: {
1331: Mat A;
1332: PetscInt xm, ym, zm, dim, dof = 2, starts[3], dims[3];
1333: ISLocalToGlobalMapping ltog;
1335: PetscFunctionBeginUser;
1336: PetscCall(DMDAGetInfo(da, &dim, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
1337: PetscCheck(dim == 3, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Expected DMDA to be 3D");
1338: PetscCall(DMDAGetCorners(da, 0, 0, 0, &zm, &ym, &xm));
1339: PetscCall(DMGetLocalToGlobalMapping(da, <og));
1340: PetscCall(MatCreate(PetscObjectComm((PetscObject)da), &A));
1341: PetscCall(MatSetSizes(A, dof * xm * ym * zm, dof * xm * ym * zm, PETSC_DETERMINE, PETSC_DETERMINE));
1342: PetscCall(MatSetType(A, da->mattype));
1343: PetscCall(MatSetFromOptions(A));
1344: PetscCall(MatSeqAIJSetPreallocation(A, 3 * 2, NULL));
1345: PetscCall(MatMPIAIJSetPreallocation(A, 3 * 2, NULL, 0, NULL));
1346: PetscCall(MatSeqBAIJSetPreallocation(A, 2, 3, NULL));
1347: PetscCall(MatMPIBAIJSetPreallocation(A, 2, 3, NULL, 0, NULL));
1348: PetscCall(MatSeqSBAIJSetPreallocation(A, 2, 2, NULL));
1349: PetscCall(MatMPISBAIJSetPreallocation(A, 2, 2, NULL, 0, NULL));
1350: PetscCall(MatSetLocalToGlobalMapping(A, ltog, ltog));
1351: PetscCall(DMDAGetGhostCorners(da, &starts[0], &starts[1], &starts[2], &dims[0], &dims[1], &dims[2]));
1352: PetscCall(MatSetStencil(A, dim, dims, starts, dof));
1353: *J = A;
1354: PetscFunctionReturn(PETSC_SUCCESS);
1355: }
1357: static PetscErrorCode THIDAVecView_VTK_XML(THI thi, DM da, Vec X, const char filename[])
1358: {
1359: const PetscInt dof = 2;
1360: Units units = thi->units;
1361: MPI_Comm comm;
1362: PetscViewer viewer;
1363: PetscMPIInt rank, size, tag, nn, nmax;
1364: PetscInt mx, my, mz, range[6];
1365: const PetscScalar *x;
1367: PetscFunctionBeginUser;
1368: PetscCall(PetscObjectGetComm((PetscObject)thi, &comm));
1369: PetscCall(DMDAGetInfo(da, 0, &mz, &my, &mx, 0, 0, 0, 0, 0, 0, 0, 0, 0));
1370: PetscCallMPI(MPI_Comm_size(comm, &size));
1371: PetscCallMPI(MPI_Comm_rank(comm, &rank));
1372: PetscCall(PetscViewerASCIIOpen(comm, filename, &viewer));
1373: PetscCall(PetscViewerASCIIPrintf(viewer, "<VTKFile type=\"StructuredGrid\" version=\"0.1\" byte_order=\"LittleEndian\">\n"));
1374: PetscCall(PetscViewerASCIIPrintf(viewer, " <StructuredGrid WholeExtent=\"%d %" PetscInt_FMT " %d %" PetscInt_FMT " %d %" PetscInt_FMT "\">\n", 0, mz - 1, 0, my - 1, 0, mx - 1));
1376: PetscCall(DMDAGetCorners(da, range, range + 1, range + 2, range + 3, range + 4, range + 5));
1377: PetscCall(PetscMPIIntCast(range[3] * range[4] * range[5] * dof, &nn));
1378: PetscCallMPI(MPI_Reduce(&nn, &nmax, 1, MPI_INT, MPI_MAX, 0, comm));
1379: tag = ((PetscObject)viewer)->tag;
1380: PetscCall(VecGetArrayRead(X, &x));
1381: if (rank == 0) {
1382: PetscScalar *array;
1383: PetscCall(PetscMalloc1(nmax, &array));
1384: for (PetscMPIInt r = 0; r < size; r++) {
1385: PetscInt i, j, k, xs, xm, ys, ym, zs, zm;
1386: const PetscScalar *ptr;
1387: MPI_Status status;
1388: if (r) PetscCallMPI(MPIU_Recv(range, 6, MPIU_INT, r, tag, comm, MPI_STATUS_IGNORE));
1389: zs = range[0];
1390: ys = range[1];
1391: xs = range[2];
1392: zm = range[3];
1393: ym = range[4];
1394: xm = range[5];
1395: PetscCheck(xm * ym * zm * dof <= nmax, PETSC_COMM_SELF, PETSC_ERR_PLIB, "should not happen");
1396: if (r) {
1397: PetscCallMPI(MPIU_Recv(array, nmax, MPIU_SCALAR, r, tag, comm, &status));
1398: PetscCallMPI(MPI_Get_count(&status, MPIU_SCALAR, &nn));
1399: PetscCheck(nn == xm * ym * zm * dof, PETSC_COMM_SELF, PETSC_ERR_PLIB, "should not happen");
1400: ptr = array;
1401: } else ptr = x;
1402: PetscCall(PetscViewerASCIIPrintf(viewer, " <Piece Extent=\"%" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\">\n", zs, zs + zm - 1, ys, ys + ym - 1, xs, xs + xm - 1));
1404: PetscCall(PetscViewerASCIIPrintf(viewer, " <Points>\n"));
1405: PetscCall(PetscViewerASCIIPrintf(viewer, " <DataArray type=\"Float32\" NumberOfComponents=\"3\" format=\"ascii\">\n"));
1406: for (i = xs; i < xs + xm; i++) {
1407: for (j = ys; j < ys + ym; j++) {
1408: for (k = zs; k < zs + zm; k++) {
1409: PrmNode p;
1410: PetscReal xx = thi->Lx * i / mx, yy = thi->Ly * j / my, zz;
1411: thi->initialize(thi, xx, yy, &p);
1412: zz = PetscRealPart(p.b) + PetscRealPart(p.h) * k / (mz - 1);
1413: PetscCall(PetscViewerASCIIPrintf(viewer, "%f %f %f\n", (double)xx, (double)yy, (double)zz));
1414: }
1415: }
1416: }
1417: PetscCall(PetscViewerASCIIPrintf(viewer, " </DataArray>\n"));
1418: PetscCall(PetscViewerASCIIPrintf(viewer, " </Points>\n"));
1420: PetscCall(PetscViewerASCIIPrintf(viewer, " <PointData>\n"));
1421: PetscCall(PetscViewerASCIIPrintf(viewer, " <DataArray type=\"Float32\" Name=\"velocity\" NumberOfComponents=\"3\" format=\"ascii\">\n"));
1422: for (i = 0; i < nn; i += dof) PetscCall(PetscViewerASCIIPrintf(viewer, "%f %f %f\n", (double)(PetscRealPart(ptr[i]) * units->year / units->meter), (double)(PetscRealPart(ptr[i + 1]) * units->year / units->meter), 0.0));
1423: PetscCall(PetscViewerASCIIPrintf(viewer, " </DataArray>\n"));
1425: PetscCall(PetscViewerASCIIPrintf(viewer, " <DataArray type=\"Int32\" Name=\"rank\" NumberOfComponents=\"1\" format=\"ascii\">\n"));
1426: for (i = 0; i < nn; i += dof) PetscCall(PetscViewerASCIIPrintf(viewer, "%d\n", r));
1427: PetscCall(PetscViewerASCIIPrintf(viewer, " </DataArray>\n"));
1428: PetscCall(PetscViewerASCIIPrintf(viewer, " </PointData>\n"));
1430: PetscCall(PetscViewerASCIIPrintf(viewer, " </Piece>\n"));
1431: }
1432: PetscCall(PetscFree(array));
1433: } else {
1434: PetscCallMPI(MPIU_Send(range, 6, MPIU_INT, 0, tag, comm));
1435: PetscCallMPI(MPIU_Send((PetscScalar *)x, nn, MPIU_SCALAR, 0, tag, comm));
1436: }
1437: PetscCall(VecRestoreArrayRead(X, &x));
1438: PetscCall(PetscViewerASCIIPrintf(viewer, " </StructuredGrid>\n"));
1439: PetscCall(PetscViewerASCIIPrintf(viewer, "</VTKFile>\n"));
1440: PetscCall(PetscViewerDestroy(&viewer));
1441: PetscFunctionReturn(PETSC_SUCCESS);
1442: }
1444: int main(int argc, char *argv[])
1445: {
1446: MPI_Comm comm;
1447: THI thi;
1448: DM da;
1449: SNES snes;
1451: PetscFunctionBeginUser;
1452: PetscCall(PetscInitialize(&argc, &argv, 0, help));
1453: comm = PETSC_COMM_WORLD;
1455: PetscCall(THICreate(comm, &thi));
1456: {
1457: PetscInt M = 3, N = 3, P = 2;
1458: PetscOptionsBegin(comm, NULL, "Grid resolution options", "");
1459: {
1460: PetscCall(PetscOptionsInt("-M", "Number of elements in x-direction on coarse level", "", M, &M, NULL));
1461: N = M;
1462: PetscCall(PetscOptionsInt("-N", "Number of elements in y-direction on coarse level (if different from M)", "", N, &N, NULL));
1463: if (thi->coarse2d) {
1464: PetscCall(PetscOptionsInt("-zlevels", "Number of elements in z-direction on fine level", "", thi->zlevels, &thi->zlevels, NULL));
1465: } else {
1466: PetscCall(PetscOptionsInt("-P", "Number of elements in z-direction on coarse level", "", P, &P, NULL));
1467: }
1468: }
1469: PetscOptionsEnd();
1470: if (thi->coarse2d) {
1471: PetscCall(DMDACreate2d(comm, DM_BOUNDARY_PERIODIC, DM_BOUNDARY_PERIODIC, DMDA_STENCIL_BOX, N, M, PETSC_DETERMINE, PETSC_DETERMINE, sizeof(Node) / sizeof(PetscScalar), 1, 0, 0, &da));
1472: PetscCall(DMSetFromOptions(da));
1473: PetscCall(DMSetUp(da));
1474: da->ops->refinehierarchy = DMRefineHierarchy_THI;
1475: da->ops->createinterpolation = DMCreateInterpolation_DA_THI;
1477: PetscCall(PetscObjectCompose((PetscObject)da, "THI", (PetscObject)thi));
1478: } else {
1479: PetscCall(DMDACreate3d(comm, DM_BOUNDARY_NONE, DM_BOUNDARY_PERIODIC, DM_BOUNDARY_PERIODIC, DMDA_STENCIL_BOX, P, N, M, 1, PETSC_DETERMINE, PETSC_DETERMINE, sizeof(Node) / sizeof(PetscScalar), 1, 0, 0, 0, &da));
1480: PetscCall(DMSetFromOptions(da));
1481: PetscCall(DMSetUp(da));
1482: }
1483: PetscCall(DMDASetFieldName(da, 0, "x-velocity"));
1484: PetscCall(DMDASetFieldName(da, 1, "y-velocity"));
1485: }
1486: PetscCall(THISetUpDM(thi, da));
1487: if (thi->tridiagonal) da->ops->creatematrix = DMCreateMatrix_THI_Tridiagonal;
1489: { /* Set the fine level matrix type if -da_refine */
1490: PetscInt rlevel, clevel;
1491: PetscCall(DMGetRefineLevel(da, &rlevel));
1492: PetscCall(DMGetCoarsenLevel(da, &clevel));
1493: if (rlevel - clevel > 0) PetscCall(DMSetMatType(da, thi->mattype));
1494: }
1496: PetscCall(DMDASNESSetFunctionLocal(da, ADD_VALUES, (DMDASNESFunctionFn *)THIFunctionLocal, thi));
1497: if (thi->tridiagonal) {
1498: PetscCall(DMDASNESSetJacobianLocal(da, (DMDASNESJacobianFn *)THIJacobianLocal_3D_Tridiagonal, thi));
1499: } else {
1500: PetscCall(DMDASNESSetJacobianLocal(da, (DMDASNESJacobianFn *)THIJacobianLocal_3D_Full, thi));
1501: }
1502: PetscCall(DMCoarsenHookAdd(da, DMCoarsenHook_THI, NULL, thi));
1503: PetscCall(DMRefineHookAdd(da, DMRefineHook_THI, NULL, thi));
1505: PetscCall(DMSetApplicationContext(da, thi));
1507: PetscCall(SNESCreate(comm, &snes));
1508: PetscCall(SNESSetDM(snes, da));
1509: PetscCall(DMDestroy(&da));
1510: PetscCall(SNESSetComputeInitialGuess(snes, THIInitial, NULL));
1511: PetscCall(SNESSetFromOptions(snes));
1513: PetscCall(SNESSolve(snes, NULL, NULL));
1515: PetscCall(THISolveStatistics(thi, snes, 0, "Full"));
1517: {
1518: PetscBool flg;
1519: char filename[PETSC_MAX_PATH_LEN] = "";
1520: PetscCall(PetscOptionsGetString(NULL, NULL, "-o", filename, sizeof(filename), &flg));
1521: if (flg) {
1522: Vec X;
1523: DM dm;
1524: PetscCall(SNESGetSolution(snes, &X));
1525: PetscCall(SNESGetDM(snes, &dm));
1526: PetscCall(THIDAVecView_VTK_XML(thi, dm, X, filename));
1527: }
1528: }
1530: PetscCall(DMDestroy(&da));
1531: PetscCall(SNESDestroy(&snes));
1532: PetscCall(THIDestroy(&thi));
1533: PetscCall(PetscFinalize());
1534: return 0;
1535: }
1537: /*TEST
1539: build:
1540: requires: !single
1542: test:
1543: args: -M 6 -P 4 -da_refine 1 -snes_monitor_short -snes_converged_reason -ksp_monitor_short -ksp_converged_reason -thi_mat_type sbaij -ksp_type fgmres -pc_type mg -pc_mg_type full -mg_levels_ksp_type gmres -mg_levels_ksp_max_it 1 -mg_levels_pc_type icc
1545: test:
1546: suffix: 2
1547: nsize: 2
1548: args: -M 6 -P 4 -thi_hom z -snes_monitor_short -snes_converged_reason -ksp_monitor_short -ksp_converged_reason -thi_mat_type sbaij -ksp_type fgmres -pc_type mg -pc_mg_type full -mg_levels_ksp_type gmres -mg_levels_ksp_max_it 1 -mg_levels_pc_type asm -mg_levels_pc_asm_blocks 6 -mg_levels_0_pc_type redundant -snes_grid_sequence 1 -mat_partitioning_type current -ksp_atol 0
1550: test:
1551: suffix: 3
1552: nsize: 3
1553: args: -M 7 -P 4 -thi_hom z -da_refine 1 -snes_monitor_short -snes_converged_reason -ksp_monitor_short -ksp_converged_reason -thi_mat_type baij -ksp_type fgmres -pc_type mg -pc_mg_type full -mg_levels_pc_asm_type restrict -mg_levels_pc_type asm -mg_levels_pc_asm_blocks 9 -mg_levels_ksp_type gmres -mg_levels_ksp_max_it 1 -mat_partitioning_type current
1555: test:
1556: suffix: 4
1557: nsize: 6
1558: args: -M 4 -P 2 -da_refine_hierarchy_x 1,1,3 -da_refine_hierarchy_y 2,2,1 -da_refine_hierarchy_z 2,2,1 -snes_grid_sequence 3 -ksp_converged_reason -ksp_type fgmres -ksp_rtol 1e-2 -pc_type mg -mg_levels_ksp_type gmres -mg_levels_ksp_max_it 1 -mg_levels_pc_type bjacobi -mg_levels_1_sub_pc_type cholesky -pc_mg_type multiplicative -snes_converged_reason -snes_stol 1e-12 -thi_L 80e3 -thi_alpha 0.05 -thi_friction_m 1 -thi_hom x -snes_view -mg_levels_0_pc_type redundant -mg_levels_0_ksp_type preonly -ksp_atol 0
1560: test:
1561: suffix: 5
1562: nsize: 6
1563: args: -M 12 -P 5 -snes_monitor_short -ksp_converged_reason -pc_type asm -pc_asm_type restrict -dm_mat_type {{aij baij sbaij}}
1565: TEST*/