Actual source code: ex56.c
1: /* Portions of this code are under:
2: Copyright (C) 2022 Advanced Micro Devices, Inc. All rights reserved.
3: */
4: static char help[] = "3D tensor hexahedra & 3D Laplacian displacement finite element formulation\n\
5: of linear elasticity. E=1.0, nu=1/3.\n\
6: Unit cube domain with Dirichlet boundary\n\n";
8: #include <petscdmplex.h>
9: #include <petscsnes.h>
10: #include <petscds.h>
11: #include <petscdmforest.h>
13: static PetscReal s_soft_alpha = 0.01;
14: static PetscReal s_mu = 0.4;
15: static PetscReal s_lambda = 0.4;
17: static void f0_bd_u_3d(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, const PetscReal x[], const PetscReal n[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f0[])
18: {
19: f0[0] = 1; /* x direction pull */
20: f0[1] = -x[2]; /* add a twist around x-axis */
21: f0[2] = x[1];
22: }
24: static void f1_bd_u(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, const PetscReal x[], const PetscReal n[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f1[])
25: {
26: const PetscInt Ncomp = dim;
27: PetscInt d;
28: for (PetscInt comp = 0; comp < Ncomp; ++comp) {
29: for (d = 0; d < dim; ++d) f1[comp * dim + d] = 0.0;
30: }
31: }
33: /* gradU[comp*dim+d] = {u_x, u_y} or {u_x, u_y, u_z} */
34: static void f1_u_3d_alpha(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f1[])
35: {
36: PetscReal trace, mu = s_mu, lambda = s_lambda, rad;
37: PetscInt i, j;
38: for (i = 0, rad = 0.; i < dim; i++) {
39: PetscReal t = x[i];
40: rad += t * t;
41: }
42: rad = PetscSqrtReal(rad);
43: if (rad > 0.25) {
44: mu *= s_soft_alpha;
45: lambda *= s_soft_alpha; /* we could keep the bulk the same like rubberish */
46: }
47: for (i = 0, trace = 0; i < dim; ++i) trace += PetscRealPart(u_x[i * dim + i]);
48: for (i = 0; i < dim; ++i) {
49: for (j = 0; j < dim; ++j) f1[i * dim + j] = mu * (u_x[i * dim + j] + u_x[j * dim + i]);
50: f1[i * dim + i] += lambda * trace;
51: }
52: }
54: /* gradU[comp*dim+d] = {u_x, u_y} or {u_x, u_y, u_z} */
55: static void f1_u_3d(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f1[])
56: {
57: PetscReal trace, mu = s_mu, lambda = s_lambda;
58: PetscInt i, j;
59: for (i = 0, trace = 0; i < dim; ++i) trace += PetscRealPart(u_x[i * dim + i]);
60: for (i = 0; i < dim; ++i) {
61: for (j = 0; j < dim; ++j) f1[i * dim + j] = mu * (u_x[i * dim + j] + u_x[j * dim + i]);
62: f1[i * dim + i] += lambda * trace;
63: }
64: }
66: static void f1_u_lap(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f1[])
67: {
68: PetscInt d;
69: for (d = 0; d < dim; ++d) f1[d] = u_x[d];
70: }
72: /* 3D elasticity */
73: #define IDX(ii, jj, kk, ll) (27 * ii + 9 * jj + 3 * kk + ll)
75: void g3_uu_3d_private(PetscScalar g3[], const PetscReal mu, const PetscReal lambda)
76: {
77: if (1) {
78: g3[0] += lambda;
79: g3[0] += mu;
80: g3[0] += mu;
81: g3[4] += lambda;
82: g3[8] += lambda;
83: g3[10] += mu;
84: g3[12] += mu;
85: g3[20] += mu;
86: g3[24] += mu;
87: g3[28] += mu;
88: g3[30] += mu;
89: g3[36] += lambda;
90: g3[40] += lambda;
91: g3[40] += mu;
92: g3[40] += mu;
93: g3[44] += lambda;
94: g3[50] += mu;
95: g3[52] += mu;
96: g3[56] += mu;
97: g3[60] += mu;
98: g3[68] += mu;
99: g3[70] += mu;
100: g3[72] += lambda;
101: g3[76] += lambda;
102: g3[80] += lambda;
103: g3[80] += mu;
104: g3[80] += mu;
105: } else {
106: int i, j, k, l;
107: static int cc = -1;
108: cc++;
109: for (i = 0; i < 3; ++i) {
110: for (j = 0; j < 3; ++j) {
111: for (k = 0; k < 3; ++k) {
112: for (l = 0; l < 3; ++l) {
113: if (k == l && i == j) g3[IDX(i, j, k, l)] += lambda;
114: if (i == k && j == l) g3[IDX(i, j, k, l)] += mu;
115: if (i == l && j == k) g3[IDX(i, j, k, l)] += mu;
116: if (k == l && i == j && !cc) (void)PetscPrintf(PETSC_COMM_WORLD, "g3[%d] += lambda;\n", IDX(i, j, k, l));
117: if (i == k && j == l && !cc) (void)PetscPrintf(PETSC_COMM_WORLD, "g3[%d] += mu;\n", IDX(i, j, k, l));
118: if (i == l && j == k && !cc) (void)PetscPrintf(PETSC_COMM_WORLD, "g3[%d] += mu;\n", IDX(i, j, k, l));
119: }
120: }
121: }
122: }
123: }
124: }
126: static void g3_uu_3d_alpha(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, PetscReal u_tShift, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar g3[])
127: {
128: PetscReal mu = s_mu, lambda = s_lambda, rad = 0.0;
130: for (PetscInt i = 0; i < dim; i++) {
131: PetscReal t = x[i];
132: rad += t * t;
133: }
134: rad = PetscSqrtReal(rad);
135: if (rad > 0.25) {
136: mu *= s_soft_alpha;
137: lambda *= s_soft_alpha; /* we could keep the bulk the same like rubberish */
138: }
139: g3_uu_3d_private(g3, mu, lambda);
140: }
142: static void g3_uu_3d(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, PetscReal u_tShift, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar g3[])
143: {
144: g3_uu_3d_private(g3, s_mu, s_lambda);
145: }
147: static void g3_lap(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, PetscReal u_tShift, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar g3[])
148: {
149: PetscInt d;
150: for (d = 0; d < dim; ++d) g3[d * dim + d] = 1.0;
151: }
153: static void g3_lap_alpha(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, PetscReal u_tShift, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar g3[])
154: {
155: PetscReal lambda = 1, rad = 0.0;
157: for (PetscInt i = 0; i < dim; i++) {
158: PetscReal t = x[i];
159: rad += t * t;
160: }
161: rad = PetscSqrtReal(rad);
162: if (rad > 0.25) lambda *= s_soft_alpha; /* we could keep the bulk the same like rubberish */
163: for (int d = 0; d < dim; ++d) g3[d * dim + d] = lambda;
164: }
166: static void f0_u(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f0[])
167: {
168: const PetscInt Ncomp = dim;
170: for (PetscInt comp = 0; comp < Ncomp; ++comp) f0[comp] = 0.0;
171: }
173: /* PI_i (x_i^4 - x_i^2) */
174: static void f0_u_x4(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f0[])
175: {
176: for (int comp = 0; comp < Nf; ++comp) {
177: f0[comp] = 1e5;
178: for (int i = 0; i < dim; ++i) f0[comp] *= /* (comp+1)* */ (x[i] * x[i] * x[i] * x[i] - x[i] * x[i]); /* assumes (0,1]^D domain */
179: }
180: }
182: PetscErrorCode zero(PetscInt dim, PetscReal time, const PetscReal x[], PetscInt Nf, PetscScalar *u, PetscCtx ctx)
183: {
184: const PetscInt Ncomp = dim;
186: for (PetscInt comp = 0; comp < Ncomp; ++comp) u[comp] = 0;
187: return PETSC_SUCCESS;
188: }
190: int main(int argc, char **args)
191: {
192: Mat Amat;
193: SNES snes;
194: KSP ksp;
195: MPI_Comm comm;
196: PetscMPIInt rank;
197: PetscLogStage stage[17];
198: PetscBool test_nonzero_cols = PETSC_FALSE, use_nearnullspace = PETSC_TRUE, attach_nearnullspace = PETSC_FALSE;
199: Vec xx, bb;
200: PetscInt iter, i, N, dim = 3, max_conv_its, sizes[7], run_type = 1, Ncomp = dim;
201: DM dm;
202: PetscBool flg;
203: PetscReal Lx, mdisp[10], err[10];
205: PetscFunctionBeginUser;
206: PetscCall(PetscInitialize(&argc, &args, NULL, help));
207: comm = PETSC_COMM_WORLD;
208: PetscCallMPI(MPI_Comm_rank(comm, &rank));
209: /* options */
210: PetscOptionsBegin(comm, NULL, "3D bilinear Q1 elasticity options", "");
211: {
212: Lx = 1.; /* or ne for rod */
213: max_conv_its = 3;
214: PetscCall(PetscOptionsInt("-max_conv_its", "Number of iterations in convergence study", "", max_conv_its, &max_conv_its, NULL));
215: PetscCheck(max_conv_its > 0 && max_conv_its < 8, PETSC_COMM_WORLD, PETSC_ERR_USER, "Bad number of iterations for convergence test (%" PetscInt_FMT ")", max_conv_its);
216: PetscCall(PetscOptionsReal("-lx", "Length of domain", "", Lx, &Lx, NULL));
217: PetscCall(PetscOptionsReal("-alpha", "material coefficient inside circle", "", s_soft_alpha, &s_soft_alpha, NULL));
218: PetscCall(PetscOptionsBool("-test_nonzero_cols", "nonzero test", "", test_nonzero_cols, &test_nonzero_cols, NULL));
219: PetscCall(PetscOptionsBool("-use_mat_nearnullspace", "MatNearNullSpace API test", "", use_nearnullspace, &use_nearnullspace, NULL));
220: PetscCall(PetscOptionsBool("-attach_mat_nearnullspace", "MatNearNullSpace API test (via MatSetNearNullSpace)", "", attach_nearnullspace, &attach_nearnullspace, NULL));
221: PetscCall(PetscOptionsInt("-run_type", "0: twisting load on cantalever, 1: Elasticty convergence test on cube, 2: Laplacian, 3: soft core Laplacian", "", run_type, &run_type, NULL));
222: }
223: PetscOptionsEnd();
224: PetscCall(PetscLogStageRegister("Mesh Setup", &stage[16]));
225: for (iter = 0; iter < max_conv_its; iter++) {
226: char str[] = "Solve 0";
227: str[6] += iter;
228: PetscCall(PetscLogStageRegister(str, &stage[iter]));
229: }
230: /* create DM, Plex calls DMSetup */
231: PetscCall(PetscLogStagePush(stage[16]));
232: PetscCall(DMCreate(comm, &dm));
233: PetscCall(DMSetType(dm, DMPLEX));
234: PetscCall(PetscObjectSetName((PetscObject)dm, "Mesh"));
235: PetscCall(DMSetFromOptions(dm));
236: PetscCall(DMPlexDistributeSetDefault(dm, PETSC_FALSE));
237: PetscCall(DMGetDimension(dm, &dim));
238: {
239: DMLabel label;
240: IS is;
241: PetscCall(DMCreateLabel(dm, "boundary"));
242: PetscCall(DMGetLabel(dm, "boundary", &label));
243: PetscCall(DMPlexMarkBoundaryFaces(dm, 1, label));
244: if (run_type == 0) {
245: PetscCall(DMGetStratumIS(dm, "boundary", 1, &is));
246: PetscCall(DMCreateLabel(dm, "Faces"));
247: if (is) {
248: PetscInt d, f, Nf;
249: const PetscInt *faces;
250: PetscInt csize;
251: PetscSection cs;
252: Vec coordinates;
253: DM cdm;
254: PetscCall(ISGetLocalSize(is, &Nf));
255: PetscCall(ISGetIndices(is, &faces));
256: PetscCall(DMGetCoordinatesLocal(dm, &coordinates));
257: PetscCall(DMGetCoordinateDM(dm, &cdm));
258: PetscCall(DMGetLocalSection(cdm, &cs));
259: /* Check for each boundary face if any component of its centroid is either 0.0 or 1.0 */
260: for (f = 0; f < Nf; ++f) {
261: PetscReal faceCoord;
262: PetscScalar *coords = NULL;
263: PetscInt Nv;
264: PetscCall(DMPlexVecGetClosure(cdm, cs, coordinates, faces[f], &csize, &coords));
265: Nv = csize / dim; /* Calculate mean coordinate vector */
266: for (d = 0; d < dim; ++d) {
267: faceCoord = 0.0;
268: for (PetscInt v = 0; v < Nv; ++v) faceCoord += PetscRealPart(coords[v * dim + d]);
269: faceCoord /= Nv;
270: for (PetscInt b = 0; b < 2; ++b) {
271: if (PetscAbs(faceCoord - b) < PETSC_SMALL) { /* domain have not been set yet, still [0,1]^3 */
272: PetscCall(DMSetLabelValue(dm, "Faces", faces[f], d * 2 + b + 1));
273: }
274: }
275: }
276: PetscCall(DMPlexVecRestoreClosure(cdm, cs, coordinates, faces[f], &csize, &coords));
277: }
278: PetscCall(ISRestoreIndices(is, &faces));
279: }
280: PetscCall(ISDestroy(&is));
281: PetscCall(DMGetLabel(dm, "Faces", &label));
282: PetscCall(DMPlexLabelComplete(dm, label));
283: }
284: }
285: PetscCall(PetscLogStagePop());
286: for (iter = 0; iter < max_conv_its; iter++) {
287: PetscCall(PetscLogStagePush(stage[16]));
288: /* snes */
289: PetscCall(SNESCreate(comm, &snes));
290: PetscCall(SNESSetDM(snes, dm));
291: PetscCall(DMViewFromOptions(dm, NULL, "-dm_view"));
292: /* fem */
293: {
294: const PetscInt components[] = {0, 1, 2};
295: const PetscInt Nfid = 1, Npid = 1;
296: PetscInt fid[] = {1}; /* The fixed faces (x=0) */
297: const PetscInt pid[] = {2}; /* The faces with loading (x=L_x) */
298: PetscFE fe;
299: PetscDS prob;
300: DMLabel label;
302: if (run_type == 2 || run_type == 3) Ncomp = 1;
303: else Ncomp = dim;
304: PetscCall(PetscFECreateDefault(PETSC_COMM_SELF, dim, Ncomp, PETSC_FALSE, NULL, PETSC_DECIDE, &fe));
305: PetscCall(PetscObjectSetName((PetscObject)fe, "deformation"));
306: /* FEM prob */
307: PetscCall(DMSetField(dm, 0, NULL, (PetscObject)fe));
308: PetscCall(DMCreateDS(dm));
309: PetscCall(DMGetDS(dm, &prob));
310: /* setup problem */
311: if (run_type == 1) { // elast
312: PetscCall(PetscDSSetJacobian(prob, 0, 0, NULL, NULL, NULL, g3_uu_3d));
313: PetscCall(PetscDSSetResidual(prob, 0, f0_u_x4, f1_u_3d));
314: } else if (run_type == 0) { //twisted not maintained
315: PetscWeakForm wf;
316: PetscInt bd;
317: PetscCall(PetscDSSetJacobian(prob, 0, 0, NULL, NULL, NULL, g3_uu_3d_alpha));
318: PetscCall(PetscDSSetResidual(prob, 0, f0_u, f1_u_3d_alpha));
319: PetscCall(DMGetLabel(dm, "Faces", &label));
320: PetscCall(DMAddBoundary(dm, DM_BC_NATURAL, "traction", label, Npid, pid, 0, Ncomp, components, NULL, NULL, NULL, &bd));
321: PetscCall(PetscDSGetBoundary(prob, bd, &wf, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL));
322: for (PetscInt i = 0; i < Npid; ++i) PetscCall(PetscWeakFormSetIndexBdResidual(wf, label, pid[i], 0, 0, 0, f0_bd_u_3d, 0, f1_bd_u));
323: } else if (run_type == 2) { // Laplacian
324: PetscCall(PetscDSSetJacobian(prob, 0, 0, NULL, NULL, NULL, g3_lap));
325: PetscCall(PetscDSSetResidual(prob, 0, f0_u_x4, f1_u_lap));
326: } else if (run_type == 3) { // soft core Laplacian
327: PetscCall(PetscDSSetJacobian(prob, 0, 0, NULL, NULL, NULL, g3_lap_alpha));
328: PetscCall(PetscDSSetResidual(prob, 0, f0_u_x4, f1_u_lap));
329: }
330: /* bcs */
331: if (run_type != 0) {
332: PetscInt id = 1;
333: PetscCall(DMGetLabel(dm, "boundary", &label));
334: PetscCall(DMAddBoundary(dm, DM_BC_ESSENTIAL, "wall", label, 1, &id, 0, 0, NULL, (PetscVoidFn *)zero, NULL, NULL, NULL));
335: } else {
336: PetscCall(DMGetLabel(dm, "Faces", &label));
337: PetscCall(DMAddBoundary(dm, DM_BC_ESSENTIAL, "fixed", label, Nfid, fid, 0, Ncomp, components, (PetscVoidFn *)zero, NULL, NULL, NULL));
338: }
339: PetscCall(PetscFEDestroy(&fe));
340: }
341: /* vecs & mat */
342: PetscCall(DMCreateGlobalVector(dm, &xx));
343: PetscCall(VecDuplicate(xx, &bb));
344: PetscCall(PetscObjectSetName((PetscObject)bb, "b"));
345: PetscCall(PetscObjectSetName((PetscObject)xx, "u"));
346: PetscCall(DMCreateMatrix(dm, &Amat));
347: PetscCall(MatSetOption(Amat, MAT_SYMMETRIC, PETSC_TRUE)); /* Some matrix kernels can take advantage of symmetry if we set this. */
348: PetscCall(MatSetOption(Amat, MAT_SYMMETRY_ETERNAL, PETSC_TRUE)); /* Inform PETSc that Amat is always symmetric, so info set above isn't lost. */
349: PetscCall(MatSetBlockSize(Amat, Ncomp));
350: PetscCall(MatSetOption(Amat, MAT_SPD, PETSC_TRUE));
351: PetscCall(MatSetOption(Amat, MAT_SPD_ETERNAL, PETSC_TRUE));
352: PetscCall(VecGetSize(bb, &N));
353: sizes[iter] = N;
354: PetscCall(PetscInfo(snes, "%" PetscInt_FMT " global equations, %" PetscInt_FMT " vertices\n", N, N / dim));
355: if ((use_nearnullspace || attach_nearnullspace) && N / dim > 1 && Ncomp > 1) {
356: /* Set up the near null space (a.k.a. rigid body modes) that will be used by the multigrid preconditioner */
357: DM subdm;
358: MatNullSpace nearNullSpace;
359: PetscInt fields = 0;
360: PetscObject deformation;
361: PetscCall(DMCreateSubDM(dm, 1, &fields, NULL, &subdm));
362: PetscCall(DMPlexCreateRigidBody(subdm, 0, &nearNullSpace));
363: PetscCall(DMGetField(dm, 0, NULL, &deformation));
364: PetscCall(PetscObjectCompose(deformation, "nearnullspace", (PetscObject)nearNullSpace));
365: PetscCall(DMDestroy(&subdm));
366: if (attach_nearnullspace) PetscCall(MatSetNearNullSpace(Amat, nearNullSpace));
367: PetscCall(MatNullSpaceDestroy(&nearNullSpace)); /* created by DM and destroyed by Mat */
368: }
369: PetscCall(DMPlexSetSNESLocalFEM(dm, PETSC_FALSE, NULL));
370: PetscCall(SNESSetJacobian(snes, Amat, Amat, NULL, NULL));
371: PetscCall(SNESSetFromOptions(snes));
372: PetscCall(DMSetUp(dm));
373: PetscCall(PetscLogStagePop());
374: PetscCall(PetscLogStagePush(stage[16]));
375: /* ksp */
376: PetscCall(SNESGetKSP(snes, &ksp));
377: PetscCall(KSPSetComputeSingularValues(ksp, PETSC_TRUE));
378: if (!use_nearnullspace) {
379: PC pc;
380: PetscCall(KSPGetPC(ksp, &pc));
381: PetscCall(PCGAMGASMSetHEM(pc, 3)); // code coverage
382: }
383: /* test BCs */
384: PetscCall(VecZeroEntries(xx));
385: if (test_nonzero_cols) {
386: if (rank == 0) PetscCall(VecSetValue(xx, 0, 1.0, INSERT_VALUES));
387: PetscCall(VecAssemblyBegin(xx));
388: PetscCall(VecAssemblyEnd(xx));
389: }
390: PetscCall(VecZeroEntries(bb));
391: PetscCall(VecGetSize(bb, &i));
392: sizes[iter] = i;
393: PetscCall(PetscInfo(snes, "%" PetscInt_FMT " equations in vector, %" PetscInt_FMT " vertices\n", i, i / dim));
394: PetscCall(PetscLogStagePop());
395: /* solve */
396: PetscCall(SNESComputeJacobian(snes, xx, Amat, Amat));
397: PetscCall(MatViewFromOptions(Amat, NULL, "-my_mat_view"));
398: PetscCall(PetscLogStagePush(stage[iter]));
399: PetscCall(SNESSolve(snes, bb, xx));
400: PetscCall(PetscLogStagePop());
401: PetscCall(VecNorm(xx, NORM_INFINITY, &mdisp[iter]));
402: {
403: PetscViewer viewer = NULL;
404: PetscViewerFormat fmt;
405: PetscCall(PetscOptionsCreateViewer(comm, NULL, "", "-vec_view", &viewer, &fmt, &flg));
406: if (flg) {
407: PetscCall(PetscViewerPushFormat(viewer, fmt));
408: PetscCall(VecView(xx, viewer));
409: PetscCall(VecView(bb, viewer));
410: PetscCall(PetscViewerPopFormat(viewer));
411: }
412: PetscCall(PetscViewerDestroy(&viewer));
413: }
414: /* Free work space */
415: PetscCall(SNESDestroy(&snes));
416: PetscCall(VecDestroy(&xx));
417: PetscCall(VecDestroy(&bb));
418: PetscCall(MatDestroy(&Amat));
419: if (iter + 1 < max_conv_its) {
420: DM newdm;
421: PetscCall(DMViewFromOptions(dm, NULL, "-my_dm_view"));
422: PetscCall(DMRefine(dm, comm, &newdm));
423: if (rank == -1) {
424: PetscDS prob;
425: PetscCall(DMGetDS(dm, &prob));
426: PetscCall(PetscDSViewFromOptions(prob, NULL, "-ds_view"));
427: PetscCall(DMGetDS(newdm, &prob));
428: PetscCall(PetscDSViewFromOptions(prob, NULL, "-ds_view"));
429: }
430: PetscCall(DMDestroy(&dm));
431: dm = newdm;
432: PetscCall(PetscObjectSetName((PetscObject)dm, "Mesh"));
433: PetscCall(DMViewFromOptions(dm, NULL, "-my_dm_view"));
434: PetscCall(DMSetFromOptions(dm));
435: }
436: }
437: PetscCall(DMDestroy(&dm));
438: if (run_type == 1) err[0] = 5.97537599375e+01 - mdisp[0]; /* error with what I think is the exact solution */
439: else if (run_type == 0) err[0] = 0;
440: else if (run_type == 2) err[0] = 3.527795e+01 - mdisp[0];
441: else err[0] = 0;
442: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "[%d] %d) N=%12" PetscInt_FMT ", max displ=%9.7e, error=%4.3e\n", rank, 0, sizes[0], (double)mdisp[0], (double)err[0]));
443: for (iter = 1; iter < max_conv_its; iter++) {
444: if (run_type == 1) err[iter] = 5.97537599375e+01 - mdisp[iter];
445: else if (run_type == 0) err[iter] = 0;
446: else if (run_type == 2) err[iter] = 3.527795e+01 - mdisp[iter];
447: else err[iter] = 0;
448: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "[%d] %" PetscInt_FMT ") N=%12" PetscInt_FMT ", max displ=%9.7e, disp diff=%9.2e, error=%4.3e, rate=%3.2g\n", rank, iter, sizes[iter], (double)mdisp[iter], (double)(mdisp[iter] - mdisp[iter - 1]), (double)err[iter], (double)(PetscLogReal(PetscAbs(err[iter - 1] / err[iter])) / PetscLogReal(2.))));
449: }
451: PetscCall(PetscFinalize());
452: return 0;
453: }
455: /*TEST
457: testset:
458: nsize: 4
459: requires: !single
460: args: -dm_plex_dim 3 -dm_plex_simplex 0 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 1,1,1 -dm_plex_box_faces 2,2,1 -petscpartitioner_simple_process_grid 2,2,1 -petscspace_degree 2 -snes_max_it 1 -ksp_max_it 100 -ksp_type cg -ksp_rtol 1.e-10 -ksp_norm_type unpreconditioned -pc_type gamg -pc_gamg_coarse_eq_limit 10 -pc_gamg_reuse_interpolation true -pc_gamg_aggressive_coarsening 1 -pc_gamg_threshold 0.001 -ksp_converged_reason -use_mat_nearnullspace true -mg_levels_ksp_max_it 2 -mg_levels_ksp_type chebyshev -mg_levels_ksp_chebyshev_esteig 0,0.2,0,1.1 -mg_levels_pc_type jacobi -petscpartitioner_type simple -my_dm_view -snes_lag_jacobian -2 -snes_type ksponly -pc_gamg_mis_k_minimum_degree_ordering true -pc_gamg_low_memory_threshold_filter
461: timeoutfactor: 2
462: test:
463: suffix: 0
464: args: -run_type 1 -max_conv_its 3 -pc_gamg_mat_coarsen_type hem -pc_gamg_mat_coarsen_max_it 5 -pc_gamg_asm_hem_aggs 4 -ksp_rtol 1.e-6
465: filter: sed -e "s/Linear solve converged due to CONVERGED_RTOL iterations 7/Linear solve converged due to CONVERGED_RTOL iterations 8/g"
466: test:
467: suffix: 1
468: filter: grep -v HERMITIAN
469: args: -run_type 2 -max_conv_its 2 -use_mat_nearnullspace false -snes_view
471: test:
472: nsize: 1
473: requires: !single
474: suffix: 2
475: args: -dm_plex_dim 3 -dm_plex_simplex 0 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 1,1,1 -dm_plex_box_faces 2,2,1 -petscpartitioner_simple_process_grid 2,2,1 -max_conv_its 2 -petscspace_degree 2 -snes_max_it 1 -ksp_type cg -ksp_norm_type unpreconditioned -pc_type gamg -pc_gamg_coarse_eq_limit 10 -pc_gamg_aggressive_coarsening 1 -ksp_converged_reason -use_mat_nearnullspace true -my_dm_view -snes_type ksponly
476: timeoutfactor: 2
478: # HYPRE PtAP broken with complex numbers
479: test:
480: suffix: hypre
481: requires: hypre !single !complex !defined(PETSC_HAVE_HYPRE_DEVICE)
482: nsize: 4
483: args: -dm_plex_dim 3 -dm_plex_simplex 0 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 1,1,1 -run_type 1 -dm_plex_box_faces 2,2,1 -petscpartitioner_simple_process_grid 2,2,1 -max_conv_its 2 -lx 1. -alpha .01 -petscspace_degree 2 -ksp_type cg -ksp_monitor_short -ksp_rtol 1.e-8 -pc_type hypre -pc_hypre_type boomeramg -pc_hypre_boomeramg_no_CF true -pc_hypre_boomeramg_agg_nl 1 -pc_hypre_boomeramg_coarsen_type HMIS -pc_hypre_boomeramg_interp_type ext+i -ksp_converged_reason -use_mat_nearnullspace true -petscpartitioner_type simple
485: test:
486: suffix: ml
487: requires: ml !single
488: nsize: 4
489: args: -dm_plex_dim 3 -dm_plex_simplex 0 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 1,1,1 -run_type 1 -dm_plex_box_faces 2,2,1 -petscpartitioner_simple_process_grid 2,2,1 -max_conv_its 2 -lx 1. -alpha .01 -petscspace_degree 2 -ksp_type cg -ksp_monitor_short -ksp_converged_reason -ksp_rtol 1.e-8 -pc_type ml -mg_levels_ksp_type chebyshev -mg_levels_ksp_max_it 3 -mg_levels_ksp_chebyshev_esteig 0,0.05,0,1.05 -mg_levels_pc_type sor -petscpartitioner_type simple -use_mat_nearnullspace
491: test:
492: suffix: hpddm
493: requires: hpddm slepc !single defined(PETSC_HAVE_DYNAMIC_LIBRARIES) defined(PETSC_USE_SHARED_LIBRARIES)
494: nsize: 4
495: args: -dm_plex_dim 3 -dm_plex_simplex 0 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 1,1,1 -run_type 1 -dm_plex_box_faces 2,2,1 -petscpartitioner_simple_process_grid 2,2,1 -max_conv_its 2 -lx 1. -alpha .01 -petscspace_degree 2 -ksp_type fgmres -ksp_monitor_short -ksp_converged_reason -ksp_rtol 1.e-8 -pc_type hpddm -petscpartitioner_type simple -pc_hpddm_levels_1_sub_pc_type lu -pc_hpddm_levels_1_eps_nev 6 -pc_hpddm_coarse_p 1 -pc_hpddm_coarse_pc_type svd
497: test:
498: suffix: repart
499: nsize: 4
500: requires: parmetis !single
501: args: -dm_plex_dim 3 -dm_plex_simplex 0 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 1,1,1 -run_type 1 -dm_plex_box_faces 2,2,1 -petscpartitioner_simple_process_grid 2,2,1 -max_conv_its 2 -petscspace_degree 2 -snes_max_it 4 -ksp_max_it 100 -ksp_type cg -ksp_rtol 1.e-2 -ksp_norm_type unpreconditioned -snes_rtol 1.e-3 -pc_type gamg -pc_gamg_esteig_ksp_max_it 10 -pc_gamg_type agg -pc_gamg_agg_nsmooths 1 -pc_gamg_aggressive_coarsening 1 -pc_gamg_threshold 0.05 -pc_gamg_threshold_scale .0 -use_mat_nearnullspace true -mg_levels_ksp_max_it 2 -mg_levels_ksp_type chebyshev -mg_levels_ksp_chebyshev_esteig 0,0.05,0,1.05 -mg_levels_pc_type jacobi -pc_gamg_mat_partitioning_type parmetis -pc_gamg_repartition true -pc_gamg_process_eq_limit 20 -pc_gamg_coarse_eq_limit 10 -ksp_converged_reason -pc_gamg_reuse_interpolation true -petscpartitioner_type simple
503: test:
504: suffix: bddc
505: nsize: 4
506: requires: !single
507: args: -dm_plex_dim 3 -dm_plex_simplex 0 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 1,1,1 -run_type 1 -dm_plex_box_faces 2,2,1 -petscpartitioner_simple_process_grid 2,2,1 -max_conv_its 2 -lx 1. -alpha .01 -petscspace_degree 2 -ksp_type cg -ksp_monitor_short -ksp_rtol 1.e-8 -ksp_converged_reason -petscpartitioner_type simple -dm_mat_type is -mat_is_localmat_type {{sbaij baij aij}} -pc_type bddc
509: testset:
510: nsize: 4
511: requires: !single
512: args: -dm_plex_dim 3 -dm_plex_simplex 0 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 1,1,1 -run_type 1 -dm_plex_box_faces 2,2,1 -petscpartitioner_simple_process_grid 2,2,1 -max_conv_its 2 -lx 1. -alpha .01 -petscspace_degree 2 -ksp_type cg -ksp_monitor_short -ksp_rtol 1.e-10 -ksp_converged_reason -petscpartitioner_type simple -dm_mat_type is -mat_is_localmat_type aij -pc_type bddc -attach_mat_nearnullspace {{0 1}separate output}
513: test:
514: suffix: bddc_approx_gamg
515: args: -pc_bddc_switch_static -prefix_push pc_bddc_dirichlet_ -approximate -pc_type gamg -pc_gamg_esteig_ksp_max_it 10 -pc_gamg_type agg -pc_gamg_agg_nsmooths 1 -pc_gamg_reuse_interpolation true -pc_gamg_aggressive_coarsening 1 -pc_gamg_threshold 0.05 -pc_gamg_threshold_scale .0 -mg_levels_ksp_max_it 1 -mg_levels_ksp_type chebyshev -prefix_pop -prefix_push pc_bddc_neumann_ -approximate -pc_type gamg -pc_gamg_esteig_ksp_max_it 10 -pc_gamg_type agg -pc_gamg_agg_nsmooths 1 -pc_gamg_coarse_eq_limit 10 -pc_gamg_reuse_interpolation true -pc_gamg_aggressive_coarsening 1 -pc_gamg_threshold 0.05 -pc_gamg_threshold_scale .0 -mg_levels_ksp_max_it 1 -mg_levels_ksp_type chebyshev -prefix_pop
516: # HYPRE PtAP broken with complex numbers
517: test:
518: requires: hypre !complex !defined(PETSC_HAVE_HYPRE_DEVICE)
519: suffix: bddc_approx_hypre
520: args: -pc_bddc_switch_static -prefix_push pc_bddc_dirichlet_ -pc_type hypre -pc_hypre_boomeramg_no_CF true -pc_hypre_boomeramg_strong_threshold 0.75 -pc_hypre_boomeramg_agg_nl 1 -pc_hypre_boomeramg_coarsen_type HMIS -pc_hypre_boomeramg_interp_type ext+i -prefix_pop -prefix_push pc_bddc_neumann_ -pc_type hypre -pc_hypre_boomeramg_no_CF true -pc_hypre_boomeramg_strong_threshold 0.75 -pc_hypre_boomeramg_agg_nl 1 -pc_hypre_boomeramg_coarsen_type HMIS -pc_hypre_boomeramg_interp_type ext+i -prefix_pop
521: test:
522: requires: ml
523: suffix: bddc_approx_ml
524: args: -pc_bddc_switch_static -prefix_push pc_bddc_dirichlet_ -approximate -pc_type ml -mg_levels_ksp_max_it 1 -mg_levels_ksp_type chebyshev -prefix_pop -prefix_push pc_bddc_neumann_ -approximate -pc_type ml -mg_levels_ksp_max_it 1 -mg_levels_ksp_type chebyshev -prefix_pop
526: test:
527: suffix: fetidp
528: nsize: 4
529: requires: !single
530: args: -dm_plex_dim 3 -dm_plex_simplex 0 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 1,1,1 -run_type 1 -dm_plex_box_faces 2,2,1 -petscpartitioner_simple_process_grid 2,2,1 -max_conv_its 2 -lx 1. -alpha .01 -petscspace_degree 2 -ksp_type fetidp -fetidp_ksp_type cg -ksp_monitor_short -ksp_rtol 1.e-8 -ksp_converged_reason -petscpartitioner_type simple -dm_mat_type is -mat_is_localmat_type {{sbaij baij aij}}
532: test:
533: suffix: bddc_elast
534: nsize: 4
535: requires: !single
536: args: -dm_plex_dim 3 -dm_plex_simplex 0 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 1,1,1 -run_type 1 -dm_plex_box_faces 2,2,1 -petscpartitioner_simple_process_grid 2,2,1 -max_conv_its 2 -lx 1. -alpha .01 -petscspace_degree 2 -ksp_type cg -ksp_monitor_short -ksp_rtol 1.e-8 -ksp_converged_reason -petscpartitioner_type simple -dm_mat_type is -mat_is_localmat_type sbaij -pc_type bddc -pc_bddc_monolithic -attach_mat_nearnullspace
538: test:
539: suffix: fetidp_elast
540: nsize: 4
541: requires: !single
542: args: -dm_plex_dim 3 -dm_plex_simplex 0 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 1,1,1 -run_type 1 -dm_plex_box_faces 2,2,1 -petscpartitioner_simple_process_grid 2,2,1 -max_conv_its 2 -lx 1. -alpha .01 -petscspace_degree 2 -ksp_type fetidp -fetidp_ksp_type cg -ksp_monitor_short -ksp_rtol 1.e-8 -ksp_converged_reason -petscpartitioner_type simple -dm_mat_type is -mat_is_localmat_type sbaij -fetidp_bddc_pc_bddc_monolithic -attach_mat_nearnullspace
544: test:
545: suffix: gdsw
546: nsize: 4
547: requires: !single
548: args: -dm_plex_dim 3 -dm_plex_simplex 0 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 1,1,1 -run_type 1 -dm_plex_box_faces 2,2,1 -petscpartitioner_simple_process_grid 2,2,1 -max_conv_its 2 -lx 1. -alpha .01 -petscspace_degree 2 -ksp_type cg -ksp_monitor_short -ksp_rtol 1.e-8 -ksp_converged_reason -petscpartitioner_type simple -dm_mat_type is -attach_mat_nearnullspace \
549: -pc_type mg -pc_mg_galerkin -pc_mg_adapt_interp_coarse_space gdsw -pc_mg_levels 2 -mg_levels_pc_type bjacobi -mg_levels_sub_pc_type icc
551: testset:
552: nsize: 4
553: requires: !single
554: args: -dm_plex_dim 3 -dm_plex_simplex 0 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 1,1,1 -run_type 1 -dm_plex_box_faces 2,2,1 -petscpartitioner_simple_process_grid 2,2,1 -max_conv_its 2 -petscspace_degree 2 -snes_max_it 2 -ksp_max_it 100 -ksp_type cg -ksp_rtol 1.e-10 -ksp_norm_type unpreconditioned -snes_rtol 1.e-10 -pc_type gamg -pc_gamg_esteig_ksp_max_it 10 -pc_gamg_type agg -pc_gamg_agg_nsmooths 1 -pc_gamg_coarse_eq_limit 10 -pc_gamg_reuse_interpolation true -pc_gamg_aggressive_coarsening 0 -pc_gamg_threshold 0.05 -pc_gamg_threshold_scale .0 -use_mat_nearnullspace true -mg_levels_ksp_max_it 2 -mg_levels_ksp_type chebyshev -mg_levels_ksp_chebyshev_esteig 0,0.05,0,1.05 -mg_levels_pc_type jacobi -ksp_monitor_short -ksp_converged_reason -snes_monitor_short -dm_view -petscpartitioner_type simple -pc_gamg_process_eq_limit 20 -pc_gamg_coarse_eq_limit 40
555: output_file: output/ex56_cuda.out
557: test:
558: suffix: cuda
559: requires: cuda
560: args: -dm_mat_type aijcusparse -dm_vec_type cuda
562: test:
563: suffix: hip
564: requires: hip
565: args: -dm_mat_type aijhipsparse -dm_vec_type hip
567: test:
568: suffix: viennacl
569: requires: viennacl
570: args: -dm_mat_type aijviennacl -dm_vec_type viennacl
572: test:
573: suffix: kokkos
574: requires: kokkos_kernels
575: args: -dm_mat_type aijkokkos -dm_vec_type kokkos
576: # Don't run AIJMKL caes with complex scalars because of convergence issues.
577: # Note that we need to test both single and multiple MPI rank cases, because these use different sparse MKL routines to implement the PtAP operation.
578: test:
579: suffix: seqaijmkl
580: nsize: 1
581: requires: defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) !single !complex
582: args: -dm_plex_dim 3 -dm_plex_simplex 0 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 1,1,1 -run_type 1 -dm_plex_box_faces 2,2,1 -petscpartitioner_simple_process_grid 2,2,1 -max_conv_its 2 -petscspace_degree 2 -snes_max_it 2 -ksp_max_it 100 -ksp_type cg -ksp_rtol 1.e-11 -ksp_norm_type unpreconditioned -snes_rtol 1.e-10 -pc_type gamg -pc_gamg_type agg -pc_gamg_agg_nsmooths 1 -pc_gamg_coarse_eq_limit 1000 -pc_gamg_reuse_interpolation true -pc_gamg_aggressive_coarsening 1 -pc_gamg_threshold 0.05 -pc_gamg_threshold_scale .0 -ksp_converged_reason -use_mat_nearnullspace true -mg_levels_ksp_max_it 1 -mg_levels_ksp_type chebyshev -pc_gamg_esteig_ksp_type cg -pc_gamg_esteig_ksp_max_it 10 -mg_levels_ksp_chebyshev_esteig 0,0.05,0,1.1 -mg_levels_pc_type jacobi -petscpartitioner_type simple -mat_block_size 3 -dm_view -mat_seqaij_type seqaijmkl
583: timeoutfactor: 2
585: test:
586: suffix: mpiaijmkl
587: nsize: 4
588: requires: defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) !single !complex
589: args: -dm_plex_dim 3 -dm_plex_simplex 0 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 1,1,1 -run_type 1 -dm_plex_box_faces 2,2,1 -petscpartitioner_simple_process_grid 2,2,1 -max_conv_its 2 -petscspace_degree 2 -snes_max_it 2 -ksp_max_it 100 -ksp_type cg -ksp_rtol 1.e-11 -ksp_norm_type unpreconditioned -snes_rtol 1.e-10 -pc_type gamg -pc_gamg_type agg -pc_gamg_agg_nsmooths 1 -pc_gamg_coarse_eq_limit 1000 -pc_gamg_reuse_interpolation true -pc_gamg_aggressive_coarsening 1 -pc_gamg_threshold 0.05 -pc_gamg_threshold_scale .0 -ksp_converged_reason -use_mat_nearnullspace true -mg_levels_ksp_max_it 1 -mg_levels_ksp_type chebyshev -pc_gamg_esteig_ksp_type cg -pc_gamg_esteig_ksp_max_it 10 -mg_levels_ksp_chebyshev_esteig 0,0.05,0,1.1 -mg_levels_pc_type jacobi -petscpartitioner_type simple -mat_block_size 3 -dm_view -mat_seqaij_type seqaijmkl
590: timeoutfactor: 2
592: TEST*/