Actual source code: ex56.c

  1: /* Portions of this code are under:
  2:    Copyright (C) 2022 Advanced Micro Devices, Inc. All rights reserved.
  3: */
  4: static char help[] = "3D tensor hexahedra & 3D Laplacian displacement finite element formulation\n\
  5: of linear elasticity.  E=1.0, nu=1/3.\n\
  6: Unit cube domain with Dirichlet boundary\n\n";

  8: #include <petscdmplex.h>
  9: #include <petscsnes.h>
 10: #include <petscds.h>
 11: #include <petscdmforest.h>

 13: static PetscReal s_soft_alpha = 0.01;
 14: static PetscReal s_mu         = 0.4;
 15: static PetscReal s_lambda     = 0.4;

 17: static void f0_bd_u_3d(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, const PetscReal x[], const PetscReal n[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f0[])
 18: {
 19:   f0[0] = 1;     /* x direction pull */
 20:   f0[1] = -x[2]; /* add a twist around x-axis */
 21:   f0[2] = x[1];
 22: }

 24: static void f1_bd_u(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, const PetscReal x[], const PetscReal n[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f1[])
 25: {
 26:   const PetscInt Ncomp = dim;
 27:   PetscInt       comp, d;
 28:   for (comp = 0; comp < Ncomp; ++comp) {
 29:     for (d = 0; d < dim; ++d) f1[comp * dim + d] = 0.0;
 30:   }
 31: }

 33: /* gradU[comp*dim+d] = {u_x, u_y} or {u_x, u_y, u_z} */
 34: static void f1_u_3d_alpha(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f1[])
 35: {
 36:   PetscReal trace, mu = s_mu, lambda = s_lambda, rad;
 37:   PetscInt  i, j;
 38:   for (i = 0, rad = 0.; i < dim; i++) {
 39:     PetscReal t = x[i];
 40:     rad += t * t;
 41:   }
 42:   rad = PetscSqrtReal(rad);
 43:   if (rad > 0.25) {
 44:     mu *= s_soft_alpha;
 45:     lambda *= s_soft_alpha; /* we could keep the bulk the same like rubberish */
 46:   }
 47:   for (i = 0, trace = 0; i < dim; ++i) trace += PetscRealPart(u_x[i * dim + i]);
 48:   for (i = 0; i < dim; ++i) {
 49:     for (j = 0; j < dim; ++j) f1[i * dim + j] = mu * (u_x[i * dim + j] + u_x[j * dim + i]);
 50:     f1[i * dim + i] += lambda * trace;
 51:   }
 52: }

 54: /* gradU[comp*dim+d] = {u_x, u_y} or {u_x, u_y, u_z} */
 55: static void f1_u_3d(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f1[])
 56: {
 57:   PetscReal trace, mu = s_mu, lambda = s_lambda;
 58:   PetscInt  i, j;
 59:   for (i = 0, trace = 0; i < dim; ++i) trace += PetscRealPart(u_x[i * dim + i]);
 60:   for (i = 0; i < dim; ++i) {
 61:     for (j = 0; j < dim; ++j) f1[i * dim + j] = mu * (u_x[i * dim + j] + u_x[j * dim + i]);
 62:     f1[i * dim + i] += lambda * trace;
 63:   }
 64: }

 66: static void f1_u_lap(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f1[])
 67: {
 68:   PetscInt d;
 69:   for (d = 0; d < dim; ++d) f1[d] = u_x[d];
 70: }

 72: /* 3D elasticity */
 73: #define IDX(ii, jj, kk, ll) (27 * ii + 9 * jj + 3 * kk + ll)

 75: void g3_uu_3d_private(PetscScalar g3[], const PetscReal mu, const PetscReal lambda)
 76: {
 77:   if (1) {
 78:     g3[0] += lambda;
 79:     g3[0] += mu;
 80:     g3[0] += mu;
 81:     g3[4] += lambda;
 82:     g3[8] += lambda;
 83:     g3[10] += mu;
 84:     g3[12] += mu;
 85:     g3[20] += mu;
 86:     g3[24] += mu;
 87:     g3[28] += mu;
 88:     g3[30] += mu;
 89:     g3[36] += lambda;
 90:     g3[40] += lambda;
 91:     g3[40] += mu;
 92:     g3[40] += mu;
 93:     g3[44] += lambda;
 94:     g3[50] += mu;
 95:     g3[52] += mu;
 96:     g3[56] += mu;
 97:     g3[60] += mu;
 98:     g3[68] += mu;
 99:     g3[70] += mu;
100:     g3[72] += lambda;
101:     g3[76] += lambda;
102:     g3[80] += lambda;
103:     g3[80] += mu;
104:     g3[80] += mu;
105:   } else {
106:     int        i, j, k, l;
107:     static int cc = -1;
108:     cc++;
109:     for (i = 0; i < 3; ++i) {
110:       for (j = 0; j < 3; ++j) {
111:         for (k = 0; k < 3; ++k) {
112:           for (l = 0; l < 3; ++l) {
113:             if (k == l && i == j) g3[IDX(i, j, k, l)] += lambda;
114:             if (i == k && j == l) g3[IDX(i, j, k, l)] += mu;
115:             if (i == l && j == k) g3[IDX(i, j, k, l)] += mu;
116:             if (k == l && i == j && !cc) (void)PetscPrintf(PETSC_COMM_WORLD, "g3[%d] += lambda;\n", IDX(i, j, k, l));
117:             if (i == k && j == l && !cc) (void)PetscPrintf(PETSC_COMM_WORLD, "g3[%d] += mu;\n", IDX(i, j, k, l));
118:             if (i == l && j == k && !cc) (void)PetscPrintf(PETSC_COMM_WORLD, "g3[%d] += mu;\n", IDX(i, j, k, l));
119:           }
120:         }
121:       }
122:     }
123:   }
124: }

126: static void g3_uu_3d_alpha(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, PetscReal u_tShift, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar g3[])
127: {
128:   PetscReal mu = s_mu, lambda = s_lambda, rad;
129:   PetscInt  i;
130:   for (i = 0, rad = 0.; i < dim; i++) {
131:     PetscReal t = x[i];
132:     rad += t * t;
133:   }
134:   rad = PetscSqrtReal(rad);
135:   if (rad > 0.25) {
136:     mu *= s_soft_alpha;
137:     lambda *= s_soft_alpha; /* we could keep the bulk the same like rubberish */
138:   }
139:   g3_uu_3d_private(g3, mu, lambda);
140: }

142: static void g3_uu_3d(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, PetscReal u_tShift, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar g3[])
143: {
144:   g3_uu_3d_private(g3, s_mu, s_lambda);
145: }

147: static void g3_lap(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, PetscReal u_tShift, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar g3[])
148: {
149:   PetscInt d;
150:   for (d = 0; d < dim; ++d) g3[d * dim + d] = 1.0;
151: }

153: static void g3_lap_alpha(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, PetscReal u_tShift, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar g3[])
154: {
155:   PetscReal lambda = 1, rad;
156:   PetscInt  i;
157:   for (i = 0, rad = 0.; i < dim; i++) {
158:     PetscReal t = x[i];
159:     rad += t * t;
160:   }
161:   rad = PetscSqrtReal(rad);
162:   if (rad > 0.25) { lambda *= s_soft_alpha; /* we could keep the bulk the same like rubberish */ }
163:   for (int d = 0; d < dim; ++d) g3[d * dim + d] = lambda;
164: }

166: static void f0_u(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f0[])
167: {
168:   const PetscInt Ncomp = dim;
169:   PetscInt       comp;

171:   for (comp = 0; comp < Ncomp; ++comp) f0[comp] = 0.0;
172: }

174: /* PI_i (x_i^4 - x_i^2) */
175: static void f0_u_x4(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f0[])
176: {
177:   for (int comp = 0; comp < Nf; ++comp) {
178:     f0[comp] = 1e5;
179:     for (int i = 0; i < dim; ++i) { f0[comp] *= /* (comp+1)* */ (x[i] * x[i] * x[i] * x[i] - x[i] * x[i]); /* assumes (0,1]^D domain */ }
180:   }
181: }

183: PetscErrorCode zero(PetscInt dim, PetscReal time, const PetscReal x[], PetscInt Nf, PetscScalar *u, void *ctx)
184: {
185:   const PetscInt Ncomp = dim;
186:   PetscInt       comp;

188:   for (comp = 0; comp < Ncomp; ++comp) u[comp] = 0;
189:   return PETSC_SUCCESS;
190: }

192: int main(int argc, char **args)
193: {
194:   Mat           Amat;
195:   SNES          snes;
196:   KSP           ksp;
197:   MPI_Comm      comm;
198:   PetscMPIInt   rank;
199:   PetscLogStage stage[17];
200:   PetscBool     test_nonzero_cols = PETSC_FALSE, use_nearnullspace = PETSC_TRUE, attach_nearnullspace = PETSC_FALSE;
201:   Vec           xx, bb;
202:   PetscInt      iter, i, N, dim = 3, max_conv_its, sizes[7], run_type = 1, Ncomp = dim;
203:   DM            dm;
204:   PetscBool     flg;
205:   PetscReal     Lx, mdisp[10], err[10];

207:   PetscFunctionBeginUser;
208:   PetscCall(PetscInitialize(&argc, &args, NULL, help));
209:   comm = PETSC_COMM_WORLD;
210:   PetscCallMPI(MPI_Comm_rank(comm, &rank));
211:   /* options */
212:   PetscOptionsBegin(comm, NULL, "3D bilinear Q1 elasticity options", "");
213:   {
214:     Lx           = 1.; /* or ne for rod */
215:     max_conv_its = 3;
216:     PetscCall(PetscOptionsInt("-max_conv_its", "Number of iterations in convergence study", "", max_conv_its, &max_conv_its, NULL));
217:     PetscCheck(max_conv_its > 0 && max_conv_its < 8, PETSC_COMM_WORLD, PETSC_ERR_USER, "Bad number of iterations for convergence test (%" PetscInt_FMT ")", max_conv_its);
218:     PetscCall(PetscOptionsReal("-lx", "Length of domain", "", Lx, &Lx, NULL));
219:     PetscCall(PetscOptionsReal("-alpha", "material coefficient inside circle", "", s_soft_alpha, &s_soft_alpha, NULL));
220:     PetscCall(PetscOptionsBool("-test_nonzero_cols", "nonzero test", "", test_nonzero_cols, &test_nonzero_cols, NULL));
221:     PetscCall(PetscOptionsBool("-use_mat_nearnullspace", "MatNearNullSpace API test", "", use_nearnullspace, &use_nearnullspace, NULL));
222:     PetscCall(PetscOptionsBool("-attach_mat_nearnullspace", "MatNearNullSpace API test (via MatSetNearNullSpace)", "", attach_nearnullspace, &attach_nearnullspace, NULL));
223:     PetscCall(PetscOptionsInt("-run_type", "0: twisting load on cantalever, 1: Elasticty convergence test on cube, 2: Laplacian, 3: soft core Laplacian", "", run_type, &run_type, NULL));
224:   }
225:   PetscOptionsEnd();
226:   PetscCall(PetscLogStageRegister("Mesh Setup", &stage[16]));
227:   for (iter = 0; iter < max_conv_its; iter++) {
228:     char str[] = "Solve 0";
229:     str[6] += iter;
230:     PetscCall(PetscLogStageRegister(str, &stage[iter]));
231:   }
232:   /* create DM, Plex calls DMSetup */
233:   PetscCall(PetscLogStagePush(stage[16]));
234:   PetscCall(DMCreate(comm, &dm));
235:   PetscCall(DMSetType(dm, DMPLEX));
236:   PetscCall(PetscObjectSetName((PetscObject)dm, "Mesh"));
237:   PetscCall(DMSetFromOptions(dm));
238:   PetscCall(DMPlexDistributeSetDefault(dm, PETSC_FALSE));
239:   PetscCall(DMGetDimension(dm, &dim));
240:   {
241:     DMLabel label;
242:     IS      is;
243:     PetscCall(DMCreateLabel(dm, "boundary"));
244:     PetscCall(DMGetLabel(dm, "boundary", &label));
245:     PetscCall(DMPlexMarkBoundaryFaces(dm, 1, label));
246:     if (run_type == 0) {
247:       PetscCall(DMGetStratumIS(dm, "boundary", 1, &is));
248:       PetscCall(DMCreateLabel(dm, "Faces"));
249:       if (is) {
250:         PetscInt        d, f, Nf;
251:         const PetscInt *faces;
252:         PetscInt        csize;
253:         PetscSection    cs;
254:         Vec             coordinates;
255:         DM              cdm;
256:         PetscCall(ISGetLocalSize(is, &Nf));
257:         PetscCall(ISGetIndices(is, &faces));
258:         PetscCall(DMGetCoordinatesLocal(dm, &coordinates));
259:         PetscCall(DMGetCoordinateDM(dm, &cdm));
260:         PetscCall(DMGetLocalSection(cdm, &cs));
261:         /* Check for each boundary face if any component of its centroid is either 0.0 or 1.0 */
262:         for (f = 0; f < Nf; ++f) {
263:           PetscReal    faceCoord;
264:           PetscInt     b, v;
265:           PetscScalar *coords = NULL;
266:           PetscInt     Nv;
267:           PetscCall(DMPlexVecGetClosure(cdm, cs, coordinates, faces[f], &csize, &coords));
268:           Nv = csize / dim; /* Calculate mean coordinate vector */
269:           for (d = 0; d < dim; ++d) {
270:             faceCoord = 0.0;
271:             for (v = 0; v < Nv; ++v) faceCoord += PetscRealPart(coords[v * dim + d]);
272:             faceCoord /= Nv;
273:             for (b = 0; b < 2; ++b) {
274:               if (PetscAbs(faceCoord - b) < PETSC_SMALL) { /* domain have not been set yet, still [0,1]^3 */
275:                 PetscCall(DMSetLabelValue(dm, "Faces", faces[f], d * 2 + b + 1));
276:               }
277:             }
278:           }
279:           PetscCall(DMPlexVecRestoreClosure(cdm, cs, coordinates, faces[f], &csize, &coords));
280:         }
281:         PetscCall(ISRestoreIndices(is, &faces));
282:       }
283:       PetscCall(ISDestroy(&is));
284:       PetscCall(DMGetLabel(dm, "Faces", &label));
285:       PetscCall(DMPlexLabelComplete(dm, label));
286:     }
287:   }
288:   PetscCall(PetscLogStagePop());
289:   for (iter = 0; iter < max_conv_its; iter++) {
290:     PetscCall(PetscLogStagePush(stage[16]));
291:     /* snes */
292:     PetscCall(SNESCreate(comm, &snes));
293:     PetscCall(SNESSetDM(snes, dm));
294:     PetscCall(DMViewFromOptions(dm, NULL, "-dm_view"));
295:     /* fem */
296:     {
297:       const PetscInt components[] = {0, 1, 2};
298:       const PetscInt Nfid = 1, Npid = 1;
299:       PetscInt       fid[] = {1}; /* The fixed faces (x=0) */
300:       const PetscInt pid[] = {2}; /* The faces with loading (x=L_x) */
301:       PetscFE        fe;
302:       PetscDS        prob;
303:       DMLabel        label;

305:       if (run_type == 2 || run_type == 3) Ncomp = 1;
306:       else Ncomp = dim;
307:       PetscCall(PetscFECreateDefault(PETSC_COMM_SELF, dim, Ncomp, PETSC_FALSE, NULL, PETSC_DECIDE, &fe));
308:       PetscCall(PetscObjectSetName((PetscObject)fe, "deformation"));
309:       /* FEM prob */
310:       PetscCall(DMSetField(dm, 0, NULL, (PetscObject)fe));
311:       PetscCall(DMCreateDS(dm));
312:       PetscCall(DMGetDS(dm, &prob));
313:       /* setup problem */
314:       if (run_type == 1) { // elast
315:         PetscCall(PetscDSSetJacobian(prob, 0, 0, NULL, NULL, NULL, g3_uu_3d));
316:         PetscCall(PetscDSSetResidual(prob, 0, f0_u_x4, f1_u_3d));
317:       } else if (run_type == 0) { //twisted not maintained
318:         PetscWeakForm wf;
319:         PetscInt      bd, i;
320:         PetscCall(PetscDSSetJacobian(prob, 0, 0, NULL, NULL, NULL, g3_uu_3d_alpha));
321:         PetscCall(PetscDSSetResidual(prob, 0, f0_u, f1_u_3d_alpha));
322:         PetscCall(DMGetLabel(dm, "Faces", &label));
323:         PetscCall(DMAddBoundary(dm, DM_BC_NATURAL, "traction", label, Npid, pid, 0, Ncomp, components, NULL, NULL, NULL, &bd));
324:         PetscCall(PetscDSGetBoundary(prob, bd, &wf, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL));
325:         for (i = 0; i < Npid; ++i) PetscCall(PetscWeakFormSetIndexBdResidual(wf, label, pid[i], 0, 0, 0, f0_bd_u_3d, 0, f1_bd_u));
326:       } else if (run_type == 2) { // Laplacian
327:         PetscCall(PetscDSSetJacobian(prob, 0, 0, NULL, NULL, NULL, g3_lap));
328:         PetscCall(PetscDSSetResidual(prob, 0, f0_u_x4, f1_u_lap));
329:       } else if (run_type == 3) { // soft core Laplacian
330:         PetscCall(PetscDSSetJacobian(prob, 0, 0, NULL, NULL, NULL, g3_lap_alpha));
331:         PetscCall(PetscDSSetResidual(prob, 0, f0_u_x4, f1_u_lap));
332:       }
333:       /* bcs */
334:       if (run_type != 0) {
335:         PetscInt id = 1;
336:         PetscCall(DMGetLabel(dm, "boundary", &label));
337:         PetscCall(DMAddBoundary(dm, DM_BC_ESSENTIAL, "wall", label, 1, &id, 0, 0, NULL, (void (*)(void))zero, NULL, NULL, NULL));
338:       } else {
339:         PetscCall(DMGetLabel(dm, "Faces", &label));
340:         PetscCall(DMAddBoundary(dm, DM_BC_ESSENTIAL, "fixed", label, Nfid, fid, 0, Ncomp, components, (void (*)(void))zero, NULL, NULL, NULL));
341:       }
342:       PetscCall(PetscFEDestroy(&fe));
343:     }
344:     /* vecs & mat */
345:     PetscCall(DMCreateGlobalVector(dm, &xx));
346:     PetscCall(VecDuplicate(xx, &bb));
347:     PetscCall(PetscObjectSetName((PetscObject)bb, "b"));
348:     PetscCall(PetscObjectSetName((PetscObject)xx, "u"));
349:     PetscCall(DMCreateMatrix(dm, &Amat));
350:     PetscCall(MatSetOption(Amat, MAT_SYMMETRIC, PETSC_TRUE));        /* Some matrix kernels can take advantage of symmetry if we set this. */
351:     PetscCall(MatSetOption(Amat, MAT_SYMMETRY_ETERNAL, PETSC_TRUE)); /* Inform PETSc that Amat is always symmetric, so info set above isn't lost. */
352:     PetscCall(MatSetBlockSize(Amat, Ncomp));
353:     PetscCall(MatSetOption(Amat, MAT_SPD, PETSC_TRUE));
354:     PetscCall(MatSetOption(Amat, MAT_SPD_ETERNAL, PETSC_TRUE));
355:     PetscCall(VecGetSize(bb, &N));
356:     sizes[iter] = N;
357:     PetscCall(PetscInfo(snes, "%" PetscInt_FMT " global equations, %" PetscInt_FMT " vertices\n", N, N / dim));
358:     if ((use_nearnullspace || attach_nearnullspace) && N / dim > 1 && Ncomp > 1) {
359:       /* Set up the near null space (a.k.a. rigid body modes) that will be used by the multigrid preconditioner */
360:       DM           subdm;
361:       MatNullSpace nearNullSpace;
362:       PetscInt     fields = 0;
363:       PetscObject  deformation;
364:       PetscCall(DMCreateSubDM(dm, 1, &fields, NULL, &subdm));
365:       PetscCall(DMPlexCreateRigidBody(subdm, 0, &nearNullSpace));
366:       PetscCall(DMGetField(dm, 0, NULL, &deformation));
367:       PetscCall(PetscObjectCompose(deformation, "nearnullspace", (PetscObject)nearNullSpace));
368:       PetscCall(DMDestroy(&subdm));
369:       if (attach_nearnullspace) PetscCall(MatSetNearNullSpace(Amat, nearNullSpace));
370:       PetscCall(MatNullSpaceDestroy(&nearNullSpace)); /* created by DM and destroyed by Mat */
371:     }
372:     PetscCall(DMPlexSetSNESLocalFEM(dm, PETSC_FALSE, NULL));
373:     PetscCall(SNESSetJacobian(snes, Amat, Amat, NULL, NULL));
374:     PetscCall(SNESSetFromOptions(snes));
375:     PetscCall(DMSetUp(dm));
376:     PetscCall(PetscLogStagePop());
377:     PetscCall(PetscLogStagePush(stage[16]));
378:     /* ksp */
379:     PetscCall(SNESGetKSP(snes, &ksp));
380:     PetscCall(KSPSetComputeSingularValues(ksp, PETSC_TRUE));
381:     if (!use_nearnullspace) {
382:       PC pc;
383:       PetscCall(KSPGetPC(ksp, &pc));
384:       PetscCall(PCGAMGASMSetHEM(pc, 3)); // code coverage
385:     }
386:     /* test BCs */
387:     PetscCall(VecZeroEntries(xx));
388:     if (test_nonzero_cols) {
389:       if (rank == 0) PetscCall(VecSetValue(xx, 0, 1.0, INSERT_VALUES));
390:       PetscCall(VecAssemblyBegin(xx));
391:       PetscCall(VecAssemblyEnd(xx));
392:     }
393:     PetscCall(VecZeroEntries(bb));
394:     PetscCall(VecGetSize(bb, &i));
395:     sizes[iter] = i;
396:     PetscCall(PetscInfo(snes, "%" PetscInt_FMT " equations in vector, %" PetscInt_FMT " vertices\n", i, i / dim));
397:     PetscCall(PetscLogStagePop());
398:     /* solve */
399:     PetscCall(SNESComputeJacobian(snes, xx, Amat, Amat));
400:     PetscCall(MatViewFromOptions(Amat, NULL, "-my_mat_view"));
401:     PetscCall(PetscLogStagePush(stage[iter]));
402:     PetscCall(SNESSolve(snes, bb, xx));
403:     PetscCall(PetscLogStagePop());
404:     PetscCall(VecNorm(xx, NORM_INFINITY, &mdisp[iter]));
405:     {
406:       PetscViewer       viewer = NULL;
407:       PetscViewerFormat fmt;
408:       PetscCall(PetscOptionsCreateViewer(comm, NULL, "", "-vec_view", &viewer, &fmt, &flg));
409:       if (flg) {
410:         PetscCall(PetscViewerPushFormat(viewer, fmt));
411:         PetscCall(VecView(xx, viewer));
412:         PetscCall(VecView(bb, viewer));
413:         PetscCall(PetscViewerPopFormat(viewer));
414:       }
415:       PetscCall(PetscViewerDestroy(&viewer));
416:     }
417:     /* Free work space */
418:     PetscCall(SNESDestroy(&snes));
419:     PetscCall(VecDestroy(&xx));
420:     PetscCall(VecDestroy(&bb));
421:     PetscCall(MatDestroy(&Amat));
422:     if (iter + 1 < max_conv_its) {
423:       DM newdm;
424:       PetscCall(DMViewFromOptions(dm, NULL, "-my_dm_view"));
425:       PetscCall(DMRefine(dm, comm, &newdm));
426:       if (rank == -1) {
427:         PetscDS prob;
428:         PetscCall(DMGetDS(dm, &prob));
429:         PetscCall(PetscDSViewFromOptions(prob, NULL, "-ds_view"));
430:         PetscCall(DMGetDS(newdm, &prob));
431:         PetscCall(PetscDSViewFromOptions(prob, NULL, "-ds_view"));
432:       }
433:       PetscCall(DMDestroy(&dm));
434:       dm = newdm;
435:       PetscCall(PetscObjectSetName((PetscObject)dm, "Mesh"));
436:       PetscCall(DMViewFromOptions(dm, NULL, "-my_dm_view"));
437:       PetscCall(DMSetFromOptions(dm));
438:     }
439:   }
440:   PetscCall(DMDestroy(&dm));
441:   if (run_type == 1) err[0] = 5.97537599375e+01 - mdisp[0]; /* error with what I think is the exact solution */
442:   else if (run_type == 0) err[0] = 0;
443:   else if (run_type == 2) err[0] = 3.527795e+01 - mdisp[0];
444:   else err[0] = 0;
445:   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "[%d] %d) N=%12" PetscInt_FMT ", max displ=%9.7e, error=%4.3e\n", rank, 0, sizes[0], (double)mdisp[0], (double)err[0]));
446:   for (iter = 1; iter < max_conv_its; iter++) {
447:     if (run_type == 1) err[iter] = 5.97537599375e+01 - mdisp[iter];
448:     else if (run_type == 0) err[iter] = 0;
449:     else if (run_type == 2) err[iter] = 3.527795e+01 - mdisp[iter];
450:     else err[iter] = 0;
451:     PetscCall(PetscPrintf(PETSC_COMM_WORLD, "[%d] %" PetscInt_FMT ") N=%12" PetscInt_FMT ", max displ=%9.7e, disp diff=%9.2e, error=%4.3e, rate=%3.2g\n", rank, iter, sizes[iter], (double)mdisp[iter], (double)(mdisp[iter] - mdisp[iter - 1]), (double)err[iter], (double)(PetscLogReal(PetscAbs(err[iter - 1] / err[iter])) / PetscLogReal(2.))));
452:   }

454:   PetscCall(PetscFinalize());
455:   return 0;
456: }

458: /*TEST

460:   testset:
461:     nsize: 4
462:     requires: !single
463:     args: -dm_plex_dim 3 -dm_plex_simplex 0 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 1,1,1 -dm_plex_box_faces 2,2,1 -petscpartitioner_simple_process_grid 2,2,1 -petscspace_degree 2 -snes_max_it 1 -ksp_max_it 100 -ksp_type cg -ksp_rtol 1.e-10 -ksp_norm_type unpreconditioned -pc_type gamg -pc_gamg_coarse_eq_limit 10 -pc_gamg_reuse_interpolation true -pc_gamg_aggressive_coarsening 1 -pc_gamg_threshold 0.001 -ksp_converged_reason -use_mat_nearnullspace true -mg_levels_ksp_max_it 2 -mg_levels_ksp_type chebyshev -mg_levels_ksp_chebyshev_esteig 0,0.2,0,1.1 -mg_levels_pc_type jacobi -petscpartitioner_type simple -my_dm_view -snes_lag_jacobian -2 -snes_type ksponly -pc_gamg_mis_k_minimum_degree_ordering true -pc_gamg_low_memory_threshold_filter
464:     timeoutfactor: 2
465:     test:
466:       suffix: 0
467:       args: -run_type 1 -max_conv_its 3 -pc_gamg_mat_coarsen_type hem -pc_gamg_mat_coarsen_max_it 5 -pc_gamg_asm_hem_aggs 4 -ksp_rtol 1.e-6
468:       filter: sed -e "s/Linear solve converged due to CONVERGED_RTOL iterations 7/Linear solve converged due to CONVERGED_RTOL iterations 8/g"
469:     test:
470:       suffix: 1
471:       filter: grep -v HERMITIAN
472:       args: -run_type 2 -max_conv_its 2 -use_mat_nearnullspace false -snes_view

474:   test:
475:     nsize: 1
476:     requires: !single
477:     suffix: 2
478:     args: -dm_plex_dim 3 -dm_plex_simplex 0 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 1,1,1 -dm_plex_box_faces 2,2,1 -petscpartitioner_simple_process_grid 2,2,1 -max_conv_its 2 -petscspace_degree 2 -snes_max_it 1 -ksp_type cg -ksp_norm_type unpreconditioned -pc_type gamg -pc_gamg_coarse_eq_limit 10 -pc_gamg_aggressive_coarsening 1 -ksp_converged_reason -use_mat_nearnullspace true -my_dm_view -snes_type ksponly
479:     timeoutfactor: 2

481:   # HYPRE PtAP broken with complex numbers
482:   test:
483:     suffix: hypre
484:     requires: hypre !single !complex !defined(PETSC_HAVE_HYPRE_DEVICE)
485:     nsize: 4
486:     args: -dm_plex_dim 3 -dm_plex_simplex 0 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 1,1,1 -run_type 1 -dm_plex_box_faces 2,2,1 -petscpartitioner_simple_process_grid 2,2,1 -max_conv_its 2 -lx 1. -alpha .01 -petscspace_degree 2 -ksp_type cg -ksp_monitor_short -ksp_rtol 1.e-8 -pc_type hypre -pc_hypre_type boomeramg -pc_hypre_boomeramg_no_CF true -pc_hypre_boomeramg_agg_nl 1 -pc_hypre_boomeramg_coarsen_type HMIS -pc_hypre_boomeramg_interp_type ext+i -ksp_converged_reason -use_mat_nearnullspace true -petscpartitioner_type simple

488:   test:
489:     suffix: ml
490:     requires: ml !single
491:     nsize: 4
492:     args: -dm_plex_dim 3 -dm_plex_simplex 0 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 1,1,1 -run_type 1 -dm_plex_box_faces 2,2,1 -petscpartitioner_simple_process_grid 2,2,1 -max_conv_its 2 -lx 1. -alpha .01 -petscspace_degree 2 -ksp_type cg -ksp_monitor_short -ksp_converged_reason -ksp_rtol 1.e-8 -pc_type ml -mg_levels_ksp_type chebyshev -mg_levels_ksp_max_it 3 -mg_levels_ksp_chebyshev_esteig 0,0.05,0,1.05 -mg_levels_pc_type sor -petscpartitioner_type simple -use_mat_nearnullspace

494:   test:
495:     suffix: hpddm
496:     requires: hpddm slepc !single defined(PETSC_HAVE_DYNAMIC_LIBRARIES) defined(PETSC_USE_SHARED_LIBRARIES)
497:     nsize: 4
498:     args: -dm_plex_dim 3 -dm_plex_simplex 0 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 1,1,1 -run_type 1 -dm_plex_box_faces 2,2,1 -petscpartitioner_simple_process_grid 2,2,1 -max_conv_its 2 -lx 1. -alpha .01 -petscspace_degree 2 -ksp_type fgmres -ksp_monitor_short -ksp_converged_reason -ksp_rtol 1.e-8 -pc_type hpddm -petscpartitioner_type simple -pc_hpddm_levels_1_sub_pc_type lu -pc_hpddm_levels_1_eps_nev 6 -pc_hpddm_coarse_p 1 -pc_hpddm_coarse_pc_type svd

500:   test:
501:     suffix: repart
502:     nsize: 4
503:     requires: parmetis !single
504:     args: -dm_plex_dim 3 -dm_plex_simplex 0 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 1,1,1 -run_type 1 -dm_plex_box_faces 2,2,1 -petscpartitioner_simple_process_grid 2,2,1 -max_conv_its 2 -petscspace_degree 2 -snes_max_it 4 -ksp_max_it 100 -ksp_type cg -ksp_rtol 1.e-2 -ksp_norm_type unpreconditioned -snes_rtol 1.e-3 -pc_type gamg -pc_gamg_esteig_ksp_max_it 10 -pc_gamg_type agg -pc_gamg_agg_nsmooths 1 -pc_gamg_aggressive_coarsening 1 -pc_gamg_threshold 0.05 -pc_gamg_threshold_scale .0 -use_mat_nearnullspace true -mg_levels_ksp_max_it 2 -mg_levels_ksp_type chebyshev -mg_levels_ksp_chebyshev_esteig 0,0.05,0,1.05 -mg_levels_pc_type jacobi -pc_gamg_mat_partitioning_type parmetis -pc_gamg_repartition true -pc_gamg_process_eq_limit 20 -pc_gamg_coarse_eq_limit 10 -ksp_converged_reason -pc_gamg_reuse_interpolation true -petscpartitioner_type simple

506:   test:
507:     suffix: bddc
508:     nsize: 4
509:     requires: !single
510:     args: -dm_plex_dim 3 -dm_plex_simplex 0 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 1,1,1 -run_type 1 -dm_plex_box_faces 2,2,1 -petscpartitioner_simple_process_grid 2,2,1 -max_conv_its 2 -lx 1. -alpha .01 -petscspace_degree 2 -ksp_type cg -ksp_monitor_short -ksp_rtol 1.e-8 -ksp_converged_reason -petscpartitioner_type simple -dm_mat_type is -mat_is_localmat_type {{sbaij baij aij}} -pc_type bddc

512:   testset:
513:     nsize: 4
514:     requires: !single
515:     args: -dm_plex_dim 3 -dm_plex_simplex 0 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 1,1,1 -run_type 1 -dm_plex_box_faces 2,2,1 -petscpartitioner_simple_process_grid 2,2,1 -max_conv_its 2 -lx 1. -alpha .01 -petscspace_degree 2 -ksp_type cg -ksp_monitor_short -ksp_rtol 1.e-10 -ksp_converged_reason -petscpartitioner_type simple -dm_mat_type is -mat_is_localmat_type aij -pc_type bddc -attach_mat_nearnullspace {{0 1}separate output}
516:     test:
517:       suffix: bddc_approx_gamg
518:       args: -pc_bddc_switch_static -prefix_push pc_bddc_dirichlet_ -approximate -pc_type gamg -pc_gamg_esteig_ksp_max_it 10 -pc_gamg_type agg -pc_gamg_agg_nsmooths 1 -pc_gamg_reuse_interpolation true -pc_gamg_aggressive_coarsening 1 -pc_gamg_threshold 0.05 -pc_gamg_threshold_scale .0 -mg_levels_ksp_max_it 1 -mg_levels_ksp_type chebyshev -prefix_pop -prefix_push pc_bddc_neumann_ -approximate -pc_type gamg -pc_gamg_esteig_ksp_max_it 10 -pc_gamg_type agg -pc_gamg_agg_nsmooths 1 -pc_gamg_coarse_eq_limit 10 -pc_gamg_reuse_interpolation true -pc_gamg_aggressive_coarsening 1 -pc_gamg_threshold 0.05 -pc_gamg_threshold_scale .0 -mg_levels_ksp_max_it 1 -mg_levels_ksp_type chebyshev -prefix_pop
519:     # HYPRE PtAP broken with complex numbers
520:     test:
521:       requires: hypre !complex !defined(PETSC_HAVE_HYPRE_DEVICE)
522:       suffix: bddc_approx_hypre
523:       args: -pc_bddc_switch_static -prefix_push pc_bddc_dirichlet_ -pc_type hypre -pc_hypre_boomeramg_no_CF true -pc_hypre_boomeramg_strong_threshold 0.75 -pc_hypre_boomeramg_agg_nl 1 -pc_hypre_boomeramg_coarsen_type HMIS -pc_hypre_boomeramg_interp_type ext+i -prefix_pop -prefix_push pc_bddc_neumann_ -pc_type hypre -pc_hypre_boomeramg_no_CF true -pc_hypre_boomeramg_strong_threshold 0.75 -pc_hypre_boomeramg_agg_nl 1 -pc_hypre_boomeramg_coarsen_type HMIS -pc_hypre_boomeramg_interp_type ext+i -prefix_pop
524:     test:
525:       requires: ml
526:       suffix: bddc_approx_ml
527:       args: -pc_bddc_switch_static -prefix_push pc_bddc_dirichlet_ -approximate -pc_type ml -mg_levels_ksp_max_it 1 -mg_levels_ksp_type chebyshev -prefix_pop -prefix_push pc_bddc_neumann_ -approximate -pc_type ml -mg_levels_ksp_max_it 1 -mg_levels_ksp_type chebyshev -prefix_pop

529:   test:
530:     suffix: fetidp
531:     nsize: 4
532:     requires: !single
533:     args: -dm_plex_dim 3 -dm_plex_simplex 0 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 1,1,1 -run_type 1 -dm_plex_box_faces 2,2,1 -petscpartitioner_simple_process_grid 2,2,1 -max_conv_its 2 -lx 1. -alpha .01 -petscspace_degree 2 -ksp_type fetidp -fetidp_ksp_type cg -ksp_monitor_short -ksp_rtol 1.e-8 -ksp_converged_reason -petscpartitioner_type simple -dm_mat_type is -mat_is_localmat_type {{sbaij baij aij}}

535:   test:
536:     suffix: bddc_elast
537:     nsize: 4
538:     requires: !single
539:     args: -dm_plex_dim 3 -dm_plex_simplex 0 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 1,1,1 -run_type 1 -dm_plex_box_faces 2,2,1 -petscpartitioner_simple_process_grid 2,2,1 -max_conv_its 2 -lx 1. -alpha .01 -petscspace_degree 2 -ksp_type cg -ksp_monitor_short -ksp_rtol 1.e-8 -ksp_converged_reason -petscpartitioner_type simple -dm_mat_type is -mat_is_localmat_type sbaij -pc_type bddc -pc_bddc_monolithic -attach_mat_nearnullspace

541:   test:
542:     suffix: fetidp_elast
543:     nsize: 4
544:     requires: !single
545:     args: -dm_plex_dim 3 -dm_plex_simplex 0 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 1,1,1 -run_type 1 -dm_plex_box_faces 2,2,1 -petscpartitioner_simple_process_grid 2,2,1 -max_conv_its 2 -lx 1. -alpha .01 -petscspace_degree 2 -ksp_type fetidp -fetidp_ksp_type cg -ksp_monitor_short -ksp_rtol 1.e-8 -ksp_converged_reason -petscpartitioner_type simple -dm_mat_type is -mat_is_localmat_type sbaij -fetidp_bddc_pc_bddc_monolithic -attach_mat_nearnullspace

547:   test:
548:     suffix: gdsw
549:     nsize: 4
550:     requires: !single
551:     args: -dm_plex_dim 3 -dm_plex_simplex 0 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 1,1,1 -run_type 1 -dm_plex_box_faces 2,2,1 -petscpartitioner_simple_process_grid 2,2,1 -max_conv_its 2 -lx 1. -alpha .01 -petscspace_degree 2 -ksp_type cg -ksp_monitor_short -ksp_rtol 1.e-8 -ksp_converged_reason -petscpartitioner_type simple -dm_mat_type is -attach_mat_nearnullspace \
552:           -pc_type mg -pc_mg_galerkin -pc_mg_adapt_interp_coarse_space gdsw -pc_mg_levels 2 -mg_levels_pc_type bjacobi -mg_levels_sub_pc_type icc

554:   testset:
555:     nsize: 4
556:     requires: !single
557:     args: -dm_plex_dim 3 -dm_plex_simplex 0 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 1,1,1 -run_type 1 -dm_plex_box_faces 2,2,1 -petscpartitioner_simple_process_grid 2,2,1 -max_conv_its 2 -petscspace_degree 2 -snes_max_it 2 -ksp_max_it 100 -ksp_type cg -ksp_rtol 1.e-10 -ksp_norm_type unpreconditioned -snes_rtol 1.e-10 -pc_type gamg -pc_gamg_esteig_ksp_max_it 10 -pc_gamg_type agg -pc_gamg_agg_nsmooths 1 -pc_gamg_coarse_eq_limit 10 -pc_gamg_reuse_interpolation true -pc_gamg_aggressive_coarsening 0 -pc_gamg_threshold 0.05 -pc_gamg_threshold_scale .0 -use_mat_nearnullspace true -mg_levels_ksp_max_it 2 -mg_levels_ksp_type chebyshev -mg_levels_ksp_chebyshev_esteig 0,0.05,0,1.05 -mg_levels_pc_type jacobi -ksp_monitor_short -ksp_converged_reason -snes_monitor_short -dm_view -petscpartitioner_type simple -pc_gamg_process_eq_limit 20 -pc_gamg_coarse_eq_limit 40
558:     output_file: output/ex56_cuda.out

560:     test:
561:       suffix: cuda
562:       requires: cuda
563:       args: -dm_mat_type aijcusparse -dm_vec_type cuda

565:     test:
566:       suffix: hip
567:       requires: hip
568:       args: -dm_mat_type aijhipsparse -dm_vec_type hip

570:     test:
571:       suffix: viennacl
572:       requires: viennacl
573:       args: -dm_mat_type aijviennacl -dm_vec_type viennacl

575:     test:
576:       suffix: kokkos
577:       requires: kokkos_kernels
578:       args: -dm_mat_type aijkokkos -dm_vec_type kokkos
579:   # Don't run AIJMKL caes with complex scalars because of convergence issues.
580:   # Note that we need to test both single and multiple MPI rank cases, because these use different sparse MKL routines to implement the PtAP operation.
581:   test:
582:     suffix: seqaijmkl
583:     nsize: 1
584:     requires: defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) !single !complex
585:     args: -dm_plex_dim 3 -dm_plex_simplex 0 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 1,1,1 -run_type 1 -dm_plex_box_faces 2,2,1 -petscpartitioner_simple_process_grid 2,2,1 -max_conv_its 2 -petscspace_degree 2 -snes_max_it 2 -ksp_max_it 100 -ksp_type cg -ksp_rtol 1.e-11 -ksp_norm_type unpreconditioned -snes_rtol 1.e-10 -pc_type gamg -pc_gamg_type agg -pc_gamg_agg_nsmooths 1 -pc_gamg_coarse_eq_limit 1000 -pc_gamg_reuse_interpolation true -pc_gamg_aggressive_coarsening 1 -pc_gamg_threshold 0.05 -pc_gamg_threshold_scale .0 -ksp_converged_reason -use_mat_nearnullspace true -mg_levels_ksp_max_it 1 -mg_levels_ksp_type chebyshev -pc_gamg_esteig_ksp_type cg -pc_gamg_esteig_ksp_max_it 10 -mg_levels_ksp_chebyshev_esteig 0,0.05,0,1.1 -mg_levels_pc_type jacobi -petscpartitioner_type simple -mat_block_size 3 -dm_view -mat_seqaij_type seqaijmkl
586:     timeoutfactor: 2

588:   test:
589:     suffix: mpiaijmkl
590:     nsize: 4
591:     requires: defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) !single !complex
592:     args: -dm_plex_dim 3 -dm_plex_simplex 0 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 1,1,1 -run_type 1 -dm_plex_box_faces 2,2,1 -petscpartitioner_simple_process_grid 2,2,1 -max_conv_its 2 -petscspace_degree 2 -snes_max_it 2 -ksp_max_it 100 -ksp_type cg -ksp_rtol 1.e-11 -ksp_norm_type unpreconditioned -snes_rtol 1.e-10 -pc_type gamg -pc_gamg_type agg -pc_gamg_agg_nsmooths 1 -pc_gamg_coarse_eq_limit 1000 -pc_gamg_reuse_interpolation true -pc_gamg_aggressive_coarsening 1 -pc_gamg_threshold 0.05 -pc_gamg_threshold_scale .0 -ksp_converged_reason -use_mat_nearnullspace true -mg_levels_ksp_max_it 1 -mg_levels_ksp_type chebyshev -pc_gamg_esteig_ksp_type cg -pc_gamg_esteig_ksp_max_it 10 -mg_levels_ksp_chebyshev_esteig 0,0.05,0,1.1 -mg_levels_pc_type jacobi -petscpartitioner_type simple -mat_block_size 3 -dm_view -mat_seqaij_type seqaijmkl
593:     timeoutfactor: 2

595: TEST*/