Actual source code: ex59.c

  1: static char help[] = "This example illustrates the use of PCBDDC/FETI-DP and its customization.\n\n\
  2: Discrete system: 1D, 2D or 3D laplacian, discretized with spectral elements.\n\
  3: Spectral degree can be specified by passing values to -p option.\n\
  4: Global problem either with dirichlet boundary conditions on one side or in the pure neumann case (depending on runtime parameters).\n\
  5: Domain is [-nex,nex]x[-ney,ney]x[-nez,nez]: ne_ number of elements in _ direction.\n\
  6: Example usage: \n\
  7: 1D: mpiexec -n 4 ex59 -nex 7\n\
  8: 2D: mpiexec -n 4 ex59 -npx 2 -npy 2 -nex 2 -ney 2\n\
  9: 3D: mpiexec -n 4 ex59 -npx 2 -npy 2 -npz 1 -nex 2 -ney 2 -nez 1\n\
 10: Subdomain decomposition can be specified with -np_ parameters.\n\
 11: Dirichlet boundaries on one side by default:\n\
 12: it does not iterate on dirichlet nodes by default: if -usezerorows is passed in, it also iterates on Dirichlet nodes.\n\
 13: Pure Neumann case can be requested by passing in -pureneumann.\n\
 14: In the latter case, in order to avoid runtime errors during factorization, please specify also -coarse_redundant_pc_factor_zeropivot 0\n\n";

 16: #include <petscksp.h>
 17: #include <petscpc.h>
 18: #include <petscdm.h>
 19: #include <petscdmda.h>
 20: #include <petscblaslapack.h>
 21: #define DEBUG 0

 23: /* structure holding domain data */
 24: typedef struct {
 25:   /* communicator */
 26:   MPI_Comm gcomm;
 27:   /* space dimension */
 28:   PetscInt dim;
 29:   /* spectral degree */
 30:   PetscInt p;
 31:   /* subdomains per dimension */
 32:   PetscInt npx, npy, npz;
 33:   /* subdomain index in cartesian dimensions */
 34:   PetscInt ipx, ipy, ipz;
 35:   /* elements per dimension */
 36:   PetscInt nex, ney, nez;
 37:   /* local elements per dimension */
 38:   PetscInt nex_l, ney_l, nez_l;
 39:   /* global number of dofs per dimension */
 40:   PetscInt xm, ym, zm;
 41:   /* local number of dofs per dimension */
 42:   PetscInt xm_l, ym_l, zm_l;
 43:   /* starting global indexes for subdomain in lexicographic ordering */
 44:   PetscInt startx, starty, startz;
 45:   /* pure Neumann problem */
 46:   PetscBool pure_neumann;
 47:   /* Dirichlet BC implementation */
 48:   PetscBool DBC_zerorows;
 49:   /* Scaling factor for subdomain */
 50:   PetscScalar scalingfactor;
 51:   PetscBool   testkspfetidp;
 52: } DomainData;

 54: /* structure holding GLL data */
 55: typedef struct {
 56:   /* GLL nodes */
 57:   PetscReal *zGL;
 58:   /* GLL weights */
 59:   PetscScalar *rhoGL;
 60:   /* aux_mat */
 61:   PetscScalar **A;
 62:   /* Element matrix */
 63:   Mat elem_mat;
 64: } GLLData;

 66: static PetscErrorCode BuildCSRGraph(DomainData dd, PetscInt **xadj, PetscInt **adjncy)
 67: {
 68:   PetscInt *xadj_temp, *adjncy_temp;
 69:   PetscInt  i, j, k, ii, jj, kk, iindex, count_adj;
 70:   PetscInt  istart_csr, iend_csr, jstart_csr, jend_csr, kstart_csr, kend_csr;
 71:   PetscBool internal_node;

 73:   /* first count dimension of adjncy */
 74:   PetscFunctionBeginUser;
 75:   count_adj = 0;
 76:   for (k = 0; k < dd.zm_l; k++) {
 77:     internal_node = PETSC_TRUE;
 78:     kstart_csr    = k > 0 ? k - 1 : k;
 79:     kend_csr      = k < dd.zm_l - 1 ? k + 2 : k + 1;
 80:     if (k == 0 || k == dd.zm_l - 1) {
 81:       internal_node = PETSC_FALSE;
 82:       kstart_csr    = k;
 83:       kend_csr      = k + 1;
 84:     }
 85:     for (j = 0; j < dd.ym_l; j++) {
 86:       jstart_csr = j > 0 ? j - 1 : j;
 87:       jend_csr   = j < dd.ym_l - 1 ? j + 2 : j + 1;
 88:       if (j == 0 || j == dd.ym_l - 1) {
 89:         internal_node = PETSC_FALSE;
 90:         jstart_csr    = j;
 91:         jend_csr      = j + 1;
 92:       }
 93:       for (i = 0; i < dd.xm_l; i++) {
 94:         istart_csr = i > 0 ? i - 1 : i;
 95:         iend_csr   = i < dd.xm_l - 1 ? i + 2 : i + 1;
 96:         if (i == 0 || i == dd.xm_l - 1) {
 97:           internal_node = PETSC_FALSE;
 98:           istart_csr    = i;
 99:           iend_csr      = i + 1;
100:         }
101:         if (internal_node) {
102:           istart_csr = i;
103:           iend_csr   = i + 1;
104:           jstart_csr = j;
105:           jend_csr   = j + 1;
106:           kstart_csr = k;
107:           kend_csr   = k + 1;
108:         }
109:         for (kk = kstart_csr; kk < kend_csr; kk++) {
110:           for (jj = jstart_csr; jj < jend_csr; jj++) {
111:             for (ii = istart_csr; ii < iend_csr; ii++) count_adj = count_adj + 1;
112:           }
113:         }
114:       }
115:     }
116:   }
117:   PetscCall(PetscMalloc1(dd.xm_l * dd.ym_l * dd.zm_l + 1, &xadj_temp));
118:   PetscCall(PetscMalloc1(count_adj, &adjncy_temp));

120:   /* now fill CSR data structure */
121:   count_adj = 0;
122:   for (k = 0; k < dd.zm_l; k++) {
123:     internal_node = PETSC_TRUE;
124:     kstart_csr    = k > 0 ? k - 1 : k;
125:     kend_csr      = k < dd.zm_l - 1 ? k + 2 : k + 1;
126:     if (k == 0 || k == dd.zm_l - 1) {
127:       internal_node = PETSC_FALSE;
128:       kstart_csr    = k;
129:       kend_csr      = k + 1;
130:     }
131:     for (j = 0; j < dd.ym_l; j++) {
132:       jstart_csr = j > 0 ? j - 1 : j;
133:       jend_csr   = j < dd.ym_l - 1 ? j + 2 : j + 1;
134:       if (j == 0 || j == dd.ym_l - 1) {
135:         internal_node = PETSC_FALSE;
136:         jstart_csr    = j;
137:         jend_csr      = j + 1;
138:       }
139:       for (i = 0; i < dd.xm_l; i++) {
140:         istart_csr = i > 0 ? i - 1 : i;
141:         iend_csr   = i < dd.xm_l - 1 ? i + 2 : i + 1;
142:         if (i == 0 || i == dd.xm_l - 1) {
143:           internal_node = PETSC_FALSE;
144:           istart_csr    = i;
145:           iend_csr      = i + 1;
146:         }
147:         iindex            = k * dd.xm_l * dd.ym_l + j * dd.xm_l + i;
148:         xadj_temp[iindex] = count_adj;
149:         if (internal_node) {
150:           istart_csr = i;
151:           iend_csr   = i + 1;
152:           jstart_csr = j;
153:           jend_csr   = j + 1;
154:           kstart_csr = k;
155:           kend_csr   = k + 1;
156:         }
157:         for (kk = kstart_csr; kk < kend_csr; kk++) {
158:           for (jj = jstart_csr; jj < jend_csr; jj++) {
159:             for (ii = istart_csr; ii < iend_csr; ii++) {
160:               iindex = kk * dd.xm_l * dd.ym_l + jj * dd.xm_l + ii;

162:               adjncy_temp[count_adj] = iindex;
163:               count_adj              = count_adj + 1;
164:             }
165:           }
166:         }
167:       }
168:     }
169:   }
170:   xadj_temp[dd.xm_l * dd.ym_l * dd.zm_l] = count_adj;

172:   *xadj   = xadj_temp;
173:   *adjncy = adjncy_temp;
174:   PetscFunctionReturn(PETSC_SUCCESS);
175: }

177: static PetscErrorCode ComputeSpecialBoundaryIndices(DomainData dd, IS *dirichlet, IS *neumann)
178: {
179:   IS         temp_dirichlet = 0, temp_neumann = 0;
180:   PetscInt   localsize, i, j, k, *indices;
181:   PetscBool *touched;

183:   PetscFunctionBeginUser;
184:   localsize = dd.xm_l * dd.ym_l * dd.zm_l;

186:   PetscCall(PetscMalloc1(localsize, &indices));
187:   PetscCall(PetscMalloc1(localsize, &touched));
188:   for (i = 0; i < localsize; i++) touched[i] = PETSC_FALSE;

190:   if (dirichlet) {
191:     i = 0;
192:     /* west boundary */
193:     if (dd.ipx == 0) {
194:       for (k = 0; k < dd.zm_l; k++) {
195:         for (j = 0; j < dd.ym_l; j++) {
196:           indices[i]          = k * dd.ym_l * dd.xm_l + j * dd.xm_l;
197:           touched[indices[i]] = PETSC_TRUE;
198:           i++;
199:         }
200:       }
201:     }
202:     PetscCall(ISCreateGeneral(dd.gcomm, i, indices, PETSC_COPY_VALUES, &temp_dirichlet));
203:   }
204:   if (neumann) {
205:     i = 0;
206:     /* west boundary */
207:     if (dd.ipx == 0) {
208:       for (k = 0; k < dd.zm_l; k++) {
209:         for (j = 0; j < dd.ym_l; j++) {
210:           indices[i] = k * dd.ym_l * dd.xm_l + j * dd.xm_l;
211:           if (!touched[indices[i]]) {
212:             touched[indices[i]] = PETSC_TRUE;
213:             i++;
214:           }
215:         }
216:       }
217:     }
218:     /* east boundary */
219:     if (dd.ipx == dd.npx - 1) {
220:       for (k = 0; k < dd.zm_l; k++) {
221:         for (j = 0; j < dd.ym_l; j++) {
222:           indices[i] = k * dd.ym_l * dd.xm_l + j * dd.xm_l + dd.xm_l - 1;
223:           if (!touched[indices[i]]) {
224:             touched[indices[i]] = PETSC_TRUE;
225:             i++;
226:           }
227:         }
228:       }
229:     }
230:     /* south boundary */
231:     if (dd.ipy == 0 && dd.dim > 1) {
232:       for (k = 0; k < dd.zm_l; k++) {
233:         for (j = 0; j < dd.xm_l; j++) {
234:           indices[i] = k * dd.ym_l * dd.xm_l + j;
235:           if (!touched[indices[i]]) {
236:             touched[indices[i]] = PETSC_TRUE;
237:             i++;
238:           }
239:         }
240:       }
241:     }
242:     /* north boundary */
243:     if (dd.ipy == dd.npy - 1 && dd.dim > 1) {
244:       for (k = 0; k < dd.zm_l; k++) {
245:         for (j = 0; j < dd.xm_l; j++) {
246:           indices[i] = k * dd.ym_l * dd.xm_l + (dd.ym_l - 1) * dd.xm_l + j;
247:           if (!touched[indices[i]]) {
248:             touched[indices[i]] = PETSC_TRUE;
249:             i++;
250:           }
251:         }
252:       }
253:     }
254:     /* bottom boundary */
255:     if (dd.ipz == 0 && dd.dim > 2) {
256:       for (k = 0; k < dd.ym_l; k++) {
257:         for (j = 0; j < dd.xm_l; j++) {
258:           indices[i] = k * dd.xm_l + j;
259:           if (!touched[indices[i]]) {
260:             touched[indices[i]] = PETSC_TRUE;
261:             i++;
262:           }
263:         }
264:       }
265:     }
266:     /* top boundary */
267:     if (dd.ipz == dd.npz - 1 && dd.dim > 2) {
268:       for (k = 0; k < dd.ym_l; k++) {
269:         for (j = 0; j < dd.xm_l; j++) {
270:           indices[i] = (dd.zm_l - 1) * dd.ym_l * dd.xm_l + k * dd.xm_l + j;
271:           if (!touched[indices[i]]) {
272:             touched[indices[i]] = PETSC_TRUE;
273:             i++;
274:           }
275:         }
276:       }
277:     }
278:     PetscCall(ISCreateGeneral(dd.gcomm, i, indices, PETSC_COPY_VALUES, &temp_neumann));
279:   }
280:   if (dirichlet) *dirichlet = temp_dirichlet;
281:   if (neumann) *neumann = temp_neumann;
282:   PetscCall(PetscFree(indices));
283:   PetscCall(PetscFree(touched));
284:   PetscFunctionReturn(PETSC_SUCCESS);
285: }

287: static PetscErrorCode ComputeMapping(DomainData dd, ISLocalToGlobalMapping *isg2lmap)
288: {
289:   DM                     da;
290:   AO                     ao;
291:   DMBoundaryType         bx = DM_BOUNDARY_NONE, by = DM_BOUNDARY_NONE, bz = DM_BOUNDARY_NONE;
292:   DMDAStencilType        stype = DMDA_STENCIL_BOX;
293:   ISLocalToGlobalMapping temp_isg2lmap;
294:   PetscInt               i, j, k, ig, jg, kg, lindex, gindex, localsize;
295:   PetscInt              *global_indices;

297:   PetscFunctionBeginUser;
298:   /* Not an efficient mapping: this function computes a very simple lexicographic mapping
299:      just to illustrate the creation of a MATIS object */
300:   localsize = dd.xm_l * dd.ym_l * dd.zm_l;
301:   PetscCall(PetscMalloc1(localsize, &global_indices));
302:   for (k = 0; k < dd.zm_l; k++) {
303:     kg = dd.startz + k;
304:     for (j = 0; j < dd.ym_l; j++) {
305:       jg = dd.starty + j;
306:       for (i = 0; i < dd.xm_l; i++) {
307:         ig                     = dd.startx + i;
308:         lindex                 = k * dd.xm_l * dd.ym_l + j * dd.xm_l + i;
309:         gindex                 = kg * dd.xm * dd.ym + jg * dd.xm + ig;
310:         global_indices[lindex] = gindex;
311:       }
312:     }
313:   }
314:   if (dd.dim == 3) {
315:     PetscCall(DMDACreate3d(dd.gcomm, bx, by, bz, stype, dd.xm, dd.ym, dd.zm, PETSC_DECIDE, PETSC_DECIDE, PETSC_DECIDE, 1, 1, NULL, NULL, NULL, &da));
316:   } else if (dd.dim == 2) {
317:     PetscCall(DMDACreate2d(dd.gcomm, bx, by, stype, dd.xm, dd.ym, PETSC_DECIDE, PETSC_DECIDE, 1, 1, NULL, NULL, &da));
318:   } else {
319:     PetscCall(DMDACreate1d(dd.gcomm, bx, dd.xm, 1, 1, NULL, &da));
320:   }
321:   PetscCall(DMSetFromOptions(da));
322:   PetscCall(DMSetUp(da));
323:   PetscCall(DMDASetAOType(da, AOMEMORYSCALABLE));
324:   PetscCall(DMDAGetAO(da, &ao));
325:   PetscCall(AOApplicationToPetsc(ao, dd.xm_l * dd.ym_l * dd.zm_l, global_indices));
326:   PetscCall(ISLocalToGlobalMappingCreate(dd.gcomm, 1, localsize, global_indices, PETSC_OWN_POINTER, &temp_isg2lmap));
327:   PetscCall(DMDestroy(&da));
328:   *isg2lmap = temp_isg2lmap;
329:   PetscFunctionReturn(PETSC_SUCCESS);
330: }
331: static PetscErrorCode ComputeSubdomainMatrix(DomainData dd, GLLData glldata, Mat *local_mat)
332: {
333:   PetscInt     localsize, zloc, yloc, xloc, auxnex, auxney, auxnez;
334:   PetscInt     ie, je, ke, i, j, k, ig, jg, kg, ii, ming;
335:   PetscInt    *indexg, *cols, *colsg;
336:   PetscScalar *vals;
337:   Mat          temp_local_mat, elem_mat_DBC = 0, *usedmat;
338:   IS           submatIS;

340:   PetscFunctionBeginUser;
341:   PetscCall(MatGetSize(glldata.elem_mat, &i, &j));
342:   PetscCall(PetscMalloc1(i, &indexg));
343:   PetscCall(PetscMalloc1(i, &colsg));
344:   /* get submatrix of elem_mat without dirichlet nodes */
345:   if (!dd.pure_neumann && !dd.DBC_zerorows && !dd.ipx) {
346:     xloc = dd.p + 1;
347:     yloc = 1;
348:     zloc = 1;
349:     if (dd.dim > 1) yloc = dd.p + 1;
350:     if (dd.dim > 2) zloc = dd.p + 1;
351:     ii = 0;
352:     for (k = 0; k < zloc; k++) {
353:       for (j = 0; j < yloc; j++) {
354:         for (i = 1; i < xloc; i++) {
355:           indexg[ii] = k * xloc * yloc + j * xloc + i;
356:           ii++;
357:         }
358:       }
359:     }
360:     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ii, indexg, PETSC_COPY_VALUES, &submatIS));
361:     PetscCall(MatCreateSubMatrix(glldata.elem_mat, submatIS, submatIS, MAT_INITIAL_MATRIX, &elem_mat_DBC));
362:     PetscCall(ISDestroy(&submatIS));
363:   }

365:   /* Assemble subdomain matrix */
366:   localsize = dd.xm_l * dd.ym_l * dd.zm_l;
367:   PetscCall(MatCreate(PETSC_COMM_SELF, &temp_local_mat));
368:   PetscCall(MatSetSizes(temp_local_mat, localsize, localsize, localsize, localsize));
369:   PetscCall(MatSetOptionsPrefix(temp_local_mat, "subdomain_"));
370:   /* set local matrices type: here we use SEQSBAIJ primarily for testing purpose */
371:   /* in order to avoid conversions inside the BDDC code, use SeqAIJ if possible */
372:   if (dd.DBC_zerorows && !dd.ipx) { /* in this case, we need to zero out some of the rows, so use seqaij */
373:     PetscCall(MatSetType(temp_local_mat, MATSEQAIJ));
374:   } else {
375:     PetscCall(MatSetType(temp_local_mat, MATSEQSBAIJ));
376:   }
377:   PetscCall(MatSetFromOptions(temp_local_mat));

379:   i = PetscPowRealInt(3.0 * (dd.p + 1.0), dd.dim);

381:   PetscCall(MatSeqAIJSetPreallocation(temp_local_mat, i, NULL));      /* very overestimated */
382:   PetscCall(MatSeqSBAIJSetPreallocation(temp_local_mat, 1, i, NULL)); /* very overestimated */
383:   PetscCall(MatSeqBAIJSetPreallocation(temp_local_mat, 1, i, NULL));  /* very overestimated */
384:   PetscCall(MatSetOption(temp_local_mat, MAT_KEEP_NONZERO_PATTERN, PETSC_TRUE));

386:   yloc = dd.p + 1;
387:   zloc = dd.p + 1;
388:   if (dd.dim < 3) zloc = 1;
389:   if (dd.dim < 2) yloc = 1;

391:   auxnez = dd.nez_l;
392:   auxney = dd.ney_l;
393:   auxnex = dd.nex_l;
394:   if (dd.dim < 3) auxnez = 1;
395:   if (dd.dim < 2) auxney = 1;

397:   for (ke = 0; ke < auxnez; ke++) {
398:     for (je = 0; je < auxney; je++) {
399:       for (ie = 0; ie < auxnex; ie++) {
400:         /* customize element accounting for BC */
401:         xloc    = dd.p + 1;
402:         ming    = 0;
403:         usedmat = &glldata.elem_mat;
404:         if (!dd.pure_neumann && !dd.DBC_zerorows && !dd.ipx) {
405:           if (ie == 0) {
406:             xloc    = dd.p;
407:             usedmat = &elem_mat_DBC;
408:           } else {
409:             ming    = -1;
410:             usedmat = &glldata.elem_mat;
411:           }
412:         }
413:         /* local to the element/global to the subdomain indexing */
414:         for (k = 0; k < zloc; k++) {
415:           kg = ke * dd.p + k;
416:           for (j = 0; j < yloc; j++) {
417:             jg = je * dd.p + j;
418:             for (i = 0; i < xloc; i++) {
419:               ig         = ie * dd.p + i + ming;
420:               ii         = k * xloc * yloc + j * xloc + i;
421:               indexg[ii] = kg * dd.xm_l * dd.ym_l + jg * dd.xm_l + ig;
422:             }
423:           }
424:         }
425:         /* Set values */
426:         for (i = 0; i < xloc * yloc * zloc; i++) {
427:           PetscCall(MatGetRow(*usedmat, i, &j, (const PetscInt **)&cols, (const PetscScalar **)&vals));
428:           for (k = 0; k < j; k++) colsg[k] = indexg[cols[k]];
429:           PetscCall(MatSetValues(temp_local_mat, 1, &indexg[i], j, colsg, vals, ADD_VALUES));
430:           PetscCall(MatRestoreRow(*usedmat, i, &j, (const PetscInt **)&cols, (const PetscScalar **)&vals));
431:         }
432:       }
433:     }
434:   }
435:   PetscCall(PetscFree(indexg));
436:   PetscCall(PetscFree(colsg));
437:   PetscCall(MatAssemblyBegin(temp_local_mat, MAT_FINAL_ASSEMBLY));
438:   PetscCall(MatAssemblyEnd(temp_local_mat, MAT_FINAL_ASSEMBLY));
439: #if DEBUG
440:   {
441:     Vec       lvec, rvec;
442:     PetscReal norm;
443:     PetscCall(MatCreateVecs(temp_local_mat, &lvec, &rvec));
444:     PetscCall(VecSet(lvec, 1.0));
445:     PetscCall(MatMult(temp_local_mat, lvec, rvec));
446:     PetscCall(VecNorm(rvec, NORM_INFINITY, &norm));
447:     PetscCall(VecDestroy(&lvec));
448:     PetscCall(VecDestroy(&rvec));
449:   }
450: #endif
451:   *local_mat = temp_local_mat;
452:   PetscCall(MatDestroy(&elem_mat_DBC));
453:   PetscFunctionReturn(PETSC_SUCCESS);
454: }

456: static PetscErrorCode GLLStuffs(DomainData dd, GLLData *glldata)
457: {
458:   PetscReal   *M, si;
459:   PetscScalar  x, z0, z1, z2, Lpj, Lpr, rhoGLj, rhoGLk;
460:   PetscBLASInt pm1, lierr;
461:   PetscInt     i, j, n, k, s, r, q, ii, jj, p = dd.p;
462:   PetscInt     xloc, yloc, zloc, xyloc, xyzloc;

464:   PetscFunctionBeginUser;
465:   /* Gauss-Lobatto-Legendre nodes zGL on [-1,1] */
466:   PetscCall(PetscCalloc1(p + 1, &glldata->zGL));

468:   glldata->zGL[0] = -1.0;
469:   glldata->zGL[p] = 1.0;
470:   if (p > 1) {
471:     if (p == 2) glldata->zGL[1] = 0.0;
472:     else {
473:       PetscCall(PetscMalloc1(p - 1, &M));
474:       for (i = 0; i < p - 1; i++) {
475:         si   = (PetscReal)(i + 1.0);
476:         M[i] = 0.5 * PetscSqrtReal(si * (si + 2.0) / ((si + 0.5) * (si + 1.5)));
477:       }
478:       pm1 = p - 1;
479:       PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
480:       PetscCallBLAS("LAPACKsteqr", LAPACKsteqr_("N", &pm1, &glldata->zGL[1], M, &x, &pm1, M, &lierr));
481:       PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in STERF Lapack routine %d", (int)lierr);
482:       PetscCall(PetscFPTrapPop());
483:       PetscCall(PetscFree(M));
484:     }
485:   }

487:   /* Weights for 1D quadrature */
488:   PetscCall(PetscMalloc1(p + 1, &glldata->rhoGL));

490:   glldata->rhoGL[0] = 2.0 / (PetscScalar)(p * (p + 1.0));
491:   glldata->rhoGL[p] = glldata->rhoGL[0];
492:   z2                = -1; /* Dummy value to avoid -Wmaybe-initialized */
493:   for (i = 1; i < p; i++) {
494:     x  = glldata->zGL[i];
495:     z0 = 1.0;
496:     z1 = x;
497:     for (n = 1; n < p; n++) {
498:       z2 = x * z1 * (2.0 * n + 1.0) / (n + 1.0) - z0 * (PetscScalar)(n / (n + 1.0));
499:       z0 = z1;
500:       z1 = z2;
501:     }
502:     glldata->rhoGL[i] = 2.0 / (p * (p + 1.0) * z2 * z2);
503:   }

505:   /* Auxiliary mat for laplacian */
506:   PetscCall(PetscMalloc1(p + 1, &glldata->A));
507:   PetscCall(PetscMalloc1((p + 1) * (p + 1), &glldata->A[0]));
508:   for (i = 1; i < p + 1; i++) glldata->A[i] = glldata->A[i - 1] + p + 1;

510:   for (j = 1; j < p; j++) {
511:     x  = glldata->zGL[j];
512:     z0 = 1.0;
513:     z1 = x;
514:     for (n = 1; n < p; n++) {
515:       z2 = x * z1 * (2.0 * n + 1.0) / (n + 1.0) - z0 * (PetscScalar)(n / (n + 1.0));
516:       z0 = z1;
517:       z1 = z2;
518:     }
519:     Lpj = z2;
520:     for (r = 1; r < p; r++) {
521:       if (r == j) {
522:         glldata->A[j][j] = 2.0 / (3.0 * (1.0 - glldata->zGL[j] * glldata->zGL[j]) * Lpj * Lpj);
523:       } else {
524:         x  = glldata->zGL[r];
525:         z0 = 1.0;
526:         z1 = x;
527:         for (n = 1; n < p; n++) {
528:           z2 = x * z1 * (2.0 * n + 1.0) / (n + 1.0) - z0 * (PetscScalar)(n / (n + 1.0));
529:           z0 = z1;
530:           z1 = z2;
531:         }
532:         Lpr              = z2;
533:         glldata->A[r][j] = 4.0 / (p * (p + 1.0) * Lpj * Lpr * (glldata->zGL[j] - glldata->zGL[r]) * (glldata->zGL[j] - glldata->zGL[r]));
534:       }
535:     }
536:   }
537:   for (j = 1; j < p + 1; j++) {
538:     x  = glldata->zGL[j];
539:     z0 = 1.0;
540:     z1 = x;
541:     for (n = 1; n < p; n++) {
542:       z2 = x * z1 * (2.0 * n + 1.0) / (n + 1.0) - z0 * (PetscScalar)(n / (n + 1.0));
543:       z0 = z1;
544:       z1 = z2;
545:     }
546:     Lpj              = z2;
547:     glldata->A[j][0] = 4.0 * PetscPowRealInt(-1.0, p) / (p * (p + 1.0) * Lpj * (1.0 + glldata->zGL[j]) * (1.0 + glldata->zGL[j]));
548:     glldata->A[0][j] = glldata->A[j][0];
549:   }
550:   for (j = 0; j < p; j++) {
551:     x  = glldata->zGL[j];
552:     z0 = 1.0;
553:     z1 = x;
554:     for (n = 1; n < p; n++) {
555:       z2 = x * z1 * (2.0 * n + 1.0) / (n + 1.0) - z0 * (PetscScalar)(n / (n + 1.0));
556:       z0 = z1;
557:       z1 = z2;
558:     }
559:     Lpj = z2;

561:     glldata->A[p][j] = 4.0 / (p * (p + 1.0) * Lpj * (1.0 - glldata->zGL[j]) * (1.0 - glldata->zGL[j]));
562:     glldata->A[j][p] = glldata->A[p][j];
563:   }
564:   glldata->A[0][0] = 0.5 + (p * (p + 1.0) - 2.0) / 6.0;
565:   glldata->A[p][p] = glldata->A[0][0];

567:   /* compute element matrix */
568:   xloc = p + 1;
569:   yloc = p + 1;
570:   zloc = p + 1;
571:   if (dd.dim < 2) yloc = 1;
572:   if (dd.dim < 3) zloc = 1;
573:   xyloc  = xloc * yloc;
574:   xyzloc = xloc * yloc * zloc;

576:   PetscCall(MatCreate(PETSC_COMM_SELF, &glldata->elem_mat));
577:   PetscCall(MatSetSizes(glldata->elem_mat, xyzloc, xyzloc, xyzloc, xyzloc));
578:   PetscCall(MatSetType(glldata->elem_mat, MATSEQAIJ));
579:   PetscCall(MatSeqAIJSetPreallocation(glldata->elem_mat, xyzloc, NULL)); /* overestimated */
580:   PetscCall(MatZeroEntries(glldata->elem_mat));
581:   PetscCall(MatSetOption(glldata->elem_mat, MAT_IGNORE_ZERO_ENTRIES, PETSC_TRUE));

583:   for (k = 0; k < zloc; k++) {
584:     if (dd.dim > 2) rhoGLk = glldata->rhoGL[k];
585:     else rhoGLk = 1.0;

587:     for (j = 0; j < yloc; j++) {
588:       if (dd.dim > 1) rhoGLj = glldata->rhoGL[j];
589:       else rhoGLj = 1.0;

591:       for (i = 0; i < xloc; i++) {
592:         ii = k * xyloc + j * xloc + i;
593:         s  = k;
594:         r  = j;
595:         for (q = 0; q < xloc; q++) {
596:           jj = s * xyloc + r * xloc + q;
597:           PetscCall(MatSetValue(glldata->elem_mat, jj, ii, glldata->A[i][q] * rhoGLj * rhoGLk, ADD_VALUES));
598:         }
599:         if (dd.dim > 1) {
600:           s = k;
601:           q = i;
602:           for (r = 0; r < yloc; r++) {
603:             jj = s * xyloc + r * xloc + q;
604:             PetscCall(MatSetValue(glldata->elem_mat, jj, ii, glldata->A[j][r] * glldata->rhoGL[i] * rhoGLk, ADD_VALUES));
605:           }
606:         }
607:         if (dd.dim > 2) {
608:           r = j;
609:           q = i;
610:           for (s = 0; s < zloc; s++) {
611:             jj = s * xyloc + r * xloc + q;
612:             PetscCall(MatSetValue(glldata->elem_mat, jj, ii, glldata->A[k][s] * rhoGLj * glldata->rhoGL[i], ADD_VALUES));
613:           }
614:         }
615:       }
616:     }
617:   }
618:   PetscCall(MatAssemblyBegin(glldata->elem_mat, MAT_FINAL_ASSEMBLY));
619:   PetscCall(MatAssemblyEnd(glldata->elem_mat, MAT_FINAL_ASSEMBLY));
620: #if DEBUG
621:   {
622:     Vec       lvec, rvec;
623:     PetscReal norm;
624:     PetscCall(MatCreateVecs(glldata->elem_mat, &lvec, &rvec));
625:     PetscCall(VecSet(lvec, 1.0));
626:     PetscCall(MatMult(glldata->elem_mat, lvec, rvec));
627:     PetscCall(VecNorm(rvec, NORM_INFINITY, &norm));
628:     PetscCall(VecDestroy(&lvec));
629:     PetscCall(VecDestroy(&rvec));
630:   }
631: #endif
632:   PetscFunctionReturn(PETSC_SUCCESS);
633: }

635: static PetscErrorCode DomainDecomposition(DomainData *dd)
636: {
637:   PetscMPIInt rank;
638:   PetscInt    i, j, k;

640:   PetscFunctionBeginUser;
641:   /* Subdomain index in cartesian coordinates */
642:   MPI_Comm_rank(dd->gcomm, &rank);
643:   dd->ipx = rank % dd->npx;
644:   if (dd->dim > 1) dd->ipz = rank / (dd->npx * dd->npy);
645:   else dd->ipz = 0;

647:   dd->ipy = rank / dd->npx - dd->ipz * dd->npy;
648:   /* number of local elements */
649:   dd->nex_l = dd->nex / dd->npx;
650:   if (dd->ipx < dd->nex % dd->npx) dd->nex_l++;
651:   if (dd->dim > 1) {
652:     dd->ney_l = dd->ney / dd->npy;
653:     if (dd->ipy < dd->ney % dd->npy) dd->ney_l++;
654:   } else {
655:     dd->ney_l = 0;
656:   }
657:   if (dd->dim > 2) {
658:     dd->nez_l = dd->nez / dd->npz;
659:     if (dd->ipz < dd->nez % dd->npz) dd->nez_l++;
660:   } else {
661:     dd->nez_l = 0;
662:   }
663:   /* local and global number of dofs */
664:   dd->xm_l = dd->nex_l * dd->p + 1;
665:   dd->xm   = dd->nex * dd->p + 1;
666:   dd->ym_l = dd->ney_l * dd->p + 1;
667:   dd->ym   = dd->ney * dd->p + 1;
668:   dd->zm_l = dd->nez_l * dd->p + 1;
669:   dd->zm   = dd->nez * dd->p + 1;
670:   if (!dd->pure_neumann) {
671:     if (!dd->DBC_zerorows && !dd->ipx) dd->xm_l--;
672:     if (!dd->DBC_zerorows) dd->xm--;
673:   }

675:   /* starting global index for local dofs (simple lexicographic order) */
676:   dd->startx = 0;
677:   j          = dd->nex / dd->npx;
678:   for (i = 0; i < dd->ipx; i++) {
679:     k = j;
680:     if (i < dd->nex % dd->npx) k++;
681:     dd->startx = dd->startx + k * dd->p;
682:   }
683:   if (!dd->pure_neumann && !dd->DBC_zerorows && dd->ipx) dd->startx--;

685:   dd->starty = 0;
686:   if (dd->dim > 1) {
687:     j = dd->ney / dd->npy;
688:     for (i = 0; i < dd->ipy; i++) {
689:       k = j;
690:       if (i < dd->ney % dd->npy) k++;
691:       dd->starty = dd->starty + k * dd->p;
692:     }
693:   }
694:   dd->startz = 0;
695:   if (dd->dim > 2) {
696:     j = dd->nez / dd->npz;
697:     for (i = 0; i < dd->ipz; i++) {
698:       k = j;
699:       if (i < dd->nez % dd->npz) k++;
700:       dd->startz = dd->startz + k * dd->p;
701:     }
702:   }
703:   PetscFunctionReturn(PETSC_SUCCESS);
704: }
705: static PetscErrorCode ComputeMatrix(DomainData dd, Mat *A)
706: {
707:   GLLData                gll;
708:   Mat                    local_mat = 0, temp_A = 0;
709:   ISLocalToGlobalMapping matis_map   = 0;
710:   IS                     dirichletIS = 0;

712:   PetscFunctionBeginUser;
713:   /* Compute some stuff of Gauss-Legendre-Lobatto quadrature rule */
714:   PetscCall(GLLStuffs(dd, &gll));
715:   /* Compute matrix of subdomain Neumann problem */
716:   PetscCall(ComputeSubdomainMatrix(dd, gll, &local_mat));
717:   /* Compute global mapping of local dofs */
718:   PetscCall(ComputeMapping(dd, &matis_map));
719:   /* Create MATIS object needed by BDDC */
720:   PetscCall(MatCreateIS(dd.gcomm, 1, PETSC_DECIDE, PETSC_DECIDE, dd.xm * dd.ym * dd.zm, dd.xm * dd.ym * dd.zm, matis_map, matis_map, &temp_A));
721:   /* Set local subdomain matrices into MATIS object */
722:   PetscCall(MatScale(local_mat, dd.scalingfactor));
723:   PetscCall(MatISSetLocalMat(temp_A, local_mat));
724:   /* Call assembly functions */
725:   PetscCall(MatAssemblyBegin(temp_A, MAT_FINAL_ASSEMBLY));
726:   PetscCall(MatAssemblyEnd(temp_A, MAT_FINAL_ASSEMBLY));

728:   if (dd.DBC_zerorows) {
729:     PetscInt dirsize;

731:     PetscCall(ComputeSpecialBoundaryIndices(dd, &dirichletIS, NULL));
732:     PetscCall(MatSetOption(local_mat, MAT_KEEP_NONZERO_PATTERN, PETSC_TRUE));
733:     PetscCall(MatZeroRowsColumnsLocalIS(temp_A, dirichletIS, 1.0, NULL, NULL));
734:     PetscCall(ISGetLocalSize(dirichletIS, &dirsize));
735:     PetscCall(ISDestroy(&dirichletIS));
736:   }

738:   /* giving hints to local and global matrices could be useful for the BDDC */
739:   PetscCall(MatSetOption(local_mat, MAT_SPD, PETSC_TRUE));
740:   PetscCall(MatSetOption(local_mat, MAT_SPD_ETERNAL, PETSC_TRUE));
741: #if DEBUG
742:   {
743:     Vec       lvec, rvec;
744:     PetscReal norm;
745:     PetscCall(MatCreateVecs(temp_A, &lvec, &rvec));
746:     PetscCall(VecSet(lvec, 1.0));
747:     PetscCall(MatMult(temp_A, lvec, rvec));
748:     PetscCall(VecNorm(rvec, NORM_INFINITY, &norm));
749:     PetscCall(VecDestroy(&lvec));
750:     PetscCall(VecDestroy(&rvec));
751:   }
752: #endif
753:   /* free allocated workspace */
754:   PetscCall(PetscFree(gll.zGL));
755:   PetscCall(PetscFree(gll.rhoGL));
756:   PetscCall(PetscFree(gll.A[0]));
757:   PetscCall(PetscFree(gll.A));
758:   PetscCall(MatDestroy(&gll.elem_mat));
759:   PetscCall(MatDestroy(&local_mat));
760:   PetscCall(ISLocalToGlobalMappingDestroy(&matis_map));
761:   /* give back the pointer to te MATIS object */
762:   *A = temp_A;
763:   PetscFunctionReturn(PETSC_SUCCESS);
764: }

766: static PetscErrorCode ComputeKSPFETIDP(DomainData dd, KSP ksp_bddc, KSP *ksp_fetidp)
767: {
768:   KSP temp_ksp;
769:   PC  pc;

771:   PetscFunctionBeginUser;
772:   PetscCall(KSPGetPC(ksp_bddc, &pc));
773:   if (!dd.testkspfetidp) {
774:     PC  D;
775:     Mat F;

777:     PetscCall(PCBDDCCreateFETIDPOperators(pc, PETSC_TRUE, NULL, &F, &D));
778:     PetscCall(KSPCreate(PetscObjectComm((PetscObject)F), &temp_ksp));
779:     PetscCall(KSPSetOperators(temp_ksp, F, F));
780:     PetscCall(KSPSetType(temp_ksp, KSPCG));
781:     PetscCall(KSPSetPC(temp_ksp, D));
782:     PetscCall(KSPSetComputeSingularValues(temp_ksp, PETSC_TRUE));
783:     PetscCall(KSPSetOptionsPrefix(temp_ksp, "fluxes_"));
784:     PetscCall(KSPSetFromOptions(temp_ksp));
785:     PetscCall(KSPSetUp(temp_ksp));
786:     PetscCall(MatDestroy(&F));
787:     PetscCall(PCDestroy(&D));
788:   } else {
789:     Mat A, Ap;

791:     PetscCall(KSPCreate(PetscObjectComm((PetscObject)ksp_bddc), &temp_ksp));
792:     PetscCall(KSPGetOperators(ksp_bddc, &A, &Ap));
793:     PetscCall(KSPSetOperators(temp_ksp, A, Ap));
794:     PetscCall(KSPSetOptionsPrefix(temp_ksp, "fluxes_"));
795:     PetscCall(KSPSetType(temp_ksp, KSPFETIDP));
796:     PetscCall(KSPFETIDPSetInnerBDDC(temp_ksp, pc));
797:     PetscCall(KSPSetComputeSingularValues(temp_ksp, PETSC_TRUE));
798:     PetscCall(KSPSetFromOptions(temp_ksp));
799:     PetscCall(KSPSetUp(temp_ksp));
800:   }
801:   *ksp_fetidp = temp_ksp;
802:   PetscFunctionReturn(PETSC_SUCCESS);
803: }

805: static PetscErrorCode ComputeKSPBDDC(DomainData dd, Mat A, KSP *ksp)
806: {
807:   KSP          temp_ksp;
808:   PC           pc;
809:   IS           primals, dirichletIS = 0, neumannIS = 0, *bddc_dofs_splitting;
810:   PetscInt     vidx[8], localsize, *xadj = NULL, *adjncy = NULL;
811:   MatNullSpace near_null_space;

813:   PetscFunctionBeginUser;
814:   PetscCall(KSPCreate(dd.gcomm, &temp_ksp));
815:   PetscCall(KSPSetOperators(temp_ksp, A, A));
816:   PetscCall(KSPSetType(temp_ksp, KSPCG));
817:   PetscCall(KSPGetPC(temp_ksp, &pc));
818:   PetscCall(PCSetType(pc, PCBDDC));

820:   localsize = dd.xm_l * dd.ym_l * dd.zm_l;

822:   /* BDDC customization */

824:   /* jumping coefficients case */
825:   PetscCall(PCISSetSubdomainScalingFactor(pc, dd.scalingfactor));

827:   /* Dofs splitting
828:      Simple stride-1 IS
829:      It is not needed since, by default, PCBDDC assumes a stride-1 split */
830:   PetscCall(PetscMalloc1(1, &bddc_dofs_splitting));
831: #if 1
832:   PetscCall(ISCreateStride(PETSC_COMM_WORLD, localsize, 0, 1, &bddc_dofs_splitting[0]));
833:   PetscCall(PCBDDCSetDofsSplittingLocal(pc, 1, bddc_dofs_splitting));
834: #else
835:   /* examples for global ordering */

837:   /* each process lists the nodes it owns */
838:   PetscInt sr, er;
839:   PetscCall(MatGetOwnershipRange(A, &sr, &er));
840:   PetscCall(ISCreateStride(PETSC_COMM_WORLD, er - sr, sr, 1, &bddc_dofs_splitting[0]));
841:   PetscCall(PCBDDCSetDofsSplitting(pc, 1, bddc_dofs_splitting));
842:   /* Split can be passed in a more general way since any process can list any node */
843: #endif
844:   PetscCall(ISDestroy(&bddc_dofs_splitting[0]));
845:   PetscCall(PetscFree(bddc_dofs_splitting));

847:   /* Primal constraints implemented by using a near null space attached to A -> now it passes in only the constants
848:     (which in practice is not needed since by default PCBDDC builds the primal space using constants for quadrature formulas */
849: #if 0
850:   Vec vecs[2];
851:   PetscRandom rctx;
852:   PetscCall(MatCreateVecs(A,&vecs[0],&vecs[1]));
853:   PetscCall(PetscRandomCreate(dd.gcomm,&rctx));
854:   PetscCall(VecSetRandom(vecs[0],rctx));
855:   PetscCall(VecSetRandom(vecs[1],rctx));
856:   PetscCall(MatNullSpaceCreate(dd.gcomm,PETSC_TRUE,2,vecs,&near_null_space));
857:   PetscCall(VecDestroy(&vecs[0]));
858:   PetscCall(VecDestroy(&vecs[1]));
859:   PetscCall(PetscRandomDestroy(&rctx));
860: #else
861:   PetscCall(MatNullSpaceCreate(dd.gcomm, PETSC_TRUE, 0, NULL, &near_null_space));
862: #endif
863:   PetscCall(MatSetNearNullSpace(A, near_null_space));
864:   PetscCall(MatNullSpaceDestroy(&near_null_space));

866:   /* CSR graph of subdomain dofs */
867:   PetscCall(BuildCSRGraph(dd, &xadj, &adjncy));
868:   PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, localsize, xadj, adjncy, PETSC_OWN_POINTER));

870:   /* Prescribe user-defined primal vertices: in this case we use the 8 corners in 3D (for 2D and 1D, the indices are duplicated) */
871:   vidx[0] = 0 * dd.xm_l + 0;
872:   vidx[1] = 0 * dd.xm_l + dd.xm_l - 1;
873:   vidx[2] = (dd.ym_l - 1) * dd.xm_l + 0;
874:   vidx[3] = (dd.ym_l - 1) * dd.xm_l + dd.xm_l - 1;
875:   vidx[4] = (dd.zm_l - 1) * dd.xm_l * dd.ym_l + 0 * dd.xm_l + 0;
876:   vidx[5] = (dd.zm_l - 1) * dd.xm_l * dd.ym_l + 0 * dd.xm_l + dd.xm_l - 1;
877:   vidx[6] = (dd.zm_l - 1) * dd.xm_l * dd.ym_l + (dd.ym_l - 1) * dd.xm_l + 0;
878:   vidx[7] = (dd.zm_l - 1) * dd.xm_l * dd.ym_l + (dd.ym_l - 1) * dd.xm_l + dd.xm_l - 1;
879:   PetscCall(ISCreateGeneral(dd.gcomm, 8, vidx, PETSC_COPY_VALUES, &primals));
880:   PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primals));
881:   PetscCall(ISDestroy(&primals));

883:   /* Neumann/Dirichlet indices on the global boundary */
884:   if (dd.DBC_zerorows) {
885:     /* Only in case you eliminate some rows matrix with zerorows function, you need to set dirichlet indices into PCBDDC data */
886:     PetscCall(ComputeSpecialBoundaryIndices(dd, &dirichletIS, &neumannIS));
887:     PetscCall(PCBDDCSetNeumannBoundariesLocal(pc, neumannIS));
888:     PetscCall(PCBDDCSetDirichletBoundariesLocal(pc, dirichletIS));
889:   } else {
890:     if (dd.pure_neumann) {
891:       /* In such a case, all interface nodes lying on the global boundary are neumann nodes */
892:       PetscCall(ComputeSpecialBoundaryIndices(dd, NULL, &neumannIS));
893:       PetscCall(PCBDDCSetNeumannBoundariesLocal(pc, neumannIS));
894:     } else {
895:       /* It is wrong setting dirichlet indices without having zeroed the corresponding rows in the global matrix */
896:       /* But we can still compute them since nodes near the dirichlet boundaries does not need to be defined as neumann nodes */
897:       PetscCall(ComputeSpecialBoundaryIndices(dd, &dirichletIS, &neumannIS));
898:       PetscCall(PCBDDCSetNeumannBoundariesLocal(pc, neumannIS));
899:     }
900:   }

902:   /* Pass local null space information to local matrices (needed when using approximate local solvers) */
903:   if (dd.ipx || dd.pure_neumann) {
904:     MatNullSpace nsp;
905:     Mat          local_mat;

907:     PetscCall(MatISGetLocalMat(A, &local_mat));
908:     PetscCall(MatNullSpaceCreate(PETSC_COMM_SELF, PETSC_TRUE, 0, NULL, &nsp));
909:     PetscCall(MatSetNullSpace(local_mat, nsp));
910:     PetscCall(MatNullSpaceDestroy(&nsp));
911:   }
912:   PetscCall(KSPSetComputeSingularValues(temp_ksp, PETSC_TRUE));
913:   PetscCall(KSPSetOptionsPrefix(temp_ksp, "physical_"));
914:   PetscCall(KSPSetFromOptions(temp_ksp));
915:   PetscCall(KSPSetUp(temp_ksp));
916:   *ksp = temp_ksp;
917:   PetscCall(ISDestroy(&dirichletIS));
918:   PetscCall(ISDestroy(&neumannIS));
919:   PetscFunctionReturn(PETSC_SUCCESS);
920: }

922: static PetscErrorCode InitializeDomainData(DomainData *dd)
923: {
924:   PetscMPIInt sizes, rank;
925:   PetscInt    factor;

927:   PetscFunctionBeginUser;
928:   dd->gcomm = PETSC_COMM_WORLD;
929:   PetscCallMPI(MPI_Comm_size(dd->gcomm, &sizes));
930:   PetscCallMPI(MPI_Comm_rank(dd->gcomm, &rank));
931:   /* Get information from command line */
932:   /* Processors/subdomains per dimension */
933:   /* Default is 1d problem */
934:   dd->npx = sizes;
935:   dd->npy = 0;
936:   dd->npz = 0;
937:   dd->dim = 1;
938:   PetscCall(PetscOptionsGetInt(NULL, NULL, "-npx", &dd->npx, NULL));
939:   PetscCall(PetscOptionsGetInt(NULL, NULL, "-npy", &dd->npy, NULL));
940:   PetscCall(PetscOptionsGetInt(NULL, NULL, "-npz", &dd->npz, NULL));
941:   if (dd->npy) dd->dim++;
942:   if (dd->npz) dd->dim++;
943:   /* Number of elements per dimension */
944:   /* Default is one element per subdomain */
945:   dd->nex = dd->npx;
946:   dd->ney = dd->npy;
947:   dd->nez = dd->npz;
948:   PetscCall(PetscOptionsGetInt(NULL, NULL, "-nex", &dd->nex, NULL));
949:   PetscCall(PetscOptionsGetInt(NULL, NULL, "-ney", &dd->ney, NULL));
950:   PetscCall(PetscOptionsGetInt(NULL, NULL, "-nez", &dd->nez, NULL));
951:   if (!dd->npy) {
952:     dd->ney = 0;
953:     dd->nez = 0;
954:   }
955:   if (!dd->npz) dd->nez = 0;
956:   /* Spectral degree */
957:   dd->p = 3;
958:   PetscCall(PetscOptionsGetInt(NULL, NULL, "-p", &dd->p, NULL));
959:   /* pure neumann problem? */
960:   dd->pure_neumann = PETSC_FALSE;
961:   PetscCall(PetscOptionsGetBool(NULL, NULL, "-pureneumann", &dd->pure_neumann, NULL));

963:   /* How to enforce dirichlet boundary conditions (in case pureneumann has not been requested explicitly) */
964:   dd->DBC_zerorows = PETSC_FALSE;

966:   PetscCall(PetscOptionsGetBool(NULL, NULL, "-usezerorows", &dd->DBC_zerorows, NULL));
967:   if (dd->pure_neumann) dd->DBC_zerorows = PETSC_FALSE;
968:   dd->scalingfactor = 1.0;

970:   factor = 0.0;
971:   PetscCall(PetscOptionsGetInt(NULL, NULL, "-jump", &factor, NULL));
972:   /* checkerboard pattern */
973:   dd->scalingfactor = PetscPowScalar(10.0, (PetscScalar)factor * PetscPowScalar(-1.0, (PetscScalar)rank));
974:   /* test data passed in */
975:   if (dd->dim == 1) {
976:     PetscCheck(sizes == dd->npx, dd->gcomm, PETSC_ERR_USER, "Number of mpi procs in 1D must be equal to npx");
977:     PetscCheck(dd->nex >= dd->npx, dd->gcomm, PETSC_ERR_USER, "Number of elements per dim must be greater/equal than number of procs per dim");
978:   } else if (dd->dim == 2) {
979:     PetscCheck(sizes == dd->npx * dd->npy, dd->gcomm, PETSC_ERR_USER, "Number of mpi procs in 2D must be equal to npx*npy");
980:     PetscCheck(dd->nex >= dd->npx || dd->ney < dd->npy, dd->gcomm, PETSC_ERR_USER, "Number of elements per dim must be greater/equal than number of procs per dim");
981:   } else {
982:     PetscCheck(sizes == dd->npx * dd->npy * dd->npz, dd->gcomm, PETSC_ERR_USER, "Number of mpi procs in 3D must be equal to npx*npy*npz");
983:     PetscCheck(dd->nex >= dd->npx && dd->ney >= dd->npy && dd->nez >= dd->npz, dd->gcomm, PETSC_ERR_USER, "Number of elements per dim must be greater/equal than number of ranks per dim");
984:   }
985:   PetscFunctionReturn(PETSC_SUCCESS);
986: }

988: int main(int argc, char **args)
989: {
990:   DomainData         dd;
991:   PetscReal          norm, maxeig, mineig;
992:   PetscScalar        scalar_value;
993:   PetscInt           ndofs, its;
994:   Mat                A = NULL, F = NULL;
995:   KSP                KSPwithBDDC = NULL, KSPwithFETIDP = NULL;
996:   KSPConvergedReason reason;
997:   Vec                exact_solution = NULL, bddc_solution = NULL, bddc_rhs = NULL;
998:   PetscBool          testfetidp = PETSC_TRUE;

1000:   /* Init PETSc */
1001:   PetscFunctionBeginUser;
1002:   PetscCall(PetscInitialize(&argc, &args, (char *)0, help));
1003:   /* Initialize DomainData */
1004:   PetscCall(InitializeDomainData(&dd));
1005:   /* Decompose domain */
1006:   PetscCall(DomainDecomposition(&dd));
1007: #if DEBUG
1008:   printf("Subdomain data\n");
1009:   printf("IPS   : %d %d %d\n", dd.ipx, dd.ipy, dd.ipz);
1010:   printf("NEG   : %d %d %d\n", dd.nex, dd.ney, dd.nez);
1011:   printf("NEL   : %d %d %d\n", dd.nex_l, dd.ney_l, dd.nez_l);
1012:   printf("LDO   : %d %d %d\n", dd.xm_l, dd.ym_l, dd.zm_l);
1013:   printf("SIZES : %d %d %d\n", dd.xm, dd.ym, dd.zm);
1014:   printf("STARTS: %d %d %d\n", dd.startx, dd.starty, dd.startz);
1015: #endif
1016:   dd.testkspfetidp = PETSC_TRUE;
1017:   PetscCall(PetscOptionsGetBool(NULL, NULL, "-testfetidp", &testfetidp, NULL));
1018:   PetscCall(PetscOptionsGetBool(NULL, NULL, "-testkspfetidp", &dd.testkspfetidp, NULL));
1019:   /* assemble global matrix */
1020:   PetscCall(ComputeMatrix(dd, &A));
1021:   /* get work vectors */
1022:   PetscCall(MatCreateVecs(A, &bddc_solution, NULL));
1023:   PetscCall(VecDuplicate(bddc_solution, &bddc_rhs));
1024:   PetscCall(VecDuplicate(bddc_solution, &exact_solution));
1025:   /* create and customize KSP/PC for BDDC */
1026:   PetscCall(ComputeKSPBDDC(dd, A, &KSPwithBDDC));
1027:   /* create KSP/PC for FETIDP */
1028:   if (testfetidp) PetscCall(ComputeKSPFETIDP(dd, KSPwithBDDC, &KSPwithFETIDP));
1029:     /* create random exact solution */
1030: #if defined(PETSC_USE_COMPLEX)
1031:   PetscCall(VecSet(exact_solution, 1.0 + PETSC_i));
1032: #else
1033:   PetscCall(VecSetRandom(exact_solution, NULL));
1034: #endif
1035:   PetscCall(VecShift(exact_solution, -0.5));
1036:   PetscCall(VecScale(exact_solution, 100.0));
1037:   PetscCall(VecGetSize(exact_solution, &ndofs));
1038:   if (dd.pure_neumann) {
1039:     PetscCall(VecSum(exact_solution, &scalar_value));
1040:     scalar_value = -scalar_value / (PetscScalar)ndofs;
1041:     PetscCall(VecShift(exact_solution, scalar_value));
1042:   }
1043:   /* assemble BDDC rhs */
1044:   PetscCall(MatMult(A, exact_solution, bddc_rhs));
1045:   /* test ksp with BDDC */
1046:   PetscCall(KSPSolve(KSPwithBDDC, bddc_rhs, bddc_solution));
1047:   PetscCall(KSPGetIterationNumber(KSPwithBDDC, &its));
1048:   PetscCall(KSPGetConvergedReason(KSPwithBDDC, &reason));
1049:   PetscCall(KSPComputeExtremeSingularValues(KSPwithBDDC, &maxeig, &mineig));
1050:   if (dd.pure_neumann) {
1051:     PetscCall(VecSum(bddc_solution, &scalar_value));
1052:     scalar_value = -scalar_value / (PetscScalar)ndofs;
1053:     PetscCall(VecShift(bddc_solution, scalar_value));
1054:   }
1055:   /* check exact_solution and BDDC solultion */
1056:   PetscCall(VecAXPY(bddc_solution, -1.0, exact_solution));
1057:   PetscCall(VecNorm(bddc_solution, NORM_INFINITY, &norm));
1058:   PetscCall(PetscPrintf(dd.gcomm, "---------------------BDDC stats-------------------------------\n"));
1059:   PetscCall(PetscPrintf(dd.gcomm, "Number of degrees of freedom               : %8" PetscInt_FMT "\n", ndofs));
1060:   if (reason < 0) {
1061:     PetscCall(PetscPrintf(dd.gcomm, "Number of iterations                       : %8" PetscInt_FMT "\n", its));
1062:     PetscCall(PetscPrintf(dd.gcomm, "Converged reason                           : %s\n", KSPConvergedReasons[reason]));
1063:   }
1064:   if (0.95 <= mineig && mineig <= 1.05) mineig = 1.0;
1065:   PetscCall(PetscPrintf(dd.gcomm, "Eigenvalues preconditioned operator        : %1.1e %1.1e\n", (double)PetscFloorReal(100. * mineig) / 100., (double)PetscCeilReal(100. * maxeig) / 100.));
1066:   if (norm > 1.e-1 || reason < 0) PetscCall(PetscPrintf(dd.gcomm, "Error between exact and computed solution : %1.2e\n", (double)norm));
1067:   PetscCall(PetscPrintf(dd.gcomm, "--------------------------------------------------------------\n"));
1068:   if (testfetidp) {
1069:     Vec fetidp_solution_all = NULL, fetidp_solution = NULL, fetidp_rhs = NULL;

1071:     PetscCall(VecDuplicate(bddc_solution, &fetidp_solution_all));
1072:     if (!dd.testkspfetidp) {
1073:       /* assemble fetidp rhs on the space of Lagrange multipliers */
1074:       PetscCall(KSPGetOperators(KSPwithFETIDP, &F, NULL));
1075:       PetscCall(MatCreateVecs(F, &fetidp_solution, &fetidp_rhs));
1076:       PetscCall(PCBDDCMatFETIDPGetRHS(F, bddc_rhs, fetidp_rhs));
1077:       PetscCall(VecSet(fetidp_solution, 0.0));
1078:       /* test ksp with FETIDP */
1079:       PetscCall(KSPSolve(KSPwithFETIDP, fetidp_rhs, fetidp_solution));
1080:       /* assemble fetidp solution on physical domain */
1081:       PetscCall(PCBDDCMatFETIDPGetSolution(F, fetidp_solution, fetidp_solution_all));
1082:     } else {
1083:       KSP kspF;
1084:       PetscCall(KSPSolve(KSPwithFETIDP, bddc_rhs, fetidp_solution_all));
1085:       PetscCall(KSPFETIDPGetInnerKSP(KSPwithFETIDP, &kspF));
1086:       PetscCall(KSPGetOperators(kspF, &F, NULL));
1087:     }
1088:     PetscCall(MatGetSize(F, &ndofs, NULL));
1089:     PetscCall(KSPGetIterationNumber(KSPwithFETIDP, &its));
1090:     PetscCall(KSPGetConvergedReason(KSPwithFETIDP, &reason));
1091:     PetscCall(KSPComputeExtremeSingularValues(KSPwithFETIDP, &maxeig, &mineig));
1092:     /* check FETIDP sol */
1093:     if (dd.pure_neumann) {
1094:       PetscCall(VecSum(fetidp_solution_all, &scalar_value));
1095:       scalar_value = -scalar_value / (PetscScalar)ndofs;
1096:       PetscCall(VecShift(fetidp_solution_all, scalar_value));
1097:     }
1098:     PetscCall(VecAXPY(fetidp_solution_all, -1.0, exact_solution));
1099:     PetscCall(VecNorm(fetidp_solution_all, NORM_INFINITY, &norm));
1100:     PetscCall(PetscPrintf(dd.gcomm, "------------------FETI-DP stats-------------------------------\n"));
1101:     PetscCall(PetscPrintf(dd.gcomm, "Number of degrees of freedom               : %8" PetscInt_FMT "\n", ndofs));
1102:     if (reason < 0) {
1103:       PetscCall(PetscPrintf(dd.gcomm, "Number of iterations                       : %8" PetscInt_FMT "\n", its));
1104:       PetscCall(PetscPrintf(dd.gcomm, "Converged reason                           : %s\n", KSPConvergedReasons[reason]));
1105:     }
1106:     if (0.95 <= mineig && mineig <= 1.05) mineig = 1.0;
1107:     PetscCall(PetscPrintf(dd.gcomm, "Eigenvalues preconditioned operator        : %1.1e %1.1e\n", (double)PetscFloorReal(100. * mineig) / 100., (double)PetscCeilReal(100. * maxeig) / 100.));
1108:     if (norm > 1.e-1 || reason < 0) PetscCall(PetscPrintf(dd.gcomm, "Error between exact and computed solution : %1.2e\n", (double)norm));
1109:     PetscCall(PetscPrintf(dd.gcomm, "--------------------------------------------------------------\n"));
1110:     PetscCall(VecDestroy(&fetidp_solution));
1111:     PetscCall(VecDestroy(&fetidp_solution_all));
1112:     PetscCall(VecDestroy(&fetidp_rhs));
1113:   }
1114:   PetscCall(KSPDestroy(&KSPwithFETIDP));
1115:   PetscCall(VecDestroy(&exact_solution));
1116:   PetscCall(VecDestroy(&bddc_solution));
1117:   PetscCall(VecDestroy(&bddc_rhs));
1118:   PetscCall(MatDestroy(&A));
1119:   PetscCall(KSPDestroy(&KSPwithBDDC));
1120:   /* Quit PETSc */
1121:   PetscCall(PetscFinalize());
1122:   return 0;
1123: }

1125: /*TEST

1127:  testset:
1128:    nsize: 4
1129:    args: -nex 7 -physical_pc_bddc_coarse_eqs_per_proc 3 -physical_pc_bddc_switch_static
1130:    output_file: output/ex59_bddc_fetidp_1.out
1131:    test:
1132:      suffix: bddc_fetidp_1
1133:    test:
1134:      requires: viennacl
1135:      suffix: bddc_fetidp_1_viennacl
1136:      args: -subdomain_mat_type aijviennacl
1137:    test:
1138:      requires: cuda
1139:      suffix: bddc_fetidp_1_cuda
1140:      args: -subdomain_mat_type aijcusparse -physical_pc_bddc_dirichlet_pc_factor_mat_solver_type cusparse

1142:  testset:
1143:    nsize: 4
1144:    args: -npx 2 -npy 2 -nex 6 -ney 6 -fluxes_ksp_max_it 10 -physical_ksp_max_it 10
1145:    requires: !single
1146:    test:
1147:      suffix: bddc_fetidp_2
1148:    test:
1149:      suffix: bddc_fetidp_3
1150:      args: -npz 1 -nez 1
1151:    test:
1152:      suffix: bddc_fetidp_4
1153:      args: -npz 1 -nez 1 -physical_pc_bddc_use_change_of_basis -physical_sub_schurs_mat_solver_type petsc -physical_pc_bddc_use_deluxe_scaling -physical_pc_bddc_deluxe_singlemat -fluxes_fetidp_ksp_type cg

1155:  testset:
1156:    nsize: 8
1157:    suffix: bddc_fetidp_approximate
1158:    args: -npx 2 -npy 2 -npz 2 -p 2 -nex 8 -ney 7 -nez 9 -physical_ksp_max_it 20 -subdomain_mat_type aij -physical_pc_bddc_switch_static -physical_pc_bddc_dirichlet_approximate -physical_pc_bddc_neumann_approximate -physical_pc_bddc_dirichlet_pc_type gamg -physical_pc_bddc_dirichlet_pc_gamg_esteig_ksp_type cg -physical_pc_bddc_dirichlet_pc_gamg_esteig_ksp_max_it 10 -physical_pc_bddc_dirichlet_mg_levels_ksp_max_it 3 -physical_pc_bddc_neumann_pc_type sor -physical_pc_bddc_neumann_approximate_scale -testfetidp 0

1160:  testset:
1161:    nsize: 4
1162:    args: -npx 2 -npy 2 -nex 6 -ney 6 -fluxes_ksp_max_it 10 -physical_ksp_max_it 10 -physical_ksp_view -physical_pc_bddc_levels 1
1163:    filter: grep -v "variant HERMITIAN"
1164:    requires: !single
1165:    test:
1166:      suffix: bddc_fetidp_ml_1
1167:      args: -physical_pc_bddc_coarsening_ratio 1
1168:    test:
1169:      suffix: bddc_fetidp_ml_2
1170:      args: -physical_pc_bddc_coarsening_ratio 2 -mat_partitioning_type average
1171:    test:
1172:      suffix: bddc_fetidp_ml_3
1173:      args: -physical_pc_bddc_coarsening_ratio 4

1175:  testset:
1176:    nsize: 9
1177:    args: -npx 3 -npy 3 -p 2 -nex 6 -ney 6 -physical_pc_bddc_deluxe_singlemat -physical_sub_schurs_mat_solver_type petsc -physical_pc_bddc_use_deluxe_scaling -physical_pc_bddc_graph_maxcount 1 -physical_pc_bddc_levels 3 -physical_pc_bddc_coarsening_ratio 2 -physical_pc_bddc_coarse_ksp_type gmres
1178:    output_file: output/ex59_bddc_fetidp_ml_eqlimit.out
1179:    test:
1180:      suffix: bddc_fetidp_ml_eqlimit_1
1181:      args: -physical_pc_bddc_coarse_eqs_limit 31 -mat_partitioning_type average -physical_pc_bddc_coarse_pc_bddc_graph_maxcount 1
1182:    test:
1183:      suffix: bddc_fetidp_ml_eqlimit_2
1184:      args: -physical_pc_bddc_coarse_eqs_limit 46

1186: TEST*/