Actual source code: ex59.c

  1: static char help[] = "This example illustrates the use of PCBDDC/FETI-DP and its customization.\n\n\
  2: Discrete system: 1D, 2D or 3D laplacian, discretized with spectral elements.\n\
  3: Spectral degree can be specified by passing values to -p option.\n\
  4: Global problem either with dirichlet boundary conditions on one side or in the pure neumann case (depending on runtime parameters).\n\
  5: Domain is [-nex,nex]x[-ney,ney]x[-nez,nez]: ne_ number of elements in _ direction.\n\
  6: Example usage: \n\
  7: 1D: mpiexec -n 4 ex59 -nex 7\n\
  8: 2D: mpiexec -n 4 ex59 -npx 2 -npy 2 -nex 2 -ney 2\n\
  9: 3D: mpiexec -n 4 ex59 -npx 2 -npy 2 -npz 1 -nex 2 -ney 2 -nez 1\n\
 10: Subdomain decomposition can be specified with -np_ parameters.\n\
 11: Dirichlet boundaries on one side by default:\n\
 12: it does not iterate on dirichlet nodes by default: if -usezerorows is passed in, it also iterates on Dirichlet nodes.\n\
 13: Pure Neumann case can be requested by passing in -pureneumann.\n\
 14: In the latter case, in order to avoid runtime errors during factorization, please specify also -coarse_redundant_pc_factor_zeropivot 0\n\n";

 16: #include <petscksp.h>
 17: #include <petscpc.h>
 18: #include <petscdm.h>
 19: #include <petscdmda.h>
 20: #include <petscblaslapack.h>
 21: #define DEBUG 0

 23: /* structure holding domain data */
 24: typedef struct {
 25:   /* communicator */
 26:   MPI_Comm gcomm;
 27:   /* space dimension */
 28:   PetscInt dim;
 29:   /* spectral degree */
 30:   PetscInt p;
 31:   /* subdomains per dimension */
 32:   PetscInt npx, npy, npz;
 33:   /* subdomain index in cartesian dimensions */
 34:   PetscInt ipx, ipy, ipz;
 35:   /* elements per dimension */
 36:   PetscInt nex, ney, nez;
 37:   /* local elements per dimension */
 38:   PetscInt nex_l, ney_l, nez_l;
 39:   /* global number of dofs per dimension */
 40:   PetscInt xm, ym, zm;
 41:   /* local number of dofs per dimension */
 42:   PetscInt xm_l, ym_l, zm_l;
 43:   /* starting global indexes for subdomain in lexicographic ordering */
 44:   PetscInt startx, starty, startz;
 45:   /* pure Neumann problem */
 46:   PetscBool pure_neumann;
 47:   /* Dirichlet BC implementation */
 48:   PetscBool DBC_zerorows;
 49:   /* Scaling factor for subdomain */
 50:   PetscScalar scalingfactor;
 51:   PetscBool   testkspfetidp;
 52: } DomainData;

 54: /* structure holding GLL data */
 55: typedef struct {
 56:   /* GLL nodes */
 57:   PetscReal *zGL;
 58:   /* GLL weights */
 59:   PetscScalar *rhoGL;
 60:   /* aux_mat */
 61:   PetscScalar **A;
 62:   /* Element matrix */
 63:   Mat elem_mat;
 64: } GLLData;

 66: static PetscErrorCode BuildCSRGraph(DomainData dd, PetscInt **xadj, PetscInt **adjncy)
 67: {
 68:   PetscInt *xadj_temp, *adjncy_temp;
 69:   PetscInt  i, j, k, ii, jj, kk, iindex, count_adj;
 70:   PetscInt  istart_csr, iend_csr, jstart_csr, jend_csr, kstart_csr, kend_csr;
 71:   PetscBool internal_node;

 73:   /* first count dimension of adjncy */
 74:   PetscFunctionBeginUser;
 75:   count_adj = 0;
 76:   for (k = 0; k < dd.zm_l; k++) {
 77:     internal_node = PETSC_TRUE;
 78:     kstart_csr    = k > 0 ? k - 1 : k;
 79:     kend_csr      = k < dd.zm_l - 1 ? k + 2 : k + 1;
 80:     if (k == 0 || k == dd.zm_l - 1) {
 81:       internal_node = PETSC_FALSE;
 82:       kstart_csr    = k;
 83:       kend_csr      = k + 1;
 84:     }
 85:     for (j = 0; j < dd.ym_l; j++) {
 86:       jstart_csr = j > 0 ? j - 1 : j;
 87:       jend_csr   = j < dd.ym_l - 1 ? j + 2 : j + 1;
 88:       if (j == 0 || j == dd.ym_l - 1) {
 89:         internal_node = PETSC_FALSE;
 90:         jstart_csr    = j;
 91:         jend_csr      = j + 1;
 92:       }
 93:       for (i = 0; i < dd.xm_l; i++) {
 94:         istart_csr = i > 0 ? i - 1 : i;
 95:         iend_csr   = i < dd.xm_l - 1 ? i + 2 : i + 1;
 96:         if (i == 0 || i == dd.xm_l - 1) {
 97:           internal_node = PETSC_FALSE;
 98:           istart_csr    = i;
 99:           iend_csr      = i + 1;
100:         }
101:         if (internal_node) {
102:           istart_csr = i;
103:           iend_csr   = i + 1;
104:           jstart_csr = j;
105:           jend_csr   = j + 1;
106:           kstart_csr = k;
107:           kend_csr   = k + 1;
108:         }
109:         for (kk = kstart_csr; kk < kend_csr; kk++) {
110:           for (jj = jstart_csr; jj < jend_csr; jj++) {
111:             for (ii = istart_csr; ii < iend_csr; ii++) count_adj = count_adj + 1;
112:           }
113:         }
114:       }
115:     }
116:   }
117:   PetscCall(PetscMalloc1(dd.xm_l * dd.ym_l * dd.zm_l + 1, &xadj_temp));
118:   PetscCall(PetscMalloc1(count_adj, &adjncy_temp));

120:   /* now fill CSR data structure */
121:   count_adj = 0;
122:   for (k = 0; k < dd.zm_l; k++) {
123:     internal_node = PETSC_TRUE;
124:     kstart_csr    = k > 0 ? k - 1 : k;
125:     kend_csr      = k < dd.zm_l - 1 ? k + 2 : k + 1;
126:     if (k == 0 || k == dd.zm_l - 1) {
127:       internal_node = PETSC_FALSE;
128:       kstart_csr    = k;
129:       kend_csr      = k + 1;
130:     }
131:     for (j = 0; j < dd.ym_l; j++) {
132:       jstart_csr = j > 0 ? j - 1 : j;
133:       jend_csr   = j < dd.ym_l - 1 ? j + 2 : j + 1;
134:       if (j == 0 || j == dd.ym_l - 1) {
135:         internal_node = PETSC_FALSE;
136:         jstart_csr    = j;
137:         jend_csr      = j + 1;
138:       }
139:       for (i = 0; i < dd.xm_l; i++) {
140:         istart_csr = i > 0 ? i - 1 : i;
141:         iend_csr   = i < dd.xm_l - 1 ? i + 2 : i + 1;
142:         if (i == 0 || i == dd.xm_l - 1) {
143:           internal_node = PETSC_FALSE;
144:           istart_csr    = i;
145:           iend_csr      = i + 1;
146:         }
147:         iindex            = k * dd.xm_l * dd.ym_l + j * dd.xm_l + i;
148:         xadj_temp[iindex] = count_adj;
149:         if (internal_node) {
150:           istart_csr = i;
151:           iend_csr   = i + 1;
152:           jstart_csr = j;
153:           jend_csr   = j + 1;
154:           kstart_csr = k;
155:           kend_csr   = k + 1;
156:         }
157:         for (kk = kstart_csr; kk < kend_csr; kk++) {
158:           for (jj = jstart_csr; jj < jend_csr; jj++) {
159:             for (ii = istart_csr; ii < iend_csr; ii++) {
160:               iindex = kk * dd.xm_l * dd.ym_l + jj * dd.xm_l + ii;

162:               adjncy_temp[count_adj] = iindex;
163:               count_adj              = count_adj + 1;
164:             }
165:           }
166:         }
167:       }
168:     }
169:   }
170:   xadj_temp[dd.xm_l * dd.ym_l * dd.zm_l] = count_adj;

172:   *xadj   = xadj_temp;
173:   *adjncy = adjncy_temp;
174:   PetscFunctionReturn(PETSC_SUCCESS);
175: }

177: static PetscErrorCode ComputeSpecialBoundaryIndices(DomainData dd, IS *dirichlet, IS *neumann)
178: {
179:   IS         temp_dirichlet = 0, temp_neumann = 0;
180:   PetscInt   localsize, i, j, k, *indices;
181:   PetscBool *touched;

183:   PetscFunctionBeginUser;
184:   localsize = dd.xm_l * dd.ym_l * dd.zm_l;

186:   PetscCall(PetscMalloc1(localsize, &indices));
187:   PetscCall(PetscMalloc1(localsize, &touched));
188:   for (i = 0; i < localsize; i++) touched[i] = PETSC_FALSE;

190:   if (dirichlet) {
191:     i = 0;
192:     /* west boundary */
193:     if (dd.ipx == 0) {
194:       for (k = 0; k < dd.zm_l; k++) {
195:         for (j = 0; j < dd.ym_l; j++) {
196:           indices[i]          = k * dd.ym_l * dd.xm_l + j * dd.xm_l;
197:           touched[indices[i]] = PETSC_TRUE;
198:           i++;
199:         }
200:       }
201:     }
202:     PetscCall(ISCreateGeneral(dd.gcomm, i, indices, PETSC_COPY_VALUES, &temp_dirichlet));
203:   }
204:   if (neumann) {
205:     i = 0;
206:     /* west boundary */
207:     if (dd.ipx == 0) {
208:       for (k = 0; k < dd.zm_l; k++) {
209:         for (j = 0; j < dd.ym_l; j++) {
210:           indices[i] = k * dd.ym_l * dd.xm_l + j * dd.xm_l;
211:           if (!touched[indices[i]]) {
212:             touched[indices[i]] = PETSC_TRUE;
213:             i++;
214:           }
215:         }
216:       }
217:     }
218:     /* east boundary */
219:     if (dd.ipx == dd.npx - 1) {
220:       for (k = 0; k < dd.zm_l; k++) {
221:         for (j = 0; j < dd.ym_l; j++) {
222:           indices[i] = k * dd.ym_l * dd.xm_l + j * dd.xm_l + dd.xm_l - 1;
223:           if (!touched[indices[i]]) {
224:             touched[indices[i]] = PETSC_TRUE;
225:             i++;
226:           }
227:         }
228:       }
229:     }
230:     /* south boundary */
231:     if (dd.ipy == 0 && dd.dim > 1) {
232:       for (k = 0; k < dd.zm_l; k++) {
233:         for (j = 0; j < dd.xm_l; j++) {
234:           indices[i] = k * dd.ym_l * dd.xm_l + j;
235:           if (!touched[indices[i]]) {
236:             touched[indices[i]] = PETSC_TRUE;
237:             i++;
238:           }
239:         }
240:       }
241:     }
242:     /* north boundary */
243:     if (dd.ipy == dd.npy - 1 && dd.dim > 1) {
244:       for (k = 0; k < dd.zm_l; k++) {
245:         for (j = 0; j < dd.xm_l; j++) {
246:           indices[i] = k * dd.ym_l * dd.xm_l + (dd.ym_l - 1) * dd.xm_l + j;
247:           if (!touched[indices[i]]) {
248:             touched[indices[i]] = PETSC_TRUE;
249:             i++;
250:           }
251:         }
252:       }
253:     }
254:     /* bottom boundary */
255:     if (dd.ipz == 0 && dd.dim > 2) {
256:       for (k = 0; k < dd.ym_l; k++) {
257:         for (j = 0; j < dd.xm_l; j++) {
258:           indices[i] = k * dd.xm_l + j;
259:           if (!touched[indices[i]]) {
260:             touched[indices[i]] = PETSC_TRUE;
261:             i++;
262:           }
263:         }
264:       }
265:     }
266:     /* top boundary */
267:     if (dd.ipz == dd.npz - 1 && dd.dim > 2) {
268:       for (k = 0; k < dd.ym_l; k++) {
269:         for (j = 0; j < dd.xm_l; j++) {
270:           indices[i] = (dd.zm_l - 1) * dd.ym_l * dd.xm_l + k * dd.xm_l + j;
271:           if (!touched[indices[i]]) {
272:             touched[indices[i]] = PETSC_TRUE;
273:             i++;
274:           }
275:         }
276:       }
277:     }
278:     PetscCall(ISCreateGeneral(dd.gcomm, i, indices, PETSC_COPY_VALUES, &temp_neumann));
279:   }
280:   if (dirichlet) *dirichlet = temp_dirichlet;
281:   if (neumann) *neumann = temp_neumann;
282:   PetscCall(PetscFree(indices));
283:   PetscCall(PetscFree(touched));
284:   PetscFunctionReturn(PETSC_SUCCESS);
285: }

287: static PetscErrorCode ComputeMapping(DomainData dd, ISLocalToGlobalMapping *isg2lmap)
288: {
289:   DM                     da;
290:   AO                     ao;
291:   DMBoundaryType         bx = DM_BOUNDARY_NONE, by = DM_BOUNDARY_NONE, bz = DM_BOUNDARY_NONE;
292:   DMDAStencilType        stype = DMDA_STENCIL_BOX;
293:   ISLocalToGlobalMapping temp_isg2lmap;
294:   PetscInt               i, j, k, ig, jg, kg, lindex, gindex, localsize;
295:   PetscInt              *global_indices;

297:   PetscFunctionBeginUser;
298:   /* Not an efficient mapping: this function computes a very simple lexicographic mapping
299:      just to illustrate the creation of a MATIS object */
300:   localsize = dd.xm_l * dd.ym_l * dd.zm_l;
301:   PetscCall(PetscMalloc1(localsize, &global_indices));
302:   for (k = 0; k < dd.zm_l; k++) {
303:     kg = dd.startz + k;
304:     for (j = 0; j < dd.ym_l; j++) {
305:       jg = dd.starty + j;
306:       for (i = 0; i < dd.xm_l; i++) {
307:         ig                     = dd.startx + i;
308:         lindex                 = k * dd.xm_l * dd.ym_l + j * dd.xm_l + i;
309:         gindex                 = kg * dd.xm * dd.ym + jg * dd.xm + ig;
310:         global_indices[lindex] = gindex;
311:       }
312:     }
313:   }
314:   if (dd.dim == 3) {
315:     PetscCall(DMDACreate3d(dd.gcomm, bx, by, bz, stype, dd.xm, dd.ym, dd.zm, PETSC_DECIDE, PETSC_DECIDE, PETSC_DECIDE, 1, 1, NULL, NULL, NULL, &da));
316:   } else if (dd.dim == 2) {
317:     PetscCall(DMDACreate2d(dd.gcomm, bx, by, stype, dd.xm, dd.ym, PETSC_DECIDE, PETSC_DECIDE, 1, 1, NULL, NULL, &da));
318:   } else {
319:     PetscCall(DMDACreate1d(dd.gcomm, bx, dd.xm, 1, 1, NULL, &da));
320:   }
321:   PetscCall(DMSetFromOptions(da));
322:   PetscCall(DMSetUp(da));
323:   PetscCall(DMDASetAOType(da, AOMEMORYSCALABLE));
324:   PetscCall(DMDAGetAO(da, &ao));
325:   PetscCall(AOApplicationToPetsc(ao, dd.xm_l * dd.ym_l * dd.zm_l, global_indices));
326:   PetscCall(ISLocalToGlobalMappingCreate(dd.gcomm, 1, localsize, global_indices, PETSC_OWN_POINTER, &temp_isg2lmap));
327:   PetscCall(DMDestroy(&da));
328:   *isg2lmap = temp_isg2lmap;
329:   PetscFunctionReturn(PETSC_SUCCESS);
330: }
331: static PetscErrorCode ComputeSubdomainMatrix(DomainData dd, GLLData glldata, Mat *local_mat)
332: {
333:   PetscInt     localsize, zloc, yloc, xloc, auxnex, auxney, auxnez;
334:   PetscInt     ie, je, ke, i, j, k, ig, jg, kg, ii, ming;
335:   PetscInt    *indexg, *cols, *colsg;
336:   PetscScalar *vals;
337:   Mat          temp_local_mat, elem_mat_DBC = 0, *usedmat;
338:   IS           submatIS;

340:   PetscFunctionBeginUser;
341:   PetscCall(MatGetSize(glldata.elem_mat, &i, &j));
342:   PetscCall(PetscMalloc1(i, &indexg));
343:   PetscCall(PetscMalloc1(i, &colsg));
344:   /* get submatrix of elem_mat without dirichlet nodes */
345:   if (!dd.pure_neumann && !dd.DBC_zerorows && !dd.ipx) {
346:     xloc = dd.p + 1;
347:     yloc = 1;
348:     zloc = 1;
349:     if (dd.dim > 1) yloc = dd.p + 1;
350:     if (dd.dim > 2) zloc = dd.p + 1;
351:     ii = 0;
352:     for (k = 0; k < zloc; k++) {
353:       for (j = 0; j < yloc; j++) {
354:         for (i = 1; i < xloc; i++) {
355:           indexg[ii] = k * xloc * yloc + j * xloc + i;
356:           ii++;
357:         }
358:       }
359:     }
360:     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ii, indexg, PETSC_COPY_VALUES, &submatIS));
361:     PetscCall(MatCreateSubMatrix(glldata.elem_mat, submatIS, submatIS, MAT_INITIAL_MATRIX, &elem_mat_DBC));
362:     PetscCall(ISDestroy(&submatIS));
363:   }

365:   /* Assemble subdomain matrix */
366:   localsize = dd.xm_l * dd.ym_l * dd.zm_l;
367:   PetscCall(MatCreate(PETSC_COMM_SELF, &temp_local_mat));
368:   PetscCall(MatSetSizes(temp_local_mat, localsize, localsize, localsize, localsize));
369:   PetscCall(MatSetOptionsPrefix(temp_local_mat, "subdomain_"));
370:   /* set local matrices type: here we use SEQSBAIJ primarily for testing purpose */
371:   /* in order to avoid conversions inside the BDDC code, use SeqAIJ if possible */
372:   if (dd.DBC_zerorows && !dd.ipx) { /* in this case, we need to zero out some of the rows, so use seqaij */
373:     PetscCall(MatSetType(temp_local_mat, MATSEQAIJ));
374:   } else {
375:     PetscCall(MatSetType(temp_local_mat, MATSEQSBAIJ));
376:   }
377:   PetscCall(MatSetFromOptions(temp_local_mat));

379:   i = PetscPowRealInt(3.0 * (dd.p + 1.0), dd.dim);

381:   PetscCall(MatSeqAIJSetPreallocation(temp_local_mat, i, NULL));      /* very overestimated */
382:   PetscCall(MatSeqSBAIJSetPreallocation(temp_local_mat, 1, i, NULL)); /* very overestimated */
383:   PetscCall(MatSeqBAIJSetPreallocation(temp_local_mat, 1, i, NULL));  /* very overestimated */
384:   PetscCall(MatSetOption(temp_local_mat, MAT_KEEP_NONZERO_PATTERN, PETSC_TRUE));

386:   yloc = dd.p + 1;
387:   zloc = dd.p + 1;
388:   if (dd.dim < 3) zloc = 1;
389:   if (dd.dim < 2) yloc = 1;

391:   auxnez = dd.nez_l;
392:   auxney = dd.ney_l;
393:   auxnex = dd.nex_l;
394:   if (dd.dim < 3) auxnez = 1;
395:   if (dd.dim < 2) auxney = 1;

397:   for (ke = 0; ke < auxnez; ke++) {
398:     for (je = 0; je < auxney; je++) {
399:       for (ie = 0; ie < auxnex; ie++) {
400:         /* customize element accounting for BC */
401:         xloc    = dd.p + 1;
402:         ming    = 0;
403:         usedmat = &glldata.elem_mat;
404:         if (!dd.pure_neumann && !dd.DBC_zerorows && !dd.ipx) {
405:           if (ie == 0) {
406:             xloc    = dd.p;
407:             usedmat = &elem_mat_DBC;
408:           } else {
409:             ming    = -1;
410:             usedmat = &glldata.elem_mat;
411:           }
412:         }
413:         /* local to the element/global to the subdomain indexing */
414:         for (k = 0; k < zloc; k++) {
415:           kg = ke * dd.p + k;
416:           for (j = 0; j < yloc; j++) {
417:             jg = je * dd.p + j;
418:             for (i = 0; i < xloc; i++) {
419:               ig         = ie * dd.p + i + ming;
420:               ii         = k * xloc * yloc + j * xloc + i;
421:               indexg[ii] = kg * dd.xm_l * dd.ym_l + jg * dd.xm_l + ig;
422:             }
423:           }
424:         }
425:         /* Set values */
426:         for (i = 0; i < xloc * yloc * zloc; i++) {
427:           PetscCall(MatGetRow(*usedmat, i, &j, (const PetscInt **)&cols, (const PetscScalar **)&vals));
428:           for (k = 0; k < j; k++) colsg[k] = indexg[cols[k]];
429:           PetscCall(MatSetValues(temp_local_mat, 1, &indexg[i], j, colsg, vals, ADD_VALUES));
430:           PetscCall(MatRestoreRow(*usedmat, i, &j, (const PetscInt **)&cols, (const PetscScalar **)&vals));
431:         }
432:       }
433:     }
434:   }
435:   PetscCall(PetscFree(indexg));
436:   PetscCall(PetscFree(colsg));
437:   PetscCall(MatAssemblyBegin(temp_local_mat, MAT_FINAL_ASSEMBLY));
438:   PetscCall(MatAssemblyEnd(temp_local_mat, MAT_FINAL_ASSEMBLY));
439: #if DEBUG
440:   {
441:     Vec       lvec, rvec;
442:     PetscReal norm;
443:     PetscCall(MatCreateVecs(temp_local_mat, &lvec, &rvec));
444:     PetscCall(VecSet(lvec, 1.0));
445:     PetscCall(MatMult(temp_local_mat, lvec, rvec));
446:     PetscCall(VecNorm(rvec, NORM_INFINITY, &norm));
447:     PetscCall(VecDestroy(&lvec));
448:     PetscCall(VecDestroy(&rvec));
449:   }
450: #endif
451:   *local_mat = temp_local_mat;
452:   PetscCall(MatDestroy(&elem_mat_DBC));
453:   PetscFunctionReturn(PETSC_SUCCESS);
454: }

456: static PetscErrorCode GLLStuffs(DomainData dd, GLLData *glldata)
457: {
458:   PetscReal   *M, si;
459:   PetscScalar  x, z0, z1, z2, Lpj, Lpr, rhoGLj, rhoGLk;
460:   PetscBLASInt pm1, lierr;
461:   PetscInt     i, j, n, k, s, r, q, ii, jj, p = dd.p;
462:   PetscInt     xloc, yloc, zloc, xyloc, xyzloc;

464:   PetscFunctionBeginUser;
465:   /* Gauss-Lobatto-Legendre nodes zGL on [-1,1] */
466:   PetscCall(PetscCalloc1(p + 1, &glldata->zGL));

468:   glldata->zGL[0] = -1.0;
469:   glldata->zGL[p] = 1.0;
470:   if (p > 1) {
471:     if (p == 2) glldata->zGL[1] = 0.0;
472:     else {
473:       PetscCall(PetscMalloc1(p - 1, &M));
474:       for (i = 0; i < p - 1; i++) {
475:         si   = (PetscReal)(i + 1.0);
476:         M[i] = 0.5 * PetscSqrtReal(si * (si + 2.0) / ((si + 0.5) * (si + 1.5)));
477:       }
478:       pm1 = (PetscBLASInt)(p - 1);
479:       PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
480:       PetscCallBLAS("LAPACKsteqr", LAPACKsteqr_("N", &pm1, &glldata->zGL[1], M, &x, &pm1, M, &lierr));
481:       PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in STERF Lapack routine %d", (int)lierr);
482:       PetscCall(PetscFPTrapPop());
483:       PetscCall(PetscFree(M));
484:     }
485:   }

487:   /* Weights for 1D quadrature */
488:   PetscCall(PetscMalloc1(p + 1, &glldata->rhoGL));

490:   glldata->rhoGL[0] = 2.0 / (PetscScalar)(p * (p + 1.0));
491:   glldata->rhoGL[p] = glldata->rhoGL[0];
492:   z2                = -1; /* Dummy value to avoid -Wmaybe-initialized */
493:   for (i = 1; i < p; i++) {
494:     x  = glldata->zGL[i];
495:     z0 = 1.0;
496:     z1 = x;
497:     for (n = 1; n < p; n++) {
498:       z2 = x * z1 * (2.0 * n + 1.0) / (n + 1.0) - z0 * (PetscScalar)(n / (n + 1.0));
499:       z0 = z1;
500:       z1 = z2;
501:     }
502:     glldata->rhoGL[i] = 2.0 / (p * (p + 1.0) * z2 * z2);
503:   }

505:   /* Auxiliary mat for laplacian */
506:   PetscCall(PetscMalloc1(p + 1, &glldata->A));
507:   PetscCall(PetscMalloc1((p + 1) * (p + 1), &glldata->A[0]));
508:   for (i = 1; i < p + 1; i++) glldata->A[i] = glldata->A[i - 1] + p + 1;

510:   for (j = 1; j < p; j++) {
511:     x  = glldata->zGL[j];
512:     z0 = 1.0;
513:     z1 = x;
514:     for (n = 1; n < p; n++) {
515:       z2 = x * z1 * (2.0 * n + 1.0) / (n + 1.0) - z0 * (PetscScalar)(n / (n + 1.0));
516:       z0 = z1;
517:       z1 = z2;
518:     }
519:     Lpj = z2;
520:     for (r = 1; r < p; r++) {
521:       if (r == j) {
522:         glldata->A[j][j] = 2.0 / (3.0 * (1.0 - glldata->zGL[j] * glldata->zGL[j]) * Lpj * Lpj);
523:       } else {
524:         x  = glldata->zGL[r];
525:         z0 = 1.0;
526:         z1 = x;
527:         for (n = 1; n < p; n++) {
528:           z2 = x * z1 * (2.0 * n + 1.0) / (n + 1.0) - z0 * (PetscScalar)(n / (n + 1.0));
529:           z0 = z1;
530:           z1 = z2;
531:         }
532:         Lpr              = z2;
533:         glldata->A[r][j] = 4.0 / (p * (p + 1.0) * Lpj * Lpr * (glldata->zGL[j] - glldata->zGL[r]) * (glldata->zGL[j] - glldata->zGL[r]));
534:       }
535:     }
536:   }
537:   for (j = 1; j < p + 1; j++) {
538:     x  = glldata->zGL[j];
539:     z0 = 1.0;
540:     z1 = x;
541:     for (n = 1; n < p; n++) {
542:       z2 = x * z1 * (2.0 * n + 1.0) / (n + 1.0) - z0 * (PetscScalar)(n / (n + 1.0));
543:       z0 = z1;
544:       z1 = z2;
545:     }
546:     Lpj              = z2;
547:     glldata->A[j][0] = 4.0 * PetscPowRealInt(-1.0, p) / (p * (p + 1.0) * Lpj * (1.0 + glldata->zGL[j]) * (1.0 + glldata->zGL[j]));
548:     glldata->A[0][j] = glldata->A[j][0];
549:   }
550:   for (j = 0; j < p; j++) {
551:     x  = glldata->zGL[j];
552:     z0 = 1.0;
553:     z1 = x;
554:     for (n = 1; n < p; n++) {
555:       z2 = x * z1 * (2.0 * n + 1.0) / (n + 1.0) - z0 * (PetscScalar)(n / (n + 1.0));
556:       z0 = z1;
557:       z1 = z2;
558:     }
559:     Lpj = z2;

561:     glldata->A[p][j] = 4.0 / (p * (p + 1.0) * Lpj * (1.0 - glldata->zGL[j]) * (1.0 - glldata->zGL[j]));
562:     glldata->A[j][p] = glldata->A[p][j];
563:   }
564:   glldata->A[0][0] = 0.5 + (p * (p + 1.0) - 2.0) / 6.0;
565:   glldata->A[p][p] = glldata->A[0][0];

567:   /* compute element matrix */
568:   xloc = p + 1;
569:   yloc = p + 1;
570:   zloc = p + 1;
571:   if (dd.dim < 2) yloc = 1;
572:   if (dd.dim < 3) zloc = 1;
573:   xyloc  = xloc * yloc;
574:   xyzloc = xloc * yloc * zloc;

576:   PetscCall(MatCreate(PETSC_COMM_SELF, &glldata->elem_mat));
577:   PetscCall(MatSetSizes(glldata->elem_mat, xyzloc, xyzloc, xyzloc, xyzloc));
578:   PetscCall(MatSetType(glldata->elem_mat, MATSEQAIJ));
579:   PetscCall(MatSeqAIJSetPreallocation(glldata->elem_mat, xyzloc, NULL)); /* overestimated */
580:   PetscCall(MatZeroEntries(glldata->elem_mat));
581:   PetscCall(MatSetOption(glldata->elem_mat, MAT_IGNORE_ZERO_ENTRIES, PETSC_TRUE));

583:   for (k = 0; k < zloc; k++) {
584:     if (dd.dim > 2) rhoGLk = glldata->rhoGL[k];
585:     else rhoGLk = 1.0;

587:     for (j = 0; j < yloc; j++) {
588:       if (dd.dim > 1) rhoGLj = glldata->rhoGL[j];
589:       else rhoGLj = 1.0;

591:       for (i = 0; i < xloc; i++) {
592:         ii = k * xyloc + j * xloc + i;
593:         s  = k;
594:         r  = j;
595:         for (q = 0; q < xloc; q++) {
596:           jj = s * xyloc + r * xloc + q;
597:           PetscCall(MatSetValue(glldata->elem_mat, jj, ii, glldata->A[i][q] * rhoGLj * rhoGLk, ADD_VALUES));
598:         }
599:         if (dd.dim > 1) {
600:           s = k;
601:           q = i;
602:           for (r = 0; r < yloc; r++) {
603:             jj = s * xyloc + r * xloc + q;
604:             PetscCall(MatSetValue(glldata->elem_mat, jj, ii, glldata->A[j][r] * glldata->rhoGL[i] * rhoGLk, ADD_VALUES));
605:           }
606:         }
607:         if (dd.dim > 2) {
608:           r = j;
609:           q = i;
610:           for (s = 0; s < zloc; s++) {
611:             jj = s * xyloc + r * xloc + q;
612:             PetscCall(MatSetValue(glldata->elem_mat, jj, ii, glldata->A[k][s] * rhoGLj * glldata->rhoGL[i], ADD_VALUES));
613:           }
614:         }
615:       }
616:     }
617:   }
618:   PetscCall(MatAssemblyBegin(glldata->elem_mat, MAT_FINAL_ASSEMBLY));
619:   PetscCall(MatAssemblyEnd(glldata->elem_mat, MAT_FINAL_ASSEMBLY));
620: #if DEBUG
621:   {
622:     Vec       lvec, rvec;
623:     PetscReal norm;
624:     PetscCall(MatCreateVecs(glldata->elem_mat, &lvec, &rvec));
625:     PetscCall(VecSet(lvec, 1.0));
626:     PetscCall(MatMult(glldata->elem_mat, lvec, rvec));
627:     PetscCall(VecNorm(rvec, NORM_INFINITY, &norm));
628:     PetscCall(VecDestroy(&lvec));
629:     PetscCall(VecDestroy(&rvec));
630:   }
631: #endif
632:   PetscFunctionReturn(PETSC_SUCCESS);
633: }

635: static PetscErrorCode DomainDecomposition(DomainData *dd)
636: {
637:   PetscMPIInt rank;
638:   PetscInt    i, j, k;

640:   PetscFunctionBeginUser;
641:   /* Subdomain index in cartesian coordinates */
642:   PetscCallMPI(MPI_Comm_rank(dd->gcomm, &rank));
643:   dd->ipx = rank % dd->npx;
644:   if (dd->dim > 1) dd->ipz = rank / (dd->npx * dd->npy);
645:   else dd->ipz = 0;

647:   dd->ipy = rank / dd->npx - dd->ipz * dd->npy;
648:   /* number of local elements */
649:   dd->nex_l = dd->nex / dd->npx;
650:   if (dd->ipx < dd->nex % dd->npx) dd->nex_l++;
651:   if (dd->dim > 1) {
652:     dd->ney_l = dd->ney / dd->npy;
653:     if (dd->ipy < dd->ney % dd->npy) dd->ney_l++;
654:   } else {
655:     dd->ney_l = 0;
656:   }
657:   if (dd->dim > 2) {
658:     dd->nez_l = dd->nez / dd->npz;
659:     if (dd->ipz < dd->nez % dd->npz) dd->nez_l++;
660:   } else {
661:     dd->nez_l = 0;
662:   }
663:   /* local and global number of dofs */
664:   dd->xm_l = dd->nex_l * dd->p + 1;
665:   dd->xm   = dd->nex * dd->p + 1;
666:   dd->ym_l = dd->ney_l * dd->p + 1;
667:   dd->ym   = dd->ney * dd->p + 1;
668:   dd->zm_l = dd->nez_l * dd->p + 1;
669:   dd->zm   = dd->nez * dd->p + 1;
670:   if (!dd->pure_neumann) {
671:     if (!dd->DBC_zerorows && !dd->ipx) dd->xm_l--;
672:     if (!dd->DBC_zerorows) dd->xm--;
673:   }

675:   /* starting global index for local dofs (simple lexicographic order) */
676:   dd->startx = 0;
677:   j          = dd->nex / dd->npx;
678:   for (i = 0; i < dd->ipx; i++) {
679:     k = j;
680:     if (i < dd->nex % dd->npx) k++;
681:     dd->startx = dd->startx + k * dd->p;
682:   }
683:   if (!dd->pure_neumann && !dd->DBC_zerorows && dd->ipx) dd->startx--;

685:   dd->starty = 0;
686:   if (dd->dim > 1) {
687:     j = dd->ney / dd->npy;
688:     for (i = 0; i < dd->ipy; i++) {
689:       k = j;
690:       if (i < dd->ney % dd->npy) k++;
691:       dd->starty = dd->starty + k * dd->p;
692:     }
693:   }
694:   dd->startz = 0;
695:   if (dd->dim > 2) {
696:     j = dd->nez / dd->npz;
697:     for (i = 0; i < dd->ipz; i++) {
698:       k = j;
699:       if (i < dd->nez % dd->npz) k++;
700:       dd->startz = dd->startz + k * dd->p;
701:     }
702:   }
703:   PetscFunctionReturn(PETSC_SUCCESS);
704: }

706: static PetscErrorCode ComputeMatrix(DomainData dd, Mat *A)
707: {
708:   GLLData                gll;
709:   Mat                    local_mat = 0, temp_A = 0;
710:   ISLocalToGlobalMapping matis_map   = 0;
711:   IS                     dirichletIS = 0;

713:   PetscFunctionBeginUser;
714:   /* Compute some stuff of Gauss-Legendre-Lobatto quadrature rule */
715:   PetscCall(GLLStuffs(dd, &gll));
716:   /* Compute matrix of subdomain Neumann problem */
717:   PetscCall(ComputeSubdomainMatrix(dd, gll, &local_mat));
718:   /* Compute global mapping of local dofs */
719:   PetscCall(ComputeMapping(dd, &matis_map));
720:   /* Create MATIS object needed by BDDC */
721:   PetscCall(MatCreateIS(dd.gcomm, 1, PETSC_DECIDE, PETSC_DECIDE, dd.xm * dd.ym * dd.zm, dd.xm * dd.ym * dd.zm, matis_map, matis_map, &temp_A));
722:   /* Set local subdomain matrices into MATIS object */
723:   PetscCall(MatScale(local_mat, dd.scalingfactor));
724:   PetscCall(MatISSetLocalMat(temp_A, local_mat));
725:   /* Call assembly functions */
726:   PetscCall(MatAssemblyBegin(temp_A, MAT_FINAL_ASSEMBLY));
727:   PetscCall(MatAssemblyEnd(temp_A, MAT_FINAL_ASSEMBLY));

729:   if (dd.DBC_zerorows) {
730:     PetscInt dirsize;

732:     PetscCall(ComputeSpecialBoundaryIndices(dd, &dirichletIS, NULL));
733:     PetscCall(MatSetOption(local_mat, MAT_KEEP_NONZERO_PATTERN, PETSC_TRUE));
734:     PetscCall(MatZeroRowsColumnsLocalIS(temp_A, dirichletIS, 1.0, NULL, NULL));
735:     PetscCall(ISGetLocalSize(dirichletIS, &dirsize));
736:     PetscCall(ISDestroy(&dirichletIS));
737:   }

739:   /* giving hints to local and global matrices could be useful for the BDDC */
740:   PetscCall(MatSetOption(local_mat, MAT_SPD, PETSC_TRUE));
741:   PetscCall(MatSetOption(local_mat, MAT_SPD_ETERNAL, PETSC_TRUE));
742: #if DEBUG
743:   {
744:     Vec       lvec, rvec;
745:     PetscReal norm;
746:     PetscCall(MatCreateVecs(temp_A, &lvec, &rvec));
747:     PetscCall(VecSet(lvec, 1.0));
748:     PetscCall(MatMult(temp_A, lvec, rvec));
749:     PetscCall(VecNorm(rvec, NORM_INFINITY, &norm));
750:     PetscCall(VecDestroy(&lvec));
751:     PetscCall(VecDestroy(&rvec));
752:   }
753: #endif
754:   /* free allocated workspace */
755:   PetscCall(PetscFree(gll.zGL));
756:   PetscCall(PetscFree(gll.rhoGL));
757:   PetscCall(PetscFree(gll.A[0]));
758:   PetscCall(PetscFree(gll.A));
759:   PetscCall(MatDestroy(&gll.elem_mat));
760:   PetscCall(MatDestroy(&local_mat));
761:   PetscCall(ISLocalToGlobalMappingDestroy(&matis_map));
762:   /* give back the pointer to te MATIS object */
763:   *A = temp_A;
764:   PetscFunctionReturn(PETSC_SUCCESS);
765: }

767: static PetscErrorCode ComputeKSPFETIDP(DomainData dd, KSP ksp_bddc, KSP *ksp_fetidp)
768: {
769:   KSP temp_ksp;
770:   PC  pc;

772:   PetscFunctionBeginUser;
773:   PetscCall(KSPGetPC(ksp_bddc, &pc));
774:   if (!dd.testkspfetidp) {
775:     PC  D;
776:     Mat F;

778:     PetscCall(PCBDDCCreateFETIDPOperators(pc, PETSC_TRUE, NULL, &F, &D));
779:     PetscCall(KSPCreate(PetscObjectComm((PetscObject)F), &temp_ksp));
780:     PetscCall(KSPSetOperators(temp_ksp, F, F));
781:     PetscCall(KSPSetType(temp_ksp, KSPCG));
782:     PetscCall(KSPSetPC(temp_ksp, D));
783:     PetscCall(KSPSetComputeSingularValues(temp_ksp, PETSC_TRUE));
784:     PetscCall(KSPSetOptionsPrefix(temp_ksp, "fluxes_"));
785:     PetscCall(KSPSetFromOptions(temp_ksp));
786:     PetscCall(KSPSetUp(temp_ksp));
787:     PetscCall(MatDestroy(&F));
788:     PetscCall(PCDestroy(&D));
789:   } else {
790:     Mat A, Ap;

792:     PetscCall(KSPCreate(PetscObjectComm((PetscObject)ksp_bddc), &temp_ksp));
793:     PetscCall(KSPGetOperators(ksp_bddc, &A, &Ap));
794:     PetscCall(KSPSetOperators(temp_ksp, A, Ap));
795:     PetscCall(KSPSetOptionsPrefix(temp_ksp, "fluxes_"));
796:     PetscCall(KSPSetType(temp_ksp, KSPFETIDP));
797:     PetscCall(KSPFETIDPSetInnerBDDC(temp_ksp, pc));
798:     PetscCall(KSPSetComputeSingularValues(temp_ksp, PETSC_TRUE));
799:     PetscCall(KSPSetFromOptions(temp_ksp));
800:     PetscCall(KSPSetUp(temp_ksp));
801:   }
802:   *ksp_fetidp = temp_ksp;
803:   PetscFunctionReturn(PETSC_SUCCESS);
804: }

806: static PetscErrorCode ComputeKSPBDDC(DomainData dd, Mat A, KSP *ksp)
807: {
808:   KSP          temp_ksp;
809:   PC           pc;
810:   IS           primals, dirichletIS = 0, neumannIS = 0, *bddc_dofs_splitting;
811:   PetscInt     vidx[8], localsize, *xadj = NULL, *adjncy = NULL;
812:   MatNullSpace near_null_space;

814:   PetscFunctionBeginUser;
815:   PetscCall(KSPCreate(dd.gcomm, &temp_ksp));
816:   PetscCall(KSPSetOperators(temp_ksp, A, A));
817:   PetscCall(KSPSetType(temp_ksp, KSPCG));
818:   PetscCall(KSPGetPC(temp_ksp, &pc));
819:   PetscCall(PCSetType(pc, PCBDDC));

821:   localsize = dd.xm_l * dd.ym_l * dd.zm_l;

823:   /* BDDC customization */

825:   /* jumping coefficients case */
826:   PetscCall(PCISSetSubdomainScalingFactor(pc, dd.scalingfactor));

828:   /* Dofs splitting
829:      Simple stride-1 IS
830:      It is not needed since, by default, PCBDDC assumes a stride-1 split */
831:   PetscCall(PetscMalloc1(1, &bddc_dofs_splitting));
832: #if 1
833:   PetscCall(ISCreateStride(PETSC_COMM_WORLD, localsize, 0, 1, &bddc_dofs_splitting[0]));
834:   PetscCall(PCBDDCSetDofsSplittingLocal(pc, 1, bddc_dofs_splitting));
835: #else
836:   /* examples for global ordering */

838:   /* each process lists the nodes it owns */
839:   PetscInt sr, er;
840:   PetscCall(MatGetOwnershipRange(A, &sr, &er));
841:   PetscCall(ISCreateStride(PETSC_COMM_WORLD, er - sr, sr, 1, &bddc_dofs_splitting[0]));
842:   PetscCall(PCBDDCSetDofsSplitting(pc, 1, bddc_dofs_splitting));
843:   /* Split can be passed in a more general way since any process can list any node */
844: #endif
845:   PetscCall(ISDestroy(&bddc_dofs_splitting[0]));
846:   PetscCall(PetscFree(bddc_dofs_splitting));

848:   /* Primal constraints implemented by using a near null space attached to A -> now it passes in only the constants
849:     (which in practice is not needed since by default PCBDDC builds the primal space using constants for quadrature formulas */
850: #if 0
851:   Vec vecs[2];
852:   PetscRandom rctx;
853:   PetscCall(MatCreateVecs(A,&vecs[0],&vecs[1]));
854:   PetscCall(PetscRandomCreate(dd.gcomm,&rctx));
855:   PetscCall(VecSetRandom(vecs[0],rctx));
856:   PetscCall(VecSetRandom(vecs[1],rctx));
857:   PetscCall(MatNullSpaceCreate(dd.gcomm,PETSC_TRUE,2,vecs,&near_null_space));
858:   PetscCall(VecDestroy(&vecs[0]));
859:   PetscCall(VecDestroy(&vecs[1]));
860:   PetscCall(PetscRandomDestroy(&rctx));
861: #else
862:   PetscCall(MatNullSpaceCreate(dd.gcomm, PETSC_TRUE, 0, NULL, &near_null_space));
863: #endif
864:   PetscCall(MatSetNearNullSpace(A, near_null_space));
865:   PetscCall(MatNullSpaceDestroy(&near_null_space));

867:   /* CSR graph of subdomain dofs */
868:   PetscCall(BuildCSRGraph(dd, &xadj, &adjncy));
869:   PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, localsize, xadj, adjncy, PETSC_OWN_POINTER));

871:   /* Prescribe user-defined primal vertices: in this case we use the 8 corners in 3D (for 2D and 1D, the indices are duplicated) */
872:   vidx[0] = 0 * dd.xm_l + 0;
873:   vidx[1] = 0 * dd.xm_l + dd.xm_l - 1;
874:   vidx[2] = (dd.ym_l - 1) * dd.xm_l + 0;
875:   vidx[3] = (dd.ym_l - 1) * dd.xm_l + dd.xm_l - 1;
876:   vidx[4] = (dd.zm_l - 1) * dd.xm_l * dd.ym_l + 0 * dd.xm_l + 0;
877:   vidx[5] = (dd.zm_l - 1) * dd.xm_l * dd.ym_l + 0 * dd.xm_l + dd.xm_l - 1;
878:   vidx[6] = (dd.zm_l - 1) * dd.xm_l * dd.ym_l + (dd.ym_l - 1) * dd.xm_l + 0;
879:   vidx[7] = (dd.zm_l - 1) * dd.xm_l * dd.ym_l + (dd.ym_l - 1) * dd.xm_l + dd.xm_l - 1;
880:   PetscCall(ISCreateGeneral(dd.gcomm, 8, vidx, PETSC_COPY_VALUES, &primals));
881:   PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primals));
882:   PetscCall(ISDestroy(&primals));

884:   /* Neumann/Dirichlet indices on the global boundary */
885:   if (dd.DBC_zerorows) {
886:     /* Only in case you eliminate some rows matrix with zerorows function, you need to set dirichlet indices into PCBDDC data */
887:     PetscCall(ComputeSpecialBoundaryIndices(dd, &dirichletIS, &neumannIS));
888:     PetscCall(PCBDDCSetNeumannBoundariesLocal(pc, neumannIS));
889:     PetscCall(PCBDDCSetDirichletBoundariesLocal(pc, dirichletIS));
890:   } else {
891:     if (dd.pure_neumann) {
892:       /* In such a case, all interface nodes lying on the global boundary are neumann nodes */
893:       PetscCall(ComputeSpecialBoundaryIndices(dd, NULL, &neumannIS));
894:       PetscCall(PCBDDCSetNeumannBoundariesLocal(pc, neumannIS));
895:     } else {
896:       /* It is wrong setting dirichlet indices without having zeroed the corresponding rows in the global matrix */
897:       /* But we can still compute them since nodes near the dirichlet boundaries does not need to be defined as neumann nodes */
898:       PetscCall(ComputeSpecialBoundaryIndices(dd, &dirichletIS, &neumannIS));
899:       PetscCall(PCBDDCSetNeumannBoundariesLocal(pc, neumannIS));
900:     }
901:   }

903:   /* Pass local null space information to local matrices (needed when using approximate local solvers) */
904:   if (dd.ipx || dd.pure_neumann) {
905:     MatNullSpace nsp;
906:     Mat          local_mat;

908:     PetscCall(MatISGetLocalMat(A, &local_mat));
909:     PetscCall(MatNullSpaceCreate(PETSC_COMM_SELF, PETSC_TRUE, 0, NULL, &nsp));
910:     PetscCall(MatSetNullSpace(local_mat, nsp));
911:     PetscCall(MatNullSpaceDestroy(&nsp));
912:   }
913:   PetscCall(KSPSetComputeSingularValues(temp_ksp, PETSC_TRUE));
914:   PetscCall(KSPSetOptionsPrefix(temp_ksp, "physical_"));
915:   PetscCall(KSPSetFromOptions(temp_ksp));
916:   PetscCall(KSPSetUp(temp_ksp));
917:   *ksp = temp_ksp;
918:   PetscCall(ISDestroy(&dirichletIS));
919:   PetscCall(ISDestroy(&neumannIS));
920:   PetscFunctionReturn(PETSC_SUCCESS);
921: }

923: static PetscErrorCode InitializeDomainData(DomainData *dd)
924: {
925:   PetscMPIInt sizes, rank;
926:   PetscInt    factor;

928:   PetscFunctionBeginUser;
929:   dd->gcomm = PETSC_COMM_WORLD;
930:   PetscCallMPI(MPI_Comm_size(dd->gcomm, &sizes));
931:   PetscCallMPI(MPI_Comm_rank(dd->gcomm, &rank));
932:   /* Get information from command line */
933:   /* Processors/subdomains per dimension */
934:   /* Default is 1d problem */
935:   dd->npx = sizes;
936:   dd->npy = 0;
937:   dd->npz = 0;
938:   dd->dim = 1;
939:   PetscCall(PetscOptionsGetInt(NULL, NULL, "-npx", &dd->npx, NULL));
940:   PetscCall(PetscOptionsGetInt(NULL, NULL, "-npy", &dd->npy, NULL));
941:   PetscCall(PetscOptionsGetInt(NULL, NULL, "-npz", &dd->npz, NULL));
942:   if (dd->npy) dd->dim++;
943:   if (dd->npz) dd->dim++;
944:   /* Number of elements per dimension */
945:   /* Default is one element per subdomain */
946:   dd->nex = dd->npx;
947:   dd->ney = dd->npy;
948:   dd->nez = dd->npz;
949:   PetscCall(PetscOptionsGetInt(NULL, NULL, "-nex", &dd->nex, NULL));
950:   PetscCall(PetscOptionsGetInt(NULL, NULL, "-ney", &dd->ney, NULL));
951:   PetscCall(PetscOptionsGetInt(NULL, NULL, "-nez", &dd->nez, NULL));
952:   if (!dd->npy) {
953:     dd->ney = 0;
954:     dd->nez = 0;
955:   }
956:   if (!dd->npz) dd->nez = 0;
957:   /* Spectral degree */
958:   dd->p = 3;
959:   PetscCall(PetscOptionsGetInt(NULL, NULL, "-p", &dd->p, NULL));
960:   /* pure neumann problem? */
961:   dd->pure_neumann = PETSC_FALSE;
962:   PetscCall(PetscOptionsGetBool(NULL, NULL, "-pureneumann", &dd->pure_neumann, NULL));

964:   /* How to enforce dirichlet boundary conditions (in case pureneumann has not been requested explicitly) */
965:   dd->DBC_zerorows = PETSC_FALSE;

967:   PetscCall(PetscOptionsGetBool(NULL, NULL, "-usezerorows", &dd->DBC_zerorows, NULL));
968:   if (dd->pure_neumann) dd->DBC_zerorows = PETSC_FALSE;
969:   dd->scalingfactor = 1.0;

971:   factor = 0;
972:   PetscCall(PetscOptionsGetInt(NULL, NULL, "-jump", &factor, NULL));
973:   /* checkerboard pattern */
974:   dd->scalingfactor = PetscPowScalar(10.0, factor * PetscPowInt(-1, rank));
975:   /* test data passed in */
976:   if (dd->dim == 1) {
977:     PetscCheck(sizes == dd->npx, dd->gcomm, PETSC_ERR_USER, "Number of mpi procs in 1D must be equal to npx");
978:     PetscCheck(dd->nex >= dd->npx, dd->gcomm, PETSC_ERR_USER, "Number of elements per dim must be greater/equal than number of procs per dim");
979:   } else if (dd->dim == 2) {
980:     PetscCheck(sizes == dd->npx * dd->npy, dd->gcomm, PETSC_ERR_USER, "Number of mpi procs in 2D must be equal to npx*npy");
981:     PetscCheck(dd->nex >= dd->npx || dd->ney < dd->npy, dd->gcomm, PETSC_ERR_USER, "Number of elements per dim must be greater/equal than number of procs per dim");
982:   } else {
983:     PetscCheck(sizes == dd->npx * dd->npy * dd->npz, dd->gcomm, PETSC_ERR_USER, "Number of mpi procs in 3D must be equal to npx*npy*npz");
984:     PetscCheck(dd->nex >= dd->npx && dd->ney >= dd->npy && dd->nez >= dd->npz, dd->gcomm, PETSC_ERR_USER, "Number of elements per dim must be greater/equal than number of ranks per dim");
985:   }
986:   PetscFunctionReturn(PETSC_SUCCESS);
987: }

989: int main(int argc, char **args)
990: {
991:   DomainData         dd;
992:   PetscReal          norm, maxeig, mineig;
993:   PetscScalar        scalar_value;
994:   PetscInt           ndofs, its;
995:   Mat                A = NULL, F = NULL;
996:   KSP                KSPwithBDDC = NULL, KSPwithFETIDP = NULL;
997:   KSPConvergedReason reason;
998:   Vec                exact_solution = NULL, bddc_solution = NULL, bddc_rhs = NULL;
999:   PetscBool          testfetidp = PETSC_TRUE;

1001:   /* Init PETSc */
1002:   PetscFunctionBeginUser;
1003:   PetscCall(PetscInitialize(&argc, &args, NULL, help));
1004:   /* Initialize DomainData */
1005:   PetscCall(InitializeDomainData(&dd));
1006:   /* Decompose domain */
1007:   PetscCall(DomainDecomposition(&dd));
1008: #if DEBUG
1009:   printf("Subdomain data\n");
1010:   printf("IPS   : %d %d %d\n", dd.ipx, dd.ipy, dd.ipz);
1011:   printf("NEG   : %d %d %d\n", dd.nex, dd.ney, dd.nez);
1012:   printf("NEL   : %d %d %d\n", dd.nex_l, dd.ney_l, dd.nez_l);
1013:   printf("LDO   : %d %d %d\n", dd.xm_l, dd.ym_l, dd.zm_l);
1014:   printf("SIZES : %d %d %d\n", dd.xm, dd.ym, dd.zm);
1015:   printf("STARTS: %d %d %d\n", dd.startx, dd.starty, dd.startz);
1016: #endif
1017:   dd.testkspfetidp = PETSC_TRUE;
1018:   PetscCall(PetscOptionsGetBool(NULL, NULL, "-testfetidp", &testfetidp, NULL));
1019:   PetscCall(PetscOptionsGetBool(NULL, NULL, "-testkspfetidp", &dd.testkspfetidp, NULL));
1020:   /* assemble global matrix */
1021:   PetscCall(ComputeMatrix(dd, &A));
1022:   /* get work vectors */
1023:   PetscCall(MatCreateVecs(A, &bddc_solution, NULL));
1024:   PetscCall(VecDuplicate(bddc_solution, &bddc_rhs));
1025:   PetscCall(VecDuplicate(bddc_solution, &exact_solution));
1026:   /* create and customize KSP/PC for BDDC */
1027:   PetscCall(ComputeKSPBDDC(dd, A, &KSPwithBDDC));
1028:   /* create KSP/PC for FETIDP */
1029:   if (testfetidp) PetscCall(ComputeKSPFETIDP(dd, KSPwithBDDC, &KSPwithFETIDP));
1030:   /* create random exact solution */
1031: #if defined(PETSC_USE_COMPLEX)
1032:   PetscCall(VecSet(exact_solution, 1.0 + PETSC_i));
1033: #else
1034:   PetscCall(VecSetRandom(exact_solution, NULL));
1035: #endif
1036:   PetscCall(VecShift(exact_solution, -0.5));
1037:   PetscCall(VecScale(exact_solution, 100.0));
1038:   PetscCall(VecGetSize(exact_solution, &ndofs));
1039:   if (dd.pure_neumann) {
1040:     PetscCall(VecSum(exact_solution, &scalar_value));
1041:     scalar_value = -scalar_value / (PetscScalar)ndofs;
1042:     PetscCall(VecShift(exact_solution, scalar_value));
1043:   }
1044:   /* assemble BDDC rhs */
1045:   PetscCall(MatMult(A, exact_solution, bddc_rhs));
1046:   /* test ksp with BDDC */
1047:   PetscCall(KSPSolve(KSPwithBDDC, bddc_rhs, bddc_solution));
1048:   PetscCall(KSPGetIterationNumber(KSPwithBDDC, &its));
1049:   PetscCall(KSPGetConvergedReason(KSPwithBDDC, &reason));
1050:   PetscCall(KSPComputeExtremeSingularValues(KSPwithBDDC, &maxeig, &mineig));
1051:   if (dd.pure_neumann) {
1052:     PetscCall(VecSum(bddc_solution, &scalar_value));
1053:     scalar_value = -scalar_value / (PetscScalar)ndofs;
1054:     PetscCall(VecShift(bddc_solution, scalar_value));
1055:   }
1056:   /* check exact_solution and BDDC solultion */
1057:   PetscCall(VecAXPY(bddc_solution, -1.0, exact_solution));
1058:   PetscCall(VecNorm(bddc_solution, NORM_INFINITY, &norm));
1059:   PetscCall(PetscPrintf(dd.gcomm, "---------------------BDDC stats-------------------------------\n"));
1060:   PetscCall(PetscPrintf(dd.gcomm, "Number of degrees of freedom               : %8" PetscInt_FMT "\n", ndofs));
1061:   if (reason < 0) {
1062:     PetscCall(PetscPrintf(dd.gcomm, "Number of iterations                       : %8" PetscInt_FMT "\n", its));
1063:     PetscCall(PetscPrintf(dd.gcomm, "Converged reason                           : %s\n", KSPConvergedReasons[reason]));
1064:   }
1065:   if (0.95 <= mineig && mineig <= 1.05) mineig = 1.0;
1066:   PetscCall(PetscPrintf(dd.gcomm, "Eigenvalues preconditioned operator        : %1.1e %1.1e\n", (double)PetscFloorReal(100. * mineig) / 100., (double)PetscCeilReal(100. * maxeig) / 100.));
1067:   if (norm > 1.e-1 || reason < 0) PetscCall(PetscPrintf(dd.gcomm, "Error between exact and computed solution : %1.2e\n", (double)norm));
1068:   PetscCall(PetscPrintf(dd.gcomm, "--------------------------------------------------------------\n"));
1069:   if (testfetidp) {
1070:     Vec fetidp_solution_all = NULL, fetidp_solution = NULL, fetidp_rhs = NULL;

1072:     PetscCall(VecDuplicate(bddc_solution, &fetidp_solution_all));
1073:     if (!dd.testkspfetidp) {
1074:       /* assemble fetidp rhs on the space of Lagrange multipliers */
1075:       PetscCall(KSPGetOperators(KSPwithFETIDP, &F, NULL));
1076:       PetscCall(MatCreateVecs(F, &fetidp_solution, &fetidp_rhs));
1077:       PetscCall(PCBDDCMatFETIDPGetRHS(F, bddc_rhs, fetidp_rhs));
1078:       PetscCall(VecSet(fetidp_solution, 0.0));
1079:       /* test ksp with FETIDP */
1080:       PetscCall(KSPSolve(KSPwithFETIDP, fetidp_rhs, fetidp_solution));
1081:       /* assemble fetidp solution on physical domain */
1082:       PetscCall(PCBDDCMatFETIDPGetSolution(F, fetidp_solution, fetidp_solution_all));
1083:     } else {
1084:       KSP kspF;
1085:       PetscCall(KSPSolve(KSPwithFETIDP, bddc_rhs, fetidp_solution_all));
1086:       PetscCall(KSPFETIDPGetInnerKSP(KSPwithFETIDP, &kspF));
1087:       PetscCall(KSPGetOperators(kspF, &F, NULL));
1088:     }
1089:     PetscCall(MatGetSize(F, &ndofs, NULL));
1090:     PetscCall(KSPGetIterationNumber(KSPwithFETIDP, &its));
1091:     PetscCall(KSPGetConvergedReason(KSPwithFETIDP, &reason));
1092:     PetscCall(KSPComputeExtremeSingularValues(KSPwithFETIDP, &maxeig, &mineig));
1093:     /* check FETIDP sol */
1094:     if (dd.pure_neumann) {
1095:       PetscCall(VecSum(fetidp_solution_all, &scalar_value));
1096:       scalar_value = -scalar_value / (PetscScalar)ndofs;
1097:       PetscCall(VecShift(fetidp_solution_all, scalar_value));
1098:     }
1099:     PetscCall(VecAXPY(fetidp_solution_all, -1.0, exact_solution));
1100:     PetscCall(VecNorm(fetidp_solution_all, NORM_INFINITY, &norm));
1101:     PetscCall(PetscPrintf(dd.gcomm, "------------------FETI-DP stats-------------------------------\n"));
1102:     PetscCall(PetscPrintf(dd.gcomm, "Number of degrees of freedom               : %8" PetscInt_FMT "\n", ndofs));
1103:     if (reason < 0) {
1104:       PetscCall(PetscPrintf(dd.gcomm, "Number of iterations                       : %8" PetscInt_FMT "\n", its));
1105:       PetscCall(PetscPrintf(dd.gcomm, "Converged reason                           : %s\n", KSPConvergedReasons[reason]));
1106:     }
1107:     if (0.95 <= mineig && mineig <= 1.05) mineig = 1.0;
1108:     PetscCall(PetscPrintf(dd.gcomm, "Eigenvalues preconditioned operator        : %1.1e %1.1e\n", (double)PetscFloorReal(100. * mineig) / 100., (double)PetscCeilReal(100. * maxeig) / 100.));
1109:     if (norm > 1.e-1 || reason < 0) PetscCall(PetscPrintf(dd.gcomm, "Error between exact and computed solution : %1.2e\n", (double)norm));
1110:     PetscCall(PetscPrintf(dd.gcomm, "--------------------------------------------------------------\n"));
1111:     PetscCall(VecDestroy(&fetidp_solution));
1112:     PetscCall(VecDestroy(&fetidp_solution_all));
1113:     PetscCall(VecDestroy(&fetidp_rhs));
1114:   }
1115:   PetscCall(KSPDestroy(&KSPwithFETIDP));
1116:   PetscCall(VecDestroy(&exact_solution));
1117:   PetscCall(VecDestroy(&bddc_solution));
1118:   PetscCall(VecDestroy(&bddc_rhs));
1119:   PetscCall(MatDestroy(&A));
1120:   PetscCall(KSPDestroy(&KSPwithBDDC));
1121:   /* Quit PETSc */
1122:   PetscCall(PetscFinalize());
1123:   return 0;
1124: }

1126: /*TEST

1128:  testset:
1129:    nsize: 4
1130:    args: -nex 7 -physical_pc_bddc_coarse_eqs_per_proc 3 -physical_pc_bddc_switch_static
1131:    output_file: output/ex59_bddc_fetidp_1.out
1132:    test:
1133:      suffix: bddc_fetidp_1
1134:    test:
1135:      requires: viennacl
1136:      suffix: bddc_fetidp_1_viennacl
1137:      args: -subdomain_mat_type aijviennacl
1138:    test:
1139:      requires: cuda
1140:      suffix: bddc_fetidp_1_cuda
1141:      args: -subdomain_mat_type aijcusparse -physical_pc_bddc_dirichlet_pc_factor_mat_solver_type cusparse

1143:  testset:
1144:    nsize: 4
1145:    args: -npx 2 -npy 2 -nex 6 -ney 6 -fluxes_ksp_max_it 10 -physical_ksp_max_it 10
1146:    requires: !single
1147:    test:
1148:      suffix: bddc_fetidp_2
1149:    test:
1150:      suffix: bddc_fetidp_3
1151:      args: -npz 1 -nez 1
1152:    test:
1153:      suffix: bddc_fetidp_4
1154:      args: -npz 1 -nez 1 -physical_pc_bddc_use_change_of_basis -physical_sub_schurs_mat_solver_type petsc -physical_pc_bddc_use_deluxe_scaling -physical_pc_bddc_deluxe_singlemat -fluxes_fetidp_ksp_type cg

1156:  testset:
1157:    nsize: 8
1158:    suffix: bddc_fetidp_approximate
1159:    args: -npx 2 -npy 2 -npz 2 -p 2 -nex 8 -ney 7 -nez 9 -physical_ksp_max_it 20 -subdomain_mat_type aij -physical_pc_bddc_switch_static -physical_pc_bddc_dirichlet_approximate -physical_pc_bddc_neumann_approximate -physical_pc_bddc_dirichlet_pc_type gamg -physical_pc_bddc_dirichlet_pc_gamg_esteig_ksp_type cg -physical_pc_bddc_dirichlet_pc_gamg_esteig_ksp_max_it 10 -physical_pc_bddc_dirichlet_mg_levels_ksp_max_it 3 -physical_pc_bddc_neumann_pc_type sor -physical_pc_bddc_neumann_approximate_scale -testfetidp 0

1161:  testset:
1162:    nsize: 4
1163:    args: -npx 2 -npy 2 -nex 6 -ney 6 -fluxes_ksp_max_it 10 -physical_ksp_max_it 10 -physical_ksp_view -physical_pc_bddc_levels 1
1164:    filter: grep -v "variant HERMITIAN"
1165:    requires: !single
1166:    test:
1167:      suffix: bddc_fetidp_ml_1
1168:      args: -physical_pc_bddc_coarsening_ratio 1
1169:    test:
1170:      suffix: bddc_fetidp_ml_2
1171:      args: -physical_pc_bddc_coarsening_ratio 2 -mat_partitioning_type average
1172:    test:
1173:      suffix: bddc_fetidp_ml_3
1174:      args: -physical_pc_bddc_coarsening_ratio 4

1176:  testset:
1177:    nsize: 9
1178:    args: -npx 3 -npy 3 -p 2 -nex 6 -ney 6 -physical_pc_bddc_deluxe_singlemat -physical_sub_schurs_mat_solver_type petsc -physical_pc_bddc_use_deluxe_scaling -physical_pc_bddc_graph_maxcount 1 -physical_pc_bddc_levels 3 -physical_pc_bddc_coarsening_ratio 2 -physical_pc_bddc_coarse_ksp_type gmres
1179:    output_file: output/ex59_bddc_fetidp_ml_eqlimit.out
1180:    test:
1181:      suffix: bddc_fetidp_ml_eqlimit_1
1182:      args: -physical_pc_bddc_coarse_eqs_limit 31 -mat_partitioning_type average -physical_pc_bddc_coarse_pc_bddc_graph_maxcount 1
1183:    test:
1184:      suffix: bddc_fetidp_ml_eqlimit_2
1185:      args: -physical_pc_bddc_coarse_eqs_limit 46

1187: TEST*/