Actual source code: ex59.c

  1: static char help[] = "This example illustrates the use of PCBDDC/FETI-DP and its customization.\n\n\
  2: Discrete system: 1D, 2D or 3D laplacian, discretized with spectral elements.\n\
  3: Spectral degree can be specified by passing values to -p option.\n\
  4: Global problem either with dirichlet boundary conditions on one side or in the pure neumann case (depending on runtime parameters).\n\
  5: Domain is [-nex,nex]x[-ney,ney]x[-nez,nez]: ne_ number of elements in _ direction.\n\
  6: Example usage: \n\
  7: 1D: mpiexec -n 4 ex59 -nex 7\n\
  8: 2D: mpiexec -n 4 ex59 -npx 2 -npy 2 -nex 2 -ney 2\n\
  9: 3D: mpiexec -n 4 ex59 -npx 2 -npy 2 -npz 1 -nex 2 -ney 2 -nez 1\n\
 10: Subdomain decomposition can be specified with -np_ parameters.\n\
 11: Dirichlet boundaries on one side by default:\n\
 12: it does not iterate on dirichlet nodes by default: if -usezerorows is passed in, it also iterates on Dirichlet nodes.\n\
 13: Pure Neumann case can be requested by passing in -pureneumann.\n\
 14: In the latter case, in order to avoid runtime errors during factorization, please specify also -coarse_redundant_pc_factor_zeropivot 0\n\n";

 16: #include <petscksp.h>
 17: #include <petscpc.h>
 18: #include <petscdm.h>
 19: #include <petscdmda.h>
 20: #include <petscblaslapack.h>
 21: #define DEBUG 0

 23: /* structure holding domain data */
 24: typedef struct {
 25:   /* communicator */
 26:   MPI_Comm gcomm;
 27:   /* space dimension */
 28:   PetscInt dim;
 29:   /* spectral degree */
 30:   PetscInt p;
 31:   /* subdomains per dimension */
 32:   PetscInt npx, npy, npz;
 33:   /* subdomain index in cartesian dimensions */
 34:   PetscInt ipx, ipy, ipz;
 35:   /* elements per dimension */
 36:   PetscInt nex, ney, nez;
 37:   /* local elements per dimension */
 38:   PetscInt nex_l, ney_l, nez_l;
 39:   /* global number of dofs per dimension */
 40:   PetscInt xm, ym, zm;
 41:   /* local number of dofs per dimension */
 42:   PetscInt xm_l, ym_l, zm_l;
 43:   /* starting global indexes for subdomain in lexicographic ordering */
 44:   PetscInt startx, starty, startz;
 45:   /* pure Neumann problem */
 46:   PetscBool pure_neumann;
 47:   /* Dirichlet BC implementation */
 48:   PetscBool DBC_zerorows;
 49:   /* Scaling factor for subdomain */
 50:   PetscScalar scalingfactor;
 51:   PetscBool   testkspfetidp;
 52: } DomainData;

 54: /* structure holding GLL data */
 55: typedef struct {
 56:   /* GLL nodes */
 57:   PetscReal *zGL;
 58:   /* GLL weights */
 59:   PetscScalar *rhoGL;
 60:   /* aux_mat */
 61:   PetscScalar **A;
 62:   /* Element matrix */
 63:   Mat elem_mat;
 64: } GLLData;

 66: static PetscErrorCode BuildCSRGraph(DomainData dd, PetscInt **xadj, PetscInt **adjncy)
 67: {
 68:   PetscInt *xadj_temp, *adjncy_temp;
 69:   PetscInt  i, j, k, ii, jj, kk, iindex, count_adj;
 70:   PetscInt  istart_csr, iend_csr, jstart_csr, jend_csr, kstart_csr, kend_csr;
 71:   PetscBool internal_node;

 73:   /* first count dimension of adjncy */
 75:   count_adj = 0;
 76:   for (k = 0; k < dd.zm_l; k++) {
 77:     internal_node = PETSC_TRUE;
 78:     kstart_csr    = k > 0 ? k - 1 : k;
 79:     kend_csr      = k < dd.zm_l - 1 ? k + 2 : k + 1;
 80:     if (k == 0 || k == dd.zm_l - 1) {
 81:       internal_node = PETSC_FALSE;
 82:       kstart_csr    = k;
 83:       kend_csr      = k + 1;
 84:     }
 85:     for (j = 0; j < dd.ym_l; j++) {
 86:       jstart_csr = j > 0 ? j - 1 : j;
 87:       jend_csr   = j < dd.ym_l - 1 ? j + 2 : j + 1;
 88:       if (j == 0 || j == dd.ym_l - 1) {
 89:         internal_node = PETSC_FALSE;
 90:         jstart_csr    = j;
 91:         jend_csr      = j + 1;
 92:       }
 93:       for (i = 0; i < dd.xm_l; i++) {
 94:         istart_csr = i > 0 ? i - 1 : i;
 95:         iend_csr   = i < dd.xm_l - 1 ? i + 2 : i + 1;
 96:         if (i == 0 || i == dd.xm_l - 1) {
 97:           internal_node = PETSC_FALSE;
 98:           istart_csr    = i;
 99:           iend_csr      = i + 1;
100:         }
101:         if (internal_node) {
102:           istart_csr = i;
103:           iend_csr   = i + 1;
104:           jstart_csr = j;
105:           jend_csr   = j + 1;
106:           kstart_csr = k;
107:           kend_csr   = k + 1;
108:         }
109:         for (kk = kstart_csr; kk < kend_csr; kk++) {
110:           for (jj = jstart_csr; jj < jend_csr; jj++) {
111:             for (ii = istart_csr; ii < iend_csr; ii++) count_adj = count_adj + 1;
112:           }
113:         }
114:       }
115:     }
116:   }
117:   PetscMalloc1(dd.xm_l * dd.ym_l * dd.zm_l + 1, &xadj_temp);
118:   PetscMalloc1(count_adj, &adjncy_temp);

120:   /* now fill CSR data structure */
121:   count_adj = 0;
122:   for (k = 0; k < dd.zm_l; k++) {
123:     internal_node = PETSC_TRUE;
124:     kstart_csr    = k > 0 ? k - 1 : k;
125:     kend_csr      = k < dd.zm_l - 1 ? k + 2 : k + 1;
126:     if (k == 0 || k == dd.zm_l - 1) {
127:       internal_node = PETSC_FALSE;
128:       kstart_csr    = k;
129:       kend_csr      = k + 1;
130:     }
131:     for (j = 0; j < dd.ym_l; j++) {
132:       jstart_csr = j > 0 ? j - 1 : j;
133:       jend_csr   = j < dd.ym_l - 1 ? j + 2 : j + 1;
134:       if (j == 0 || j == dd.ym_l - 1) {
135:         internal_node = PETSC_FALSE;
136:         jstart_csr    = j;
137:         jend_csr      = j + 1;
138:       }
139:       for (i = 0; i < dd.xm_l; i++) {
140:         istart_csr = i > 0 ? i - 1 : i;
141:         iend_csr   = i < dd.xm_l - 1 ? i + 2 : i + 1;
142:         if (i == 0 || i == dd.xm_l - 1) {
143:           internal_node = PETSC_FALSE;
144:           istart_csr    = i;
145:           iend_csr      = i + 1;
146:         }
147:         iindex            = k * dd.xm_l * dd.ym_l + j * dd.xm_l + i;
148:         xadj_temp[iindex] = count_adj;
149:         if (internal_node) {
150:           istart_csr = i;
151:           iend_csr   = i + 1;
152:           jstart_csr = j;
153:           jend_csr   = j + 1;
154:           kstart_csr = k;
155:           kend_csr   = k + 1;
156:         }
157:         for (kk = kstart_csr; kk < kend_csr; kk++) {
158:           for (jj = jstart_csr; jj < jend_csr; jj++) {
159:             for (ii = istart_csr; ii < iend_csr; ii++) {
160:               iindex = kk * dd.xm_l * dd.ym_l + jj * dd.xm_l + ii;

162:               adjncy_temp[count_adj] = iindex;
163:               count_adj              = count_adj + 1;
164:             }
165:           }
166:         }
167:       }
168:     }
169:   }
170:   xadj_temp[dd.xm_l * dd.ym_l * dd.zm_l] = count_adj;

172:   *xadj   = xadj_temp;
173:   *adjncy = adjncy_temp;
174:   return 0;
175: }

177: static PetscErrorCode ComputeSpecialBoundaryIndices(DomainData dd, IS *dirichlet, IS *neumann)
178: {
179:   IS         temp_dirichlet = 0, temp_neumann = 0;
180:   PetscInt   localsize, i, j, k, *indices;
181:   PetscBool *touched;

184:   localsize = dd.xm_l * dd.ym_l * dd.zm_l;

186:   PetscMalloc1(localsize, &indices);
187:   PetscMalloc1(localsize, &touched);
188:   for (i = 0; i < localsize; i++) touched[i] = PETSC_FALSE;

190:   if (dirichlet) {
191:     i = 0;
192:     /* west boundary */
193:     if (dd.ipx == 0) {
194:       for (k = 0; k < dd.zm_l; k++) {
195:         for (j = 0; j < dd.ym_l; j++) {
196:           indices[i]          = k * dd.ym_l * dd.xm_l + j * dd.xm_l;
197:           touched[indices[i]] = PETSC_TRUE;
198:           i++;
199:         }
200:       }
201:     }
202:     ISCreateGeneral(dd.gcomm, i, indices, PETSC_COPY_VALUES, &temp_dirichlet);
203:   }
204:   if (neumann) {
205:     i = 0;
206:     /* west boundary */
207:     if (dd.ipx == 0) {
208:       for (k = 0; k < dd.zm_l; k++) {
209:         for (j = 0; j < dd.ym_l; j++) {
210:           indices[i] = k * dd.ym_l * dd.xm_l + j * dd.xm_l;
211:           if (!touched[indices[i]]) {
212:             touched[indices[i]] = PETSC_TRUE;
213:             i++;
214:           }
215:         }
216:       }
217:     }
218:     /* east boundary */
219:     if (dd.ipx == dd.npx - 1) {
220:       for (k = 0; k < dd.zm_l; k++) {
221:         for (j = 0; j < dd.ym_l; j++) {
222:           indices[i] = k * dd.ym_l * dd.xm_l + j * dd.xm_l + dd.xm_l - 1;
223:           if (!touched[indices[i]]) {
224:             touched[indices[i]] = PETSC_TRUE;
225:             i++;
226:           }
227:         }
228:       }
229:     }
230:     /* south boundary */
231:     if (dd.ipy == 0 && dd.dim > 1) {
232:       for (k = 0; k < dd.zm_l; k++) {
233:         for (j = 0; j < dd.xm_l; j++) {
234:           indices[i] = k * dd.ym_l * dd.xm_l + j;
235:           if (!touched[indices[i]]) {
236:             touched[indices[i]] = PETSC_TRUE;
237:             i++;
238:           }
239:         }
240:       }
241:     }
242:     /* north boundary */
243:     if (dd.ipy == dd.npy - 1 && dd.dim > 1) {
244:       for (k = 0; k < dd.zm_l; k++) {
245:         for (j = 0; j < dd.xm_l; j++) {
246:           indices[i] = k * dd.ym_l * dd.xm_l + (dd.ym_l - 1) * dd.xm_l + j;
247:           if (!touched[indices[i]]) {
248:             touched[indices[i]] = PETSC_TRUE;
249:             i++;
250:           }
251:         }
252:       }
253:     }
254:     /* bottom boundary */
255:     if (dd.ipz == 0 && dd.dim > 2) {
256:       for (k = 0; k < dd.ym_l; k++) {
257:         for (j = 0; j < dd.xm_l; j++) {
258:           indices[i] = k * dd.xm_l + j;
259:           if (!touched[indices[i]]) {
260:             touched[indices[i]] = PETSC_TRUE;
261:             i++;
262:           }
263:         }
264:       }
265:     }
266:     /* top boundary */
267:     if (dd.ipz == dd.npz - 1 && dd.dim > 2) {
268:       for (k = 0; k < dd.ym_l; k++) {
269:         for (j = 0; j < dd.xm_l; j++) {
270:           indices[i] = (dd.zm_l - 1) * dd.ym_l * dd.xm_l + k * dd.xm_l + j;
271:           if (!touched[indices[i]]) {
272:             touched[indices[i]] = PETSC_TRUE;
273:             i++;
274:           }
275:         }
276:       }
277:     }
278:     ISCreateGeneral(dd.gcomm, i, indices, PETSC_COPY_VALUES, &temp_neumann);
279:   }
280:   if (dirichlet) *dirichlet = temp_dirichlet;
281:   if (neumann) *neumann = temp_neumann;
282:   PetscFree(indices);
283:   PetscFree(touched);
284:   return 0;
285: }

287: static PetscErrorCode ComputeMapping(DomainData dd, ISLocalToGlobalMapping *isg2lmap)
288: {
289:   DM                     da;
290:   AO                     ao;
291:   DMBoundaryType         bx = DM_BOUNDARY_NONE, by = DM_BOUNDARY_NONE, bz = DM_BOUNDARY_NONE;
292:   DMDAStencilType        stype = DMDA_STENCIL_BOX;
293:   ISLocalToGlobalMapping temp_isg2lmap;
294:   PetscInt               i, j, k, ig, jg, kg, lindex, gindex, localsize;
295:   PetscInt              *global_indices;

298:   /* Not an efficient mapping: this function computes a very simple lexicographic mapping
299:      just to illustrate the creation of a MATIS object */
300:   localsize = dd.xm_l * dd.ym_l * dd.zm_l;
301:   PetscMalloc1(localsize, &global_indices);
302:   for (k = 0; k < dd.zm_l; k++) {
303:     kg = dd.startz + k;
304:     for (j = 0; j < dd.ym_l; j++) {
305:       jg = dd.starty + j;
306:       for (i = 0; i < dd.xm_l; i++) {
307:         ig                     = dd.startx + i;
308:         lindex                 = k * dd.xm_l * dd.ym_l + j * dd.xm_l + i;
309:         gindex                 = kg * dd.xm * dd.ym + jg * dd.xm + ig;
310:         global_indices[lindex] = gindex;
311:       }
312:     }
313:   }
314:   if (dd.dim == 3) {
315:     DMDACreate3d(dd.gcomm, bx, by, bz, stype, dd.xm, dd.ym, dd.zm, PETSC_DECIDE, PETSC_DECIDE, PETSC_DECIDE, 1, 1, NULL, NULL, NULL, &da);
316:   } else if (dd.dim == 2) {
317:     DMDACreate2d(dd.gcomm, bx, by, stype, dd.xm, dd.ym, PETSC_DECIDE, PETSC_DECIDE, 1, 1, NULL, NULL, &da);
318:   } else {
319:     DMDACreate1d(dd.gcomm, bx, dd.xm, 1, 1, NULL, &da);
320:   }
321:   DMSetFromOptions(da);
322:   DMSetUp(da);
323:   DMDASetAOType(da, AOMEMORYSCALABLE);
324:   DMDAGetAO(da, &ao);
325:   AOApplicationToPetsc(ao, dd.xm_l * dd.ym_l * dd.zm_l, global_indices);
326:   ISLocalToGlobalMappingCreate(dd.gcomm, 1, localsize, global_indices, PETSC_OWN_POINTER, &temp_isg2lmap);
327:   DMDestroy(&da);
328:   *isg2lmap = temp_isg2lmap;
329:   return 0;
330: }
331: static PetscErrorCode ComputeSubdomainMatrix(DomainData dd, GLLData glldata, Mat *local_mat)
332: {
333:   PetscInt     localsize, zloc, yloc, xloc, auxnex, auxney, auxnez;
334:   PetscInt     ie, je, ke, i, j, k, ig, jg, kg, ii, ming;
335:   PetscInt    *indexg, *cols, *colsg;
336:   PetscScalar *vals;
337:   Mat          temp_local_mat, elem_mat_DBC = 0, *usedmat;
338:   IS           submatIS;

341:   MatGetSize(glldata.elem_mat, &i, &j);
342:   PetscMalloc1(i, &indexg);
343:   PetscMalloc1(i, &colsg);
344:   /* get submatrix of elem_mat without dirichlet nodes */
345:   if (!dd.pure_neumann && !dd.DBC_zerorows && !dd.ipx) {
346:     xloc = dd.p + 1;
347:     yloc = 1;
348:     zloc = 1;
349:     if (dd.dim > 1) yloc = dd.p + 1;
350:     if (dd.dim > 2) zloc = dd.p + 1;
351:     ii = 0;
352:     for (k = 0; k < zloc; k++) {
353:       for (j = 0; j < yloc; j++) {
354:         for (i = 1; i < xloc; i++) {
355:           indexg[ii] = k * xloc * yloc + j * xloc + i;
356:           ii++;
357:         }
358:       }
359:     }
360:     ISCreateGeneral(PETSC_COMM_SELF, ii, indexg, PETSC_COPY_VALUES, &submatIS);
361:     MatCreateSubMatrix(glldata.elem_mat, submatIS, submatIS, MAT_INITIAL_MATRIX, &elem_mat_DBC);
362:     ISDestroy(&submatIS);
363:   }

365:   /* Assemble subdomain matrix */
366:   localsize = dd.xm_l * dd.ym_l * dd.zm_l;
367:   MatCreate(PETSC_COMM_SELF, &temp_local_mat);
368:   MatSetSizes(temp_local_mat, localsize, localsize, localsize, localsize);
369:   MatSetOptionsPrefix(temp_local_mat, "subdomain_");
370:   /* set local matrices type: here we use SEQSBAIJ primarily for testing purpose */
371:   /* in order to avoid conversions inside the BDDC code, use SeqAIJ if possible */
372:   if (dd.DBC_zerorows && !dd.ipx) { /* in this case, we need to zero out some of the rows, so use seqaij */
373:     MatSetType(temp_local_mat, MATSEQAIJ);
374:   } else {
375:     MatSetType(temp_local_mat, MATSEQSBAIJ);
376:   }
377:   MatSetFromOptions(temp_local_mat);

379:   i = PetscPowRealInt(3.0 * (dd.p + 1.0), dd.dim);

381:   MatSeqAIJSetPreallocation(temp_local_mat, i, NULL);      /* very overestimated */
382:   MatSeqSBAIJSetPreallocation(temp_local_mat, 1, i, NULL); /* very overestimated */
383:   MatSeqBAIJSetPreallocation(temp_local_mat, 1, i, NULL);  /* very overestimated */
384:   MatSetOption(temp_local_mat, MAT_KEEP_NONZERO_PATTERN, PETSC_TRUE);

386:   yloc = dd.p + 1;
387:   zloc = dd.p + 1;
388:   if (dd.dim < 3) zloc = 1;
389:   if (dd.dim < 2) yloc = 1;

391:   auxnez = dd.nez_l;
392:   auxney = dd.ney_l;
393:   auxnex = dd.nex_l;
394:   if (dd.dim < 3) auxnez = 1;
395:   if (dd.dim < 2) auxney = 1;

397:   for (ke = 0; ke < auxnez; ke++) {
398:     for (je = 0; je < auxney; je++) {
399:       for (ie = 0; ie < auxnex; ie++) {
400:         /* customize element accounting for BC */
401:         xloc    = dd.p + 1;
402:         ming    = 0;
403:         usedmat = &glldata.elem_mat;
404:         if (!dd.pure_neumann && !dd.DBC_zerorows && !dd.ipx) {
405:           if (ie == 0) {
406:             xloc    = dd.p;
407:             usedmat = &elem_mat_DBC;
408:           } else {
409:             ming    = -1;
410:             usedmat = &glldata.elem_mat;
411:           }
412:         }
413:         /* local to the element/global to the subdomain indexing */
414:         for (k = 0; k < zloc; k++) {
415:           kg = ke * dd.p + k;
416:           for (j = 0; j < yloc; j++) {
417:             jg = je * dd.p + j;
418:             for (i = 0; i < xloc; i++) {
419:               ig         = ie * dd.p + i + ming;
420:               ii         = k * xloc * yloc + j * xloc + i;
421:               indexg[ii] = kg * dd.xm_l * dd.ym_l + jg * dd.xm_l + ig;
422:             }
423:           }
424:         }
425:         /* Set values */
426:         for (i = 0; i < xloc * yloc * zloc; i++) {
427:           MatGetRow(*usedmat, i, &j, (const PetscInt **)&cols, (const PetscScalar **)&vals);
428:           for (k = 0; k < j; k++) colsg[k] = indexg[cols[k]];
429:           MatSetValues(temp_local_mat, 1, &indexg[i], j, colsg, vals, ADD_VALUES);
430:           MatRestoreRow(*usedmat, i, &j, (const PetscInt **)&cols, (const PetscScalar **)&vals);
431:         }
432:       }
433:     }
434:   }
435:   PetscFree(indexg);
436:   PetscFree(colsg);
437:   MatAssemblyBegin(temp_local_mat, MAT_FINAL_ASSEMBLY);
438:   MatAssemblyEnd(temp_local_mat, MAT_FINAL_ASSEMBLY);
439: #if DEBUG
440:   {
441:     Vec       lvec, rvec;
442:     PetscReal norm;
443:     MatCreateVecs(temp_local_mat, &lvec, &rvec);
444:     VecSet(lvec, 1.0);
445:     MatMult(temp_local_mat, lvec, rvec);
446:     VecNorm(rvec, NORM_INFINITY, &norm);
447:     VecDestroy(&lvec);
448:     VecDestroy(&rvec);
449:   }
450: #endif
451:   *local_mat = temp_local_mat;
452:   MatDestroy(&elem_mat_DBC);
453:   return 0;
454: }

456: static PetscErrorCode GLLStuffs(DomainData dd, GLLData *glldata)
457: {
458:   PetscReal   *M, si;
459:   PetscScalar  x, z0, z1, z2, Lpj, Lpr, rhoGLj, rhoGLk;
460:   PetscBLASInt pm1, lierr;
461:   PetscInt     i, j, n, k, s, r, q, ii, jj, p = dd.p;
462:   PetscInt     xloc, yloc, zloc, xyloc, xyzloc;

465:   /* Gauss-Lobatto-Legendre nodes zGL on [-1,1] */
466:   PetscCalloc1(p + 1, &glldata->zGL);

468:   glldata->zGL[0] = -1.0;
469:   glldata->zGL[p] = 1.0;
470:   if (p > 1) {
471:     if (p == 2) glldata->zGL[1] = 0.0;
472:     else {
473:       PetscMalloc1(p - 1, &M);
474:       for (i = 0; i < p - 1; i++) {
475:         si   = (PetscReal)(i + 1.0);
476:         M[i] = 0.5 * PetscSqrtReal(si * (si + 2.0) / ((si + 0.5) * (si + 1.5)));
477:       }
478:       pm1 = p - 1;
479:       PetscFPTrapPush(PETSC_FP_TRAP_OFF);
480:       PetscCallBLAS("LAPACKsteqr", LAPACKsteqr_("N", &pm1, &glldata->zGL[1], M, &x, &pm1, M, &lierr));
482:       PetscFPTrapPop();
483:       PetscFree(M);
484:     }
485:   }

487:   /* Weights for 1D quadrature */
488:   PetscMalloc1(p + 1, &glldata->rhoGL);

490:   glldata->rhoGL[0] = 2.0 / (PetscScalar)(p * (p + 1.0));
491:   glldata->rhoGL[p] = glldata->rhoGL[0];
492:   z2                = -1; /* Dummy value to avoid -Wmaybe-initialized */
493:   for (i = 1; i < p; i++) {
494:     x  = glldata->zGL[i];
495:     z0 = 1.0;
496:     z1 = x;
497:     for (n = 1; n < p; n++) {
498:       z2 = x * z1 * (2.0 * n + 1.0) / (n + 1.0) - z0 * (PetscScalar)(n / (n + 1.0));
499:       z0 = z1;
500:       z1 = z2;
501:     }
502:     glldata->rhoGL[i] = 2.0 / (p * (p + 1.0) * z2 * z2);
503:   }

505:   /* Auxiliary mat for laplacian */
506:   PetscMalloc1(p + 1, &glldata->A);
507:   PetscMalloc1((p + 1) * (p + 1), &glldata->A[0]);
508:   for (i = 1; i < p + 1; i++) glldata->A[i] = glldata->A[i - 1] + p + 1;

510:   for (j = 1; j < p; j++) {
511:     x  = glldata->zGL[j];
512:     z0 = 1.0;
513:     z1 = x;
514:     for (n = 1; n < p; n++) {
515:       z2 = x * z1 * (2.0 * n + 1.0) / (n + 1.0) - z0 * (PetscScalar)(n / (n + 1.0));
516:       z0 = z1;
517:       z1 = z2;
518:     }
519:     Lpj = z2;
520:     for (r = 1; r < p; r++) {
521:       if (r == j) {
522:         glldata->A[j][j] = 2.0 / (3.0 * (1.0 - glldata->zGL[j] * glldata->zGL[j]) * Lpj * Lpj);
523:       } else {
524:         x  = glldata->zGL[r];
525:         z0 = 1.0;
526:         z1 = x;
527:         for (n = 1; n < p; n++) {
528:           z2 = x * z1 * (2.0 * n + 1.0) / (n + 1.0) - z0 * (PetscScalar)(n / (n + 1.0));
529:           z0 = z1;
530:           z1 = z2;
531:         }
532:         Lpr              = z2;
533:         glldata->A[r][j] = 4.0 / (p * (p + 1.0) * Lpj * Lpr * (glldata->zGL[j] - glldata->zGL[r]) * (glldata->zGL[j] - glldata->zGL[r]));
534:       }
535:     }
536:   }
537:   for (j = 1; j < p + 1; j++) {
538:     x  = glldata->zGL[j];
539:     z0 = 1.0;
540:     z1 = x;
541:     for (n = 1; n < p; n++) {
542:       z2 = x * z1 * (2.0 * n + 1.0) / (n + 1.0) - z0 * (PetscScalar)(n / (n + 1.0));
543:       z0 = z1;
544:       z1 = z2;
545:     }
546:     Lpj              = z2;
547:     glldata->A[j][0] = 4.0 * PetscPowRealInt(-1.0, p) / (p * (p + 1.0) * Lpj * (1.0 + glldata->zGL[j]) * (1.0 + glldata->zGL[j]));
548:     glldata->A[0][j] = glldata->A[j][0];
549:   }
550:   for (j = 0; j < p; j++) {
551:     x  = glldata->zGL[j];
552:     z0 = 1.0;
553:     z1 = x;
554:     for (n = 1; n < p; n++) {
555:       z2 = x * z1 * (2.0 * n + 1.0) / (n + 1.0) - z0 * (PetscScalar)(n / (n + 1.0));
556:       z0 = z1;
557:       z1 = z2;
558:     }
559:     Lpj = z2;

561:     glldata->A[p][j] = 4.0 / (p * (p + 1.0) * Lpj * (1.0 - glldata->zGL[j]) * (1.0 - glldata->zGL[j]));
562:     glldata->A[j][p] = glldata->A[p][j];
563:   }
564:   glldata->A[0][0] = 0.5 + (p * (p + 1.0) - 2.0) / 6.0;
565:   glldata->A[p][p] = glldata->A[0][0];

567:   /* compute element matrix */
568:   xloc = p + 1;
569:   yloc = p + 1;
570:   zloc = p + 1;
571:   if (dd.dim < 2) yloc = 1;
572:   if (dd.dim < 3) zloc = 1;
573:   xyloc  = xloc * yloc;
574:   xyzloc = xloc * yloc * zloc;

576:   MatCreate(PETSC_COMM_SELF, &glldata->elem_mat);
577:   MatSetSizes(glldata->elem_mat, xyzloc, xyzloc, xyzloc, xyzloc);
578:   MatSetType(glldata->elem_mat, MATSEQAIJ);
579:   MatSeqAIJSetPreallocation(glldata->elem_mat, xyzloc, NULL); /* overestimated */
580:   MatZeroEntries(glldata->elem_mat);
581:   MatSetOption(glldata->elem_mat, MAT_IGNORE_ZERO_ENTRIES, PETSC_TRUE);

583:   for (k = 0; k < zloc; k++) {
584:     if (dd.dim > 2) rhoGLk = glldata->rhoGL[k];
585:     else rhoGLk = 1.0;

587:     for (j = 0; j < yloc; j++) {
588:       if (dd.dim > 1) rhoGLj = glldata->rhoGL[j];
589:       else rhoGLj = 1.0;

591:       for (i = 0; i < xloc; i++) {
592:         ii = k * xyloc + j * xloc + i;
593:         s  = k;
594:         r  = j;
595:         for (q = 0; q < xloc; q++) {
596:           jj = s * xyloc + r * xloc + q;
597:           MatSetValue(glldata->elem_mat, jj, ii, glldata->A[i][q] * rhoGLj * rhoGLk, ADD_VALUES);
598:         }
599:         if (dd.dim > 1) {
600:           s = k;
601:           q = i;
602:           for (r = 0; r < yloc; r++) {
603:             jj = s * xyloc + r * xloc + q;
604:             MatSetValue(glldata->elem_mat, jj, ii, glldata->A[j][r] * glldata->rhoGL[i] * rhoGLk, ADD_VALUES);
605:           }
606:         }
607:         if (dd.dim > 2) {
608:           r = j;
609:           q = i;
610:           for (s = 0; s < zloc; s++) {
611:             jj = s * xyloc + r * xloc + q;
612:             MatSetValue(glldata->elem_mat, jj, ii, glldata->A[k][s] * rhoGLj * glldata->rhoGL[i], ADD_VALUES);
613:           }
614:         }
615:       }
616:     }
617:   }
618:   MatAssemblyBegin(glldata->elem_mat, MAT_FINAL_ASSEMBLY);
619:   MatAssemblyEnd(glldata->elem_mat, MAT_FINAL_ASSEMBLY);
620: #if DEBUG
621:   {
622:     Vec       lvec, rvec;
623:     PetscReal norm;
624:     MatCreateVecs(glldata->elem_mat, &lvec, &rvec);
625:     VecSet(lvec, 1.0);
626:     MatMult(glldata->elem_mat, lvec, rvec);
627:     VecNorm(rvec, NORM_INFINITY, &norm);
628:     VecDestroy(&lvec);
629:     VecDestroy(&rvec);
630:   }
631: #endif
632:   return 0;
633: }

635: static PetscErrorCode DomainDecomposition(DomainData *dd)
636: {
637:   PetscMPIInt rank;
638:   PetscInt    i, j, k;

641:   /* Subdomain index in cartesian coordinates */
642:   MPI_Comm_rank(dd->gcomm, &rank);
643:   dd->ipx = rank % dd->npx;
644:   if (dd->dim > 1) dd->ipz = rank / (dd->npx * dd->npy);
645:   else dd->ipz = 0;

647:   dd->ipy = rank / dd->npx - dd->ipz * dd->npy;
648:   /* number of local elements */
649:   dd->nex_l = dd->nex / dd->npx;
650:   if (dd->ipx < dd->nex % dd->npx) dd->nex_l++;
651:   if (dd->dim > 1) {
652:     dd->ney_l = dd->ney / dd->npy;
653:     if (dd->ipy < dd->ney % dd->npy) dd->ney_l++;
654:   } else {
655:     dd->ney_l = 0;
656:   }
657:   if (dd->dim > 2) {
658:     dd->nez_l = dd->nez / dd->npz;
659:     if (dd->ipz < dd->nez % dd->npz) dd->nez_l++;
660:   } else {
661:     dd->nez_l = 0;
662:   }
663:   /* local and global number of dofs */
664:   dd->xm_l = dd->nex_l * dd->p + 1;
665:   dd->xm   = dd->nex * dd->p + 1;
666:   dd->ym_l = dd->ney_l * dd->p + 1;
667:   dd->ym   = dd->ney * dd->p + 1;
668:   dd->zm_l = dd->nez_l * dd->p + 1;
669:   dd->zm   = dd->nez * dd->p + 1;
670:   if (!dd->pure_neumann) {
671:     if (!dd->DBC_zerorows && !dd->ipx) dd->xm_l--;
672:     if (!dd->DBC_zerorows) dd->xm--;
673:   }

675:   /* starting global index for local dofs (simple lexicographic order) */
676:   dd->startx = 0;
677:   j          = dd->nex / dd->npx;
678:   for (i = 0; i < dd->ipx; i++) {
679:     k = j;
680:     if (i < dd->nex % dd->npx) k++;
681:     dd->startx = dd->startx + k * dd->p;
682:   }
683:   if (!dd->pure_neumann && !dd->DBC_zerorows && dd->ipx) dd->startx--;

685:   dd->starty = 0;
686:   if (dd->dim > 1) {
687:     j = dd->ney / dd->npy;
688:     for (i = 0; i < dd->ipy; i++) {
689:       k = j;
690:       if (i < dd->ney % dd->npy) k++;
691:       dd->starty = dd->starty + k * dd->p;
692:     }
693:   }
694:   dd->startz = 0;
695:   if (dd->dim > 2) {
696:     j = dd->nez / dd->npz;
697:     for (i = 0; i < dd->ipz; i++) {
698:       k = j;
699:       if (i < dd->nez % dd->npz) k++;
700:       dd->startz = dd->startz + k * dd->p;
701:     }
702:   }
703:   return 0;
704: }
705: static PetscErrorCode ComputeMatrix(DomainData dd, Mat *A)
706: {
707:   GLLData                gll;
708:   Mat                    local_mat = 0, temp_A = 0;
709:   ISLocalToGlobalMapping matis_map   = 0;
710:   IS                     dirichletIS = 0;

713:   /* Compute some stuff of Gauss-Legendre-Lobatto quadrature rule */
714:   GLLStuffs(dd, &gll);
715:   /* Compute matrix of subdomain Neumann problem */
716:   ComputeSubdomainMatrix(dd, gll, &local_mat);
717:   /* Compute global mapping of local dofs */
718:   ComputeMapping(dd, &matis_map);
719:   /* Create MATIS object needed by BDDC */
720:   MatCreateIS(dd.gcomm, 1, PETSC_DECIDE, PETSC_DECIDE, dd.xm * dd.ym * dd.zm, dd.xm * dd.ym * dd.zm, matis_map, matis_map, &temp_A);
721:   /* Set local subdomain matrices into MATIS object */
722:   MatScale(local_mat, dd.scalingfactor);
723:   MatISSetLocalMat(temp_A, local_mat);
724:   /* Call assembly functions */
725:   MatAssemblyBegin(temp_A, MAT_FINAL_ASSEMBLY);
726:   MatAssemblyEnd(temp_A, MAT_FINAL_ASSEMBLY);

728:   if (dd.DBC_zerorows) {
729:     PetscInt dirsize;

731:     ComputeSpecialBoundaryIndices(dd, &dirichletIS, NULL);
732:     MatSetOption(local_mat, MAT_KEEP_NONZERO_PATTERN, PETSC_TRUE);
733:     MatZeroRowsColumnsLocalIS(temp_A, dirichletIS, 1.0, NULL, NULL);
734:     ISGetLocalSize(dirichletIS, &dirsize);
735:     ISDestroy(&dirichletIS);
736:   }

738:   /* giving hints to local and global matrices could be useful for the BDDC */
739:   MatSetOption(local_mat, MAT_SPD, PETSC_TRUE);
740:   MatSetOption(local_mat, MAT_SPD_ETERNAL, PETSC_TRUE);
741: #if DEBUG
742:   {
743:     Vec       lvec, rvec;
744:     PetscReal norm;
745:     MatCreateVecs(temp_A, &lvec, &rvec);
746:     VecSet(lvec, 1.0);
747:     MatMult(temp_A, lvec, rvec);
748:     VecNorm(rvec, NORM_INFINITY, &norm);
749:     VecDestroy(&lvec);
750:     VecDestroy(&rvec);
751:   }
752: #endif
753:   /* free allocated workspace */
754:   PetscFree(gll.zGL);
755:   PetscFree(gll.rhoGL);
756:   PetscFree(gll.A[0]);
757:   PetscFree(gll.A);
758:   MatDestroy(&gll.elem_mat);
759:   MatDestroy(&local_mat);
760:   ISLocalToGlobalMappingDestroy(&matis_map);
761:   /* give back the pointer to te MATIS object */
762:   *A = temp_A;
763:   return 0;
764: }

766: static PetscErrorCode ComputeKSPFETIDP(DomainData dd, KSP ksp_bddc, KSP *ksp_fetidp)
767: {
768:   KSP temp_ksp;
769:   PC  pc;

772:   KSPGetPC(ksp_bddc, &pc);
773:   if (!dd.testkspfetidp) {
774:     PC  D;
775:     Mat F;

777:     PCBDDCCreateFETIDPOperators(pc, PETSC_TRUE, NULL, &F, &D);
778:     KSPCreate(PetscObjectComm((PetscObject)F), &temp_ksp);
779:     KSPSetOperators(temp_ksp, F, F);
780:     KSPSetType(temp_ksp, KSPCG);
781:     KSPSetPC(temp_ksp, D);
782:     KSPSetComputeSingularValues(temp_ksp, PETSC_TRUE);
783:     KSPSetOptionsPrefix(temp_ksp, "fluxes_");
784:     KSPSetFromOptions(temp_ksp);
785:     KSPSetUp(temp_ksp);
786:     MatDestroy(&F);
787:     PCDestroy(&D);
788:   } else {
789:     Mat A, Ap;

791:     KSPCreate(PetscObjectComm((PetscObject)ksp_bddc), &temp_ksp);
792:     KSPGetOperators(ksp_bddc, &A, &Ap);
793:     KSPSetOperators(temp_ksp, A, Ap);
794:     KSPSetOptionsPrefix(temp_ksp, "fluxes_");
795:     KSPSetType(temp_ksp, KSPFETIDP);
796:     KSPFETIDPSetInnerBDDC(temp_ksp, pc);
797:     KSPSetComputeSingularValues(temp_ksp, PETSC_TRUE);
798:     KSPSetFromOptions(temp_ksp);
799:     KSPSetUp(temp_ksp);
800:   }
801:   *ksp_fetidp = temp_ksp;
802:   return 0;
803: }

805: static PetscErrorCode ComputeKSPBDDC(DomainData dd, Mat A, KSP *ksp)
806: {
807:   KSP          temp_ksp;
808:   PC           pc;
809:   IS           primals, dirichletIS = 0, neumannIS = 0, *bddc_dofs_splitting;
810:   PetscInt     vidx[8], localsize, *xadj = NULL, *adjncy = NULL;
811:   MatNullSpace near_null_space;

814:   KSPCreate(dd.gcomm, &temp_ksp);
815:   KSPSetOperators(temp_ksp, A, A);
816:   KSPSetType(temp_ksp, KSPCG);
817:   KSPGetPC(temp_ksp, &pc);
818:   PCSetType(pc, PCBDDC);

820:   localsize = dd.xm_l * dd.ym_l * dd.zm_l;

822:   /* BDDC customization */

824:   /* jumping coefficients case */
825:   PCISSetSubdomainScalingFactor(pc, dd.scalingfactor);

827:   /* Dofs splitting
828:      Simple stride-1 IS
829:      It is not needed since, by default, PCBDDC assumes a stride-1 split */
830:   PetscMalloc1(1, &bddc_dofs_splitting);
831: #if 1
832:   ISCreateStride(PETSC_COMM_WORLD, localsize, 0, 1, &bddc_dofs_splitting[0]);
833:   PCBDDCSetDofsSplittingLocal(pc, 1, bddc_dofs_splitting);
834: #else
835:   /* examples for global ordering */

837:   /* each process lists the nodes it owns */
838:   PetscInt sr, er;
839:   MatGetOwnershipRange(A, &sr, &er);
840:   ISCreateStride(PETSC_COMM_WORLD, er - sr, sr, 1, &bddc_dofs_splitting[0]);
841:   PCBDDCSetDofsSplitting(pc, 1, bddc_dofs_splitting);
842:   /* Split can be passed in a more general way since any process can list any node */
843: #endif
844:   ISDestroy(&bddc_dofs_splitting[0]);
845:   PetscFree(bddc_dofs_splitting);

847:   /* Primal constraints implemented by using a near null space attached to A -> now it passes in only the constants
848:     (which in practice is not needed since by default PCBDDC builds the primal space using constants for quadrature formulas */
849: #if 0
850:   Vec vecs[2];
851:   PetscRandom rctx;
852:   MatCreateVecs(A,&vecs[0],&vecs[1]);
853:   PetscRandomCreate(dd.gcomm,&rctx);
854:   VecSetRandom(vecs[0],rctx);
855:   VecSetRandom(vecs[1],rctx);
856:   MatNullSpaceCreate(dd.gcomm,PETSC_TRUE,2,vecs,&near_null_space);
857:   VecDestroy(&vecs[0]);
858:   VecDestroy(&vecs[1]);
859:   PetscRandomDestroy(&rctx);
860: #else
861:   MatNullSpaceCreate(dd.gcomm, PETSC_TRUE, 0, NULL, &near_null_space);
862: #endif
863:   MatSetNearNullSpace(A, near_null_space);
864:   MatNullSpaceDestroy(&near_null_space);

866:   /* CSR graph of subdomain dofs */
867:   BuildCSRGraph(dd, &xadj, &adjncy);
868:   PCBDDCSetLocalAdjacencyGraph(pc, localsize, xadj, adjncy, PETSC_OWN_POINTER);

870:   /* Prescribe user-defined primal vertices: in this case we use the 8 corners in 3D (for 2D and 1D, the indices are duplicated) */
871:   vidx[0] = 0 * dd.xm_l + 0;
872:   vidx[1] = 0 * dd.xm_l + dd.xm_l - 1;
873:   vidx[2] = (dd.ym_l - 1) * dd.xm_l + 0;
874:   vidx[3] = (dd.ym_l - 1) * dd.xm_l + dd.xm_l - 1;
875:   vidx[4] = (dd.zm_l - 1) * dd.xm_l * dd.ym_l + 0 * dd.xm_l + 0;
876:   vidx[5] = (dd.zm_l - 1) * dd.xm_l * dd.ym_l + 0 * dd.xm_l + dd.xm_l - 1;
877:   vidx[6] = (dd.zm_l - 1) * dd.xm_l * dd.ym_l + (dd.ym_l - 1) * dd.xm_l + 0;
878:   vidx[7] = (dd.zm_l - 1) * dd.xm_l * dd.ym_l + (dd.ym_l - 1) * dd.xm_l + dd.xm_l - 1;
879:   ISCreateGeneral(dd.gcomm, 8, vidx, PETSC_COPY_VALUES, &primals);
880:   PCBDDCSetPrimalVerticesLocalIS(pc, primals);
881:   ISDestroy(&primals);

883:   /* Neumann/Dirichlet indices on the global boundary */
884:   if (dd.DBC_zerorows) {
885:     /* Only in case you eliminate some rows matrix with zerorows function, you need to set dirichlet indices into PCBDDC data */
886:     ComputeSpecialBoundaryIndices(dd, &dirichletIS, &neumannIS);
887:     PCBDDCSetNeumannBoundariesLocal(pc, neumannIS);
888:     PCBDDCSetDirichletBoundariesLocal(pc, dirichletIS);
889:   } else {
890:     if (dd.pure_neumann) {
891:       /* In such a case, all interface nodes lying on the global boundary are neumann nodes */
892:       ComputeSpecialBoundaryIndices(dd, NULL, &neumannIS);
893:       PCBDDCSetNeumannBoundariesLocal(pc, neumannIS);
894:     } else {
895:       /* It is wrong setting dirichlet indices without having zeroed the corresponding rows in the global matrix */
896:       /* But we can still compute them since nodes near the dirichlet boundaries does not need to be defined as neumann nodes */
897:       ComputeSpecialBoundaryIndices(dd, &dirichletIS, &neumannIS);
898:       PCBDDCSetNeumannBoundariesLocal(pc, neumannIS);
899:     }
900:   }

902:   /* Pass local null space information to local matrices (needed when using approximate local solvers) */
903:   if (dd.ipx || dd.pure_neumann) {
904:     MatNullSpace nsp;
905:     Mat          local_mat;

907:     MatISGetLocalMat(A, &local_mat);
908:     MatNullSpaceCreate(PETSC_COMM_SELF, PETSC_TRUE, 0, NULL, &nsp);
909:     MatSetNullSpace(local_mat, nsp);
910:     MatNullSpaceDestroy(&nsp);
911:   }
912:   KSPSetComputeSingularValues(temp_ksp, PETSC_TRUE);
913:   KSPSetOptionsPrefix(temp_ksp, "physical_");
914:   KSPSetFromOptions(temp_ksp);
915:   KSPSetUp(temp_ksp);
916:   *ksp = temp_ksp;
917:   ISDestroy(&dirichletIS);
918:   ISDestroy(&neumannIS);
919:   return 0;
920: }

922: static PetscErrorCode InitializeDomainData(DomainData *dd)
923: {
924:   PetscMPIInt sizes, rank;
925:   PetscInt    factor;

928:   dd->gcomm = PETSC_COMM_WORLD;
929:   MPI_Comm_size(dd->gcomm, &sizes);
930:   MPI_Comm_rank(dd->gcomm, &rank);
931:   /* Get information from command line */
932:   /* Processors/subdomains per dimension */
933:   /* Default is 1d problem */
934:   dd->npx = sizes;
935:   dd->npy = 0;
936:   dd->npz = 0;
937:   dd->dim = 1;
938:   PetscOptionsGetInt(NULL, NULL, "-npx", &dd->npx, NULL);
939:   PetscOptionsGetInt(NULL, NULL, "-npy", &dd->npy, NULL);
940:   PetscOptionsGetInt(NULL, NULL, "-npz", &dd->npz, NULL);
941:   if (dd->npy) dd->dim++;
942:   if (dd->npz) dd->dim++;
943:   /* Number of elements per dimension */
944:   /* Default is one element per subdomain */
945:   dd->nex = dd->npx;
946:   dd->ney = dd->npy;
947:   dd->nez = dd->npz;
948:   PetscOptionsGetInt(NULL, NULL, "-nex", &dd->nex, NULL);
949:   PetscOptionsGetInt(NULL, NULL, "-ney", &dd->ney, NULL);
950:   PetscOptionsGetInt(NULL, NULL, "-nez", &dd->nez, NULL);
951:   if (!dd->npy) {
952:     dd->ney = 0;
953:     dd->nez = 0;
954:   }
955:   if (!dd->npz) dd->nez = 0;
956:   /* Spectral degree */
957:   dd->p = 3;
958:   PetscOptionsGetInt(NULL, NULL, "-p", &dd->p, NULL);
959:   /* pure neumann problem? */
960:   dd->pure_neumann = PETSC_FALSE;
961:   PetscOptionsGetBool(NULL, NULL, "-pureneumann", &dd->pure_neumann, NULL);

963:   /* How to enforce dirichlet boundary conditions (in case pureneumann has not been requested explicitly) */
964:   dd->DBC_zerorows = PETSC_FALSE;

966:   PetscOptionsGetBool(NULL, NULL, "-usezerorows", &dd->DBC_zerorows, NULL);
967:   if (dd->pure_neumann) dd->DBC_zerorows = PETSC_FALSE;
968:   dd->scalingfactor = 1.0;

970:   factor = 0.0;
971:   PetscOptionsGetInt(NULL, NULL, "-jump", &factor, NULL);
972:   /* checkerboard pattern */
973:   dd->scalingfactor = PetscPowScalar(10.0, (PetscScalar)factor * PetscPowScalar(-1.0, (PetscScalar)rank));
974:   /* test data passed in */
975:   if (dd->dim == 1) {
978:   } else if (dd->dim == 2) {
981:   } else {
984:   }
985:   return 0;
986: }

988: int main(int argc, char **args)
989: {
990:   DomainData         dd;
991:   PetscReal          norm, maxeig, mineig;
992:   PetscScalar        scalar_value;
993:   PetscInt           ndofs, its;
994:   Mat                A = NULL, F = NULL;
995:   KSP                KSPwithBDDC = NULL, KSPwithFETIDP = NULL;
996:   KSPConvergedReason reason;
997:   Vec                exact_solution = NULL, bddc_solution = NULL, bddc_rhs = NULL;
998:   PetscBool          testfetidp = PETSC_TRUE;

1000:   /* Init PETSc */
1002:   PetscInitialize(&argc, &args, (char *)0, help);
1003:   /* Initialize DomainData */
1004:   InitializeDomainData(&dd);
1005:   /* Decompose domain */
1006:   DomainDecomposition(&dd);
1007: #if DEBUG
1008:   printf("Subdomain data\n");
1009:   printf("IPS   : %d %d %d\n", dd.ipx, dd.ipy, dd.ipz);
1010:   printf("NEG   : %d %d %d\n", dd.nex, dd.ney, dd.nez);
1011:   printf("NEL   : %d %d %d\n", dd.nex_l, dd.ney_l, dd.nez_l);
1012:   printf("LDO   : %d %d %d\n", dd.xm_l, dd.ym_l, dd.zm_l);
1013:   printf("SIZES : %d %d %d\n", dd.xm, dd.ym, dd.zm);
1014:   printf("STARTS: %d %d %d\n", dd.startx, dd.starty, dd.startz);
1015: #endif
1016:   dd.testkspfetidp = PETSC_TRUE;
1017:   PetscOptionsGetBool(NULL, NULL, "-testfetidp", &testfetidp, NULL);
1018:   PetscOptionsGetBool(NULL, NULL, "-testkspfetidp", &dd.testkspfetidp, NULL);
1019:   /* assemble global matrix */
1020:   ComputeMatrix(dd, &A);
1021:   /* get work vectors */
1022:   MatCreateVecs(A, &bddc_solution, NULL);
1023:   VecDuplicate(bddc_solution, &bddc_rhs);
1024:   VecDuplicate(bddc_solution, &exact_solution);
1025:   /* create and customize KSP/PC for BDDC */
1026:   ComputeKSPBDDC(dd, A, &KSPwithBDDC);
1027:   /* create KSP/PC for FETIDP */
1028:   if (testfetidp) ComputeKSPFETIDP(dd, KSPwithBDDC, &KSPwithFETIDP);
1029:     /* create random exact solution */
1030: #if defined(PETSC_USE_COMPLEX)
1031:   VecSet(exact_solution, 1.0 + PETSC_i);
1032: #else
1033:   VecSetRandom(exact_solution, NULL);
1034: #endif
1035:   VecShift(exact_solution, -0.5);
1036:   VecScale(exact_solution, 100.0);
1037:   VecGetSize(exact_solution, &ndofs);
1038:   if (dd.pure_neumann) {
1039:     VecSum(exact_solution, &scalar_value);
1040:     scalar_value = -scalar_value / (PetscScalar)ndofs;
1041:     VecShift(exact_solution, scalar_value);
1042:   }
1043:   /* assemble BDDC rhs */
1044:   MatMult(A, exact_solution, bddc_rhs);
1045:   /* test ksp with BDDC */
1046:   KSPSolve(KSPwithBDDC, bddc_rhs, bddc_solution);
1047:   KSPGetIterationNumber(KSPwithBDDC, &its);
1048:   KSPGetConvergedReason(KSPwithBDDC, &reason);
1049:   KSPComputeExtremeSingularValues(KSPwithBDDC, &maxeig, &mineig);
1050:   if (dd.pure_neumann) {
1051:     VecSum(bddc_solution, &scalar_value);
1052:     scalar_value = -scalar_value / (PetscScalar)ndofs;
1053:     VecShift(bddc_solution, scalar_value);
1054:   }
1055:   /* check exact_solution and BDDC solultion */
1056:   VecAXPY(bddc_solution, -1.0, exact_solution);
1057:   VecNorm(bddc_solution, NORM_INFINITY, &norm);
1058:   PetscPrintf(dd.gcomm, "---------------------BDDC stats-------------------------------\n");
1059:   PetscPrintf(dd.gcomm, "Number of degrees of freedom               : %8" PetscInt_FMT "\n", ndofs);
1060:   if (reason < 0) {
1061:     PetscPrintf(dd.gcomm, "Number of iterations                       : %8" PetscInt_FMT "\n", its);
1062:     PetscPrintf(dd.gcomm, "Converged reason                           : %s\n", KSPConvergedReasons[reason]);
1063:   }
1064:   if (0.95 <= mineig && mineig <= 1.05) mineig = 1.0;
1065:   PetscPrintf(dd.gcomm, "Eigenvalues preconditioned operator        : %1.1e %1.1e\n", (double)PetscFloorReal(100. * mineig) / 100., (double)PetscCeilReal(100. * maxeig) / 100.);
1066:   if (norm > 1.e-1 || reason < 0) PetscPrintf(dd.gcomm, "Error between exact and computed solution : %1.2e\n", (double)norm);
1067:   PetscPrintf(dd.gcomm, "--------------------------------------------------------------\n");
1068:   if (testfetidp) {
1069:     Vec fetidp_solution_all = NULL, fetidp_solution = NULL, fetidp_rhs = NULL;

1071:     VecDuplicate(bddc_solution, &fetidp_solution_all);
1072:     if (!dd.testkspfetidp) {
1073:       /* assemble fetidp rhs on the space of Lagrange multipliers */
1074:       KSPGetOperators(KSPwithFETIDP, &F, NULL);
1075:       MatCreateVecs(F, &fetidp_solution, &fetidp_rhs);
1076:       PCBDDCMatFETIDPGetRHS(F, bddc_rhs, fetidp_rhs);
1077:       VecSet(fetidp_solution, 0.0);
1078:       /* test ksp with FETIDP */
1079:       KSPSolve(KSPwithFETIDP, fetidp_rhs, fetidp_solution);
1080:       /* assemble fetidp solution on physical domain */
1081:       PCBDDCMatFETIDPGetSolution(F, fetidp_solution, fetidp_solution_all);
1082:     } else {
1083:       KSP kspF;
1084:       KSPSolve(KSPwithFETIDP, bddc_rhs, fetidp_solution_all);
1085:       KSPFETIDPGetInnerKSP(KSPwithFETIDP, &kspF);
1086:       KSPGetOperators(kspF, &F, NULL);
1087:     }
1088:     MatGetSize(F, &ndofs, NULL);
1089:     KSPGetIterationNumber(KSPwithFETIDP, &its);
1090:     KSPGetConvergedReason(KSPwithFETIDP, &reason);
1091:     KSPComputeExtremeSingularValues(KSPwithFETIDP, &maxeig, &mineig);
1092:     /* check FETIDP sol */
1093:     if (dd.pure_neumann) {
1094:       VecSum(fetidp_solution_all, &scalar_value);
1095:       scalar_value = -scalar_value / (PetscScalar)ndofs;
1096:       VecShift(fetidp_solution_all, scalar_value);
1097:     }
1098:     VecAXPY(fetidp_solution_all, -1.0, exact_solution);
1099:     VecNorm(fetidp_solution_all, NORM_INFINITY, &norm);
1100:     PetscPrintf(dd.gcomm, "------------------FETI-DP stats-------------------------------\n");
1101:     PetscPrintf(dd.gcomm, "Number of degrees of freedom               : %8" PetscInt_FMT "\n", ndofs);
1102:     if (reason < 0) {
1103:       PetscPrintf(dd.gcomm, "Number of iterations                       : %8" PetscInt_FMT "\n", its);
1104:       PetscPrintf(dd.gcomm, "Converged reason                           : %s\n", KSPConvergedReasons[reason]);
1105:     }
1106:     if (0.95 <= mineig && mineig <= 1.05) mineig = 1.0;
1107:     PetscPrintf(dd.gcomm, "Eigenvalues preconditioned operator        : %1.1e %1.1e\n", (double)PetscFloorReal(100. * mineig) / 100., (double)PetscCeilReal(100. * maxeig) / 100.);
1108:     if (norm > 1.e-1 || reason < 0) PetscPrintf(dd.gcomm, "Error between exact and computed solution : %1.2e\n", (double)norm);
1109:     PetscPrintf(dd.gcomm, "--------------------------------------------------------------\n");
1110:     VecDestroy(&fetidp_solution);
1111:     VecDestroy(&fetidp_solution_all);
1112:     VecDestroy(&fetidp_rhs);
1113:   }
1114:   KSPDestroy(&KSPwithFETIDP);
1115:   VecDestroy(&exact_solution);
1116:   VecDestroy(&bddc_solution);
1117:   VecDestroy(&bddc_rhs);
1118:   MatDestroy(&A);
1119:   KSPDestroy(&KSPwithBDDC);
1120:   /* Quit PETSc */
1121:   PetscFinalize();
1122:   return 0;
1123: }

1125: /*TEST

1127:  testset:
1128:    nsize: 4
1129:    args: -nex 7 -physical_pc_bddc_coarse_eqs_per_proc 3 -physical_pc_bddc_switch_static
1130:    output_file: output/ex59_bddc_fetidp_1.out
1131:    test:
1132:      suffix: bddc_fetidp_1
1133:    test:
1134:      requires: viennacl
1135:      suffix: bddc_fetidp_1_viennacl
1136:      args: -subdomain_mat_type aijviennacl
1137:    test:
1138:      requires: cuda
1139:      suffix: bddc_fetidp_1_cuda
1140:      args: -subdomain_mat_type aijcusparse -physical_pc_bddc_dirichlet_pc_factor_mat_solver_type cusparse

1142:  testset:
1143:    nsize: 4
1144:    args: -npx 2 -npy 2 -nex 6 -ney 6 -fluxes_ksp_max_it 10 -physical_ksp_max_it 10
1145:    requires: !single
1146:    test:
1147:      suffix: bddc_fetidp_2
1148:    test:
1149:      suffix: bddc_fetidp_3
1150:      args: -npz 1 -nez 1
1151:    test:
1152:      suffix: bddc_fetidp_4
1153:      args: -npz 1 -nez 1 -physical_pc_bddc_use_change_of_basis -physical_sub_schurs_mat_solver_type petsc -physical_pc_bddc_use_deluxe_scaling -physical_pc_bddc_deluxe_singlemat -fluxes_fetidp_ksp_type cg

1155:  testset:
1156:    nsize: 8
1157:    suffix: bddc_fetidp_approximate
1158:    args: -npx 2 -npy 2 -npz 2 -p 2 -nex 8 -ney 7 -nez 9 -physical_ksp_max_it 20 -subdomain_mat_type aij -physical_pc_bddc_switch_static -physical_pc_bddc_dirichlet_approximate -physical_pc_bddc_neumann_approximate -physical_pc_bddc_dirichlet_pc_type gamg -physical_pc_bddc_dirichlet_pc_gamg_esteig_ksp_type cg -physical_pc_bddc_dirichlet_pc_gamg_esteig_ksp_max_it 10 -physical_pc_bddc_dirichlet_mg_levels_ksp_max_it 3 -physical_pc_bddc_neumann_pc_type sor -physical_pc_bddc_neumann_approximate_scale -testfetidp 0

1160:  testset:
1161:    nsize: 4
1162:    args: -npx 2 -npy 2 -nex 6 -ney 6 -fluxes_ksp_max_it 10 -physical_ksp_max_it 10 -physical_ksp_view -physical_pc_bddc_levels 1
1163:    filter: grep -v "variant HERMITIAN"
1164:    requires: !single
1165:    test:
1166:      suffix: bddc_fetidp_ml_1
1167:      args: -physical_pc_bddc_coarsening_ratio 1
1168:    test:
1169:      suffix: bddc_fetidp_ml_2
1170:      args: -physical_pc_bddc_coarsening_ratio 2 -mat_partitioning_type average
1171:    test:
1172:      suffix: bddc_fetidp_ml_3
1173:      args: -physical_pc_bddc_coarsening_ratio 4

1175:  testset:
1176:    nsize: 9
1177:    args: -npx 3 -npy 3 -p 2 -nex 6 -ney 6 -physical_pc_bddc_deluxe_singlemat -physical_sub_schurs_mat_solver_type petsc -physical_pc_bddc_use_deluxe_scaling -physical_pc_bddc_graph_maxcount 1 -physical_pc_bddc_levels 3 -physical_pc_bddc_coarsening_ratio 2 -physical_pc_bddc_coarse_ksp_type gmres
1178:    output_file: output/ex59_bddc_fetidp_ml_eqlimit.out
1179:    test:
1180:      suffix: bddc_fetidp_ml_eqlimit_1
1181:      args: -physical_pc_bddc_coarse_eqs_limit 31 -mat_partitioning_type average -physical_pc_bddc_coarse_pc_bddc_graph_maxcount 1
1182:    test:
1183:      suffix: bddc_fetidp_ml_eqlimit_2
1184:      args: -physical_pc_bddc_coarse_eqs_limit 46

1186: TEST*/