Actual source code: ex59.c
1: static char help[] = "This example illustrates the use of PCBDDC/FETI-DP and its customization.\n\n\
2: Discrete system: 1D, 2D or 3D laplacian, discretized with spectral elements.\n\
3: Spectral degree can be specified by passing values to -p option.\n\
4: Global problem either with dirichlet boundary conditions on one side or in the pure neumann case (depending on runtime parameters).\n\
5: Domain is [-nex,nex]x[-ney,ney]x[-nez,nez]: ne_ number of elements in _ direction.\n\
6: Example usage: \n\
7: 1D: mpiexec -n 4 ex59 -nex 7\n\
8: 2D: mpiexec -n 4 ex59 -npx 2 -npy 2 -nex 2 -ney 2\n\
9: 3D: mpiexec -n 4 ex59 -npx 2 -npy 2 -npz 1 -nex 2 -ney 2 -nez 1\n\
10: Subdomain decomposition can be specified with -np_ parameters.\n\
11: Dirichlet boundaries on one side by default:\n\
12: it does not iterate on dirichlet nodes by default: if -usezerorows is passed in, it also iterates on Dirichlet nodes.\n\
13: Pure Neumann case can be requested by passing in -pureneumann.\n\
14: In the latter case, in order to avoid runtime errors during factorization, please specify also -coarse_redundant_pc_factor_zeropivot 0\n\n";
16: #include <petscksp.h>
17: #include <petscpc.h>
18: #include <petscdm.h>
19: #include <petscdmda.h>
20: #include <petscblaslapack.h>
21: #define DEBUG 0
23: /* structure holding domain data */
24: typedef struct {
25: /* communicator */
26: MPI_Comm gcomm;
27: /* space dimension */
28: PetscInt dim;
29: /* spectral degree */
30: PetscInt p;
31: /* subdomains per dimension */
32: PetscInt npx, npy, npz;
33: /* subdomain index in cartesian dimensions */
34: PetscInt ipx, ipy, ipz;
35: /* elements per dimension */
36: PetscInt nex, ney, nez;
37: /* local elements per dimension */
38: PetscInt nex_l, ney_l, nez_l;
39: /* global number of dofs per dimension */
40: PetscInt xm, ym, zm;
41: /* local number of dofs per dimension */
42: PetscInt xm_l, ym_l, zm_l;
43: /* starting global indexes for subdomain in lexicographic ordering */
44: PetscInt startx, starty, startz;
45: /* pure Neumann problem */
46: PetscBool pure_neumann;
47: /* Dirichlet BC implementation */
48: PetscBool DBC_zerorows;
49: /* Scaling factor for subdomain */
50: PetscScalar scalingfactor;
51: PetscBool testkspfetidp;
52: } DomainData;
54: /* structure holding GLL data */
55: typedef struct {
56: /* GLL nodes */
57: PetscReal *zGL;
58: /* GLL weights */
59: PetscScalar *rhoGL;
60: /* aux_mat */
61: PetscScalar **A;
62: /* Element matrix */
63: Mat elem_mat;
64: } GLLData;
66: static PetscErrorCode BuildCSRGraph(DomainData dd, PetscInt **xadj, PetscInt **adjncy)
67: {
68: PetscInt *xadj_temp, *adjncy_temp;
69: PetscInt i, j, k, ii, jj, kk, iindex, count_adj;
70: PetscInt istart_csr, iend_csr, jstart_csr, jend_csr, kstart_csr, kend_csr;
71: PetscBool internal_node;
73: /* first count dimension of adjncy */
74: PetscFunctionBeginUser;
75: count_adj = 0;
76: for (k = 0; k < dd.zm_l; k++) {
77: internal_node = PETSC_TRUE;
78: kstart_csr = k > 0 ? k - 1 : k;
79: kend_csr = k < dd.zm_l - 1 ? k + 2 : k + 1;
80: if (k == 0 || k == dd.zm_l - 1) {
81: internal_node = PETSC_FALSE;
82: kstart_csr = k;
83: kend_csr = k + 1;
84: }
85: for (j = 0; j < dd.ym_l; j++) {
86: jstart_csr = j > 0 ? j - 1 : j;
87: jend_csr = j < dd.ym_l - 1 ? j + 2 : j + 1;
88: if (j == 0 || j == dd.ym_l - 1) {
89: internal_node = PETSC_FALSE;
90: jstart_csr = j;
91: jend_csr = j + 1;
92: }
93: for (i = 0; i < dd.xm_l; i++) {
94: istart_csr = i > 0 ? i - 1 : i;
95: iend_csr = i < dd.xm_l - 1 ? i + 2 : i + 1;
96: if (i == 0 || i == dd.xm_l - 1) {
97: internal_node = PETSC_FALSE;
98: istart_csr = i;
99: iend_csr = i + 1;
100: }
101: if (internal_node) {
102: istart_csr = i;
103: iend_csr = i + 1;
104: jstart_csr = j;
105: jend_csr = j + 1;
106: kstart_csr = k;
107: kend_csr = k + 1;
108: }
109: for (kk = kstart_csr; kk < kend_csr; kk++) {
110: for (jj = jstart_csr; jj < jend_csr; jj++) {
111: for (ii = istart_csr; ii < iend_csr; ii++) count_adj = count_adj + 1;
112: }
113: }
114: }
115: }
116: }
117: PetscCall(PetscMalloc1(dd.xm_l * dd.ym_l * dd.zm_l + 1, &xadj_temp));
118: PetscCall(PetscMalloc1(count_adj, &adjncy_temp));
120: /* now fill CSR data structure */
121: count_adj = 0;
122: for (k = 0; k < dd.zm_l; k++) {
123: internal_node = PETSC_TRUE;
124: kstart_csr = k > 0 ? k - 1 : k;
125: kend_csr = k < dd.zm_l - 1 ? k + 2 : k + 1;
126: if (k == 0 || k == dd.zm_l - 1) {
127: internal_node = PETSC_FALSE;
128: kstart_csr = k;
129: kend_csr = k + 1;
130: }
131: for (j = 0; j < dd.ym_l; j++) {
132: jstart_csr = j > 0 ? j - 1 : j;
133: jend_csr = j < dd.ym_l - 1 ? j + 2 : j + 1;
134: if (j == 0 || j == dd.ym_l - 1) {
135: internal_node = PETSC_FALSE;
136: jstart_csr = j;
137: jend_csr = j + 1;
138: }
139: for (i = 0; i < dd.xm_l; i++) {
140: istart_csr = i > 0 ? i - 1 : i;
141: iend_csr = i < dd.xm_l - 1 ? i + 2 : i + 1;
142: if (i == 0 || i == dd.xm_l - 1) {
143: internal_node = PETSC_FALSE;
144: istart_csr = i;
145: iend_csr = i + 1;
146: }
147: iindex = k * dd.xm_l * dd.ym_l + j * dd.xm_l + i;
148: xadj_temp[iindex] = count_adj;
149: if (internal_node) {
150: istart_csr = i;
151: iend_csr = i + 1;
152: jstart_csr = j;
153: jend_csr = j + 1;
154: kstart_csr = k;
155: kend_csr = k + 1;
156: }
157: for (kk = kstart_csr; kk < kend_csr; kk++) {
158: for (jj = jstart_csr; jj < jend_csr; jj++) {
159: for (ii = istart_csr; ii < iend_csr; ii++) {
160: iindex = kk * dd.xm_l * dd.ym_l + jj * dd.xm_l + ii;
162: adjncy_temp[count_adj] = iindex;
163: count_adj = count_adj + 1;
164: }
165: }
166: }
167: }
168: }
169: }
170: xadj_temp[dd.xm_l * dd.ym_l * dd.zm_l] = count_adj;
172: *xadj = xadj_temp;
173: *adjncy = adjncy_temp;
174: PetscFunctionReturn(PETSC_SUCCESS);
175: }
177: static PetscErrorCode ComputeSpecialBoundaryIndices(DomainData dd, IS *dirichlet, IS *neumann)
178: {
179: IS temp_dirichlet = 0, temp_neumann = 0;
180: PetscInt localsize, i, j, k, *indices;
181: PetscBool *touched;
183: PetscFunctionBeginUser;
184: localsize = dd.xm_l * dd.ym_l * dd.zm_l;
186: PetscCall(PetscMalloc1(localsize, &indices));
187: PetscCall(PetscMalloc1(localsize, &touched));
188: for (i = 0; i < localsize; i++) touched[i] = PETSC_FALSE;
190: if (dirichlet) {
191: i = 0;
192: /* west boundary */
193: if (dd.ipx == 0) {
194: for (k = 0; k < dd.zm_l; k++) {
195: for (j = 0; j < dd.ym_l; j++) {
196: indices[i] = k * dd.ym_l * dd.xm_l + j * dd.xm_l;
197: touched[indices[i]] = PETSC_TRUE;
198: i++;
199: }
200: }
201: }
202: PetscCall(ISCreateGeneral(dd.gcomm, i, indices, PETSC_COPY_VALUES, &temp_dirichlet));
203: }
204: if (neumann) {
205: i = 0;
206: /* west boundary */
207: if (dd.ipx == 0) {
208: for (k = 0; k < dd.zm_l; k++) {
209: for (j = 0; j < dd.ym_l; j++) {
210: indices[i] = k * dd.ym_l * dd.xm_l + j * dd.xm_l;
211: if (!touched[indices[i]]) {
212: touched[indices[i]] = PETSC_TRUE;
213: i++;
214: }
215: }
216: }
217: }
218: /* east boundary */
219: if (dd.ipx == dd.npx - 1) {
220: for (k = 0; k < dd.zm_l; k++) {
221: for (j = 0; j < dd.ym_l; j++) {
222: indices[i] = k * dd.ym_l * dd.xm_l + j * dd.xm_l + dd.xm_l - 1;
223: if (!touched[indices[i]]) {
224: touched[indices[i]] = PETSC_TRUE;
225: i++;
226: }
227: }
228: }
229: }
230: /* south boundary */
231: if (dd.ipy == 0 && dd.dim > 1) {
232: for (k = 0; k < dd.zm_l; k++) {
233: for (j = 0; j < dd.xm_l; j++) {
234: indices[i] = k * dd.ym_l * dd.xm_l + j;
235: if (!touched[indices[i]]) {
236: touched[indices[i]] = PETSC_TRUE;
237: i++;
238: }
239: }
240: }
241: }
242: /* north boundary */
243: if (dd.ipy == dd.npy - 1 && dd.dim > 1) {
244: for (k = 0; k < dd.zm_l; k++) {
245: for (j = 0; j < dd.xm_l; j++) {
246: indices[i] = k * dd.ym_l * dd.xm_l + (dd.ym_l - 1) * dd.xm_l + j;
247: if (!touched[indices[i]]) {
248: touched[indices[i]] = PETSC_TRUE;
249: i++;
250: }
251: }
252: }
253: }
254: /* bottom boundary */
255: if (dd.ipz == 0 && dd.dim > 2) {
256: for (k = 0; k < dd.ym_l; k++) {
257: for (j = 0; j < dd.xm_l; j++) {
258: indices[i] = k * dd.xm_l + j;
259: if (!touched[indices[i]]) {
260: touched[indices[i]] = PETSC_TRUE;
261: i++;
262: }
263: }
264: }
265: }
266: /* top boundary */
267: if (dd.ipz == dd.npz - 1 && dd.dim > 2) {
268: for (k = 0; k < dd.ym_l; k++) {
269: for (j = 0; j < dd.xm_l; j++) {
270: indices[i] = (dd.zm_l - 1) * dd.ym_l * dd.xm_l + k * dd.xm_l + j;
271: if (!touched[indices[i]]) {
272: touched[indices[i]] = PETSC_TRUE;
273: i++;
274: }
275: }
276: }
277: }
278: PetscCall(ISCreateGeneral(dd.gcomm, i, indices, PETSC_COPY_VALUES, &temp_neumann));
279: }
280: if (dirichlet) *dirichlet = temp_dirichlet;
281: if (neumann) *neumann = temp_neumann;
282: PetscCall(PetscFree(indices));
283: PetscCall(PetscFree(touched));
284: PetscFunctionReturn(PETSC_SUCCESS);
285: }
287: static PetscErrorCode ComputeMapping(DomainData dd, ISLocalToGlobalMapping *isg2lmap)
288: {
289: DM da;
290: AO ao;
291: DMBoundaryType bx = DM_BOUNDARY_NONE, by = DM_BOUNDARY_NONE, bz = DM_BOUNDARY_NONE;
292: DMDAStencilType stype = DMDA_STENCIL_BOX;
293: ISLocalToGlobalMapping temp_isg2lmap;
294: PetscInt i, j, k, ig, jg, kg, lindex, gindex, localsize;
295: PetscInt *global_indices;
297: PetscFunctionBeginUser;
298: /* Not an efficient mapping: this function computes a very simple lexicographic mapping
299: just to illustrate the creation of a MATIS object */
300: localsize = dd.xm_l * dd.ym_l * dd.zm_l;
301: PetscCall(PetscMalloc1(localsize, &global_indices));
302: for (k = 0; k < dd.zm_l; k++) {
303: kg = dd.startz + k;
304: for (j = 0; j < dd.ym_l; j++) {
305: jg = dd.starty + j;
306: for (i = 0; i < dd.xm_l; i++) {
307: ig = dd.startx + i;
308: lindex = k * dd.xm_l * dd.ym_l + j * dd.xm_l + i;
309: gindex = kg * dd.xm * dd.ym + jg * dd.xm + ig;
310: global_indices[lindex] = gindex;
311: }
312: }
313: }
314: if (dd.dim == 3) {
315: PetscCall(DMDACreate3d(dd.gcomm, bx, by, bz, stype, dd.xm, dd.ym, dd.zm, PETSC_DECIDE, PETSC_DECIDE, PETSC_DECIDE, 1, 1, NULL, NULL, NULL, &da));
316: } else if (dd.dim == 2) {
317: PetscCall(DMDACreate2d(dd.gcomm, bx, by, stype, dd.xm, dd.ym, PETSC_DECIDE, PETSC_DECIDE, 1, 1, NULL, NULL, &da));
318: } else {
319: PetscCall(DMDACreate1d(dd.gcomm, bx, dd.xm, 1, 1, NULL, &da));
320: }
321: PetscCall(DMSetFromOptions(da));
322: PetscCall(DMSetUp(da));
323: PetscCall(DMDASetAOType(da, AOMEMORYSCALABLE));
324: PetscCall(DMDAGetAO(da, &ao));
325: PetscCall(AOApplicationToPetsc(ao, dd.xm_l * dd.ym_l * dd.zm_l, global_indices));
326: PetscCall(ISLocalToGlobalMappingCreate(dd.gcomm, 1, localsize, global_indices, PETSC_OWN_POINTER, &temp_isg2lmap));
327: PetscCall(DMDestroy(&da));
328: *isg2lmap = temp_isg2lmap;
329: PetscFunctionReturn(PETSC_SUCCESS);
330: }
331: static PetscErrorCode ComputeSubdomainMatrix(DomainData dd, GLLData glldata, Mat *local_mat)
332: {
333: PetscInt localsize, zloc, yloc, xloc, auxnex, auxney, auxnez;
334: PetscInt ie, je, ke, i, j, k, ig, jg, kg, ii, ming;
335: PetscInt *indexg, *cols, *colsg;
336: PetscScalar *vals;
337: Mat temp_local_mat, elem_mat_DBC = 0, *usedmat;
338: IS submatIS;
340: PetscFunctionBeginUser;
341: PetscCall(MatGetSize(glldata.elem_mat, &i, &j));
342: PetscCall(PetscMalloc1(i, &indexg));
343: PetscCall(PetscMalloc1(i, &colsg));
344: /* get submatrix of elem_mat without dirichlet nodes */
345: if (!dd.pure_neumann && !dd.DBC_zerorows && !dd.ipx) {
346: xloc = dd.p + 1;
347: yloc = 1;
348: zloc = 1;
349: if (dd.dim > 1) yloc = dd.p + 1;
350: if (dd.dim > 2) zloc = dd.p + 1;
351: ii = 0;
352: for (k = 0; k < zloc; k++) {
353: for (j = 0; j < yloc; j++) {
354: for (i = 1; i < xloc; i++) {
355: indexg[ii] = k * xloc * yloc + j * xloc + i;
356: ii++;
357: }
358: }
359: }
360: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ii, indexg, PETSC_COPY_VALUES, &submatIS));
361: PetscCall(MatCreateSubMatrix(glldata.elem_mat, submatIS, submatIS, MAT_INITIAL_MATRIX, &elem_mat_DBC));
362: PetscCall(ISDestroy(&submatIS));
363: }
365: /* Assemble subdomain matrix */
366: localsize = dd.xm_l * dd.ym_l * dd.zm_l;
367: PetscCall(MatCreate(PETSC_COMM_SELF, &temp_local_mat));
368: PetscCall(MatSetSizes(temp_local_mat, localsize, localsize, localsize, localsize));
369: PetscCall(MatSetOptionsPrefix(temp_local_mat, "subdomain_"));
370: /* set local matrices type: here we use SEQSBAIJ primarily for testing purpose */
371: /* in order to avoid conversions inside the BDDC code, use SeqAIJ if possible */
372: if (dd.DBC_zerorows && !dd.ipx) { /* in this case, we need to zero out some of the rows, so use seqaij */
373: PetscCall(MatSetType(temp_local_mat, MATSEQAIJ));
374: } else {
375: PetscCall(MatSetType(temp_local_mat, MATSEQSBAIJ));
376: }
377: PetscCall(MatSetFromOptions(temp_local_mat));
379: i = PetscPowRealInt(3.0 * (dd.p + 1.0), dd.dim);
381: PetscCall(MatSeqAIJSetPreallocation(temp_local_mat, i, NULL)); /* very overestimated */
382: PetscCall(MatSeqSBAIJSetPreallocation(temp_local_mat, 1, i, NULL)); /* very overestimated */
383: PetscCall(MatSeqBAIJSetPreallocation(temp_local_mat, 1, i, NULL)); /* very overestimated */
384: PetscCall(MatSetOption(temp_local_mat, MAT_KEEP_NONZERO_PATTERN, PETSC_TRUE));
386: yloc = dd.p + 1;
387: zloc = dd.p + 1;
388: if (dd.dim < 3) zloc = 1;
389: if (dd.dim < 2) yloc = 1;
391: auxnez = dd.nez_l;
392: auxney = dd.ney_l;
393: auxnex = dd.nex_l;
394: if (dd.dim < 3) auxnez = 1;
395: if (dd.dim < 2) auxney = 1;
397: for (ke = 0; ke < auxnez; ke++) {
398: for (je = 0; je < auxney; je++) {
399: for (ie = 0; ie < auxnex; ie++) {
400: /* customize element accounting for BC */
401: xloc = dd.p + 1;
402: ming = 0;
403: usedmat = &glldata.elem_mat;
404: if (!dd.pure_neumann && !dd.DBC_zerorows && !dd.ipx) {
405: if (ie == 0) {
406: xloc = dd.p;
407: usedmat = &elem_mat_DBC;
408: } else {
409: ming = -1;
410: usedmat = &glldata.elem_mat;
411: }
412: }
413: /* local to the element/global to the subdomain indexing */
414: for (k = 0; k < zloc; k++) {
415: kg = ke * dd.p + k;
416: for (j = 0; j < yloc; j++) {
417: jg = je * dd.p + j;
418: for (i = 0; i < xloc; i++) {
419: ig = ie * dd.p + i + ming;
420: ii = k * xloc * yloc + j * xloc + i;
421: indexg[ii] = kg * dd.xm_l * dd.ym_l + jg * dd.xm_l + ig;
422: }
423: }
424: }
425: /* Set values */
426: for (i = 0; i < xloc * yloc * zloc; i++) {
427: PetscCall(MatGetRow(*usedmat, i, &j, (const PetscInt **)&cols, (const PetscScalar **)&vals));
428: for (k = 0; k < j; k++) colsg[k] = indexg[cols[k]];
429: PetscCall(MatSetValues(temp_local_mat, 1, &indexg[i], j, colsg, vals, ADD_VALUES));
430: PetscCall(MatRestoreRow(*usedmat, i, &j, (const PetscInt **)&cols, (const PetscScalar **)&vals));
431: }
432: }
433: }
434: }
435: PetscCall(PetscFree(indexg));
436: PetscCall(PetscFree(colsg));
437: PetscCall(MatAssemblyBegin(temp_local_mat, MAT_FINAL_ASSEMBLY));
438: PetscCall(MatAssemblyEnd(temp_local_mat, MAT_FINAL_ASSEMBLY));
439: #if DEBUG
440: {
441: Vec lvec, rvec;
442: PetscReal norm;
443: PetscCall(MatCreateVecs(temp_local_mat, &lvec, &rvec));
444: PetscCall(VecSet(lvec, 1.0));
445: PetscCall(MatMult(temp_local_mat, lvec, rvec));
446: PetscCall(VecNorm(rvec, NORM_INFINITY, &norm));
447: PetscCall(VecDestroy(&lvec));
448: PetscCall(VecDestroy(&rvec));
449: }
450: #endif
451: *local_mat = temp_local_mat;
452: PetscCall(MatDestroy(&elem_mat_DBC));
453: PetscFunctionReturn(PETSC_SUCCESS);
454: }
456: static PetscErrorCode GLLStuffs(DomainData dd, GLLData *glldata)
457: {
458: PetscReal *M, si;
459: PetscScalar x, z0, z1, z2, Lpj, Lpr, rhoGLj, rhoGLk;
460: PetscBLASInt pm1, lierr;
461: PetscInt i, j, n, k, s, r, q, ii, jj, p = dd.p;
462: PetscInt xloc, yloc, zloc, xyloc, xyzloc;
464: PetscFunctionBeginUser;
465: /* Gauss-Lobatto-Legendre nodes zGL on [-1,1] */
466: PetscCall(PetscCalloc1(p + 1, &glldata->zGL));
468: glldata->zGL[0] = -1.0;
469: glldata->zGL[p] = 1.0;
470: if (p > 1) {
471: if (p == 2) glldata->zGL[1] = 0.0;
472: else {
473: PetscCall(PetscMalloc1(p - 1, &M));
474: for (i = 0; i < p - 1; i++) {
475: si = (PetscReal)(i + 1.0);
476: M[i] = 0.5 * PetscSqrtReal(si * (si + 2.0) / ((si + 0.5) * (si + 1.5)));
477: }
478: pm1 = p - 1;
479: PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
480: PetscCallBLAS("LAPACKsteqr", LAPACKsteqr_("N", &pm1, &glldata->zGL[1], M, &x, &pm1, M, &lierr));
481: PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in STERF Lapack routine %d", (int)lierr);
482: PetscCall(PetscFPTrapPop());
483: PetscCall(PetscFree(M));
484: }
485: }
487: /* Weights for 1D quadrature */
488: PetscCall(PetscMalloc1(p + 1, &glldata->rhoGL));
490: glldata->rhoGL[0] = 2.0 / (PetscScalar)(p * (p + 1.0));
491: glldata->rhoGL[p] = glldata->rhoGL[0];
492: z2 = -1; /* Dummy value to avoid -Wmaybe-initialized */
493: for (i = 1; i < p; i++) {
494: x = glldata->zGL[i];
495: z0 = 1.0;
496: z1 = x;
497: for (n = 1; n < p; n++) {
498: z2 = x * z1 * (2.0 * n + 1.0) / (n + 1.0) - z0 * (PetscScalar)(n / (n + 1.0));
499: z0 = z1;
500: z1 = z2;
501: }
502: glldata->rhoGL[i] = 2.0 / (p * (p + 1.0) * z2 * z2);
503: }
505: /* Auxiliary mat for laplacian */
506: PetscCall(PetscMalloc1(p + 1, &glldata->A));
507: PetscCall(PetscMalloc1((p + 1) * (p + 1), &glldata->A[0]));
508: for (i = 1; i < p + 1; i++) glldata->A[i] = glldata->A[i - 1] + p + 1;
510: for (j = 1; j < p; j++) {
511: x = glldata->zGL[j];
512: z0 = 1.0;
513: z1 = x;
514: for (n = 1; n < p; n++) {
515: z2 = x * z1 * (2.0 * n + 1.0) / (n + 1.0) - z0 * (PetscScalar)(n / (n + 1.0));
516: z0 = z1;
517: z1 = z2;
518: }
519: Lpj = z2;
520: for (r = 1; r < p; r++) {
521: if (r == j) {
522: glldata->A[j][j] = 2.0 / (3.0 * (1.0 - glldata->zGL[j] * glldata->zGL[j]) * Lpj * Lpj);
523: } else {
524: x = glldata->zGL[r];
525: z0 = 1.0;
526: z1 = x;
527: for (n = 1; n < p; n++) {
528: z2 = x * z1 * (2.0 * n + 1.0) / (n + 1.0) - z0 * (PetscScalar)(n / (n + 1.0));
529: z0 = z1;
530: z1 = z2;
531: }
532: Lpr = z2;
533: glldata->A[r][j] = 4.0 / (p * (p + 1.0) * Lpj * Lpr * (glldata->zGL[j] - glldata->zGL[r]) * (glldata->zGL[j] - glldata->zGL[r]));
534: }
535: }
536: }
537: for (j = 1; j < p + 1; j++) {
538: x = glldata->zGL[j];
539: z0 = 1.0;
540: z1 = x;
541: for (n = 1; n < p; n++) {
542: z2 = x * z1 * (2.0 * n + 1.0) / (n + 1.0) - z0 * (PetscScalar)(n / (n + 1.0));
543: z0 = z1;
544: z1 = z2;
545: }
546: Lpj = z2;
547: glldata->A[j][0] = 4.0 * PetscPowRealInt(-1.0, p) / (p * (p + 1.0) * Lpj * (1.0 + glldata->zGL[j]) * (1.0 + glldata->zGL[j]));
548: glldata->A[0][j] = glldata->A[j][0];
549: }
550: for (j = 0; j < p; j++) {
551: x = glldata->zGL[j];
552: z0 = 1.0;
553: z1 = x;
554: for (n = 1; n < p; n++) {
555: z2 = x * z1 * (2.0 * n + 1.0) / (n + 1.0) - z0 * (PetscScalar)(n / (n + 1.0));
556: z0 = z1;
557: z1 = z2;
558: }
559: Lpj = z2;
561: glldata->A[p][j] = 4.0 / (p * (p + 1.0) * Lpj * (1.0 - glldata->zGL[j]) * (1.0 - glldata->zGL[j]));
562: glldata->A[j][p] = glldata->A[p][j];
563: }
564: glldata->A[0][0] = 0.5 + (p * (p + 1.0) - 2.0) / 6.0;
565: glldata->A[p][p] = glldata->A[0][0];
567: /* compute element matrix */
568: xloc = p + 1;
569: yloc = p + 1;
570: zloc = p + 1;
571: if (dd.dim < 2) yloc = 1;
572: if (dd.dim < 3) zloc = 1;
573: xyloc = xloc * yloc;
574: xyzloc = xloc * yloc * zloc;
576: PetscCall(MatCreate(PETSC_COMM_SELF, &glldata->elem_mat));
577: PetscCall(MatSetSizes(glldata->elem_mat, xyzloc, xyzloc, xyzloc, xyzloc));
578: PetscCall(MatSetType(glldata->elem_mat, MATSEQAIJ));
579: PetscCall(MatSeqAIJSetPreallocation(glldata->elem_mat, xyzloc, NULL)); /* overestimated */
580: PetscCall(MatZeroEntries(glldata->elem_mat));
581: PetscCall(MatSetOption(glldata->elem_mat, MAT_IGNORE_ZERO_ENTRIES, PETSC_TRUE));
583: for (k = 0; k < zloc; k++) {
584: if (dd.dim > 2) rhoGLk = glldata->rhoGL[k];
585: else rhoGLk = 1.0;
587: for (j = 0; j < yloc; j++) {
588: if (dd.dim > 1) rhoGLj = glldata->rhoGL[j];
589: else rhoGLj = 1.0;
591: for (i = 0; i < xloc; i++) {
592: ii = k * xyloc + j * xloc + i;
593: s = k;
594: r = j;
595: for (q = 0; q < xloc; q++) {
596: jj = s * xyloc + r * xloc + q;
597: PetscCall(MatSetValue(glldata->elem_mat, jj, ii, glldata->A[i][q] * rhoGLj * rhoGLk, ADD_VALUES));
598: }
599: if (dd.dim > 1) {
600: s = k;
601: q = i;
602: for (r = 0; r < yloc; r++) {
603: jj = s * xyloc + r * xloc + q;
604: PetscCall(MatSetValue(glldata->elem_mat, jj, ii, glldata->A[j][r] * glldata->rhoGL[i] * rhoGLk, ADD_VALUES));
605: }
606: }
607: if (dd.dim > 2) {
608: r = j;
609: q = i;
610: for (s = 0; s < zloc; s++) {
611: jj = s * xyloc + r * xloc + q;
612: PetscCall(MatSetValue(glldata->elem_mat, jj, ii, glldata->A[k][s] * rhoGLj * glldata->rhoGL[i], ADD_VALUES));
613: }
614: }
615: }
616: }
617: }
618: PetscCall(MatAssemblyBegin(glldata->elem_mat, MAT_FINAL_ASSEMBLY));
619: PetscCall(MatAssemblyEnd(glldata->elem_mat, MAT_FINAL_ASSEMBLY));
620: #if DEBUG
621: {
622: Vec lvec, rvec;
623: PetscReal norm;
624: PetscCall(MatCreateVecs(glldata->elem_mat, &lvec, &rvec));
625: PetscCall(VecSet(lvec, 1.0));
626: PetscCall(MatMult(glldata->elem_mat, lvec, rvec));
627: PetscCall(VecNorm(rvec, NORM_INFINITY, &norm));
628: PetscCall(VecDestroy(&lvec));
629: PetscCall(VecDestroy(&rvec));
630: }
631: #endif
632: PetscFunctionReturn(PETSC_SUCCESS);
633: }
635: static PetscErrorCode DomainDecomposition(DomainData *dd)
636: {
637: PetscMPIInt rank;
638: PetscInt i, j, k;
640: PetscFunctionBeginUser;
641: /* Subdomain index in cartesian coordinates */
642: MPI_Comm_rank(dd->gcomm, &rank);
643: dd->ipx = rank % dd->npx;
644: if (dd->dim > 1) dd->ipz = rank / (dd->npx * dd->npy);
645: else dd->ipz = 0;
647: dd->ipy = rank / dd->npx - dd->ipz * dd->npy;
648: /* number of local elements */
649: dd->nex_l = dd->nex / dd->npx;
650: if (dd->ipx < dd->nex % dd->npx) dd->nex_l++;
651: if (dd->dim > 1) {
652: dd->ney_l = dd->ney / dd->npy;
653: if (dd->ipy < dd->ney % dd->npy) dd->ney_l++;
654: } else {
655: dd->ney_l = 0;
656: }
657: if (dd->dim > 2) {
658: dd->nez_l = dd->nez / dd->npz;
659: if (dd->ipz < dd->nez % dd->npz) dd->nez_l++;
660: } else {
661: dd->nez_l = 0;
662: }
663: /* local and global number of dofs */
664: dd->xm_l = dd->nex_l * dd->p + 1;
665: dd->xm = dd->nex * dd->p + 1;
666: dd->ym_l = dd->ney_l * dd->p + 1;
667: dd->ym = dd->ney * dd->p + 1;
668: dd->zm_l = dd->nez_l * dd->p + 1;
669: dd->zm = dd->nez * dd->p + 1;
670: if (!dd->pure_neumann) {
671: if (!dd->DBC_zerorows && !dd->ipx) dd->xm_l--;
672: if (!dd->DBC_zerorows) dd->xm--;
673: }
675: /* starting global index for local dofs (simple lexicographic order) */
676: dd->startx = 0;
677: j = dd->nex / dd->npx;
678: for (i = 0; i < dd->ipx; i++) {
679: k = j;
680: if (i < dd->nex % dd->npx) k++;
681: dd->startx = dd->startx + k * dd->p;
682: }
683: if (!dd->pure_neumann && !dd->DBC_zerorows && dd->ipx) dd->startx--;
685: dd->starty = 0;
686: if (dd->dim > 1) {
687: j = dd->ney / dd->npy;
688: for (i = 0; i < dd->ipy; i++) {
689: k = j;
690: if (i < dd->ney % dd->npy) k++;
691: dd->starty = dd->starty + k * dd->p;
692: }
693: }
694: dd->startz = 0;
695: if (dd->dim > 2) {
696: j = dd->nez / dd->npz;
697: for (i = 0; i < dd->ipz; i++) {
698: k = j;
699: if (i < dd->nez % dd->npz) k++;
700: dd->startz = dd->startz + k * dd->p;
701: }
702: }
703: PetscFunctionReturn(PETSC_SUCCESS);
704: }
705: static PetscErrorCode ComputeMatrix(DomainData dd, Mat *A)
706: {
707: GLLData gll;
708: Mat local_mat = 0, temp_A = 0;
709: ISLocalToGlobalMapping matis_map = 0;
710: IS dirichletIS = 0;
712: PetscFunctionBeginUser;
713: /* Compute some stuff of Gauss-Legendre-Lobatto quadrature rule */
714: PetscCall(GLLStuffs(dd, &gll));
715: /* Compute matrix of subdomain Neumann problem */
716: PetscCall(ComputeSubdomainMatrix(dd, gll, &local_mat));
717: /* Compute global mapping of local dofs */
718: PetscCall(ComputeMapping(dd, &matis_map));
719: /* Create MATIS object needed by BDDC */
720: PetscCall(MatCreateIS(dd.gcomm, 1, PETSC_DECIDE, PETSC_DECIDE, dd.xm * dd.ym * dd.zm, dd.xm * dd.ym * dd.zm, matis_map, matis_map, &temp_A));
721: /* Set local subdomain matrices into MATIS object */
722: PetscCall(MatScale(local_mat, dd.scalingfactor));
723: PetscCall(MatISSetLocalMat(temp_A, local_mat));
724: /* Call assembly functions */
725: PetscCall(MatAssemblyBegin(temp_A, MAT_FINAL_ASSEMBLY));
726: PetscCall(MatAssemblyEnd(temp_A, MAT_FINAL_ASSEMBLY));
728: if (dd.DBC_zerorows) {
729: PetscInt dirsize;
731: PetscCall(ComputeSpecialBoundaryIndices(dd, &dirichletIS, NULL));
732: PetscCall(MatSetOption(local_mat, MAT_KEEP_NONZERO_PATTERN, PETSC_TRUE));
733: PetscCall(MatZeroRowsColumnsLocalIS(temp_A, dirichletIS, 1.0, NULL, NULL));
734: PetscCall(ISGetLocalSize(dirichletIS, &dirsize));
735: PetscCall(ISDestroy(&dirichletIS));
736: }
738: /* giving hints to local and global matrices could be useful for the BDDC */
739: PetscCall(MatSetOption(local_mat, MAT_SPD, PETSC_TRUE));
740: PetscCall(MatSetOption(local_mat, MAT_SPD_ETERNAL, PETSC_TRUE));
741: #if DEBUG
742: {
743: Vec lvec, rvec;
744: PetscReal norm;
745: PetscCall(MatCreateVecs(temp_A, &lvec, &rvec));
746: PetscCall(VecSet(lvec, 1.0));
747: PetscCall(MatMult(temp_A, lvec, rvec));
748: PetscCall(VecNorm(rvec, NORM_INFINITY, &norm));
749: PetscCall(VecDestroy(&lvec));
750: PetscCall(VecDestroy(&rvec));
751: }
752: #endif
753: /* free allocated workspace */
754: PetscCall(PetscFree(gll.zGL));
755: PetscCall(PetscFree(gll.rhoGL));
756: PetscCall(PetscFree(gll.A[0]));
757: PetscCall(PetscFree(gll.A));
758: PetscCall(MatDestroy(&gll.elem_mat));
759: PetscCall(MatDestroy(&local_mat));
760: PetscCall(ISLocalToGlobalMappingDestroy(&matis_map));
761: /* give back the pointer to te MATIS object */
762: *A = temp_A;
763: PetscFunctionReturn(PETSC_SUCCESS);
764: }
766: static PetscErrorCode ComputeKSPFETIDP(DomainData dd, KSP ksp_bddc, KSP *ksp_fetidp)
767: {
768: KSP temp_ksp;
769: PC pc;
771: PetscFunctionBeginUser;
772: PetscCall(KSPGetPC(ksp_bddc, &pc));
773: if (!dd.testkspfetidp) {
774: PC D;
775: Mat F;
777: PetscCall(PCBDDCCreateFETIDPOperators(pc, PETSC_TRUE, NULL, &F, &D));
778: PetscCall(KSPCreate(PetscObjectComm((PetscObject)F), &temp_ksp));
779: PetscCall(KSPSetOperators(temp_ksp, F, F));
780: PetscCall(KSPSetType(temp_ksp, KSPCG));
781: PetscCall(KSPSetPC(temp_ksp, D));
782: PetscCall(KSPSetComputeSingularValues(temp_ksp, PETSC_TRUE));
783: PetscCall(KSPSetOptionsPrefix(temp_ksp, "fluxes_"));
784: PetscCall(KSPSetFromOptions(temp_ksp));
785: PetscCall(KSPSetUp(temp_ksp));
786: PetscCall(MatDestroy(&F));
787: PetscCall(PCDestroy(&D));
788: } else {
789: Mat A, Ap;
791: PetscCall(KSPCreate(PetscObjectComm((PetscObject)ksp_bddc), &temp_ksp));
792: PetscCall(KSPGetOperators(ksp_bddc, &A, &Ap));
793: PetscCall(KSPSetOperators(temp_ksp, A, Ap));
794: PetscCall(KSPSetOptionsPrefix(temp_ksp, "fluxes_"));
795: PetscCall(KSPSetType(temp_ksp, KSPFETIDP));
796: PetscCall(KSPFETIDPSetInnerBDDC(temp_ksp, pc));
797: PetscCall(KSPSetComputeSingularValues(temp_ksp, PETSC_TRUE));
798: PetscCall(KSPSetFromOptions(temp_ksp));
799: PetscCall(KSPSetUp(temp_ksp));
800: }
801: *ksp_fetidp = temp_ksp;
802: PetscFunctionReturn(PETSC_SUCCESS);
803: }
805: static PetscErrorCode ComputeKSPBDDC(DomainData dd, Mat A, KSP *ksp)
806: {
807: KSP temp_ksp;
808: PC pc;
809: IS primals, dirichletIS = 0, neumannIS = 0, *bddc_dofs_splitting;
810: PetscInt vidx[8], localsize, *xadj = NULL, *adjncy = NULL;
811: MatNullSpace near_null_space;
813: PetscFunctionBeginUser;
814: PetscCall(KSPCreate(dd.gcomm, &temp_ksp));
815: PetscCall(KSPSetOperators(temp_ksp, A, A));
816: PetscCall(KSPSetType(temp_ksp, KSPCG));
817: PetscCall(KSPGetPC(temp_ksp, &pc));
818: PetscCall(PCSetType(pc, PCBDDC));
820: localsize = dd.xm_l * dd.ym_l * dd.zm_l;
822: /* BDDC customization */
824: /* jumping coefficients case */
825: PetscCall(PCISSetSubdomainScalingFactor(pc, dd.scalingfactor));
827: /* Dofs splitting
828: Simple stride-1 IS
829: It is not needed since, by default, PCBDDC assumes a stride-1 split */
830: PetscCall(PetscMalloc1(1, &bddc_dofs_splitting));
831: #if 1
832: PetscCall(ISCreateStride(PETSC_COMM_WORLD, localsize, 0, 1, &bddc_dofs_splitting[0]));
833: PetscCall(PCBDDCSetDofsSplittingLocal(pc, 1, bddc_dofs_splitting));
834: #else
835: /* examples for global ordering */
837: /* each process lists the nodes it owns */
838: PetscInt sr, er;
839: PetscCall(MatGetOwnershipRange(A, &sr, &er));
840: PetscCall(ISCreateStride(PETSC_COMM_WORLD, er - sr, sr, 1, &bddc_dofs_splitting[0]));
841: PetscCall(PCBDDCSetDofsSplitting(pc, 1, bddc_dofs_splitting));
842: /* Split can be passed in a more general way since any process can list any node */
843: #endif
844: PetscCall(ISDestroy(&bddc_dofs_splitting[0]));
845: PetscCall(PetscFree(bddc_dofs_splitting));
847: /* Primal constraints implemented by using a near null space attached to A -> now it passes in only the constants
848: (which in practice is not needed since by default PCBDDC builds the primal space using constants for quadrature formulas */
849: #if 0
850: Vec vecs[2];
851: PetscRandom rctx;
852: PetscCall(MatCreateVecs(A,&vecs[0],&vecs[1]));
853: PetscCall(PetscRandomCreate(dd.gcomm,&rctx));
854: PetscCall(VecSetRandom(vecs[0],rctx));
855: PetscCall(VecSetRandom(vecs[1],rctx));
856: PetscCall(MatNullSpaceCreate(dd.gcomm,PETSC_TRUE,2,vecs,&near_null_space));
857: PetscCall(VecDestroy(&vecs[0]));
858: PetscCall(VecDestroy(&vecs[1]));
859: PetscCall(PetscRandomDestroy(&rctx));
860: #else
861: PetscCall(MatNullSpaceCreate(dd.gcomm, PETSC_TRUE, 0, NULL, &near_null_space));
862: #endif
863: PetscCall(MatSetNearNullSpace(A, near_null_space));
864: PetscCall(MatNullSpaceDestroy(&near_null_space));
866: /* CSR graph of subdomain dofs */
867: PetscCall(BuildCSRGraph(dd, &xadj, &adjncy));
868: PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, localsize, xadj, adjncy, PETSC_OWN_POINTER));
870: /* Prescribe user-defined primal vertices: in this case we use the 8 corners in 3D (for 2D and 1D, the indices are duplicated) */
871: vidx[0] = 0 * dd.xm_l + 0;
872: vidx[1] = 0 * dd.xm_l + dd.xm_l - 1;
873: vidx[2] = (dd.ym_l - 1) * dd.xm_l + 0;
874: vidx[3] = (dd.ym_l - 1) * dd.xm_l + dd.xm_l - 1;
875: vidx[4] = (dd.zm_l - 1) * dd.xm_l * dd.ym_l + 0 * dd.xm_l + 0;
876: vidx[5] = (dd.zm_l - 1) * dd.xm_l * dd.ym_l + 0 * dd.xm_l + dd.xm_l - 1;
877: vidx[6] = (dd.zm_l - 1) * dd.xm_l * dd.ym_l + (dd.ym_l - 1) * dd.xm_l + 0;
878: vidx[7] = (dd.zm_l - 1) * dd.xm_l * dd.ym_l + (dd.ym_l - 1) * dd.xm_l + dd.xm_l - 1;
879: PetscCall(ISCreateGeneral(dd.gcomm, 8, vidx, PETSC_COPY_VALUES, &primals));
880: PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primals));
881: PetscCall(ISDestroy(&primals));
883: /* Neumann/Dirichlet indices on the global boundary */
884: if (dd.DBC_zerorows) {
885: /* Only in case you eliminate some rows matrix with zerorows function, you need to set dirichlet indices into PCBDDC data */
886: PetscCall(ComputeSpecialBoundaryIndices(dd, &dirichletIS, &neumannIS));
887: PetscCall(PCBDDCSetNeumannBoundariesLocal(pc, neumannIS));
888: PetscCall(PCBDDCSetDirichletBoundariesLocal(pc, dirichletIS));
889: } else {
890: if (dd.pure_neumann) {
891: /* In such a case, all interface nodes lying on the global boundary are neumann nodes */
892: PetscCall(ComputeSpecialBoundaryIndices(dd, NULL, &neumannIS));
893: PetscCall(PCBDDCSetNeumannBoundariesLocal(pc, neumannIS));
894: } else {
895: /* It is wrong setting dirichlet indices without having zeroed the corresponding rows in the global matrix */
896: /* But we can still compute them since nodes near the dirichlet boundaries does not need to be defined as neumann nodes */
897: PetscCall(ComputeSpecialBoundaryIndices(dd, &dirichletIS, &neumannIS));
898: PetscCall(PCBDDCSetNeumannBoundariesLocal(pc, neumannIS));
899: }
900: }
902: /* Pass local null space information to local matrices (needed when using approximate local solvers) */
903: if (dd.ipx || dd.pure_neumann) {
904: MatNullSpace nsp;
905: Mat local_mat;
907: PetscCall(MatISGetLocalMat(A, &local_mat));
908: PetscCall(MatNullSpaceCreate(PETSC_COMM_SELF, PETSC_TRUE, 0, NULL, &nsp));
909: PetscCall(MatSetNullSpace(local_mat, nsp));
910: PetscCall(MatNullSpaceDestroy(&nsp));
911: }
912: PetscCall(KSPSetComputeSingularValues(temp_ksp, PETSC_TRUE));
913: PetscCall(KSPSetOptionsPrefix(temp_ksp, "physical_"));
914: PetscCall(KSPSetFromOptions(temp_ksp));
915: PetscCall(KSPSetUp(temp_ksp));
916: *ksp = temp_ksp;
917: PetscCall(ISDestroy(&dirichletIS));
918: PetscCall(ISDestroy(&neumannIS));
919: PetscFunctionReturn(PETSC_SUCCESS);
920: }
922: static PetscErrorCode InitializeDomainData(DomainData *dd)
923: {
924: PetscMPIInt sizes, rank;
925: PetscInt factor;
927: PetscFunctionBeginUser;
928: dd->gcomm = PETSC_COMM_WORLD;
929: PetscCallMPI(MPI_Comm_size(dd->gcomm, &sizes));
930: PetscCallMPI(MPI_Comm_rank(dd->gcomm, &rank));
931: /* Get information from command line */
932: /* Processors/subdomains per dimension */
933: /* Default is 1d problem */
934: dd->npx = sizes;
935: dd->npy = 0;
936: dd->npz = 0;
937: dd->dim = 1;
938: PetscCall(PetscOptionsGetInt(NULL, NULL, "-npx", &dd->npx, NULL));
939: PetscCall(PetscOptionsGetInt(NULL, NULL, "-npy", &dd->npy, NULL));
940: PetscCall(PetscOptionsGetInt(NULL, NULL, "-npz", &dd->npz, NULL));
941: if (dd->npy) dd->dim++;
942: if (dd->npz) dd->dim++;
943: /* Number of elements per dimension */
944: /* Default is one element per subdomain */
945: dd->nex = dd->npx;
946: dd->ney = dd->npy;
947: dd->nez = dd->npz;
948: PetscCall(PetscOptionsGetInt(NULL, NULL, "-nex", &dd->nex, NULL));
949: PetscCall(PetscOptionsGetInt(NULL, NULL, "-ney", &dd->ney, NULL));
950: PetscCall(PetscOptionsGetInt(NULL, NULL, "-nez", &dd->nez, NULL));
951: if (!dd->npy) {
952: dd->ney = 0;
953: dd->nez = 0;
954: }
955: if (!dd->npz) dd->nez = 0;
956: /* Spectral degree */
957: dd->p = 3;
958: PetscCall(PetscOptionsGetInt(NULL, NULL, "-p", &dd->p, NULL));
959: /* pure neumann problem? */
960: dd->pure_neumann = PETSC_FALSE;
961: PetscCall(PetscOptionsGetBool(NULL, NULL, "-pureneumann", &dd->pure_neumann, NULL));
963: /* How to enforce dirichlet boundary conditions (in case pureneumann has not been requested explicitly) */
964: dd->DBC_zerorows = PETSC_FALSE;
966: PetscCall(PetscOptionsGetBool(NULL, NULL, "-usezerorows", &dd->DBC_zerorows, NULL));
967: if (dd->pure_neumann) dd->DBC_zerorows = PETSC_FALSE;
968: dd->scalingfactor = 1.0;
970: factor = 0.0;
971: PetscCall(PetscOptionsGetInt(NULL, NULL, "-jump", &factor, NULL));
972: /* checkerboard pattern */
973: dd->scalingfactor = PetscPowScalar(10.0, (PetscScalar)factor * PetscPowScalar(-1.0, (PetscScalar)rank));
974: /* test data passed in */
975: if (dd->dim == 1) {
976: PetscCheck(sizes == dd->npx, dd->gcomm, PETSC_ERR_USER, "Number of mpi procs in 1D must be equal to npx");
977: PetscCheck(dd->nex >= dd->npx, dd->gcomm, PETSC_ERR_USER, "Number of elements per dim must be greater/equal than number of procs per dim");
978: } else if (dd->dim == 2) {
979: PetscCheck(sizes == dd->npx * dd->npy, dd->gcomm, PETSC_ERR_USER, "Number of mpi procs in 2D must be equal to npx*npy");
980: PetscCheck(dd->nex >= dd->npx || dd->ney < dd->npy, dd->gcomm, PETSC_ERR_USER, "Number of elements per dim must be greater/equal than number of procs per dim");
981: } else {
982: PetscCheck(sizes == dd->npx * dd->npy * dd->npz, dd->gcomm, PETSC_ERR_USER, "Number of mpi procs in 3D must be equal to npx*npy*npz");
983: PetscCheck(dd->nex >= dd->npx && dd->ney >= dd->npy && dd->nez >= dd->npz, dd->gcomm, PETSC_ERR_USER, "Number of elements per dim must be greater/equal than number of ranks per dim");
984: }
985: PetscFunctionReturn(PETSC_SUCCESS);
986: }
988: int main(int argc, char **args)
989: {
990: DomainData dd;
991: PetscReal norm, maxeig, mineig;
992: PetscScalar scalar_value;
993: PetscInt ndofs, its;
994: Mat A = NULL, F = NULL;
995: KSP KSPwithBDDC = NULL, KSPwithFETIDP = NULL;
996: KSPConvergedReason reason;
997: Vec exact_solution = NULL, bddc_solution = NULL, bddc_rhs = NULL;
998: PetscBool testfetidp = PETSC_TRUE;
1000: /* Init PETSc */
1001: PetscFunctionBeginUser;
1002: PetscCall(PetscInitialize(&argc, &args, (char *)0, help));
1003: /* Initialize DomainData */
1004: PetscCall(InitializeDomainData(&dd));
1005: /* Decompose domain */
1006: PetscCall(DomainDecomposition(&dd));
1007: #if DEBUG
1008: printf("Subdomain data\n");
1009: printf("IPS : %d %d %d\n", dd.ipx, dd.ipy, dd.ipz);
1010: printf("NEG : %d %d %d\n", dd.nex, dd.ney, dd.nez);
1011: printf("NEL : %d %d %d\n", dd.nex_l, dd.ney_l, dd.nez_l);
1012: printf("LDO : %d %d %d\n", dd.xm_l, dd.ym_l, dd.zm_l);
1013: printf("SIZES : %d %d %d\n", dd.xm, dd.ym, dd.zm);
1014: printf("STARTS: %d %d %d\n", dd.startx, dd.starty, dd.startz);
1015: #endif
1016: dd.testkspfetidp = PETSC_TRUE;
1017: PetscCall(PetscOptionsGetBool(NULL, NULL, "-testfetidp", &testfetidp, NULL));
1018: PetscCall(PetscOptionsGetBool(NULL, NULL, "-testkspfetidp", &dd.testkspfetidp, NULL));
1019: /* assemble global matrix */
1020: PetscCall(ComputeMatrix(dd, &A));
1021: /* get work vectors */
1022: PetscCall(MatCreateVecs(A, &bddc_solution, NULL));
1023: PetscCall(VecDuplicate(bddc_solution, &bddc_rhs));
1024: PetscCall(VecDuplicate(bddc_solution, &exact_solution));
1025: /* create and customize KSP/PC for BDDC */
1026: PetscCall(ComputeKSPBDDC(dd, A, &KSPwithBDDC));
1027: /* create KSP/PC for FETIDP */
1028: if (testfetidp) PetscCall(ComputeKSPFETIDP(dd, KSPwithBDDC, &KSPwithFETIDP));
1029: /* create random exact solution */
1030: #if defined(PETSC_USE_COMPLEX)
1031: PetscCall(VecSet(exact_solution, 1.0 + PETSC_i));
1032: #else
1033: PetscCall(VecSetRandom(exact_solution, NULL));
1034: #endif
1035: PetscCall(VecShift(exact_solution, -0.5));
1036: PetscCall(VecScale(exact_solution, 100.0));
1037: PetscCall(VecGetSize(exact_solution, &ndofs));
1038: if (dd.pure_neumann) {
1039: PetscCall(VecSum(exact_solution, &scalar_value));
1040: scalar_value = -scalar_value / (PetscScalar)ndofs;
1041: PetscCall(VecShift(exact_solution, scalar_value));
1042: }
1043: /* assemble BDDC rhs */
1044: PetscCall(MatMult(A, exact_solution, bddc_rhs));
1045: /* test ksp with BDDC */
1046: PetscCall(KSPSolve(KSPwithBDDC, bddc_rhs, bddc_solution));
1047: PetscCall(KSPGetIterationNumber(KSPwithBDDC, &its));
1048: PetscCall(KSPGetConvergedReason(KSPwithBDDC, &reason));
1049: PetscCall(KSPComputeExtremeSingularValues(KSPwithBDDC, &maxeig, &mineig));
1050: if (dd.pure_neumann) {
1051: PetscCall(VecSum(bddc_solution, &scalar_value));
1052: scalar_value = -scalar_value / (PetscScalar)ndofs;
1053: PetscCall(VecShift(bddc_solution, scalar_value));
1054: }
1055: /* check exact_solution and BDDC solultion */
1056: PetscCall(VecAXPY(bddc_solution, -1.0, exact_solution));
1057: PetscCall(VecNorm(bddc_solution, NORM_INFINITY, &norm));
1058: PetscCall(PetscPrintf(dd.gcomm, "---------------------BDDC stats-------------------------------\n"));
1059: PetscCall(PetscPrintf(dd.gcomm, "Number of degrees of freedom : %8" PetscInt_FMT "\n", ndofs));
1060: if (reason < 0) {
1061: PetscCall(PetscPrintf(dd.gcomm, "Number of iterations : %8" PetscInt_FMT "\n", its));
1062: PetscCall(PetscPrintf(dd.gcomm, "Converged reason : %s\n", KSPConvergedReasons[reason]));
1063: }
1064: if (0.95 <= mineig && mineig <= 1.05) mineig = 1.0;
1065: PetscCall(PetscPrintf(dd.gcomm, "Eigenvalues preconditioned operator : %1.1e %1.1e\n", (double)PetscFloorReal(100. * mineig) / 100., (double)PetscCeilReal(100. * maxeig) / 100.));
1066: if (norm > 1.e-1 || reason < 0) PetscCall(PetscPrintf(dd.gcomm, "Error between exact and computed solution : %1.2e\n", (double)norm));
1067: PetscCall(PetscPrintf(dd.gcomm, "--------------------------------------------------------------\n"));
1068: if (testfetidp) {
1069: Vec fetidp_solution_all = NULL, fetidp_solution = NULL, fetidp_rhs = NULL;
1071: PetscCall(VecDuplicate(bddc_solution, &fetidp_solution_all));
1072: if (!dd.testkspfetidp) {
1073: /* assemble fetidp rhs on the space of Lagrange multipliers */
1074: PetscCall(KSPGetOperators(KSPwithFETIDP, &F, NULL));
1075: PetscCall(MatCreateVecs(F, &fetidp_solution, &fetidp_rhs));
1076: PetscCall(PCBDDCMatFETIDPGetRHS(F, bddc_rhs, fetidp_rhs));
1077: PetscCall(VecSet(fetidp_solution, 0.0));
1078: /* test ksp with FETIDP */
1079: PetscCall(KSPSolve(KSPwithFETIDP, fetidp_rhs, fetidp_solution));
1080: /* assemble fetidp solution on physical domain */
1081: PetscCall(PCBDDCMatFETIDPGetSolution(F, fetidp_solution, fetidp_solution_all));
1082: } else {
1083: KSP kspF;
1084: PetscCall(KSPSolve(KSPwithFETIDP, bddc_rhs, fetidp_solution_all));
1085: PetscCall(KSPFETIDPGetInnerKSP(KSPwithFETIDP, &kspF));
1086: PetscCall(KSPGetOperators(kspF, &F, NULL));
1087: }
1088: PetscCall(MatGetSize(F, &ndofs, NULL));
1089: PetscCall(KSPGetIterationNumber(KSPwithFETIDP, &its));
1090: PetscCall(KSPGetConvergedReason(KSPwithFETIDP, &reason));
1091: PetscCall(KSPComputeExtremeSingularValues(KSPwithFETIDP, &maxeig, &mineig));
1092: /* check FETIDP sol */
1093: if (dd.pure_neumann) {
1094: PetscCall(VecSum(fetidp_solution_all, &scalar_value));
1095: scalar_value = -scalar_value / (PetscScalar)ndofs;
1096: PetscCall(VecShift(fetidp_solution_all, scalar_value));
1097: }
1098: PetscCall(VecAXPY(fetidp_solution_all, -1.0, exact_solution));
1099: PetscCall(VecNorm(fetidp_solution_all, NORM_INFINITY, &norm));
1100: PetscCall(PetscPrintf(dd.gcomm, "------------------FETI-DP stats-------------------------------\n"));
1101: PetscCall(PetscPrintf(dd.gcomm, "Number of degrees of freedom : %8" PetscInt_FMT "\n", ndofs));
1102: if (reason < 0) {
1103: PetscCall(PetscPrintf(dd.gcomm, "Number of iterations : %8" PetscInt_FMT "\n", its));
1104: PetscCall(PetscPrintf(dd.gcomm, "Converged reason : %s\n", KSPConvergedReasons[reason]));
1105: }
1106: if (0.95 <= mineig && mineig <= 1.05) mineig = 1.0;
1107: PetscCall(PetscPrintf(dd.gcomm, "Eigenvalues preconditioned operator : %1.1e %1.1e\n", (double)PetscFloorReal(100. * mineig) / 100., (double)PetscCeilReal(100. * maxeig) / 100.));
1108: if (norm > 1.e-1 || reason < 0) PetscCall(PetscPrintf(dd.gcomm, "Error between exact and computed solution : %1.2e\n", (double)norm));
1109: PetscCall(PetscPrintf(dd.gcomm, "--------------------------------------------------------------\n"));
1110: PetscCall(VecDestroy(&fetidp_solution));
1111: PetscCall(VecDestroy(&fetidp_solution_all));
1112: PetscCall(VecDestroy(&fetidp_rhs));
1113: }
1114: PetscCall(KSPDestroy(&KSPwithFETIDP));
1115: PetscCall(VecDestroy(&exact_solution));
1116: PetscCall(VecDestroy(&bddc_solution));
1117: PetscCall(VecDestroy(&bddc_rhs));
1118: PetscCall(MatDestroy(&A));
1119: PetscCall(KSPDestroy(&KSPwithBDDC));
1120: /* Quit PETSc */
1121: PetscCall(PetscFinalize());
1122: return 0;
1123: }
1125: /*TEST
1127: testset:
1128: nsize: 4
1129: args: -nex 7 -physical_pc_bddc_coarse_eqs_per_proc 3 -physical_pc_bddc_switch_static
1130: output_file: output/ex59_bddc_fetidp_1.out
1131: test:
1132: suffix: bddc_fetidp_1
1133: test:
1134: requires: viennacl
1135: suffix: bddc_fetidp_1_viennacl
1136: args: -subdomain_mat_type aijviennacl
1137: test:
1138: requires: cuda
1139: suffix: bddc_fetidp_1_cuda
1140: args: -subdomain_mat_type aijcusparse -physical_pc_bddc_dirichlet_pc_factor_mat_solver_type cusparse
1142: testset:
1143: nsize: 4
1144: args: -npx 2 -npy 2 -nex 6 -ney 6 -fluxes_ksp_max_it 10 -physical_ksp_max_it 10
1145: requires: !single
1146: test:
1147: suffix: bddc_fetidp_2
1148: test:
1149: suffix: bddc_fetidp_3
1150: args: -npz 1 -nez 1
1151: test:
1152: suffix: bddc_fetidp_4
1153: args: -npz 1 -nez 1 -physical_pc_bddc_use_change_of_basis -physical_sub_schurs_mat_solver_type petsc -physical_pc_bddc_use_deluxe_scaling -physical_pc_bddc_deluxe_singlemat -fluxes_fetidp_ksp_type cg
1155: testset:
1156: nsize: 8
1157: suffix: bddc_fetidp_approximate
1158: args: -npx 2 -npy 2 -npz 2 -p 2 -nex 8 -ney 7 -nez 9 -physical_ksp_max_it 20 -subdomain_mat_type aij -physical_pc_bddc_switch_static -physical_pc_bddc_dirichlet_approximate -physical_pc_bddc_neumann_approximate -physical_pc_bddc_dirichlet_pc_type gamg -physical_pc_bddc_dirichlet_pc_gamg_esteig_ksp_type cg -physical_pc_bddc_dirichlet_pc_gamg_esteig_ksp_max_it 10 -physical_pc_bddc_dirichlet_mg_levels_ksp_max_it 3 -physical_pc_bddc_neumann_pc_type sor -physical_pc_bddc_neumann_approximate_scale -testfetidp 0
1160: testset:
1161: nsize: 4
1162: args: -npx 2 -npy 2 -nex 6 -ney 6 -fluxes_ksp_max_it 10 -physical_ksp_max_it 10 -physical_ksp_view -physical_pc_bddc_levels 1
1163: filter: grep -v "variant HERMITIAN"
1164: requires: !single
1165: test:
1166: suffix: bddc_fetidp_ml_1
1167: args: -physical_pc_bddc_coarsening_ratio 1
1168: test:
1169: suffix: bddc_fetidp_ml_2
1170: args: -physical_pc_bddc_coarsening_ratio 2 -mat_partitioning_type average
1171: test:
1172: suffix: bddc_fetidp_ml_3
1173: args: -physical_pc_bddc_coarsening_ratio 4
1175: testset:
1176: nsize: 9
1177: args: -npx 3 -npy 3 -p 2 -nex 6 -ney 6 -physical_pc_bddc_deluxe_singlemat -physical_sub_schurs_mat_solver_type petsc -physical_pc_bddc_use_deluxe_scaling -physical_pc_bddc_graph_maxcount 1 -physical_pc_bddc_levels 3 -physical_pc_bddc_coarsening_ratio 2 -physical_pc_bddc_coarse_ksp_type gmres
1178: output_file: output/ex59_bddc_fetidp_ml_eqlimit.out
1179: test:
1180: suffix: bddc_fetidp_ml_eqlimit_1
1181: args: -physical_pc_bddc_coarse_eqs_limit 31 -mat_partitioning_type average -physical_pc_bddc_coarse_pc_bddc_graph_maxcount 1
1182: test:
1183: suffix: bddc_fetidp_ml_eqlimit_2
1184: args: -physical_pc_bddc_coarse_eqs_limit 46
1186: TEST*/