Actual source code: ex59.c
1: static char help[] = "This example illustrates the use of PCBDDC/FETI-DP and its customization.\n\n\
2: Discrete system: 1D, 2D or 3D laplacian, discretized with spectral elements.\n\
3: Spectral degree can be specified by passing values to -p option.\n\
4: Global problem either with dirichlet boundary conditions on one side or in the pure neumann case (depending on runtime parameters).\n\
5: Domain is [-nex,nex]x[-ney,ney]x[-nez,nez]: ne_ number of elements in _ direction.\n\
6: Example usage: \n\
7: 1D: mpiexec -n 4 ex59 -nex 7\n\
8: 2D: mpiexec -n 4 ex59 -npx 2 -npy 2 -nex 2 -ney 2\n\
9: 3D: mpiexec -n 4 ex59 -npx 2 -npy 2 -npz 1 -nex 2 -ney 2 -nez 1\n\
10: Subdomain decomposition can be specified with -np_ parameters.\n\
11: Dirichlet boundaries on one side by default:\n\
12: it does not iterate on dirichlet nodes by default: if -usezerorows is passed in, it also iterates on Dirichlet nodes.\n\
13: Pure Neumann case can be requested by passing in -pureneumann.\n\
14: In the latter case, in order to avoid runtime errors during factorization, please specify also -coarse_redundant_pc_factor_zeropivot 0\n\n";
16: #include <petscksp.h>
17: #include <petscpc.h>
18: #include <petscdm.h>
19: #include <petscdmda.h>
20: #include <petscblaslapack.h>
21: #define DEBUG 0
23: /* structure holding domain data */
24: typedef struct {
25: /* communicator */
26: MPI_Comm gcomm;
27: /* space dimension */
28: PetscInt dim;
29: /* spectral degree */
30: PetscInt p;
31: /* subdomains per dimension */
32: PetscInt npx, npy, npz;
33: /* subdomain index in cartesian dimensions */
34: PetscInt ipx, ipy, ipz;
35: /* elements per dimension */
36: PetscInt nex, ney, nez;
37: /* local elements per dimension */
38: PetscInt nex_l, ney_l, nez_l;
39: /* global number of dofs per dimension */
40: PetscInt xm, ym, zm;
41: /* local number of dofs per dimension */
42: PetscInt xm_l, ym_l, zm_l;
43: /* starting global indexes for subdomain in lexicographic ordering */
44: PetscInt startx, starty, startz;
45: /* pure Neumann problem */
46: PetscBool pure_neumann;
47: /* Dirichlet BC implementation */
48: PetscBool DBC_zerorows;
49: /* Scaling factor for subdomain */
50: PetscScalar scalingfactor;
51: PetscBool testkspfetidp;
52: } DomainData;
54: /* structure holding GLL data */
55: typedef struct {
56: /* GLL nodes */
57: PetscReal *zGL;
58: /* GLL weights */
59: PetscScalar *rhoGL;
60: /* aux_mat */
61: PetscScalar **A;
62: /* Element matrix */
63: Mat elem_mat;
64: } GLLData;
66: static PetscErrorCode BuildCSRGraph(DomainData dd, PetscInt **xadj, PetscInt **adjncy)
67: {
68: PetscInt *xadj_temp, *adjncy_temp;
69: PetscInt i, j, k, ii, jj, kk, iindex, count_adj;
70: PetscInt istart_csr, iend_csr, jstart_csr, jend_csr, kstart_csr, kend_csr;
71: PetscBool internal_node;
73: /* first count dimension of adjncy */
74: PetscFunctionBeginUser;
75: count_adj = 0;
76: for (k = 0; k < dd.zm_l; k++) {
77: internal_node = PETSC_TRUE;
78: kstart_csr = k > 0 ? k - 1 : k;
79: kend_csr = k < dd.zm_l - 1 ? k + 2 : k + 1;
80: if (k == 0 || k == dd.zm_l - 1) {
81: internal_node = PETSC_FALSE;
82: kstart_csr = k;
83: kend_csr = k + 1;
84: }
85: for (j = 0; j < dd.ym_l; j++) {
86: jstart_csr = j > 0 ? j - 1 : j;
87: jend_csr = j < dd.ym_l - 1 ? j + 2 : j + 1;
88: if (j == 0 || j == dd.ym_l - 1) {
89: internal_node = PETSC_FALSE;
90: jstart_csr = j;
91: jend_csr = j + 1;
92: }
93: for (i = 0; i < dd.xm_l; i++) {
94: istart_csr = i > 0 ? i - 1 : i;
95: iend_csr = i < dd.xm_l - 1 ? i + 2 : i + 1;
96: if (i == 0 || i == dd.xm_l - 1) {
97: internal_node = PETSC_FALSE;
98: istart_csr = i;
99: iend_csr = i + 1;
100: }
101: if (internal_node) {
102: istart_csr = i;
103: iend_csr = i + 1;
104: jstart_csr = j;
105: jend_csr = j + 1;
106: kstart_csr = k;
107: kend_csr = k + 1;
108: }
109: for (kk = kstart_csr; kk < kend_csr; kk++) {
110: for (jj = jstart_csr; jj < jend_csr; jj++) {
111: for (ii = istart_csr; ii < iend_csr; ii++) count_adj = count_adj + 1;
112: }
113: }
114: }
115: }
116: }
117: PetscCall(PetscMalloc1(dd.xm_l * dd.ym_l * dd.zm_l + 1, &xadj_temp));
118: PetscCall(PetscMalloc1(count_adj, &adjncy_temp));
120: /* now fill CSR data structure */
121: count_adj = 0;
122: for (k = 0; k < dd.zm_l; k++) {
123: internal_node = PETSC_TRUE;
124: kstart_csr = k > 0 ? k - 1 : k;
125: kend_csr = k < dd.zm_l - 1 ? k + 2 : k + 1;
126: if (k == 0 || k == dd.zm_l - 1) {
127: internal_node = PETSC_FALSE;
128: kstart_csr = k;
129: kend_csr = k + 1;
130: }
131: for (j = 0; j < dd.ym_l; j++) {
132: jstart_csr = j > 0 ? j - 1 : j;
133: jend_csr = j < dd.ym_l - 1 ? j + 2 : j + 1;
134: if (j == 0 || j == dd.ym_l - 1) {
135: internal_node = PETSC_FALSE;
136: jstart_csr = j;
137: jend_csr = j + 1;
138: }
139: for (i = 0; i < dd.xm_l; i++) {
140: istart_csr = i > 0 ? i - 1 : i;
141: iend_csr = i < dd.xm_l - 1 ? i + 2 : i + 1;
142: if (i == 0 || i == dd.xm_l - 1) {
143: internal_node = PETSC_FALSE;
144: istart_csr = i;
145: iend_csr = i + 1;
146: }
147: iindex = k * dd.xm_l * dd.ym_l + j * dd.xm_l + i;
148: xadj_temp[iindex] = count_adj;
149: if (internal_node) {
150: istart_csr = i;
151: iend_csr = i + 1;
152: jstart_csr = j;
153: jend_csr = j + 1;
154: kstart_csr = k;
155: kend_csr = k + 1;
156: }
157: for (kk = kstart_csr; kk < kend_csr; kk++) {
158: for (jj = jstart_csr; jj < jend_csr; jj++) {
159: for (ii = istart_csr; ii < iend_csr; ii++) {
160: iindex = kk * dd.xm_l * dd.ym_l + jj * dd.xm_l + ii;
162: adjncy_temp[count_adj] = iindex;
163: count_adj = count_adj + 1;
164: }
165: }
166: }
167: }
168: }
169: }
170: xadj_temp[dd.xm_l * dd.ym_l * dd.zm_l] = count_adj;
172: *xadj = xadj_temp;
173: *adjncy = adjncy_temp;
174: PetscFunctionReturn(PETSC_SUCCESS);
175: }
177: static PetscErrorCode ComputeSpecialBoundaryIndices(DomainData dd, IS *dirichlet, IS *neumann)
178: {
179: IS temp_dirichlet = 0, temp_neumann = 0;
180: PetscInt localsize, i, j, k, *indices;
181: PetscBool *touched;
183: PetscFunctionBeginUser;
184: localsize = dd.xm_l * dd.ym_l * dd.zm_l;
186: PetscCall(PetscMalloc1(localsize, &indices));
187: PetscCall(PetscMalloc1(localsize, &touched));
188: for (i = 0; i < localsize; i++) touched[i] = PETSC_FALSE;
190: if (dirichlet) {
191: i = 0;
192: /* west boundary */
193: if (dd.ipx == 0) {
194: for (k = 0; k < dd.zm_l; k++) {
195: for (j = 0; j < dd.ym_l; j++) {
196: indices[i] = k * dd.ym_l * dd.xm_l + j * dd.xm_l;
197: touched[indices[i]] = PETSC_TRUE;
198: i++;
199: }
200: }
201: }
202: PetscCall(ISCreateGeneral(dd.gcomm, i, indices, PETSC_COPY_VALUES, &temp_dirichlet));
203: }
204: if (neumann) {
205: i = 0;
206: /* west boundary */
207: if (dd.ipx == 0) {
208: for (k = 0; k < dd.zm_l; k++) {
209: for (j = 0; j < dd.ym_l; j++) {
210: indices[i] = k * dd.ym_l * dd.xm_l + j * dd.xm_l;
211: if (!touched[indices[i]]) {
212: touched[indices[i]] = PETSC_TRUE;
213: i++;
214: }
215: }
216: }
217: }
218: /* east boundary */
219: if (dd.ipx == dd.npx - 1) {
220: for (k = 0; k < dd.zm_l; k++) {
221: for (j = 0; j < dd.ym_l; j++) {
222: indices[i] = k * dd.ym_l * dd.xm_l + j * dd.xm_l + dd.xm_l - 1;
223: if (!touched[indices[i]]) {
224: touched[indices[i]] = PETSC_TRUE;
225: i++;
226: }
227: }
228: }
229: }
230: /* south boundary */
231: if (dd.ipy == 0 && dd.dim > 1) {
232: for (k = 0; k < dd.zm_l; k++) {
233: for (j = 0; j < dd.xm_l; j++) {
234: indices[i] = k * dd.ym_l * dd.xm_l + j;
235: if (!touched[indices[i]]) {
236: touched[indices[i]] = PETSC_TRUE;
237: i++;
238: }
239: }
240: }
241: }
242: /* north boundary */
243: if (dd.ipy == dd.npy - 1 && dd.dim > 1) {
244: for (k = 0; k < dd.zm_l; k++) {
245: for (j = 0; j < dd.xm_l; j++) {
246: indices[i] = k * dd.ym_l * dd.xm_l + (dd.ym_l - 1) * dd.xm_l + j;
247: if (!touched[indices[i]]) {
248: touched[indices[i]] = PETSC_TRUE;
249: i++;
250: }
251: }
252: }
253: }
254: /* bottom boundary */
255: if (dd.ipz == 0 && dd.dim > 2) {
256: for (k = 0; k < dd.ym_l; k++) {
257: for (j = 0; j < dd.xm_l; j++) {
258: indices[i] = k * dd.xm_l + j;
259: if (!touched[indices[i]]) {
260: touched[indices[i]] = PETSC_TRUE;
261: i++;
262: }
263: }
264: }
265: }
266: /* top boundary */
267: if (dd.ipz == dd.npz - 1 && dd.dim > 2) {
268: for (k = 0; k < dd.ym_l; k++) {
269: for (j = 0; j < dd.xm_l; j++) {
270: indices[i] = (dd.zm_l - 1) * dd.ym_l * dd.xm_l + k * dd.xm_l + j;
271: if (!touched[indices[i]]) {
272: touched[indices[i]] = PETSC_TRUE;
273: i++;
274: }
275: }
276: }
277: }
278: PetscCall(ISCreateGeneral(dd.gcomm, i, indices, PETSC_COPY_VALUES, &temp_neumann));
279: }
280: if (dirichlet) *dirichlet = temp_dirichlet;
281: if (neumann) *neumann = temp_neumann;
282: PetscCall(PetscFree(indices));
283: PetscCall(PetscFree(touched));
284: PetscFunctionReturn(PETSC_SUCCESS);
285: }
287: static PetscErrorCode ComputeMapping(DomainData dd, ISLocalToGlobalMapping *isg2lmap)
288: {
289: DM da;
290: AO ao;
291: DMBoundaryType bx = DM_BOUNDARY_NONE, by = DM_BOUNDARY_NONE, bz = DM_BOUNDARY_NONE;
292: DMDAStencilType stype = DMDA_STENCIL_BOX;
293: ISLocalToGlobalMapping temp_isg2lmap;
294: PetscInt i, j, k, ig, jg, kg, lindex, gindex, localsize;
295: PetscInt *global_indices;
297: PetscFunctionBeginUser;
298: /* Not an efficient mapping: this function computes a very simple lexicographic mapping
299: just to illustrate the creation of a MATIS object */
300: localsize = dd.xm_l * dd.ym_l * dd.zm_l;
301: PetscCall(PetscMalloc1(localsize, &global_indices));
302: for (k = 0; k < dd.zm_l; k++) {
303: kg = dd.startz + k;
304: for (j = 0; j < dd.ym_l; j++) {
305: jg = dd.starty + j;
306: for (i = 0; i < dd.xm_l; i++) {
307: ig = dd.startx + i;
308: lindex = k * dd.xm_l * dd.ym_l + j * dd.xm_l + i;
309: gindex = kg * dd.xm * dd.ym + jg * dd.xm + ig;
310: global_indices[lindex] = gindex;
311: }
312: }
313: }
314: if (dd.dim == 3) {
315: PetscCall(DMDACreate3d(dd.gcomm, bx, by, bz, stype, dd.xm, dd.ym, dd.zm, PETSC_DECIDE, PETSC_DECIDE, PETSC_DECIDE, 1, 1, NULL, NULL, NULL, &da));
316: } else if (dd.dim == 2) {
317: PetscCall(DMDACreate2d(dd.gcomm, bx, by, stype, dd.xm, dd.ym, PETSC_DECIDE, PETSC_DECIDE, 1, 1, NULL, NULL, &da));
318: } else {
319: PetscCall(DMDACreate1d(dd.gcomm, bx, dd.xm, 1, 1, NULL, &da));
320: }
321: PetscCall(DMSetFromOptions(da));
322: PetscCall(DMSetUp(da));
323: PetscCall(DMDASetAOType(da, AOMEMORYSCALABLE));
324: PetscCall(DMDAGetAO(da, &ao));
325: PetscCall(AOApplicationToPetsc(ao, dd.xm_l * dd.ym_l * dd.zm_l, global_indices));
326: PetscCall(ISLocalToGlobalMappingCreate(dd.gcomm, 1, localsize, global_indices, PETSC_OWN_POINTER, &temp_isg2lmap));
327: PetscCall(DMDestroy(&da));
328: *isg2lmap = temp_isg2lmap;
329: PetscFunctionReturn(PETSC_SUCCESS);
330: }
331: static PetscErrorCode ComputeSubdomainMatrix(DomainData dd, GLLData glldata, Mat *local_mat)
332: {
333: PetscInt localsize, zloc, yloc, xloc, auxnex, auxney, auxnez;
334: PetscInt ie, je, ke, i, j, k, ig, jg, kg, ii, ming;
335: PetscInt *indexg, *cols, *colsg;
336: PetscScalar *vals;
337: Mat temp_local_mat, elem_mat_DBC = 0, *usedmat;
338: IS submatIS;
340: PetscFunctionBeginUser;
341: PetscCall(MatGetSize(glldata.elem_mat, &i, &j));
342: PetscCall(PetscMalloc1(i, &indexg));
343: PetscCall(PetscMalloc1(i, &colsg));
344: /* get submatrix of elem_mat without dirichlet nodes */
345: if (!dd.pure_neumann && !dd.DBC_zerorows && !dd.ipx) {
346: xloc = dd.p + 1;
347: yloc = 1;
348: zloc = 1;
349: if (dd.dim > 1) yloc = dd.p + 1;
350: if (dd.dim > 2) zloc = dd.p + 1;
351: ii = 0;
352: for (k = 0; k < zloc; k++) {
353: for (j = 0; j < yloc; j++) {
354: for (i = 1; i < xloc; i++) {
355: indexg[ii] = k * xloc * yloc + j * xloc + i;
356: ii++;
357: }
358: }
359: }
360: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ii, indexg, PETSC_COPY_VALUES, &submatIS));
361: PetscCall(MatCreateSubMatrix(glldata.elem_mat, submatIS, submatIS, MAT_INITIAL_MATRIX, &elem_mat_DBC));
362: PetscCall(ISDestroy(&submatIS));
363: }
365: /* Assemble subdomain matrix */
366: localsize = dd.xm_l * dd.ym_l * dd.zm_l;
367: PetscCall(MatCreate(PETSC_COMM_SELF, &temp_local_mat));
368: PetscCall(MatSetSizes(temp_local_mat, localsize, localsize, localsize, localsize));
369: PetscCall(MatSetOptionsPrefix(temp_local_mat, "subdomain_"));
370: /* set local matrices type: here we use SEQSBAIJ primarily for testing purpose */
371: /* in order to avoid conversions inside the BDDC code, use SeqAIJ if possible */
372: if (dd.DBC_zerorows && !dd.ipx) { /* in this case, we need to zero out some of the rows, so use seqaij */
373: PetscCall(MatSetType(temp_local_mat, MATSEQAIJ));
374: } else {
375: PetscCall(MatSetType(temp_local_mat, MATSEQSBAIJ));
376: }
377: PetscCall(MatSetFromOptions(temp_local_mat));
379: i = PetscPowRealInt(3.0 * (dd.p + 1.0), dd.dim);
381: PetscCall(MatSeqAIJSetPreallocation(temp_local_mat, i, NULL)); /* very overestimated */
382: PetscCall(MatSeqSBAIJSetPreallocation(temp_local_mat, 1, i, NULL)); /* very overestimated */
383: PetscCall(MatSeqBAIJSetPreallocation(temp_local_mat, 1, i, NULL)); /* very overestimated */
384: PetscCall(MatSetOption(temp_local_mat, MAT_KEEP_NONZERO_PATTERN, PETSC_TRUE));
386: yloc = dd.p + 1;
387: zloc = dd.p + 1;
388: if (dd.dim < 3) zloc = 1;
389: if (dd.dim < 2) yloc = 1;
391: auxnez = dd.nez_l;
392: auxney = dd.ney_l;
393: auxnex = dd.nex_l;
394: if (dd.dim < 3) auxnez = 1;
395: if (dd.dim < 2) auxney = 1;
397: for (ke = 0; ke < auxnez; ke++) {
398: for (je = 0; je < auxney; je++) {
399: for (ie = 0; ie < auxnex; ie++) {
400: /* customize element accounting for BC */
401: xloc = dd.p + 1;
402: ming = 0;
403: usedmat = &glldata.elem_mat;
404: if (!dd.pure_neumann && !dd.DBC_zerorows && !dd.ipx) {
405: if (ie == 0) {
406: xloc = dd.p;
407: usedmat = &elem_mat_DBC;
408: } else {
409: ming = -1;
410: usedmat = &glldata.elem_mat;
411: }
412: }
413: /* local to the element/global to the subdomain indexing */
414: for (k = 0; k < zloc; k++) {
415: kg = ke * dd.p + k;
416: for (j = 0; j < yloc; j++) {
417: jg = je * dd.p + j;
418: for (i = 0; i < xloc; i++) {
419: ig = ie * dd.p + i + ming;
420: ii = k * xloc * yloc + j * xloc + i;
421: indexg[ii] = kg * dd.xm_l * dd.ym_l + jg * dd.xm_l + ig;
422: }
423: }
424: }
425: /* Set values */
426: for (i = 0; i < xloc * yloc * zloc; i++) {
427: PetscCall(MatGetRow(*usedmat, i, &j, (const PetscInt **)&cols, (const PetscScalar **)&vals));
428: for (k = 0; k < j; k++) colsg[k] = indexg[cols[k]];
429: PetscCall(MatSetValues(temp_local_mat, 1, &indexg[i], j, colsg, vals, ADD_VALUES));
430: PetscCall(MatRestoreRow(*usedmat, i, &j, (const PetscInt **)&cols, (const PetscScalar **)&vals));
431: }
432: }
433: }
434: }
435: PetscCall(PetscFree(indexg));
436: PetscCall(PetscFree(colsg));
437: PetscCall(MatAssemblyBegin(temp_local_mat, MAT_FINAL_ASSEMBLY));
438: PetscCall(MatAssemblyEnd(temp_local_mat, MAT_FINAL_ASSEMBLY));
439: #if DEBUG
440: {
441: Vec lvec, rvec;
442: PetscReal norm;
443: PetscCall(MatCreateVecs(temp_local_mat, &lvec, &rvec));
444: PetscCall(VecSet(lvec, 1.0));
445: PetscCall(MatMult(temp_local_mat, lvec, rvec));
446: PetscCall(VecNorm(rvec, NORM_INFINITY, &norm));
447: PetscCall(VecDestroy(&lvec));
448: PetscCall(VecDestroy(&rvec));
449: }
450: #endif
451: *local_mat = temp_local_mat;
452: PetscCall(MatDestroy(&elem_mat_DBC));
453: PetscFunctionReturn(PETSC_SUCCESS);
454: }
456: static PetscErrorCode GLLStuffs(DomainData dd, GLLData *glldata)
457: {
458: PetscReal *M, si;
459: PetscScalar x, z0, z1, z2, Lpj, Lpr, rhoGLj, rhoGLk;
460: PetscBLASInt pm1, lierr;
461: PetscInt i, j, n, k, s, r, q, ii, jj, p = dd.p;
462: PetscInt xloc, yloc, zloc, xyloc, xyzloc;
464: PetscFunctionBeginUser;
465: /* Gauss-Lobatto-Legendre nodes zGL on [-1,1] */
466: PetscCall(PetscCalloc1(p + 1, &glldata->zGL));
468: glldata->zGL[0] = -1.0;
469: glldata->zGL[p] = 1.0;
470: if (p > 1) {
471: if (p == 2) glldata->zGL[1] = 0.0;
472: else {
473: PetscCall(PetscMalloc1(p - 1, &M));
474: for (i = 0; i < p - 1; i++) {
475: si = (PetscReal)(i + 1.0);
476: M[i] = 0.5 * PetscSqrtReal(si * (si + 2.0) / ((si + 0.5) * (si + 1.5)));
477: }
478: pm1 = (PetscBLASInt)(p - 1);
479: PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
480: PetscCallBLAS("LAPACKsteqr", LAPACKsteqr_("N", &pm1, &glldata->zGL[1], M, &x, &pm1, M, &lierr));
481: PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in STERF Lapack routine %d", (int)lierr);
482: PetscCall(PetscFPTrapPop());
483: PetscCall(PetscFree(M));
484: }
485: }
487: /* Weights for 1D quadrature */
488: PetscCall(PetscMalloc1(p + 1, &glldata->rhoGL));
490: glldata->rhoGL[0] = 2.0 / (PetscScalar)(p * (p + 1.0));
491: glldata->rhoGL[p] = glldata->rhoGL[0];
492: z2 = -1; /* Dummy value to avoid -Wmaybe-initialized */
493: for (i = 1; i < p; i++) {
494: x = glldata->zGL[i];
495: z0 = 1.0;
496: z1 = x;
497: for (n = 1; n < p; n++) {
498: z2 = x * z1 * (2.0 * n + 1.0) / (n + 1.0) - z0 * (PetscScalar)(n / (n + 1.0));
499: z0 = z1;
500: z1 = z2;
501: }
502: glldata->rhoGL[i] = 2.0 / (p * (p + 1.0) * z2 * z2);
503: }
505: /* Auxiliary mat for laplacian */
506: PetscCall(PetscMalloc1(p + 1, &glldata->A));
507: PetscCall(PetscMalloc1((p + 1) * (p + 1), &glldata->A[0]));
508: for (i = 1; i < p + 1; i++) glldata->A[i] = glldata->A[i - 1] + p + 1;
510: for (j = 1; j < p; j++) {
511: x = glldata->zGL[j];
512: z0 = 1.0;
513: z1 = x;
514: for (n = 1; n < p; n++) {
515: z2 = x * z1 * (2.0 * n + 1.0) / (n + 1.0) - z0 * (PetscScalar)(n / (n + 1.0));
516: z0 = z1;
517: z1 = z2;
518: }
519: Lpj = z2;
520: for (r = 1; r < p; r++) {
521: if (r == j) {
522: glldata->A[j][j] = 2.0 / (3.0 * (1.0 - glldata->zGL[j] * glldata->zGL[j]) * Lpj * Lpj);
523: } else {
524: x = glldata->zGL[r];
525: z0 = 1.0;
526: z1 = x;
527: for (n = 1; n < p; n++) {
528: z2 = x * z1 * (2.0 * n + 1.0) / (n + 1.0) - z0 * (PetscScalar)(n / (n + 1.0));
529: z0 = z1;
530: z1 = z2;
531: }
532: Lpr = z2;
533: glldata->A[r][j] = 4.0 / (p * (p + 1.0) * Lpj * Lpr * (glldata->zGL[j] - glldata->zGL[r]) * (glldata->zGL[j] - glldata->zGL[r]));
534: }
535: }
536: }
537: for (j = 1; j < p + 1; j++) {
538: x = glldata->zGL[j];
539: z0 = 1.0;
540: z1 = x;
541: for (n = 1; n < p; n++) {
542: z2 = x * z1 * (2.0 * n + 1.0) / (n + 1.0) - z0 * (PetscScalar)(n / (n + 1.0));
543: z0 = z1;
544: z1 = z2;
545: }
546: Lpj = z2;
547: glldata->A[j][0] = 4.0 * PetscPowRealInt(-1.0, p) / (p * (p + 1.0) * Lpj * (1.0 + glldata->zGL[j]) * (1.0 + glldata->zGL[j]));
548: glldata->A[0][j] = glldata->A[j][0];
549: }
550: for (j = 0; j < p; j++) {
551: x = glldata->zGL[j];
552: z0 = 1.0;
553: z1 = x;
554: for (n = 1; n < p; n++) {
555: z2 = x * z1 * (2.0 * n + 1.0) / (n + 1.0) - z0 * (PetscScalar)(n / (n + 1.0));
556: z0 = z1;
557: z1 = z2;
558: }
559: Lpj = z2;
561: glldata->A[p][j] = 4.0 / (p * (p + 1.0) * Lpj * (1.0 - glldata->zGL[j]) * (1.0 - glldata->zGL[j]));
562: glldata->A[j][p] = glldata->A[p][j];
563: }
564: glldata->A[0][0] = 0.5 + (p * (p + 1.0) - 2.0) / 6.0;
565: glldata->A[p][p] = glldata->A[0][0];
567: /* compute element matrix */
568: xloc = p + 1;
569: yloc = p + 1;
570: zloc = p + 1;
571: if (dd.dim < 2) yloc = 1;
572: if (dd.dim < 3) zloc = 1;
573: xyloc = xloc * yloc;
574: xyzloc = xloc * yloc * zloc;
576: PetscCall(MatCreate(PETSC_COMM_SELF, &glldata->elem_mat));
577: PetscCall(MatSetSizes(glldata->elem_mat, xyzloc, xyzloc, xyzloc, xyzloc));
578: PetscCall(MatSetType(glldata->elem_mat, MATSEQAIJ));
579: PetscCall(MatSeqAIJSetPreallocation(glldata->elem_mat, xyzloc, NULL)); /* overestimated */
580: PetscCall(MatZeroEntries(glldata->elem_mat));
581: PetscCall(MatSetOption(glldata->elem_mat, MAT_IGNORE_ZERO_ENTRIES, PETSC_TRUE));
583: for (k = 0; k < zloc; k++) {
584: if (dd.dim > 2) rhoGLk = glldata->rhoGL[k];
585: else rhoGLk = 1.0;
587: for (j = 0; j < yloc; j++) {
588: if (dd.dim > 1) rhoGLj = glldata->rhoGL[j];
589: else rhoGLj = 1.0;
591: for (i = 0; i < xloc; i++) {
592: ii = k * xyloc + j * xloc + i;
593: s = k;
594: r = j;
595: for (q = 0; q < xloc; q++) {
596: jj = s * xyloc + r * xloc + q;
597: PetscCall(MatSetValue(glldata->elem_mat, jj, ii, glldata->A[i][q] * rhoGLj * rhoGLk, ADD_VALUES));
598: }
599: if (dd.dim > 1) {
600: s = k;
601: q = i;
602: for (r = 0; r < yloc; r++) {
603: jj = s * xyloc + r * xloc + q;
604: PetscCall(MatSetValue(glldata->elem_mat, jj, ii, glldata->A[j][r] * glldata->rhoGL[i] * rhoGLk, ADD_VALUES));
605: }
606: }
607: if (dd.dim > 2) {
608: r = j;
609: q = i;
610: for (s = 0; s < zloc; s++) {
611: jj = s * xyloc + r * xloc + q;
612: PetscCall(MatSetValue(glldata->elem_mat, jj, ii, glldata->A[k][s] * rhoGLj * glldata->rhoGL[i], ADD_VALUES));
613: }
614: }
615: }
616: }
617: }
618: PetscCall(MatAssemblyBegin(glldata->elem_mat, MAT_FINAL_ASSEMBLY));
619: PetscCall(MatAssemblyEnd(glldata->elem_mat, MAT_FINAL_ASSEMBLY));
620: #if DEBUG
621: {
622: Vec lvec, rvec;
623: PetscReal norm;
624: PetscCall(MatCreateVecs(glldata->elem_mat, &lvec, &rvec));
625: PetscCall(VecSet(lvec, 1.0));
626: PetscCall(MatMult(glldata->elem_mat, lvec, rvec));
627: PetscCall(VecNorm(rvec, NORM_INFINITY, &norm));
628: PetscCall(VecDestroy(&lvec));
629: PetscCall(VecDestroy(&rvec));
630: }
631: #endif
632: PetscFunctionReturn(PETSC_SUCCESS);
633: }
635: static PetscErrorCode DomainDecomposition(DomainData *dd)
636: {
637: PetscMPIInt rank;
638: PetscInt i, j, k;
640: PetscFunctionBeginUser;
641: /* Subdomain index in cartesian coordinates */
642: PetscCallMPI(MPI_Comm_rank(dd->gcomm, &rank));
643: dd->ipx = rank % dd->npx;
644: if (dd->dim > 1) dd->ipz = rank / (dd->npx * dd->npy);
645: else dd->ipz = 0;
647: dd->ipy = rank / dd->npx - dd->ipz * dd->npy;
648: /* number of local elements */
649: dd->nex_l = dd->nex / dd->npx;
650: if (dd->ipx < dd->nex % dd->npx) dd->nex_l++;
651: if (dd->dim > 1) {
652: dd->ney_l = dd->ney / dd->npy;
653: if (dd->ipy < dd->ney % dd->npy) dd->ney_l++;
654: } else {
655: dd->ney_l = 0;
656: }
657: if (dd->dim > 2) {
658: dd->nez_l = dd->nez / dd->npz;
659: if (dd->ipz < dd->nez % dd->npz) dd->nez_l++;
660: } else {
661: dd->nez_l = 0;
662: }
663: /* local and global number of dofs */
664: dd->xm_l = dd->nex_l * dd->p + 1;
665: dd->xm = dd->nex * dd->p + 1;
666: dd->ym_l = dd->ney_l * dd->p + 1;
667: dd->ym = dd->ney * dd->p + 1;
668: dd->zm_l = dd->nez_l * dd->p + 1;
669: dd->zm = dd->nez * dd->p + 1;
670: if (!dd->pure_neumann) {
671: if (!dd->DBC_zerorows && !dd->ipx) dd->xm_l--;
672: if (!dd->DBC_zerorows) dd->xm--;
673: }
675: /* starting global index for local dofs (simple lexicographic order) */
676: dd->startx = 0;
677: j = dd->nex / dd->npx;
678: for (i = 0; i < dd->ipx; i++) {
679: k = j;
680: if (i < dd->nex % dd->npx) k++;
681: dd->startx = dd->startx + k * dd->p;
682: }
683: if (!dd->pure_neumann && !dd->DBC_zerorows && dd->ipx) dd->startx--;
685: dd->starty = 0;
686: if (dd->dim > 1) {
687: j = dd->ney / dd->npy;
688: for (i = 0; i < dd->ipy; i++) {
689: k = j;
690: if (i < dd->ney % dd->npy) k++;
691: dd->starty = dd->starty + k * dd->p;
692: }
693: }
694: dd->startz = 0;
695: if (dd->dim > 2) {
696: j = dd->nez / dd->npz;
697: for (i = 0; i < dd->ipz; i++) {
698: k = j;
699: if (i < dd->nez % dd->npz) k++;
700: dd->startz = dd->startz + k * dd->p;
701: }
702: }
703: PetscFunctionReturn(PETSC_SUCCESS);
704: }
706: static PetscErrorCode ComputeMatrix(DomainData dd, Mat *A)
707: {
708: GLLData gll;
709: Mat local_mat = 0, temp_A = 0;
710: ISLocalToGlobalMapping matis_map = 0;
711: IS dirichletIS = 0;
713: PetscFunctionBeginUser;
714: /* Compute some stuff of Gauss-Legendre-Lobatto quadrature rule */
715: PetscCall(GLLStuffs(dd, &gll));
716: /* Compute matrix of subdomain Neumann problem */
717: PetscCall(ComputeSubdomainMatrix(dd, gll, &local_mat));
718: /* Compute global mapping of local dofs */
719: PetscCall(ComputeMapping(dd, &matis_map));
720: /* Create MATIS object needed by BDDC */
721: PetscCall(MatCreateIS(dd.gcomm, 1, PETSC_DECIDE, PETSC_DECIDE, dd.xm * dd.ym * dd.zm, dd.xm * dd.ym * dd.zm, matis_map, matis_map, &temp_A));
722: /* Set local subdomain matrices into MATIS object */
723: PetscCall(MatScale(local_mat, dd.scalingfactor));
724: PetscCall(MatISSetLocalMat(temp_A, local_mat));
725: /* Call assembly functions */
726: PetscCall(MatAssemblyBegin(temp_A, MAT_FINAL_ASSEMBLY));
727: PetscCall(MatAssemblyEnd(temp_A, MAT_FINAL_ASSEMBLY));
729: if (dd.DBC_zerorows) {
730: PetscInt dirsize;
732: PetscCall(ComputeSpecialBoundaryIndices(dd, &dirichletIS, NULL));
733: PetscCall(MatSetOption(local_mat, MAT_KEEP_NONZERO_PATTERN, PETSC_TRUE));
734: PetscCall(MatZeroRowsColumnsLocalIS(temp_A, dirichletIS, 1.0, NULL, NULL));
735: PetscCall(ISGetLocalSize(dirichletIS, &dirsize));
736: PetscCall(ISDestroy(&dirichletIS));
737: }
739: /* giving hints to local and global matrices could be useful for the BDDC */
740: PetscCall(MatSetOption(local_mat, MAT_SPD, PETSC_TRUE));
741: PetscCall(MatSetOption(local_mat, MAT_SPD_ETERNAL, PETSC_TRUE));
742: #if DEBUG
743: {
744: Vec lvec, rvec;
745: PetscReal norm;
746: PetscCall(MatCreateVecs(temp_A, &lvec, &rvec));
747: PetscCall(VecSet(lvec, 1.0));
748: PetscCall(MatMult(temp_A, lvec, rvec));
749: PetscCall(VecNorm(rvec, NORM_INFINITY, &norm));
750: PetscCall(VecDestroy(&lvec));
751: PetscCall(VecDestroy(&rvec));
752: }
753: #endif
754: /* free allocated workspace */
755: PetscCall(PetscFree(gll.zGL));
756: PetscCall(PetscFree(gll.rhoGL));
757: PetscCall(PetscFree(gll.A[0]));
758: PetscCall(PetscFree(gll.A));
759: PetscCall(MatDestroy(&gll.elem_mat));
760: PetscCall(MatDestroy(&local_mat));
761: PetscCall(ISLocalToGlobalMappingDestroy(&matis_map));
762: /* give back the pointer to te MATIS object */
763: *A = temp_A;
764: PetscFunctionReturn(PETSC_SUCCESS);
765: }
767: static PetscErrorCode ComputeKSPFETIDP(DomainData dd, KSP ksp_bddc, KSP *ksp_fetidp)
768: {
769: KSP temp_ksp;
770: PC pc;
772: PetscFunctionBeginUser;
773: PetscCall(KSPGetPC(ksp_bddc, &pc));
774: if (!dd.testkspfetidp) {
775: PC D;
776: Mat F;
778: PetscCall(PCBDDCCreateFETIDPOperators(pc, PETSC_TRUE, NULL, &F, &D));
779: PetscCall(KSPCreate(PetscObjectComm((PetscObject)F), &temp_ksp));
780: PetscCall(KSPSetOperators(temp_ksp, F, F));
781: PetscCall(KSPSetType(temp_ksp, KSPCG));
782: PetscCall(KSPSetPC(temp_ksp, D));
783: PetscCall(KSPSetComputeSingularValues(temp_ksp, PETSC_TRUE));
784: PetscCall(KSPSetOptionsPrefix(temp_ksp, "fluxes_"));
785: PetscCall(KSPSetFromOptions(temp_ksp));
786: PetscCall(KSPSetUp(temp_ksp));
787: PetscCall(MatDestroy(&F));
788: PetscCall(PCDestroy(&D));
789: } else {
790: Mat A, Ap;
792: PetscCall(KSPCreate(PetscObjectComm((PetscObject)ksp_bddc), &temp_ksp));
793: PetscCall(KSPGetOperators(ksp_bddc, &A, &Ap));
794: PetscCall(KSPSetOperators(temp_ksp, A, Ap));
795: PetscCall(KSPSetOptionsPrefix(temp_ksp, "fluxes_"));
796: PetscCall(KSPSetType(temp_ksp, KSPFETIDP));
797: PetscCall(KSPFETIDPSetInnerBDDC(temp_ksp, pc));
798: PetscCall(KSPSetComputeSingularValues(temp_ksp, PETSC_TRUE));
799: PetscCall(KSPSetFromOptions(temp_ksp));
800: PetscCall(KSPSetUp(temp_ksp));
801: }
802: *ksp_fetidp = temp_ksp;
803: PetscFunctionReturn(PETSC_SUCCESS);
804: }
806: static PetscErrorCode ComputeKSPBDDC(DomainData dd, Mat A, KSP *ksp)
807: {
808: KSP temp_ksp;
809: PC pc;
810: IS primals, dirichletIS = 0, neumannIS = 0, *bddc_dofs_splitting;
811: PetscInt vidx[8], localsize, *xadj = NULL, *adjncy = NULL;
812: MatNullSpace near_null_space;
814: PetscFunctionBeginUser;
815: PetscCall(KSPCreate(dd.gcomm, &temp_ksp));
816: PetscCall(KSPSetOperators(temp_ksp, A, A));
817: PetscCall(KSPSetType(temp_ksp, KSPCG));
818: PetscCall(KSPGetPC(temp_ksp, &pc));
819: PetscCall(PCSetType(pc, PCBDDC));
821: localsize = dd.xm_l * dd.ym_l * dd.zm_l;
823: /* BDDC customization */
825: /* jumping coefficients case */
826: PetscCall(PCISSetSubdomainScalingFactor(pc, dd.scalingfactor));
828: /* Dofs splitting
829: Simple stride-1 IS
830: It is not needed since, by default, PCBDDC assumes a stride-1 split */
831: PetscCall(PetscMalloc1(1, &bddc_dofs_splitting));
832: #if 1
833: PetscCall(ISCreateStride(PETSC_COMM_WORLD, localsize, 0, 1, &bddc_dofs_splitting[0]));
834: PetscCall(PCBDDCSetDofsSplittingLocal(pc, 1, bddc_dofs_splitting));
835: #else
836: /* examples for global ordering */
838: /* each process lists the nodes it owns */
839: PetscInt sr, er;
840: PetscCall(MatGetOwnershipRange(A, &sr, &er));
841: PetscCall(ISCreateStride(PETSC_COMM_WORLD, er - sr, sr, 1, &bddc_dofs_splitting[0]));
842: PetscCall(PCBDDCSetDofsSplitting(pc, 1, bddc_dofs_splitting));
843: /* Split can be passed in a more general way since any process can list any node */
844: #endif
845: PetscCall(ISDestroy(&bddc_dofs_splitting[0]));
846: PetscCall(PetscFree(bddc_dofs_splitting));
848: /* Primal constraints implemented by using a near null space attached to A -> now it passes in only the constants
849: (which in practice is not needed since by default PCBDDC builds the primal space using constants for quadrature formulas */
850: #if 0
851: Vec vecs[2];
852: PetscRandom rctx;
853: PetscCall(MatCreateVecs(A,&vecs[0],&vecs[1]));
854: PetscCall(PetscRandomCreate(dd.gcomm,&rctx));
855: PetscCall(VecSetRandom(vecs[0],rctx));
856: PetscCall(VecSetRandom(vecs[1],rctx));
857: PetscCall(MatNullSpaceCreate(dd.gcomm,PETSC_TRUE,2,vecs,&near_null_space));
858: PetscCall(VecDestroy(&vecs[0]));
859: PetscCall(VecDestroy(&vecs[1]));
860: PetscCall(PetscRandomDestroy(&rctx));
861: #else
862: PetscCall(MatNullSpaceCreate(dd.gcomm, PETSC_TRUE, 0, NULL, &near_null_space));
863: #endif
864: PetscCall(MatSetNearNullSpace(A, near_null_space));
865: PetscCall(MatNullSpaceDestroy(&near_null_space));
867: /* CSR graph of subdomain dofs */
868: PetscCall(BuildCSRGraph(dd, &xadj, &adjncy));
869: PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, localsize, xadj, adjncy, PETSC_OWN_POINTER));
871: /* Prescribe user-defined primal vertices: in this case we use the 8 corners in 3D (for 2D and 1D, the indices are duplicated) */
872: vidx[0] = 0 * dd.xm_l + 0;
873: vidx[1] = 0 * dd.xm_l + dd.xm_l - 1;
874: vidx[2] = (dd.ym_l - 1) * dd.xm_l + 0;
875: vidx[3] = (dd.ym_l - 1) * dd.xm_l + dd.xm_l - 1;
876: vidx[4] = (dd.zm_l - 1) * dd.xm_l * dd.ym_l + 0 * dd.xm_l + 0;
877: vidx[5] = (dd.zm_l - 1) * dd.xm_l * dd.ym_l + 0 * dd.xm_l + dd.xm_l - 1;
878: vidx[6] = (dd.zm_l - 1) * dd.xm_l * dd.ym_l + (dd.ym_l - 1) * dd.xm_l + 0;
879: vidx[7] = (dd.zm_l - 1) * dd.xm_l * dd.ym_l + (dd.ym_l - 1) * dd.xm_l + dd.xm_l - 1;
880: PetscCall(ISCreateGeneral(dd.gcomm, 8, vidx, PETSC_COPY_VALUES, &primals));
881: PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primals));
882: PetscCall(ISDestroy(&primals));
884: /* Neumann/Dirichlet indices on the global boundary */
885: if (dd.DBC_zerorows) {
886: /* Only in case you eliminate some rows matrix with zerorows function, you need to set dirichlet indices into PCBDDC data */
887: PetscCall(ComputeSpecialBoundaryIndices(dd, &dirichletIS, &neumannIS));
888: PetscCall(PCBDDCSetNeumannBoundariesLocal(pc, neumannIS));
889: PetscCall(PCBDDCSetDirichletBoundariesLocal(pc, dirichletIS));
890: } else {
891: if (dd.pure_neumann) {
892: /* In such a case, all interface nodes lying on the global boundary are neumann nodes */
893: PetscCall(ComputeSpecialBoundaryIndices(dd, NULL, &neumannIS));
894: PetscCall(PCBDDCSetNeumannBoundariesLocal(pc, neumannIS));
895: } else {
896: /* It is wrong setting dirichlet indices without having zeroed the corresponding rows in the global matrix */
897: /* But we can still compute them since nodes near the dirichlet boundaries does not need to be defined as neumann nodes */
898: PetscCall(ComputeSpecialBoundaryIndices(dd, &dirichletIS, &neumannIS));
899: PetscCall(PCBDDCSetNeumannBoundariesLocal(pc, neumannIS));
900: }
901: }
903: /* Pass local null space information to local matrices (needed when using approximate local solvers) */
904: if (dd.ipx || dd.pure_neumann) {
905: MatNullSpace nsp;
906: Mat local_mat;
908: PetscCall(MatISGetLocalMat(A, &local_mat));
909: PetscCall(MatNullSpaceCreate(PETSC_COMM_SELF, PETSC_TRUE, 0, NULL, &nsp));
910: PetscCall(MatSetNullSpace(local_mat, nsp));
911: PetscCall(MatNullSpaceDestroy(&nsp));
912: }
913: PetscCall(KSPSetComputeSingularValues(temp_ksp, PETSC_TRUE));
914: PetscCall(KSPSetOptionsPrefix(temp_ksp, "physical_"));
915: PetscCall(KSPSetFromOptions(temp_ksp));
916: PetscCall(KSPSetUp(temp_ksp));
917: *ksp = temp_ksp;
918: PetscCall(ISDestroy(&dirichletIS));
919: PetscCall(ISDestroy(&neumannIS));
920: PetscFunctionReturn(PETSC_SUCCESS);
921: }
923: static PetscErrorCode InitializeDomainData(DomainData *dd)
924: {
925: PetscMPIInt sizes, rank;
926: PetscInt factor;
928: PetscFunctionBeginUser;
929: dd->gcomm = PETSC_COMM_WORLD;
930: PetscCallMPI(MPI_Comm_size(dd->gcomm, &sizes));
931: PetscCallMPI(MPI_Comm_rank(dd->gcomm, &rank));
932: /* Get information from command line */
933: /* Processors/subdomains per dimension */
934: /* Default is 1d problem */
935: dd->npx = sizes;
936: dd->npy = 0;
937: dd->npz = 0;
938: dd->dim = 1;
939: PetscCall(PetscOptionsGetInt(NULL, NULL, "-npx", &dd->npx, NULL));
940: PetscCall(PetscOptionsGetInt(NULL, NULL, "-npy", &dd->npy, NULL));
941: PetscCall(PetscOptionsGetInt(NULL, NULL, "-npz", &dd->npz, NULL));
942: if (dd->npy) dd->dim++;
943: if (dd->npz) dd->dim++;
944: /* Number of elements per dimension */
945: /* Default is one element per subdomain */
946: dd->nex = dd->npx;
947: dd->ney = dd->npy;
948: dd->nez = dd->npz;
949: PetscCall(PetscOptionsGetInt(NULL, NULL, "-nex", &dd->nex, NULL));
950: PetscCall(PetscOptionsGetInt(NULL, NULL, "-ney", &dd->ney, NULL));
951: PetscCall(PetscOptionsGetInt(NULL, NULL, "-nez", &dd->nez, NULL));
952: if (!dd->npy) {
953: dd->ney = 0;
954: dd->nez = 0;
955: }
956: if (!dd->npz) dd->nez = 0;
957: /* Spectral degree */
958: dd->p = 3;
959: PetscCall(PetscOptionsGetInt(NULL, NULL, "-p", &dd->p, NULL));
960: /* pure neumann problem? */
961: dd->pure_neumann = PETSC_FALSE;
962: PetscCall(PetscOptionsGetBool(NULL, NULL, "-pureneumann", &dd->pure_neumann, NULL));
964: /* How to enforce dirichlet boundary conditions (in case pureneumann has not been requested explicitly) */
965: dd->DBC_zerorows = PETSC_FALSE;
967: PetscCall(PetscOptionsGetBool(NULL, NULL, "-usezerorows", &dd->DBC_zerorows, NULL));
968: if (dd->pure_neumann) dd->DBC_zerorows = PETSC_FALSE;
969: dd->scalingfactor = 1.0;
971: factor = 0;
972: PetscCall(PetscOptionsGetInt(NULL, NULL, "-jump", &factor, NULL));
973: /* checkerboard pattern */
974: dd->scalingfactor = PetscPowScalar(10.0, factor * PetscPowInt(-1, rank));
975: /* test data passed in */
976: if (dd->dim == 1) {
977: PetscCheck(sizes == dd->npx, dd->gcomm, PETSC_ERR_USER, "Number of mpi procs in 1D must be equal to npx");
978: PetscCheck(dd->nex >= dd->npx, dd->gcomm, PETSC_ERR_USER, "Number of elements per dim must be greater/equal than number of procs per dim");
979: } else if (dd->dim == 2) {
980: PetscCheck(sizes == dd->npx * dd->npy, dd->gcomm, PETSC_ERR_USER, "Number of mpi procs in 2D must be equal to npx*npy");
981: PetscCheck(dd->nex >= dd->npx || dd->ney < dd->npy, dd->gcomm, PETSC_ERR_USER, "Number of elements per dim must be greater/equal than number of procs per dim");
982: } else {
983: PetscCheck(sizes == dd->npx * dd->npy * dd->npz, dd->gcomm, PETSC_ERR_USER, "Number of mpi procs in 3D must be equal to npx*npy*npz");
984: PetscCheck(dd->nex >= dd->npx && dd->ney >= dd->npy && dd->nez >= dd->npz, dd->gcomm, PETSC_ERR_USER, "Number of elements per dim must be greater/equal than number of ranks per dim");
985: }
986: PetscFunctionReturn(PETSC_SUCCESS);
987: }
989: int main(int argc, char **args)
990: {
991: DomainData dd;
992: PetscReal norm, maxeig, mineig;
993: PetscScalar scalar_value;
994: PetscInt ndofs, its;
995: Mat A = NULL, F = NULL;
996: KSP KSPwithBDDC = NULL, KSPwithFETIDP = NULL;
997: KSPConvergedReason reason;
998: Vec exact_solution = NULL, bddc_solution = NULL, bddc_rhs = NULL;
999: PetscBool testfetidp = PETSC_TRUE;
1001: /* Init PETSc */
1002: PetscFunctionBeginUser;
1003: PetscCall(PetscInitialize(&argc, &args, NULL, help));
1004: /* Initialize DomainData */
1005: PetscCall(InitializeDomainData(&dd));
1006: /* Decompose domain */
1007: PetscCall(DomainDecomposition(&dd));
1008: #if DEBUG
1009: printf("Subdomain data\n");
1010: printf("IPS : %d %d %d\n", dd.ipx, dd.ipy, dd.ipz);
1011: printf("NEG : %d %d %d\n", dd.nex, dd.ney, dd.nez);
1012: printf("NEL : %d %d %d\n", dd.nex_l, dd.ney_l, dd.nez_l);
1013: printf("LDO : %d %d %d\n", dd.xm_l, dd.ym_l, dd.zm_l);
1014: printf("SIZES : %d %d %d\n", dd.xm, dd.ym, dd.zm);
1015: printf("STARTS: %d %d %d\n", dd.startx, dd.starty, dd.startz);
1016: #endif
1017: dd.testkspfetidp = PETSC_TRUE;
1018: PetscCall(PetscOptionsGetBool(NULL, NULL, "-testfetidp", &testfetidp, NULL));
1019: PetscCall(PetscOptionsGetBool(NULL, NULL, "-testkspfetidp", &dd.testkspfetidp, NULL));
1020: /* assemble global matrix */
1021: PetscCall(ComputeMatrix(dd, &A));
1022: /* get work vectors */
1023: PetscCall(MatCreateVecs(A, &bddc_solution, NULL));
1024: PetscCall(VecDuplicate(bddc_solution, &bddc_rhs));
1025: PetscCall(VecDuplicate(bddc_solution, &exact_solution));
1026: /* create and customize KSP/PC for BDDC */
1027: PetscCall(ComputeKSPBDDC(dd, A, &KSPwithBDDC));
1028: /* create KSP/PC for FETIDP */
1029: if (testfetidp) PetscCall(ComputeKSPFETIDP(dd, KSPwithBDDC, &KSPwithFETIDP));
1030: /* create random exact solution */
1031: #if defined(PETSC_USE_COMPLEX)
1032: PetscCall(VecSet(exact_solution, 1.0 + PETSC_i));
1033: #else
1034: PetscCall(VecSetRandom(exact_solution, NULL));
1035: #endif
1036: PetscCall(VecShift(exact_solution, -0.5));
1037: PetscCall(VecScale(exact_solution, 100.0));
1038: PetscCall(VecGetSize(exact_solution, &ndofs));
1039: if (dd.pure_neumann) {
1040: PetscCall(VecSum(exact_solution, &scalar_value));
1041: scalar_value = -scalar_value / (PetscScalar)ndofs;
1042: PetscCall(VecShift(exact_solution, scalar_value));
1043: }
1044: /* assemble BDDC rhs */
1045: PetscCall(MatMult(A, exact_solution, bddc_rhs));
1046: /* test ksp with BDDC */
1047: PetscCall(KSPSolve(KSPwithBDDC, bddc_rhs, bddc_solution));
1048: PetscCall(KSPGetIterationNumber(KSPwithBDDC, &its));
1049: PetscCall(KSPGetConvergedReason(KSPwithBDDC, &reason));
1050: PetscCall(KSPComputeExtremeSingularValues(KSPwithBDDC, &maxeig, &mineig));
1051: if (dd.pure_neumann) {
1052: PetscCall(VecSum(bddc_solution, &scalar_value));
1053: scalar_value = -scalar_value / (PetscScalar)ndofs;
1054: PetscCall(VecShift(bddc_solution, scalar_value));
1055: }
1056: /* check exact_solution and BDDC solultion */
1057: PetscCall(VecAXPY(bddc_solution, -1.0, exact_solution));
1058: PetscCall(VecNorm(bddc_solution, NORM_INFINITY, &norm));
1059: PetscCall(PetscPrintf(dd.gcomm, "---------------------BDDC stats-------------------------------\n"));
1060: PetscCall(PetscPrintf(dd.gcomm, "Number of degrees of freedom : %8" PetscInt_FMT "\n", ndofs));
1061: if (reason < 0) {
1062: PetscCall(PetscPrintf(dd.gcomm, "Number of iterations : %8" PetscInt_FMT "\n", its));
1063: PetscCall(PetscPrintf(dd.gcomm, "Converged reason : %s\n", KSPConvergedReasons[reason]));
1064: }
1065: if (0.95 <= mineig && mineig <= 1.05) mineig = 1.0;
1066: PetscCall(PetscPrintf(dd.gcomm, "Eigenvalues preconditioned operator : %1.1e %1.1e\n", (double)PetscFloorReal(100. * mineig) / 100., (double)PetscCeilReal(100. * maxeig) / 100.));
1067: if (norm > 1.e-1 || reason < 0) PetscCall(PetscPrintf(dd.gcomm, "Error between exact and computed solution : %1.2e\n", (double)norm));
1068: PetscCall(PetscPrintf(dd.gcomm, "--------------------------------------------------------------\n"));
1069: if (testfetidp) {
1070: Vec fetidp_solution_all = NULL, fetidp_solution = NULL, fetidp_rhs = NULL;
1072: PetscCall(VecDuplicate(bddc_solution, &fetidp_solution_all));
1073: if (!dd.testkspfetidp) {
1074: /* assemble fetidp rhs on the space of Lagrange multipliers */
1075: PetscCall(KSPGetOperators(KSPwithFETIDP, &F, NULL));
1076: PetscCall(MatCreateVecs(F, &fetidp_solution, &fetidp_rhs));
1077: PetscCall(PCBDDCMatFETIDPGetRHS(F, bddc_rhs, fetidp_rhs));
1078: PetscCall(VecSet(fetidp_solution, 0.0));
1079: /* test ksp with FETIDP */
1080: PetscCall(KSPSolve(KSPwithFETIDP, fetidp_rhs, fetidp_solution));
1081: /* assemble fetidp solution on physical domain */
1082: PetscCall(PCBDDCMatFETIDPGetSolution(F, fetidp_solution, fetidp_solution_all));
1083: } else {
1084: KSP kspF;
1085: PetscCall(KSPSolve(KSPwithFETIDP, bddc_rhs, fetidp_solution_all));
1086: PetscCall(KSPFETIDPGetInnerKSP(KSPwithFETIDP, &kspF));
1087: PetscCall(KSPGetOperators(kspF, &F, NULL));
1088: }
1089: PetscCall(MatGetSize(F, &ndofs, NULL));
1090: PetscCall(KSPGetIterationNumber(KSPwithFETIDP, &its));
1091: PetscCall(KSPGetConvergedReason(KSPwithFETIDP, &reason));
1092: PetscCall(KSPComputeExtremeSingularValues(KSPwithFETIDP, &maxeig, &mineig));
1093: /* check FETIDP sol */
1094: if (dd.pure_neumann) {
1095: PetscCall(VecSum(fetidp_solution_all, &scalar_value));
1096: scalar_value = -scalar_value / (PetscScalar)ndofs;
1097: PetscCall(VecShift(fetidp_solution_all, scalar_value));
1098: }
1099: PetscCall(VecAXPY(fetidp_solution_all, -1.0, exact_solution));
1100: PetscCall(VecNorm(fetidp_solution_all, NORM_INFINITY, &norm));
1101: PetscCall(PetscPrintf(dd.gcomm, "------------------FETI-DP stats-------------------------------\n"));
1102: PetscCall(PetscPrintf(dd.gcomm, "Number of degrees of freedom : %8" PetscInt_FMT "\n", ndofs));
1103: if (reason < 0) {
1104: PetscCall(PetscPrintf(dd.gcomm, "Number of iterations : %8" PetscInt_FMT "\n", its));
1105: PetscCall(PetscPrintf(dd.gcomm, "Converged reason : %s\n", KSPConvergedReasons[reason]));
1106: }
1107: if (0.95 <= mineig && mineig <= 1.05) mineig = 1.0;
1108: PetscCall(PetscPrintf(dd.gcomm, "Eigenvalues preconditioned operator : %1.1e %1.1e\n", (double)PetscFloorReal(100. * mineig) / 100., (double)PetscCeilReal(100. * maxeig) / 100.));
1109: if (norm > 1.e-1 || reason < 0) PetscCall(PetscPrintf(dd.gcomm, "Error between exact and computed solution : %1.2e\n", (double)norm));
1110: PetscCall(PetscPrintf(dd.gcomm, "--------------------------------------------------------------\n"));
1111: PetscCall(VecDestroy(&fetidp_solution));
1112: PetscCall(VecDestroy(&fetidp_solution_all));
1113: PetscCall(VecDestroy(&fetidp_rhs));
1114: }
1115: PetscCall(KSPDestroy(&KSPwithFETIDP));
1116: PetscCall(VecDestroy(&exact_solution));
1117: PetscCall(VecDestroy(&bddc_solution));
1118: PetscCall(VecDestroy(&bddc_rhs));
1119: PetscCall(MatDestroy(&A));
1120: PetscCall(KSPDestroy(&KSPwithBDDC));
1121: /* Quit PETSc */
1122: PetscCall(PetscFinalize());
1123: return 0;
1124: }
1126: /*TEST
1128: testset:
1129: nsize: 4
1130: args: -nex 7 -physical_pc_bddc_coarse_eqs_per_proc 3 -physical_pc_bddc_switch_static
1131: output_file: output/ex59_bddc_fetidp_1.out
1132: test:
1133: suffix: bddc_fetidp_1
1134: test:
1135: requires: viennacl
1136: suffix: bddc_fetidp_1_viennacl
1137: args: -subdomain_mat_type aijviennacl
1138: test:
1139: requires: cuda
1140: suffix: bddc_fetidp_1_cuda
1141: args: -subdomain_mat_type aijcusparse -physical_pc_bddc_dirichlet_pc_factor_mat_solver_type cusparse
1143: testset:
1144: nsize: 4
1145: args: -npx 2 -npy 2 -nex 6 -ney 6 -fluxes_ksp_max_it 10 -physical_ksp_max_it 10
1146: requires: !single
1147: test:
1148: suffix: bddc_fetidp_2
1149: test:
1150: suffix: bddc_fetidp_3
1151: args: -npz 1 -nez 1
1152: test:
1153: suffix: bddc_fetidp_4
1154: args: -npz 1 -nez 1 -physical_pc_bddc_use_change_of_basis -physical_sub_schurs_mat_solver_type petsc -physical_pc_bddc_use_deluxe_scaling -physical_pc_bddc_deluxe_singlemat -fluxes_fetidp_ksp_type cg
1156: testset:
1157: nsize: 8
1158: suffix: bddc_fetidp_approximate
1159: args: -npx 2 -npy 2 -npz 2 -p 2 -nex 8 -ney 7 -nez 9 -physical_ksp_max_it 20 -subdomain_mat_type aij -physical_pc_bddc_switch_static -physical_pc_bddc_dirichlet_approximate -physical_pc_bddc_neumann_approximate -physical_pc_bddc_dirichlet_pc_type gamg -physical_pc_bddc_dirichlet_pc_gamg_esteig_ksp_type cg -physical_pc_bddc_dirichlet_pc_gamg_esteig_ksp_max_it 10 -physical_pc_bddc_dirichlet_mg_levels_ksp_max_it 3 -physical_pc_bddc_neumann_pc_type sor -physical_pc_bddc_neumann_approximate_scale -testfetidp 0
1161: testset:
1162: nsize: 4
1163: args: -npx 2 -npy 2 -nex 6 -ney 6 -fluxes_ksp_max_it 10 -physical_ksp_max_it 10 -physical_ksp_view -physical_pc_bddc_levels 1
1164: filter: grep -v "variant HERMITIAN"
1165: requires: !single
1166: test:
1167: suffix: bddc_fetidp_ml_1
1168: args: -physical_pc_bddc_coarsening_ratio 1
1169: test:
1170: suffix: bddc_fetidp_ml_2
1171: args: -physical_pc_bddc_coarsening_ratio 2 -mat_partitioning_type average
1172: test:
1173: suffix: bddc_fetidp_ml_3
1174: args: -physical_pc_bddc_coarsening_ratio 4
1176: testset:
1177: nsize: 9
1178: args: -npx 3 -npy 3 -p 2 -nex 6 -ney 6 -physical_pc_bddc_deluxe_singlemat -physical_sub_schurs_mat_solver_type petsc -physical_pc_bddc_use_deluxe_scaling -physical_pc_bddc_graph_maxcount 1 -physical_pc_bddc_levels 3 -physical_pc_bddc_coarsening_ratio 2 -physical_pc_bddc_coarse_ksp_type gmres
1179: output_file: output/ex59_bddc_fetidp_ml_eqlimit.out
1180: test:
1181: suffix: bddc_fetidp_ml_eqlimit_1
1182: args: -physical_pc_bddc_coarse_eqs_limit 31 -mat_partitioning_type average -physical_pc_bddc_coarse_pc_bddc_graph_maxcount 1
1183: test:
1184: suffix: bddc_fetidp_ml_eqlimit_2
1185: args: -physical_pc_bddc_coarse_eqs_limit 46
1187: TEST*/