Actual source code: ex59.c
1: static char help[] = "This example illustrates the use of PCBDDC/FETI-DP and its customization.\n\n\
2: Discrete system: 1D, 2D or 3D laplacian, discretized with spectral elements.\n\
3: Spectral degree can be specified by passing values to -p option.\n\
4: Global problem either with dirichlet boundary conditions on one side or in the pure neumann case (depending on runtime parameters).\n\
5: Domain is [-nex,nex]x[-ney,ney]x[-nez,nez]: ne_ number of elements in _ direction.\n\
6: Exaple usage: \n\
7: 1D: mpiexec -n 4 ex59 -nex 7\n\
8: 2D: mpiexec -n 4 ex59 -npx 2 -npy 2 -nex 2 -ney 2\n\
9: 3D: mpiexec -n 4 ex59 -npx 2 -npy 2 -npz 1 -nex 2 -ney 2 -nez 1\n\
10: Subdomain decomposition can be specified with -np_ parameters.\n\
11: Dirichlet boundaries on one side by default:\n\
12: it does not iterate on dirichlet nodes by default: if -usezerorows is passed in, it also iterates on Dirichlet nodes.\n\
13: Pure Neumann case can be requested by passing in -pureneumann.\n\
14: In the latter case, in order to avoid runtime errors during factorization, please specify also -coarse_redundant_pc_factor_zeropivot 0\n\n";
16: #include <petscksp.h>
17: #include <petscpc.h>
18: #include <petscdm.h>
19: #include <petscdmda.h>
20: #include <petscblaslapack.h>
21: #define DEBUG 0
23: /* structure holding domain data */
24: typedef struct {
25: /* communicator */
26: MPI_Comm gcomm;
27: /* space dimension */
28: PetscInt dim;
29: /* spectral degree */
30: PetscInt p;
31: /* subdomains per dimension */
32: PetscInt npx,npy,npz;
33: /* subdomain index in cartesian dimensions */
34: PetscInt ipx,ipy,ipz;
35: /* elements per dimension */
36: PetscInt nex,ney,nez;
37: /* local elements per dimension */
38: PetscInt nex_l,ney_l,nez_l;
39: /* global number of dofs per dimension */
40: PetscInt xm,ym,zm;
41: /* local number of dofs per dimension */
42: PetscInt xm_l,ym_l,zm_l;
43: /* starting global indexes for subdomain in lexicographic ordering */
44: PetscInt startx,starty,startz;
45: /* pure Neumann problem */
46: PetscBool pure_neumann;
47: /* Dirichlet BC implementation */
48: PetscBool DBC_zerorows;
49: /* Scaling factor for subdomain */
50: PetscScalar scalingfactor;
51: PetscBool testkspfetidp;
52: } DomainData;
54: /* structure holding GLL data */
55: typedef struct {
56: /* GLL nodes */
57: PetscReal *zGL;
58: /* GLL weights */
59: PetscScalar *rhoGL;
60: /* aux_mat */
61: PetscScalar **A;
62: /* Element matrix */
63: Mat elem_mat;
64: } GLLData;
66: static PetscErrorCode BuildCSRGraph(DomainData dd, PetscInt **xadj, PetscInt **adjncy)
67: {
68: PetscInt *xadj_temp,*adjncy_temp;
69: PetscInt i,j,k,ii,jj,kk,iindex,count_adj;
70: PetscInt istart_csr,iend_csr,jstart_csr,jend_csr,kstart_csr,kend_csr;
71: PetscBool internal_node;
73: /* first count dimension of adjncy */
75: count_adj=0;
76: for (k=0; k<dd.zm_l; k++) {
77: internal_node = PETSC_TRUE;
78: kstart_csr = k>0 ? k-1 : k;
79: kend_csr = k<dd.zm_l-1 ? k+2 : k+1;
80: if (k == 0 || k == dd.zm_l-1) {
81: internal_node = PETSC_FALSE;
82: kstart_csr = k;
83: kend_csr = k+1;
84: }
85: for (j=0; j<dd.ym_l; j++) {
86: jstart_csr = j>0 ? j-1 : j;
87: jend_csr = j<dd.ym_l-1 ? j+2 : j+1;
88: if (j == 0 || j == dd.ym_l-1) {
89: internal_node = PETSC_FALSE;
90: jstart_csr = j;
91: jend_csr = j+1;
92: }
93: for (i=0; i<dd.xm_l; i++) {
94: istart_csr = i>0 ? i-1 : i;
95: iend_csr = i<dd.xm_l-1 ? i+2 : i+1;
96: if (i == 0 || i == dd.xm_l-1) {
97: internal_node = PETSC_FALSE;
98: istart_csr = i;
99: iend_csr = i+1;
100: }
101: if (internal_node) {
102: istart_csr = i;
103: iend_csr = i+1;
104: jstart_csr = j;
105: jend_csr = j+1;
106: kstart_csr = k;
107: kend_csr = k+1;
108: }
109: for (kk=kstart_csr;kk<kend_csr;kk++) {
110: for (jj=jstart_csr;jj<jend_csr;jj++) {
111: for (ii=istart_csr;ii<iend_csr;ii++) {
112: count_adj=count_adj+1;
113: }
114: }
115: }
116: }
117: }
118: }
119: PetscMalloc1(dd.xm_l*dd.ym_l*dd.zm_l+1,&xadj_temp);
120: PetscMalloc1(count_adj,&adjncy_temp);
122: /* now fill CSR data structure */
123: count_adj=0;
124: for (k=0; k<dd.zm_l; k++) {
125: internal_node = PETSC_TRUE;
126: kstart_csr = k>0 ? k-1 : k;
127: kend_csr = k<dd.zm_l-1 ? k+2 : k+1;
128: if (k == 0 || k == dd.zm_l-1) {
129: internal_node = PETSC_FALSE;
130: kstart_csr = k;
131: kend_csr = k+1;
132: }
133: for (j=0; j<dd.ym_l; j++) {
134: jstart_csr = j>0 ? j-1 : j;
135: jend_csr = j<dd.ym_l-1 ? j+2 : j+1;
136: if (j == 0 || j == dd.ym_l-1) {
137: internal_node = PETSC_FALSE;
138: jstart_csr = j;
139: jend_csr = j+1;
140: }
141: for (i=0; i<dd.xm_l; i++) {
142: istart_csr = i>0 ? i-1 : i;
143: iend_csr = i<dd.xm_l-1 ? i+2 : i+1;
144: if (i == 0 || i == dd.xm_l-1) {
145: internal_node = PETSC_FALSE;
146: istart_csr = i;
147: iend_csr = i+1;
148: }
149: iindex = k*dd.xm_l*dd.ym_l+j*dd.xm_l+i;
150: xadj_temp[iindex] = count_adj;
151: if (internal_node) {
152: istart_csr = i;
153: iend_csr = i+1;
154: jstart_csr = j;
155: jend_csr = j+1;
156: kstart_csr = k;
157: kend_csr = k+1;
158: }
159: for (kk=kstart_csr; kk<kend_csr; kk++) {
160: for (jj=jstart_csr; jj<jend_csr; jj++) {
161: for (ii=istart_csr; ii<iend_csr; ii++) {
162: iindex = kk*dd.xm_l*dd.ym_l+jj*dd.xm_l+ii;
164: adjncy_temp[count_adj] = iindex;
165: count_adj = count_adj+1;
166: }
167: }
168: }
169: }
170: }
171: }
172: xadj_temp[dd.xm_l*dd.ym_l*dd.zm_l] = count_adj;
174: *xadj = xadj_temp;
175: *adjncy = adjncy_temp;
176: return 0;
177: }
179: static PetscErrorCode ComputeSpecialBoundaryIndices(DomainData dd,IS *dirichlet,IS *neumann)
180: {
181: IS temp_dirichlet=0,temp_neumann=0;
182: PetscInt localsize,i,j,k,*indices;
183: PetscBool *touched;
186: localsize = dd.xm_l*dd.ym_l*dd.zm_l;
188: PetscMalloc1(localsize,&indices);
189: PetscMalloc1(localsize,&touched);
190: for (i=0; i<localsize; i++) touched[i] = PETSC_FALSE;
192: if (dirichlet) {
193: i = 0;
194: /* west boundary */
195: if (dd.ipx == 0) {
196: for (k=0;k<dd.zm_l;k++) {
197: for (j=0;j<dd.ym_l;j++) {
198: indices[i]=k*dd.ym_l*dd.xm_l+j*dd.xm_l;
199: touched[indices[i]]=PETSC_TRUE;
200: i++;
201: }
202: }
203: }
204: ISCreateGeneral(dd.gcomm,i,indices,PETSC_COPY_VALUES,&temp_dirichlet);
205: }
206: if (neumann) {
207: i = 0;
208: /* west boundary */
209: if (dd.ipx == 0) {
210: for (k=0;k<dd.zm_l;k++) {
211: for (j=0;j<dd.ym_l;j++) {
212: indices[i]=k*dd.ym_l*dd.xm_l+j*dd.xm_l;
213: if (!touched[indices[i]]) {
214: touched[indices[i]]=PETSC_TRUE;
215: i++;
216: }
217: }
218: }
219: }
220: /* east boundary */
221: if (dd.ipx == dd.npx-1) {
222: for (k=0;k<dd.zm_l;k++) {
223: for (j=0;j<dd.ym_l;j++) {
224: indices[i]=k*dd.ym_l*dd.xm_l+j*dd.xm_l+dd.xm_l-1;
225: if (!touched[indices[i]]) {
226: touched[indices[i]]=PETSC_TRUE;
227: i++;
228: }
229: }
230: }
231: }
232: /* south boundary */
233: if (dd.ipy == 0 && dd.dim > 1) {
234: for (k=0;k<dd.zm_l;k++) {
235: for (j=0;j<dd.xm_l;j++) {
236: indices[i]=k*dd.ym_l*dd.xm_l+j;
237: if (!touched[indices[i]]) {
238: touched[indices[i]]=PETSC_TRUE;
239: i++;
240: }
241: }
242: }
243: }
244: /* north boundary */
245: if (dd.ipy == dd.npy-1 && dd.dim > 1) {
246: for (k=0;k<dd.zm_l;k++) {
247: for (j=0;j<dd.xm_l;j++) {
248: indices[i]=k*dd.ym_l*dd.xm_l+(dd.ym_l-1)*dd.xm_l+j;
249: if (!touched[indices[i]]) {
250: touched[indices[i]]=PETSC_TRUE;
251: i++;
252: }
253: }
254: }
255: }
256: /* bottom boundary */
257: if (dd.ipz == 0 && dd.dim > 2) {
258: for (k=0;k<dd.ym_l;k++) {
259: for (j=0;j<dd.xm_l;j++) {
260: indices[i]=k*dd.xm_l+j;
261: if (!touched[indices[i]]) {
262: touched[indices[i]]=PETSC_TRUE;
263: i++;
264: }
265: }
266: }
267: }
268: /* top boundary */
269: if (dd.ipz == dd.npz-1 && dd.dim > 2) {
270: for (k=0;k<dd.ym_l;k++) {
271: for (j=0;j<dd.xm_l;j++) {
272: indices[i]=(dd.zm_l-1)*dd.ym_l*dd.xm_l+k*dd.xm_l+j;
273: if (!touched[indices[i]]) {
274: touched[indices[i]]=PETSC_TRUE;
275: i++;
276: }
277: }
278: }
279: }
280: ISCreateGeneral(dd.gcomm,i,indices,PETSC_COPY_VALUES,&temp_neumann);
281: }
282: if (dirichlet) *dirichlet = temp_dirichlet;
283: if (neumann) *neumann = temp_neumann;
284: PetscFree(indices);
285: PetscFree(touched);
286: return 0;
287: }
289: static PetscErrorCode ComputeMapping(DomainData dd,ISLocalToGlobalMapping *isg2lmap)
290: {
291: DM da;
292: AO ao;
293: DMBoundaryType bx = DM_BOUNDARY_NONE,by = DM_BOUNDARY_NONE, bz = DM_BOUNDARY_NONE;
294: DMDAStencilType stype = DMDA_STENCIL_BOX;
295: ISLocalToGlobalMapping temp_isg2lmap;
296: PetscInt i,j,k,ig,jg,kg,lindex,gindex,localsize;
297: PetscInt *global_indices;
300: /* Not an efficient mapping: this function computes a very simple lexicographic mapping
301: just to illustrate the creation of a MATIS object */
302: localsize = dd.xm_l*dd.ym_l*dd.zm_l;
303: PetscMalloc1(localsize,&global_indices);
304: for (k=0; k<dd.zm_l; k++) {
305: kg=dd.startz+k;
306: for (j=0; j<dd.ym_l; j++) {
307: jg=dd.starty+j;
308: for (i=0; i<dd.xm_l; i++) {
309: ig =dd.startx+i;
310: lindex =k*dd.xm_l*dd.ym_l+j*dd.xm_l+i;
311: gindex =kg*dd.xm*dd.ym+jg*dd.xm+ig;
312: global_indices[lindex]=gindex;
313: }
314: }
315: }
316: if (dd.dim==3) {
317: DMDACreate3d(dd.gcomm,bx,by,bz,stype,dd.xm,dd.ym,dd.zm,PETSC_DECIDE,PETSC_DECIDE,PETSC_DECIDE,1,1,NULL,NULL,NULL,&da);
318: } else if (dd.dim==2) {
319: DMDACreate2d(dd.gcomm,bx,by,stype,dd.xm,dd.ym,PETSC_DECIDE,PETSC_DECIDE,1,1,NULL,NULL,&da);
320: } else {
321: DMDACreate1d(dd.gcomm,bx,dd.xm,1,1,NULL,&da);
322: }
323: DMSetFromOptions(da);
324: DMSetUp(da);
325: DMDASetAOType(da,AOMEMORYSCALABLE);
326: DMDAGetAO(da,&ao);
327: AOApplicationToPetsc(ao,dd.xm_l*dd.ym_l*dd.zm_l,global_indices);
328: ISLocalToGlobalMappingCreate(dd.gcomm,1,localsize,global_indices,PETSC_OWN_POINTER,&temp_isg2lmap);
329: DMDestroy(&da);
330: *isg2lmap = temp_isg2lmap;
331: return 0;
332: }
333: static PetscErrorCode ComputeSubdomainMatrix(DomainData dd, GLLData glldata, Mat *local_mat)
334: {
335: PetscInt localsize,zloc,yloc,xloc,auxnex,auxney,auxnez;
336: PetscInt ie,je,ke,i,j,k,ig,jg,kg,ii,ming;
337: PetscInt *indexg,*cols,*colsg;
338: PetscScalar *vals;
339: Mat temp_local_mat,elem_mat_DBC=0,*usedmat;
340: IS submatIS;
343: MatGetSize(glldata.elem_mat,&i,&j);
344: PetscMalloc1(i,&indexg);
345: PetscMalloc1(i,&colsg);
346: /* get submatrix of elem_mat without dirichlet nodes */
347: if (!dd.pure_neumann && !dd.DBC_zerorows && !dd.ipx) {
348: xloc = dd.p+1;
349: yloc = 1;
350: zloc = 1;
351: if (dd.dim>1) yloc = dd.p+1;
352: if (dd.dim>2) zloc = dd.p+1;
353: ii = 0;
354: for (k=0;k<zloc;k++) {
355: for (j=0;j<yloc;j++) {
356: for (i=1;i<xloc;i++) {
357: indexg[ii]=k*xloc*yloc+j*xloc+i;
358: ii++;
359: }
360: }
361: }
362: ISCreateGeneral(PETSC_COMM_SELF,ii,indexg,PETSC_COPY_VALUES,&submatIS);
363: MatCreateSubMatrix(glldata.elem_mat,submatIS,submatIS,MAT_INITIAL_MATRIX,&elem_mat_DBC);
364: ISDestroy(&submatIS);
365: }
367: /* Assemble subdomain matrix */
368: localsize = dd.xm_l*dd.ym_l*dd.zm_l;
369: MatCreate(PETSC_COMM_SELF,&temp_local_mat);
370: MatSetSizes(temp_local_mat,localsize,localsize,localsize,localsize);
371: MatSetOptionsPrefix(temp_local_mat,"subdomain_");
372: /* set local matrices type: here we use SEQSBAIJ primarily for testing purpose */
373: /* in order to avoid conversions inside the BDDC code, use SeqAIJ if possible */
374: if (dd.DBC_zerorows && !dd.ipx) { /* in this case, we need to zero out some of the rows, so use seqaij */
375: MatSetType(temp_local_mat,MATSEQAIJ);
376: } else {
377: MatSetType(temp_local_mat,MATSEQSBAIJ);
378: }
379: MatSetFromOptions(temp_local_mat);
381: i = PetscPowRealInt(3.0*(dd.p+1.0),dd.dim);
383: MatSeqAIJSetPreallocation(temp_local_mat,i,NULL); /* very overestimated */
384: MatSeqSBAIJSetPreallocation(temp_local_mat,1,i,NULL); /* very overestimated */
385: MatSeqBAIJSetPreallocation(temp_local_mat,1,i,NULL); /* very overestimated */
386: MatSetOption(temp_local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);
388: yloc = dd.p+1;
389: zloc = dd.p+1;
390: if (dd.dim < 3) zloc = 1;
391: if (dd.dim < 2) yloc = 1;
393: auxnez = dd.nez_l;
394: auxney = dd.ney_l;
395: auxnex = dd.nex_l;
396: if (dd.dim < 3) auxnez = 1;
397: if (dd.dim < 2) auxney = 1;
399: for (ke=0; ke<auxnez; ke++) {
400: for (je=0; je<auxney; je++) {
401: for (ie=0; ie<auxnex; ie++) {
402: /* customize element accounting for BC */
403: xloc = dd.p+1;
404: ming = 0;
405: usedmat = &glldata.elem_mat;
406: if (!dd.pure_neumann && !dd.DBC_zerorows && !dd.ipx) {
407: if (ie == 0) {
408: xloc = dd.p;
409: usedmat = &elem_mat_DBC;
410: } else {
411: ming = -1;
412: usedmat = &glldata.elem_mat;
413: }
414: }
415: /* local to the element/global to the subdomain indexing */
416: for (k=0; k<zloc; k++) {
417: kg = ke*dd.p+k;
418: for (j=0; j<yloc; j++) {
419: jg = je*dd.p+j;
420: for (i=0; i<xloc; i++) {
421: ig = ie*dd.p+i+ming;
422: ii = k*xloc*yloc+j*xloc+i;
423: indexg[ii] = kg*dd.xm_l*dd.ym_l+jg*dd.xm_l+ig;
424: }
425: }
426: }
427: /* Set values */
428: for (i=0; i<xloc*yloc*zloc; i++) {
429: MatGetRow(*usedmat,i,&j,(const PetscInt**)&cols,(const PetscScalar**)&vals);
430: for (k=0; k<j; k++) colsg[k] = indexg[cols[k]];
431: MatSetValues(temp_local_mat,1,&indexg[i],j,colsg,vals,ADD_VALUES);
432: MatRestoreRow(*usedmat,i,&j,(const PetscInt**)&cols,(const PetscScalar**)&vals);
433: }
434: }
435: }
436: }
437: PetscFree(indexg);
438: PetscFree(colsg);
439: MatAssemblyBegin(temp_local_mat,MAT_FINAL_ASSEMBLY);
440: MatAssemblyEnd (temp_local_mat,MAT_FINAL_ASSEMBLY);
441: #if DEBUG
442: {
443: Vec lvec,rvec;
444: PetscReal norm;
445: MatCreateVecs(temp_local_mat,&lvec,&rvec);
446: VecSet(lvec,1.0);
447: MatMult(temp_local_mat,lvec,rvec);
448: VecNorm(rvec,NORM_INFINITY,&norm);
449: VecDestroy(&lvec);
450: VecDestroy(&rvec);
451: }
452: #endif
453: *local_mat = temp_local_mat;
454: MatDestroy(&elem_mat_DBC);
455: return 0;
456: }
458: static PetscErrorCode GLLStuffs(DomainData dd, GLLData *glldata)
459: {
460: PetscReal *M,si;
461: PetscScalar x,z0,z1,z2,Lpj,Lpr,rhoGLj,rhoGLk;
462: PetscBLASInt pm1,lierr;
463: PetscInt i,j,n,k,s,r,q,ii,jj,p=dd.p;
464: PetscInt xloc,yloc,zloc,xyloc,xyzloc;
467: /* Gauss-Lobatto-Legendre nodes zGL on [-1,1] */
468: PetscCalloc1(p+1,&glldata->zGL);
470: glldata->zGL[0]=-1.0;
471: glldata->zGL[p]= 1.0;
472: if (p > 1) {
473: if (p == 2) glldata->zGL[1]=0.0;
474: else {
475: PetscMalloc1(p-1,&M);
476: for (i=0; i<p-1; i++) {
477: si = (PetscReal)(i+1.0);
478: M[i]=0.5*PetscSqrtReal(si*(si+2.0)/((si+0.5)*(si+1.5)));
479: }
480: pm1 = p-1;
481: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
482: PetscStackCallBLAS("LAPACKsteqr",LAPACKsteqr_("N",&pm1,&glldata->zGL[1],M,&x,&pm1,M,&lierr));
484: PetscFPTrapPop();
485: PetscFree(M);
486: }
487: }
489: /* Weights for 1D quadrature */
490: PetscMalloc1(p+1,&glldata->rhoGL);
492: glldata->rhoGL[0]=2.0/(PetscScalar)(p*(p+1.0));
493: glldata->rhoGL[p]=glldata->rhoGL[0];
494: z2 = -1; /* Dummy value to avoid -Wmaybe-initialized */
495: for (i=1; i<p; i++) {
496: x = glldata->zGL[i];
497: z0 = 1.0;
498: z1 = x;
499: for (n=1; n<p; n++) {
500: z2 = x*z1*(2.0*n+1.0)/(n+1.0)-z0*(PetscScalar)(n/(n+1.0));
501: z0 = z1;
502: z1 = z2;
503: }
504: glldata->rhoGL[i]=2.0/(p*(p+1.0)*z2*z2);
505: }
507: /* Auxiliary mat for laplacian */
508: PetscMalloc1(p+1,&glldata->A);
509: PetscMalloc1((p+1)*(p+1),&glldata->A[0]);
510: for (i=1; i<p+1; i++) glldata->A[i]=glldata->A[i-1]+p+1;
512: for (j=1; j<p; j++) {
513: x =glldata->zGL[j];
514: z0=1.0;
515: z1=x;
516: for (n=1; n<p; n++) {
517: z2=x*z1*(2.0*n+1.0)/(n+1.0)-z0*(PetscScalar)(n/(n+1.0));
518: z0=z1;
519: z1=z2;
520: }
521: Lpj=z2;
522: for (r=1; r<p; r++) {
523: if (r == j) {
524: glldata->A[j][j]=2.0/(3.0*(1.0-glldata->zGL[j]*glldata->zGL[j])*Lpj*Lpj);
525: } else {
526: x = glldata->zGL[r];
527: z0 = 1.0;
528: z1 = x;
529: for (n=1; n<p; n++) {
530: z2=x*z1*(2.0*n+1.0)/(n+1.0)-z0*(PetscScalar)(n/(n+1.0));
531: z0=z1;
532: z1=z2;
533: }
534: Lpr = z2;
535: glldata->A[r][j]=4.0/(p*(p+1.0)*Lpj*Lpr*(glldata->zGL[j]-glldata->zGL[r])*(glldata->zGL[j]-glldata->zGL[r]));
536: }
537: }
538: }
539: for (j=1; j<p+1; j++) {
540: x = glldata->zGL[j];
541: z0 = 1.0;
542: z1 = x;
543: for (n=1; n<p; n++) {
544: z2=x*z1*(2.0*n+1.0)/(n+1.0)-z0*(PetscScalar)(n/(n+1.0));
545: z0=z1;
546: z1=z2;
547: }
548: Lpj = z2;
549: glldata->A[j][0]=4.0*PetscPowRealInt(-1.0,p)/(p*(p+1.0)*Lpj*(1.0+glldata->zGL[j])*(1.0+glldata->zGL[j]));
550: glldata->A[0][j]=glldata->A[j][0];
551: }
552: for (j=0; j<p; j++) {
553: x = glldata->zGL[j];
554: z0 = 1.0;
555: z1 = x;
556: for (n=1; n<p; n++) {
557: z2=x*z1*(2.0*n+1.0)/(n+1.0)-z0*(PetscScalar)(n/(n+1.0));
558: z0=z1;
559: z1=z2;
560: }
561: Lpj=z2;
563: glldata->A[p][j]=4.0/(p*(p+1.0)*Lpj*(1.0-glldata->zGL[j])*(1.0-glldata->zGL[j]));
564: glldata->A[j][p]=glldata->A[p][j];
565: }
566: glldata->A[0][0]=0.5+(p*(p+1.0)-2.0)/6.0;
567: glldata->A[p][p]=glldata->A[0][0];
569: /* compute element matrix */
570: xloc = p+1;
571: yloc = p+1;
572: zloc = p+1;
573: if (dd.dim<2) yloc=1;
574: if (dd.dim<3) zloc=1;
575: xyloc = xloc*yloc;
576: xyzloc = xloc*yloc*zloc;
578: MatCreate(PETSC_COMM_SELF,&glldata->elem_mat);
579: MatSetSizes(glldata->elem_mat,xyzloc,xyzloc,xyzloc,xyzloc);
580: MatSetType(glldata->elem_mat,MATSEQAIJ);
581: MatSeqAIJSetPreallocation(glldata->elem_mat,xyzloc,NULL); /* overestimated */
582: MatZeroEntries(glldata->elem_mat);
583: MatSetOption(glldata->elem_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);
585: for (k=0; k<zloc; k++) {
586: if (dd.dim>2) rhoGLk=glldata->rhoGL[k];
587: else rhoGLk=1.0;
589: for (j=0; j<yloc; j++) {
590: if (dd.dim>1) rhoGLj=glldata->rhoGL[j];
591: else rhoGLj=1.0;
593: for (i=0; i<xloc; i++) {
594: ii = k*xyloc+j*xloc+i;
595: s = k;
596: r = j;
597: for (q=0; q<xloc; q++) {
598: jj = s*xyloc+r*xloc+q;
599: MatSetValue(glldata->elem_mat,jj,ii,glldata->A[i][q]*rhoGLj*rhoGLk,ADD_VALUES);
600: }
601: if (dd.dim>1) {
602: s=k;
603: q=i;
604: for (r=0; r<yloc; r++) {
605: jj = s*xyloc+r*xloc+q;
606: MatSetValue(glldata->elem_mat,jj,ii,glldata->A[j][r]*glldata->rhoGL[i]*rhoGLk,ADD_VALUES);
607: }
608: }
609: if (dd.dim>2) {
610: r=j;
611: q=i;
612: for (s=0; s<zloc; s++) {
613: jj = s*xyloc+r*xloc+q;
614: MatSetValue(glldata->elem_mat,jj,ii,glldata->A[k][s]*rhoGLj*glldata->rhoGL[i],ADD_VALUES);
615: }
616: }
617: }
618: }
619: }
620: MatAssemblyBegin(glldata->elem_mat,MAT_FINAL_ASSEMBLY);
621: MatAssemblyEnd (glldata->elem_mat,MAT_FINAL_ASSEMBLY);
622: #if DEBUG
623: {
624: Vec lvec,rvec;
625: PetscReal norm;
626: MatCreateVecs(glldata->elem_mat,&lvec,&rvec);
627: VecSet(lvec,1.0);
628: MatMult(glldata->elem_mat,lvec,rvec);
629: VecNorm(rvec,NORM_INFINITY,&norm);
630: VecDestroy(&lvec);
631: VecDestroy(&rvec);
632: }
633: #endif
634: return 0;
635: }
637: static PetscErrorCode DomainDecomposition(DomainData *dd)
638: {
639: PetscMPIInt rank;
640: PetscInt i,j,k;
643: /* Subdomain index in cartesian coordinates */
644: MPI_Comm_rank(dd->gcomm,&rank);
645: dd->ipx = rank%dd->npx;
646: if (dd->dim>1) dd->ipz = rank/(dd->npx*dd->npy);
647: else dd->ipz = 0;
649: dd->ipy = rank/dd->npx-dd->ipz*dd->npy;
650: /* number of local elements */
651: dd->nex_l = dd->nex/dd->npx;
652: if (dd->ipx < dd->nex%dd->npx) dd->nex_l++;
653: if (dd->dim>1) {
654: dd->ney_l = dd->ney/dd->npy;
655: if (dd->ipy < dd->ney%dd->npy) dd->ney_l++;
656: } else {
657: dd->ney_l = 0;
658: }
659: if (dd->dim>2) {
660: dd->nez_l = dd->nez/dd->npz;
661: if (dd->ipz < dd->nez%dd->npz) dd->nez_l++;
662: } else {
663: dd->nez_l = 0;
664: }
665: /* local and global number of dofs */
666: dd->xm_l = dd->nex_l*dd->p+1;
667: dd->xm = dd->nex*dd->p+1;
668: dd->ym_l = dd->ney_l*dd->p+1;
669: dd->ym = dd->ney*dd->p+1;
670: dd->zm_l = dd->nez_l*dd->p+1;
671: dd->zm = dd->nez*dd->p+1;
672: if (!dd->pure_neumann) {
673: if (!dd->DBC_zerorows && !dd->ipx) dd->xm_l--;
674: if (!dd->DBC_zerorows) dd->xm--;
675: }
677: /* starting global index for local dofs (simple lexicographic order) */
678: dd->startx = 0;
679: j = dd->nex/dd->npx;
680: for (i=0; i<dd->ipx; i++) {
681: k = j;
682: if (i<dd->nex%dd->npx) k++;
683: dd->startx = dd->startx+k*dd->p;
684: }
685: if (!dd->pure_neumann && !dd->DBC_zerorows && dd->ipx) dd->startx--;
687: dd->starty = 0;
688: if (dd->dim > 1) {
689: j = dd->ney/dd->npy;
690: for (i=0; i<dd->ipy; i++) {
691: k = j;
692: if (i<dd->ney%dd->npy) k++;
693: dd->starty = dd->starty+k*dd->p;
694: }
695: }
696: dd->startz = 0;
697: if (dd->dim > 2) {
698: j = dd->nez/dd->npz;
699: for (i=0; i<dd->ipz; i++) {
700: k = j;
701: if (i<dd->nez%dd->npz) k++;
702: dd->startz = dd->startz+k*dd->p;
703: }
704: }
705: return 0;
706: }
707: static PetscErrorCode ComputeMatrix(DomainData dd, Mat *A)
708: {
709: GLLData gll;
710: Mat local_mat =0,temp_A=0;
711: ISLocalToGlobalMapping matis_map =0;
712: IS dirichletIS=0;
715: /* Compute some stuff of Gauss-Legendre-Lobatto quadrature rule */
716: GLLStuffs(dd,&gll);
717: /* Compute matrix of subdomain Neumann problem */
718: ComputeSubdomainMatrix(dd,gll,&local_mat);
719: /* Compute global mapping of local dofs */
720: ComputeMapping(dd,&matis_map);
721: /* Create MATIS object needed by BDDC */
722: MatCreateIS(dd.gcomm,1,PETSC_DECIDE,PETSC_DECIDE,dd.xm*dd.ym*dd.zm,dd.xm*dd.ym*dd.zm,matis_map,matis_map,&temp_A);
723: /* Set local subdomain matrices into MATIS object */
724: MatScale(local_mat,dd.scalingfactor);
725: MatISSetLocalMat(temp_A,local_mat);
726: /* Call assembly functions */
727: MatAssemblyBegin(temp_A,MAT_FINAL_ASSEMBLY);
728: MatAssemblyEnd(temp_A,MAT_FINAL_ASSEMBLY);
730: if (dd.DBC_zerorows) {
731: PetscInt dirsize;
733: ComputeSpecialBoundaryIndices(dd,&dirichletIS,NULL);
734: MatSetOption(local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);
735: MatZeroRowsColumnsLocalIS(temp_A,dirichletIS,1.0,NULL,NULL);
736: ISGetLocalSize(dirichletIS,&dirsize);
737: ISDestroy(&dirichletIS);
738: }
740: /* giving hints to local and global matrices could be useful for the BDDC */
741: MatSetOption(local_mat,MAT_SYMMETRIC,PETSC_TRUE);
742: MatSetOption(local_mat,MAT_SPD,PETSC_TRUE);
743: #if DEBUG
744: {
745: Vec lvec,rvec;
746: PetscReal norm;
747: MatCreateVecs(temp_A,&lvec,&rvec);
748: VecSet(lvec,1.0);
749: MatMult(temp_A,lvec,rvec);
750: VecNorm(rvec,NORM_INFINITY,&norm);
751: VecDestroy(&lvec);
752: VecDestroy(&rvec);
753: }
754: #endif
755: /* free allocated workspace */
756: PetscFree(gll.zGL);
757: PetscFree(gll.rhoGL);
758: PetscFree(gll.A[0]);
759: PetscFree(gll.A);
760: MatDestroy(&gll.elem_mat);
761: MatDestroy(&local_mat);
762: ISLocalToGlobalMappingDestroy(&matis_map);
763: /* give back the pointer to te MATIS object */
764: *A = temp_A;
765: return 0;
766: }
768: static PetscErrorCode ComputeKSPFETIDP(DomainData dd, KSP ksp_bddc, KSP *ksp_fetidp)
769: {
770: KSP temp_ksp;
771: PC pc;
774: KSPGetPC(ksp_bddc,&pc);
775: if (!dd.testkspfetidp) {
776: PC D;
777: Mat F;
779: PCBDDCCreateFETIDPOperators(pc,PETSC_TRUE,NULL,&F,&D);
780: KSPCreate(PetscObjectComm((PetscObject)F),&temp_ksp);
781: KSPSetOperators(temp_ksp,F,F);
782: KSPSetType(temp_ksp,KSPCG);
783: KSPSetPC(temp_ksp,D);
784: KSPSetComputeSingularValues(temp_ksp,PETSC_TRUE);
785: KSPSetOptionsPrefix(temp_ksp,"fluxes_");
786: KSPSetFromOptions(temp_ksp);
787: KSPSetUp(temp_ksp);
788: MatDestroy(&F);
789: PCDestroy(&D);
790: } else {
791: Mat A,Ap;
793: KSPCreate(PetscObjectComm((PetscObject)ksp_bddc),&temp_ksp);
794: KSPGetOperators(ksp_bddc,&A,&Ap);
795: KSPSetOperators(temp_ksp,A,Ap);
796: KSPSetOptionsPrefix(temp_ksp,"fluxes_");
797: KSPSetType(temp_ksp,KSPFETIDP);
798: KSPFETIDPSetInnerBDDC(temp_ksp,pc);
799: KSPSetComputeSingularValues(temp_ksp,PETSC_TRUE);
800: KSPSetFromOptions(temp_ksp);
801: KSPSetUp(temp_ksp);
802: }
803: *ksp_fetidp = temp_ksp;
804: return 0;
805: }
807: static PetscErrorCode ComputeKSPBDDC(DomainData dd,Mat A,KSP *ksp)
808: {
809: KSP temp_ksp;
810: PC pc;
811: IS primals,dirichletIS=0,neumannIS=0,*bddc_dofs_splitting;
812: PetscInt vidx[8],localsize,*xadj=NULL,*adjncy=NULL;
813: MatNullSpace near_null_space;
816: KSPCreate(dd.gcomm,&temp_ksp);
817: KSPSetOperators(temp_ksp,A,A);
818: KSPSetType(temp_ksp,KSPCG);
819: KSPGetPC(temp_ksp,&pc);
820: PCSetType(pc,PCBDDC);
822: localsize = dd.xm_l*dd.ym_l*dd.zm_l;
824: /* BDDC customization */
826: /* jumping coefficients case */
827: PCISSetSubdomainScalingFactor(pc,dd.scalingfactor);
829: /* Dofs splitting
830: Simple stride-1 IS
831: It is not needed since, by default, PCBDDC assumes a stride-1 split */
832: PetscMalloc1(1,&bddc_dofs_splitting);
833: #if 1
834: ISCreateStride(PETSC_COMM_WORLD,localsize,0,1,&bddc_dofs_splitting[0]);
835: PCBDDCSetDofsSplittingLocal(pc,1,bddc_dofs_splitting);
836: #else
837: /* examples for global ordering */
839: /* each process lists the nodes it owns */
840: PetscInt sr,er;
841: MatGetOwnershipRange(A,&sr,&er);
842: ISCreateStride(PETSC_COMM_WORLD,er-sr,sr,1,&bddc_dofs_splitting[0]);
843: PCBDDCSetDofsSplitting(pc,1,bddc_dofs_splitting);
844: /* Split can be passed in a more general way since any process can list any node */
845: #endif
846: ISDestroy(&bddc_dofs_splitting[0]);
847: PetscFree(bddc_dofs_splitting);
849: /* Primal constraints implemented by using a near null space attached to A -> now it passes in only the constants
850: (which in practice is not needed since by default PCBDDC builds the primal space using constants for quadrature formulas */
851: #if 0
852: Vec vecs[2];
853: PetscRandom rctx;
854: MatCreateVecs(A,&vecs[0],&vecs[1]);
855: PetscRandomCreate(dd.gcomm,&rctx);
856: VecSetRandom(vecs[0],rctx);
857: VecSetRandom(vecs[1],rctx);
858: MatNullSpaceCreate(dd.gcomm,PETSC_TRUE,2,vecs,&near_null_space);
859: VecDestroy(&vecs[0]);
860: VecDestroy(&vecs[1]);
861: PetscRandomDestroy(&rctx);
862: #else
863: MatNullSpaceCreate(dd.gcomm,PETSC_TRUE,0,NULL,&near_null_space);
864: #endif
865: MatSetNearNullSpace(A,near_null_space);
866: MatNullSpaceDestroy(&near_null_space);
868: /* CSR graph of subdomain dofs */
869: BuildCSRGraph(dd,&xadj,&adjncy);
870: PCBDDCSetLocalAdjacencyGraph(pc,localsize,xadj,adjncy,PETSC_OWN_POINTER);
872: /* Prescribe user-defined primal vertices: in this case we use the 8 corners in 3D (for 2D and 1D, the indices are duplicated) */
873: vidx[0] = 0*dd.xm_l+0;
874: vidx[1] = 0*dd.xm_l+dd.xm_l-1;
875: vidx[2] = (dd.ym_l-1)*dd.xm_l+0;
876: vidx[3] = (dd.ym_l-1)*dd.xm_l+dd.xm_l-1;
877: vidx[4] = (dd.zm_l-1)*dd.xm_l*dd.ym_l+0*dd.xm_l+0;
878: vidx[5] = (dd.zm_l-1)*dd.xm_l*dd.ym_l+0*dd.xm_l+dd.xm_l-1;
879: vidx[6] = (dd.zm_l-1)*dd.xm_l*dd.ym_l+(dd.ym_l-1)*dd.xm_l+0;
880: vidx[7] = (dd.zm_l-1)*dd.xm_l*dd.ym_l+(dd.ym_l-1)*dd.xm_l+dd.xm_l-1;
881: ISCreateGeneral(dd.gcomm,8,vidx,PETSC_COPY_VALUES,&primals);
882: PCBDDCSetPrimalVerticesLocalIS(pc,primals);
883: ISDestroy(&primals);
885: /* Neumann/Dirichlet indices on the global boundary */
886: if (dd.DBC_zerorows) {
887: /* Only in case you eliminate some rows matrix with zerorows function, you need to set dirichlet indices into PCBDDC data */
888: ComputeSpecialBoundaryIndices(dd,&dirichletIS,&neumannIS);
889: PCBDDCSetNeumannBoundariesLocal(pc,neumannIS);
890: PCBDDCSetDirichletBoundariesLocal(pc,dirichletIS);
891: } else {
892: if (dd.pure_neumann) {
893: /* In such a case, all interface nodes lying on the global boundary are neumann nodes */
894: ComputeSpecialBoundaryIndices(dd,NULL,&neumannIS);
895: PCBDDCSetNeumannBoundariesLocal(pc,neumannIS);
896: } else {
897: /* It is wrong setting dirichlet indices without having zeroed the corresponding rows in the global matrix */
898: /* But we can still compute them since nodes near the dirichlet boundaries does not need to be defined as neumann nodes */
899: ComputeSpecialBoundaryIndices(dd,&dirichletIS,&neumannIS);
900: PCBDDCSetNeumannBoundariesLocal(pc,neumannIS);
901: }
902: }
904: /* Pass local null space information to local matrices (needed when using approximate local solvers) */
905: if (dd.ipx || dd.pure_neumann) {
906: MatNullSpace nsp;
907: Mat local_mat;
909: MatISGetLocalMat(A,&local_mat);
910: MatNullSpaceCreate(PETSC_COMM_SELF,PETSC_TRUE,0,NULL,&nsp);
911: MatSetNullSpace(local_mat,nsp);
912: MatNullSpaceDestroy(&nsp);
913: }
914: KSPSetComputeSingularValues(temp_ksp,PETSC_TRUE);
915: KSPSetOptionsPrefix(temp_ksp,"physical_");
916: KSPSetFromOptions(temp_ksp);
917: KSPSetUp(temp_ksp);
918: *ksp = temp_ksp;
919: ISDestroy(&dirichletIS);
920: ISDestroy(&neumannIS);
921: return 0;
922: }
924: static PetscErrorCode InitializeDomainData(DomainData *dd)
925: {
926: PetscMPIInt sizes,rank;
927: PetscInt factor;
930: dd->gcomm = PETSC_COMM_WORLD;
931: MPI_Comm_size(dd->gcomm,&sizes);
932: MPI_Comm_rank(dd->gcomm,&rank);
933: /* Get information from command line */
934: /* Processors/subdomains per dimension */
935: /* Default is 1d problem */
936: dd->npx = sizes;
937: dd->npy = 0;
938: dd->npz = 0;
939: dd->dim = 1;
940: PetscOptionsGetInt(NULL,NULL,"-npx",&dd->npx,NULL);
941: PetscOptionsGetInt(NULL,NULL,"-npy",&dd->npy,NULL);
942: PetscOptionsGetInt(NULL,NULL,"-npz",&dd->npz,NULL);
943: if (dd->npy) dd->dim++;
944: if (dd->npz) dd->dim++;
945: /* Number of elements per dimension */
946: /* Default is one element per subdomain */
947: dd->nex = dd->npx;
948: dd->ney = dd->npy;
949: dd->nez = dd->npz;
950: PetscOptionsGetInt(NULL,NULL,"-nex",&dd->nex,NULL);
951: PetscOptionsGetInt(NULL,NULL,"-ney",&dd->ney,NULL);
952: PetscOptionsGetInt(NULL,NULL,"-nez",&dd->nez,NULL);
953: if (!dd->npy) {
954: dd->ney=0;
955: dd->nez=0;
956: }
957: if (!dd->npz) dd->nez=0;
958: /* Spectral degree */
959: dd->p = 3;
960: PetscOptionsGetInt(NULL,NULL,"-p",&dd->p,NULL);
961: /* pure neumann problem? */
962: dd->pure_neumann = PETSC_FALSE;
963: PetscOptionsGetBool(NULL,NULL,"-pureneumann",&dd->pure_neumann,NULL);
965: /* How to enforce dirichlet boundary conditions (in case pureneumann has not been requested explicitly) */
966: dd->DBC_zerorows = PETSC_FALSE;
968: PetscOptionsGetBool(NULL,NULL,"-usezerorows",&dd->DBC_zerorows,NULL);
969: if (dd->pure_neumann) dd->DBC_zerorows = PETSC_FALSE;
970: dd->scalingfactor = 1.0;
972: factor = 0.0;
973: PetscOptionsGetInt(NULL,NULL,"-jump",&factor,NULL);
974: /* checkerboard pattern */
975: dd->scalingfactor = PetscPowScalar(10.0,(PetscScalar)factor*PetscPowScalar(-1.0,(PetscScalar)rank));
976: /* test data passed in */
977: if (dd->dim==1) {
980: } else if (dd->dim==2) {
983: } else {
986: }
987: return 0;
988: }
990: int main(int argc,char **args)
991: {
992: DomainData dd;
993: PetscReal norm,maxeig,mineig;
994: PetscScalar scalar_value;
995: PetscInt ndofs,its;
996: Mat A = NULL,F = NULL;
997: KSP KSPwithBDDC = NULL,KSPwithFETIDP = NULL;
998: KSPConvergedReason reason;
999: Vec exact_solution = NULL,bddc_solution = NULL,bddc_rhs = NULL;
1000: PetscBool testfetidp = PETSC_TRUE;
1002: /* Init PETSc */
1003: PetscInitialize(&argc,&args,(char*)0,help);
1004: /* Initialize DomainData */
1005: InitializeDomainData(&dd);
1006: /* Decompose domain */
1007: DomainDecomposition(&dd);
1008: #if DEBUG
1009: printf("Subdomain data\n");
1010: printf("IPS : %d %d %d\n",dd.ipx,dd.ipy,dd.ipz);
1011: printf("NEG : %d %d %d\n",dd.nex,dd.ney,dd.nez);
1012: printf("NEL : %d %d %d\n",dd.nex_l,dd.ney_l,dd.nez_l);
1013: printf("LDO : %d %d %d\n",dd.xm_l,dd.ym_l,dd.zm_l);
1014: printf("SIZES : %d %d %d\n",dd.xm,dd.ym,dd.zm);
1015: printf("STARTS: %d %d %d\n",dd.startx,dd.starty,dd.startz);
1016: #endif
1017: dd.testkspfetidp = PETSC_TRUE;
1018: PetscOptionsGetBool(NULL,NULL,"-testfetidp",&testfetidp,NULL);
1019: PetscOptionsGetBool(NULL,NULL,"-testkspfetidp",&dd.testkspfetidp,NULL);
1020: /* assemble global matrix */
1021: ComputeMatrix(dd,&A);
1022: /* get work vectors */
1023: MatCreateVecs(A,&bddc_solution,NULL);
1024: VecDuplicate(bddc_solution,&bddc_rhs);
1025: VecDuplicate(bddc_solution,&exact_solution);
1026: /* create and customize KSP/PC for BDDC */
1027: ComputeKSPBDDC(dd,A,&KSPwithBDDC);
1028: /* create KSP/PC for FETIDP */
1029: if (testfetidp) {
1030: ComputeKSPFETIDP(dd,KSPwithBDDC,&KSPwithFETIDP);
1031: }
1032: /* create random exact solution */
1033: #if defined(PETSC_USE_COMPLEX)
1034: VecSet(exact_solution,1.0 + PETSC_i);
1035: #else
1036: VecSetRandom(exact_solution,NULL);
1037: #endif
1038: VecShift(exact_solution,-0.5);
1039: VecScale(exact_solution,100.0);
1040: VecGetSize(exact_solution,&ndofs);
1041: if (dd.pure_neumann) {
1042: VecSum(exact_solution,&scalar_value);
1043: scalar_value = -scalar_value/(PetscScalar)ndofs;
1044: VecShift(exact_solution,scalar_value);
1045: }
1046: /* assemble BDDC rhs */
1047: MatMult(A,exact_solution,bddc_rhs);
1048: /* test ksp with BDDC */
1049: KSPSolve(KSPwithBDDC,bddc_rhs,bddc_solution);
1050: KSPGetIterationNumber(KSPwithBDDC,&its);
1051: KSPGetConvergedReason(KSPwithBDDC,&reason);
1052: KSPComputeExtremeSingularValues(KSPwithBDDC,&maxeig,&mineig);
1053: if (dd.pure_neumann) {
1054: VecSum(bddc_solution,&scalar_value);
1055: scalar_value = -scalar_value/(PetscScalar)ndofs;
1056: VecShift(bddc_solution,scalar_value);
1057: }
1058: /* check exact_solution and BDDC solultion */
1059: VecAXPY(bddc_solution,-1.0,exact_solution);
1060: VecNorm(bddc_solution,NORM_INFINITY,&norm);
1061: PetscPrintf(dd.gcomm,"---------------------BDDC stats-------------------------------\n");
1062: PetscPrintf(dd.gcomm,"Number of degrees of freedom : %8D\n",ndofs);
1063: if (reason < 0) {
1064: PetscPrintf(dd.gcomm,"Number of iterations : %8D\n",its);
1065: PetscPrintf(dd.gcomm,"Converged reason : %s\n",KSPConvergedReasons[reason]);
1066: }
1067: if (0.95 <= mineig && mineig <= 1.05) mineig = 1.0;
1068: PetscPrintf(dd.gcomm,"Eigenvalues preconditioned operator : %1.1e %1.1e\n",(double)PetscFloorReal(100.*mineig)/100.,(double)PetscCeilReal(100.*maxeig)/100.);
1069: if (norm > 1.e-1 || reason < 0) {
1070: PetscPrintf(dd.gcomm,"Error between exact and computed solution : %1.2e\n",(double)norm);
1071: }
1072: PetscPrintf(dd.gcomm,"--------------------------------------------------------------\n");
1073: if (testfetidp) {
1074: Vec fetidp_solution_all = NULL,fetidp_solution = NULL,fetidp_rhs = NULL;
1076: VecDuplicate(bddc_solution,&fetidp_solution_all);
1077: if (!dd.testkspfetidp) {
1078: /* assemble fetidp rhs on the space of Lagrange multipliers */
1079: KSPGetOperators(KSPwithFETIDP,&F,NULL);
1080: MatCreateVecs(F,&fetidp_solution,&fetidp_rhs);
1081: PCBDDCMatFETIDPGetRHS(F,bddc_rhs,fetidp_rhs);
1082: VecSet(fetidp_solution,0.0);
1083: /* test ksp with FETIDP */
1084: KSPSolve(KSPwithFETIDP,fetidp_rhs,fetidp_solution);
1085: /* assemble fetidp solution on physical domain */
1086: PCBDDCMatFETIDPGetSolution(F,fetidp_solution,fetidp_solution_all);
1087: } else {
1088: KSP kspF;
1089: KSPSolve(KSPwithFETIDP,bddc_rhs,fetidp_solution_all);
1090: KSPFETIDPGetInnerKSP(KSPwithFETIDP,&kspF);
1091: KSPGetOperators(kspF,&F,NULL);
1092: }
1093: MatGetSize(F,&ndofs,NULL);
1094: KSPGetIterationNumber(KSPwithFETIDP,&its);
1095: KSPGetConvergedReason(KSPwithFETIDP,&reason);
1096: KSPComputeExtremeSingularValues(KSPwithFETIDP,&maxeig,&mineig);
1097: /* check FETIDP sol */
1098: if (dd.pure_neumann) {
1099: VecSum(fetidp_solution_all,&scalar_value);
1100: scalar_value = -scalar_value/(PetscScalar)ndofs;
1101: VecShift(fetidp_solution_all,scalar_value);
1102: }
1103: VecAXPY(fetidp_solution_all,-1.0,exact_solution);
1104: VecNorm(fetidp_solution_all,NORM_INFINITY,&norm);
1105: PetscPrintf(dd.gcomm,"------------------FETI-DP stats-------------------------------\n");
1106: PetscPrintf(dd.gcomm,"Number of degrees of freedom : %8D\n",ndofs);
1107: if (reason < 0) {
1108: PetscPrintf(dd.gcomm,"Number of iterations : %8D\n",its);
1109: PetscPrintf(dd.gcomm,"Converged reason : %D\n",reason);
1110: }
1111: if (0.95 <= mineig && mineig <= 1.05) mineig = 1.0;
1112: PetscPrintf(dd.gcomm,"Eigenvalues preconditioned operator : %1.1e %1.1e\n",(double)PetscFloorReal(100.*mineig)/100.,(double)PetscCeilReal(100.*maxeig)/100.);
1113: if (norm > 1.e-1 || reason < 0) {
1114: PetscPrintf(dd.gcomm,"Error between exact and computed solution : %1.2e\n",(double)norm);
1115: }
1116: PetscPrintf(dd.gcomm,"--------------------------------------------------------------\n");
1117: VecDestroy(&fetidp_solution);
1118: VecDestroy(&fetidp_solution_all);
1119: VecDestroy(&fetidp_rhs);
1120: }
1121: KSPDestroy(&KSPwithFETIDP);
1122: VecDestroy(&exact_solution);
1123: VecDestroy(&bddc_solution);
1124: VecDestroy(&bddc_rhs);
1125: MatDestroy(&A);
1126: KSPDestroy(&KSPwithBDDC);
1127: /* Quit PETSc */
1128: PetscFinalize();
1129: return 0;
1130: }
1132: /*TEST
1134: testset:
1135: nsize: 4
1136: args: -nex 7 -physical_pc_bddc_coarse_eqs_per_proc 3 -physical_pc_bddc_switch_static
1137: output_file: output/ex59_bddc_fetidp_1.out
1138: test:
1139: suffix: bddc_fetidp_1
1140: test:
1141: requires: viennacl
1142: suffix: bddc_fetidp_1_viennacl
1143: args: -subdomain_mat_type aijviennacl
1144: test:
1145: requires: cuda
1146: suffix: bddc_fetidp_1_cuda
1147: args: -subdomain_mat_type aijcusparse -physical_pc_bddc_dirichlet_pc_factor_mat_solver_type cusparse
1149: testset:
1150: nsize: 4
1151: args: -npx 2 -npy 2 -nex 6 -ney 6 -fluxes_ksp_max_it 10 -physical_ksp_max_it 10
1152: requires: !single
1153: test:
1154: suffix: bddc_fetidp_2
1155: test:
1156: suffix: bddc_fetidp_3
1157: args: -npz 1 -nez 1
1158: test:
1159: suffix: bddc_fetidp_4
1160: args: -npz 1 -nez 1 -physical_pc_bddc_use_change_of_basis -physical_sub_schurs_mat_solver_type petsc -physical_pc_bddc_use_deluxe_scaling -physical_pc_bddc_deluxe_singlemat -fluxes_fetidp_ksp_type cg
1162: testset:
1163: nsize: 8
1164: suffix: bddc_fetidp_approximate
1165: args: -npx 2 -npy 2 -npz 2 -p 2 -nex 8 -ney 7 -nez 9 -physical_ksp_max_it 20 -subdomain_mat_type aij -physical_pc_bddc_switch_static -physical_pc_bddc_dirichlet_approximate -physical_pc_bddc_neumann_approximate -physical_pc_bddc_dirichlet_pc_type gamg -physical_pc_bddc_dirichlet_pc_gamg_esteig_ksp_type cg -physical_pc_bddc_dirichlet_pc_gamg_esteig_ksp_max_it 10 -physical_pc_bddc_dirichlet_mg_levels_ksp_max_it 3 -physical_pc_bddc_neumann_pc_type sor -physical_pc_bddc_neumann_approximate_scale -testfetidp 0
1167: testset:
1168: nsize: 4
1169: args: -npx 2 -npy 2 -nex 6 -ney 6 -fluxes_ksp_max_it 10 -physical_ksp_max_it 10 -physical_ksp_view -physical_pc_bddc_levels 1
1170: filter: grep -v "variant HERMITIAN"
1171: requires: !single
1172: test:
1173: suffix: bddc_fetidp_ml_1
1174: args: -physical_pc_bddc_coarsening_ratio 1
1175: test:
1176: suffix: bddc_fetidp_ml_2
1177: args: -physical_pc_bddc_coarsening_ratio 2 -mat_partitioning_type average
1178: test:
1179: suffix: bddc_fetidp_ml_3
1180: args: -physical_pc_bddc_coarsening_ratio 4
1182: testset:
1183: nsize: 9
1184: args: -npx 3 -npy 3 -p 2 -nex 6 -ney 6 -physical_pc_bddc_deluxe_singlemat -physical_sub_schurs_mat_solver_type petsc -physical_pc_bddc_use_deluxe_scaling -physical_pc_bddc_graph_maxcount 1 -physical_pc_bddc_levels 3 -physical_pc_bddc_coarsening_ratio 2 -physical_pc_bddc_coarse_ksp_type gmres
1185: output_file: output/ex59_bddc_fetidp_ml_eqlimit.out
1186: test:
1187: suffix: bddc_fetidp_ml_eqlimit_1
1188: args: -physical_pc_bddc_coarse_eqs_limit 31 -mat_partitioning_type average -physical_pc_bddc_coarse_pc_bddc_graph_maxcount 1
1189: test:
1190: suffix: bddc_fetidp_ml_eqlimit_2
1191: args: -physical_pc_bddc_coarse_eqs_limit 46
1193: TEST*/