Actual source code: superlu.c
2: /* --------------------------------------------------------------------
4: This file implements a subclass of the SeqAIJ matrix class that uses
5: the SuperLU sparse solver.
6: */
8: /*
9: Defines the data structure for the base matrix type (SeqAIJ)
10: */
11: #include <../src/mat/impls/aij/seq/aij.h>
13: /*
14: SuperLU include files
15: */
16: EXTERN_C_BEGIN
17: #if defined(PETSC_USE_COMPLEX)
18: #if defined(PETSC_USE_REAL_SINGLE)
19: #include <slu_cdefs.h>
20: #else
21: #include <slu_zdefs.h>
22: #endif
23: #else
24: #if defined(PETSC_USE_REAL_SINGLE)
25: #include <slu_sdefs.h>
26: #else
27: #include <slu_ddefs.h>
28: #endif
29: #endif
30: #include <slu_util.h>
31: EXTERN_C_END
33: /*
34: This is the data that defines the SuperLU factored matrix type
35: */
36: typedef struct {
37: SuperMatrix A, L, U, B, X;
38: superlu_options_t options;
39: PetscInt *perm_c; /* column permutation vector */
40: PetscInt *perm_r; /* row permutations from partial pivoting */
41: PetscInt *etree;
42: PetscReal *R, *C;
43: char equed[1];
44: PetscInt lwork;
45: void *work;
46: PetscReal rpg, rcond;
47: mem_usage_t mem_usage;
48: MatStructure flg;
49: SuperLUStat_t stat;
50: Mat A_dup;
51: PetscScalar *rhs_dup;
52: GlobalLU_t Glu;
53: PetscBool needconversion;
55: /* Flag to clean up (non-global) SuperLU objects during Destroy */
56: PetscBool CleanUpSuperLU;
57: } Mat_SuperLU;
59: /*
60: Utility function
61: */
62: static PetscErrorCode MatView_Info_SuperLU(Mat A, PetscViewer viewer)
63: {
64: Mat_SuperLU *lu = (Mat_SuperLU *)A->data;
65: superlu_options_t options;
67: options = lu->options;
69: PetscViewerASCIIPrintf(viewer, "SuperLU run parameters:\n");
70: PetscViewerASCIIPrintf(viewer, " Equil: %s\n", (options.Equil != NO) ? "YES" : "NO");
71: PetscViewerASCIIPrintf(viewer, " ColPerm: %" PetscInt_FMT "\n", options.ColPerm);
72: PetscViewerASCIIPrintf(viewer, " IterRefine: %" PetscInt_FMT "\n", options.IterRefine);
73: PetscViewerASCIIPrintf(viewer, " SymmetricMode: %s\n", (options.SymmetricMode != NO) ? "YES" : "NO");
74: PetscViewerASCIIPrintf(viewer, " DiagPivotThresh: %g\n", options.DiagPivotThresh);
75: PetscViewerASCIIPrintf(viewer, " PivotGrowth: %s\n", (options.PivotGrowth != NO) ? "YES" : "NO");
76: PetscViewerASCIIPrintf(viewer, " ConditionNumber: %s\n", (options.ConditionNumber != NO) ? "YES" : "NO");
77: PetscViewerASCIIPrintf(viewer, " RowPerm: %" PetscInt_FMT "\n", options.RowPerm);
78: PetscViewerASCIIPrintf(viewer, " ReplaceTinyPivot: %s\n", (options.ReplaceTinyPivot != NO) ? "YES" : "NO");
79: PetscViewerASCIIPrintf(viewer, " PrintStat: %s\n", (options.PrintStat != NO) ? "YES" : "NO");
80: PetscViewerASCIIPrintf(viewer, " lwork: %" PetscInt_FMT "\n", lu->lwork);
81: if (A->factortype == MAT_FACTOR_ILU) {
82: PetscViewerASCIIPrintf(viewer, " ILU_DropTol: %g\n", options.ILU_DropTol);
83: PetscViewerASCIIPrintf(viewer, " ILU_FillTol: %g\n", options.ILU_FillTol);
84: PetscViewerASCIIPrintf(viewer, " ILU_FillFactor: %g\n", options.ILU_FillFactor);
85: PetscViewerASCIIPrintf(viewer, " ILU_DropRule: %" PetscInt_FMT "\n", options.ILU_DropRule);
86: PetscViewerASCIIPrintf(viewer, " ILU_Norm: %" PetscInt_FMT "\n", options.ILU_Norm);
87: PetscViewerASCIIPrintf(viewer, " ILU_MILU: %" PetscInt_FMT "\n", options.ILU_MILU);
88: }
89: return 0;
90: }
92: PetscErrorCode MatSolve_SuperLU_Private(Mat A, Vec b, Vec x)
93: {
94: Mat_SuperLU *lu = (Mat_SuperLU *)A->data;
95: const PetscScalar *barray;
96: PetscScalar *xarray;
97: PetscInt info, i, n;
98: PetscReal ferr, berr;
99: static PetscBool cite = PETSC_FALSE;
101: if (lu->lwork == -1) return 0;
102: PetscCall(PetscCitationsRegister("@article{superlu99,\n author = {James W. Demmel and Stanley C. Eisenstat and\n John R. Gilbert and Xiaoye S. Li and Joseph W. H. Liu},\n title = {A supernodal approach to sparse partial "
103: "pivoting},\n journal = {SIAM J. Matrix Analysis and Applications},\n year = {1999},\n volume = {20},\n number = {3},\n pages = {720-755}\n}\n",
104: &cite));
106: VecGetLocalSize(x, &n);
107: lu->B.ncol = 1; /* Set the number of right-hand side */
108: if (lu->options.Equil && !lu->rhs_dup) {
109: /* superlu overwrites b when Equil is used, thus create rhs_dup to keep user's b unchanged */
110: PetscMalloc1(n, &lu->rhs_dup);
111: }
112: if (lu->options.Equil) {
113: /* Copy b into rsh_dup */
114: VecGetArrayRead(b, &barray);
115: PetscArraycpy(lu->rhs_dup, barray, n);
116: VecRestoreArrayRead(b, &barray);
117: barray = lu->rhs_dup;
118: } else {
119: VecGetArrayRead(b, &barray);
120: }
121: VecGetArray(x, &xarray);
123: #if defined(PETSC_USE_COMPLEX)
124: #if defined(PETSC_USE_REAL_SINGLE)
125: ((DNformat *)lu->B.Store)->nzval = (singlecomplex *)barray;
126: ((DNformat *)lu->X.Store)->nzval = (singlecomplex *)xarray;
127: #else
128: ((DNformat *)lu->B.Store)->nzval = (doublecomplex *)barray;
129: ((DNformat *)lu->X.Store)->nzval = (doublecomplex *)xarray;
130: #endif
131: #else
132: ((DNformat *)lu->B.Store)->nzval = (void *)barray;
133: ((DNformat *)lu->X.Store)->nzval = xarray;
134: #endif
136: lu->options.Fact = FACTORED; /* Indicate the factored form of A is supplied. */
137: if (A->factortype == MAT_FACTOR_LU) {
138: #if defined(PETSC_USE_COMPLEX)
139: #if defined(PETSC_USE_REAL_SINGLE)
140: PetscStackCallExternalVoid("SuperLU:cgssvx", cgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr, &lu->Glu, &lu->mem_usage, &lu->stat, &info));
141: #else
142: PetscStackCallExternalVoid("SuperLU:zgssvx", zgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr, &lu->Glu, &lu->mem_usage, &lu->stat, &info));
143: #endif
144: #else
145: #if defined(PETSC_USE_REAL_SINGLE)
146: PetscStackCallExternalVoid("SuperLU:sgssvx", sgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr, &lu->Glu, &lu->mem_usage, &lu->stat, &info));
147: #else
148: PetscStackCallExternalVoid("SuperLU:dgssvx", dgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr, &lu->Glu, &lu->mem_usage, &lu->stat, &info));
149: #endif
150: #endif
151: } else if (A->factortype == MAT_FACTOR_ILU) {
152: #if defined(PETSC_USE_COMPLEX)
153: #if defined(PETSC_USE_REAL_SINGLE)
154: PetscStackCallExternalVoid("SuperLU:cgsisx", cgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &lu->Glu, &lu->mem_usage, &lu->stat, &info));
155: #else
156: PetscStackCallExternalVoid("SuperLU:zgsisx", zgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &lu->Glu, &lu->mem_usage, &lu->stat, &info));
157: #endif
158: #else
159: #if defined(PETSC_USE_REAL_SINGLE)
160: PetscStackCallExternalVoid("SuperLU:sgsisx", sgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &lu->Glu, &lu->mem_usage, &lu->stat, &info));
161: #else
162: PetscStackCallExternalVoid("SuperLU:dgsisx", dgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &lu->Glu, &lu->mem_usage, &lu->stat, &info));
163: #endif
164: #endif
165: } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor type not supported");
166: if (!lu->options.Equil) VecRestoreArrayRead(b, &barray);
167: VecRestoreArray(x, &xarray);
169: if (!info || info == lu->A.ncol + 1) {
170: if (lu->options.IterRefine) {
171: PetscPrintf(PETSC_COMM_SELF, "Iterative Refinement:\n");
172: PetscPrintf(PETSC_COMM_SELF, " %8s%8s%16s%16s\n", "rhs", "Steps", "FERR", "BERR");
173: for (i = 0; i < 1; ++i) PetscPrintf(PETSC_COMM_SELF, " %8d%8d%16e%16e\n", i + 1, lu->stat.RefineSteps, ferr, berr);
174: }
175: } else if (info > 0) {
176: if (lu->lwork == -1) {
177: PetscPrintf(PETSC_COMM_SELF, " ** Estimated memory: %" PetscInt_FMT " bytes\n", info - lu->A.ncol);
178: } else {
179: PetscPrintf(PETSC_COMM_SELF, " Warning: gssvx() returns info %" PetscInt_FMT "\n", info);
180: }
183: if (lu->options.PrintStat) {
184: PetscPrintf(PETSC_COMM_SELF, "MatSolve__SuperLU():\n");
185: PetscStackCallExternalVoid("SuperLU:StatPrint", StatPrint(&lu->stat));
186: }
187: return 0;
188: }
190: PetscErrorCode MatSolve_SuperLU(Mat A, Vec b, Vec x)
191: {
192: Mat_SuperLU *lu = (Mat_SuperLU *)A->data;
194: if (A->factorerrortype) {
195: PetscInfo(A, "MatSolve is called with singular matrix factor, skip\n");
196: VecSetInf(x);
197: return 0;
198: }
200: lu->options.Trans = TRANS;
201: MatSolve_SuperLU_Private(A, b, x);
202: return 0;
203: }
205: PetscErrorCode MatSolveTranspose_SuperLU(Mat A, Vec b, Vec x)
206: {
207: Mat_SuperLU *lu = (Mat_SuperLU *)A->data;
209: if (A->factorerrortype) {
210: PetscInfo(A, "MatSolve is called with singular matrix factor, skip\n");
211: VecSetInf(x);
212: return 0;
213: }
215: lu->options.Trans = NOTRANS;
216: MatSolve_SuperLU_Private(A, b, x);
217: return 0;
218: }
220: static PetscErrorCode MatLUFactorNumeric_SuperLU(Mat F, Mat A, const MatFactorInfo *info)
221: {
222: Mat_SuperLU *lu = (Mat_SuperLU *)F->data;
223: Mat_SeqAIJ *aa;
224: PetscInt sinfo;
225: PetscReal ferr, berr;
226: NCformat *Ustore;
227: SCformat *Lstore;
229: if (lu->flg == SAME_NONZERO_PATTERN) { /* successing numerical factorization */
230: lu->options.Fact = SamePattern;
231: /* Ref: ~SuperLU_3.0/EXAMPLE/dlinsolx2.c */
232: Destroy_SuperMatrix_Store(&lu->A);
233: if (lu->A_dup) MatCopy_SeqAIJ(A, lu->A_dup, SAME_NONZERO_PATTERN);
235: if (lu->lwork >= 0) {
236: PetscStackCallExternalVoid("SuperLU:Destroy_SuperNode_Matrix", Destroy_SuperNode_Matrix(&lu->L));
237: PetscStackCallExternalVoid("SuperLU:Destroy_CompCol_Matrix", Destroy_CompCol_Matrix(&lu->U));
238: lu->options.Fact = SamePattern;
239: }
240: }
242: /* Create the SuperMatrix for lu->A=A^T:
243: Since SuperLU likes column-oriented matrices,we pass it the transpose,
244: and then solve A^T X = B in MatSolve(). */
245: if (lu->A_dup) {
246: aa = (Mat_SeqAIJ *)(lu->A_dup)->data;
247: } else {
248: aa = (Mat_SeqAIJ *)(A)->data;
249: }
250: #if defined(PETSC_USE_COMPLEX)
251: #if defined(PETSC_USE_REAL_SINGLE)
252: PetscStackCallExternalVoid("SuperLU:cCreate_CompCol_Matrix", cCreate_CompCol_Matrix(&lu->A, A->cmap->n, A->rmap->n, aa->nz, (singlecomplex *)aa->a, aa->j, aa->i, SLU_NC, SLU_C, SLU_GE));
253: #else
254: PetscStackCallExternalVoid("SuperLU:zCreate_CompCol_Matrix", zCreate_CompCol_Matrix(&lu->A, A->cmap->n, A->rmap->n, aa->nz, (doublecomplex *)aa->a, aa->j, aa->i, SLU_NC, SLU_Z, SLU_GE));
255: #endif
256: #else
257: #if defined(PETSC_USE_REAL_SINGLE)
258: PetscStackCallExternalVoid("SuperLU:sCreate_CompCol_Matrix", sCreate_CompCol_Matrix(&lu->A, A->cmap->n, A->rmap->n, aa->nz, aa->a, aa->j, aa->i, SLU_NC, SLU_S, SLU_GE));
259: #else
260: PetscStackCallExternalVoid("SuperLU:dCreate_CompCol_Matrix", dCreate_CompCol_Matrix(&lu->A, A->cmap->n, A->rmap->n, aa->nz, aa->a, aa->j, aa->i, SLU_NC, SLU_D, SLU_GE));
261: #endif
262: #endif
264: /* Numerical factorization */
265: lu->B.ncol = 0; /* Indicate not to solve the system */
266: if (F->factortype == MAT_FACTOR_LU) {
267: #if defined(PETSC_USE_COMPLEX)
268: #if defined(PETSC_USE_REAL_SINGLE)
269: PetscStackCallExternalVoid("SuperLU:cgssvx", cgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr, &lu->Glu, &lu->mem_usage, &lu->stat, &sinfo));
270: #else
271: PetscStackCallExternalVoid("SuperLU:zgssvx", zgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr, &lu->Glu, &lu->mem_usage, &lu->stat, &sinfo));
272: #endif
273: #else
274: #if defined(PETSC_USE_REAL_SINGLE)
275: PetscStackCallExternalVoid("SuperLU:sgssvx", sgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr, &lu->Glu, &lu->mem_usage, &lu->stat, &sinfo));
276: #else
277: PetscStackCallExternalVoid("SuperLU:dgssvx", dgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr, &lu->Glu, &lu->mem_usage, &lu->stat, &sinfo));
278: #endif
279: #endif
280: } else if (F->factortype == MAT_FACTOR_ILU) {
281: /* Compute the incomplete factorization, condition number and pivot growth */
282: #if defined(PETSC_USE_COMPLEX)
283: #if defined(PETSC_USE_REAL_SINGLE)
284: PetscStackCallExternalVoid("SuperLU:cgsisx", cgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &lu->Glu, &lu->mem_usage, &lu->stat, &sinfo));
285: #else
286: PetscStackCallExternalVoid("SuperLU:zgsisx", zgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &lu->Glu, &lu->mem_usage, &lu->stat, &sinfo));
287: #endif
288: #else
289: #if defined(PETSC_USE_REAL_SINGLE)
290: PetscStackCallExternalVoid("SuperLU:sgsisx", sgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &lu->Glu, &lu->mem_usage, &lu->stat, &sinfo));
291: #else
292: PetscStackCallExternalVoid("SuperLU:dgsisx", dgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &lu->Glu, &lu->mem_usage, &lu->stat, &sinfo));
293: #endif
294: #endif
295: } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor type not supported");
296: if (!sinfo || sinfo == lu->A.ncol + 1) {
297: if (lu->options.PivotGrowth) PetscPrintf(PETSC_COMM_SELF, " Recip. pivot growth = %e\n", lu->rpg);
298: if (lu->options.ConditionNumber) PetscPrintf(PETSC_COMM_SELF, " Recip. condition number = %e\n", lu->rcond);
299: } else if (sinfo > 0) {
300: if (A->erroriffailure) {
301: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot in row %" PetscInt_FMT, sinfo);
302: } else {
303: if (sinfo <= lu->A.ncol) {
304: if (lu->options.ILU_FillTol == 0.0) F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
305: PetscInfo(F, "Number of zero pivots %" PetscInt_FMT ", ILU_FillTol %g\n", sinfo, lu->options.ILU_FillTol);
306: } else if (sinfo == lu->A.ncol + 1) {
307: /*
308: U is nonsingular, but RCOND is less than machine
309: precision, meaning that the matrix is singular to
310: working precision. Nevertheless, the solution and
311: error bounds are computed because there are a number
312: of situations where the computed solution can be more
313: accurate than the value of RCOND would suggest.
314: */
315: PetscInfo(F, "Matrix factor U is nonsingular, but is singular to working precision. The solution is computed. info %" PetscInt_FMT, sinfo);
316: } else { /* sinfo > lu->A.ncol + 1 */
317: F->factorerrortype = MAT_FACTOR_OUTMEMORY;
318: PetscInfo(F, "Number of bytes allocated when memory allocation fails %" PetscInt_FMT "\n", sinfo);
319: }
320: }
321: } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "info = %" PetscInt_FMT ", the %" PetscInt_FMT "-th argument in gssvx() had an illegal value", sinfo, -sinfo);
323: if (lu->options.PrintStat) {
324: PetscPrintf(PETSC_COMM_SELF, "MatLUFactorNumeric_SuperLU():\n");
325: PetscStackCallExternalVoid("SuperLU:StatPrint", StatPrint(&lu->stat));
326: Lstore = (SCformat *)lu->L.Store;
327: Ustore = (NCformat *)lu->U.Store;
328: PetscPrintf(PETSC_COMM_SELF, " No of nonzeros in factor L = %" PetscInt_FMT "\n", Lstore->nnz);
329: PetscPrintf(PETSC_COMM_SELF, " No of nonzeros in factor U = %" PetscInt_FMT "\n", Ustore->nnz);
330: PetscPrintf(PETSC_COMM_SELF, " No of nonzeros in L+U = %" PetscInt_FMT "\n", Lstore->nnz + Ustore->nnz - lu->A.ncol);
331: PetscPrintf(PETSC_COMM_SELF, " L\\U MB %.3f\ttotal MB needed %.3f\n", lu->mem_usage.for_lu / 1e6, lu->mem_usage.total_needed / 1e6);
332: }
334: lu->flg = SAME_NONZERO_PATTERN;
335: F->ops->solve = MatSolve_SuperLU;
336: F->ops->solvetranspose = MatSolveTranspose_SuperLU;
337: F->ops->matsolve = NULL;
338: return 0;
339: }
341: static PetscErrorCode MatDestroy_SuperLU(Mat A)
342: {
343: Mat_SuperLU *lu = (Mat_SuperLU *)A->data;
345: if (lu->CleanUpSuperLU) { /* Free the SuperLU datastructures */
346: PetscStackCallExternalVoid("SuperLU:Destroy_SuperMatrix_Store", Destroy_SuperMatrix_Store(&lu->A));
347: PetscStackCallExternalVoid("SuperLU:Destroy_SuperMatrix_Store", Destroy_SuperMatrix_Store(&lu->B));
348: PetscStackCallExternalVoid("SuperLU:Destroy_SuperMatrix_Store", Destroy_SuperMatrix_Store(&lu->X));
349: PetscStackCallExternalVoid("SuperLU:StatFree", StatFree(&lu->stat));
350: if (lu->lwork >= 0) {
351: PetscStackCallExternalVoid("SuperLU:Destroy_SuperNode_Matrix", Destroy_SuperNode_Matrix(&lu->L));
352: PetscStackCallExternalVoid("SuperLU:Destroy_CompCol_Matrix", Destroy_CompCol_Matrix(&lu->U));
353: }
354: }
355: PetscFree(lu->etree);
356: PetscFree(lu->perm_r);
357: PetscFree(lu->perm_c);
358: PetscFree(lu->R);
359: PetscFree(lu->C);
360: PetscFree(lu->rhs_dup);
361: MatDestroy(&lu->A_dup);
362: PetscFree(A->data);
364: /* clear composed functions */
365: PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL);
366: PetscObjectComposeFunction((PetscObject)A, "MatSuperluSetILUDropTol_C", NULL);
367: return 0;
368: }
370: static PetscErrorCode MatView_SuperLU(Mat A, PetscViewer viewer)
371: {
372: PetscBool iascii;
373: PetscViewerFormat format;
375: PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii);
376: if (iascii) {
377: PetscViewerGetFormat(viewer, &format);
378: if (format == PETSC_VIEWER_ASCII_INFO) MatView_Info_SuperLU(A, viewer);
379: }
380: return 0;
381: }
383: PetscErrorCode MatMatSolve_SuperLU(Mat A, Mat B, Mat X)
384: {
385: Mat_SuperLU *lu = (Mat_SuperLU *)A->data;
386: PetscBool flg;
388: PetscObjectTypeCompareAny((PetscObject)B, &flg, MATSEQDENSE, MATMPIDENSE, NULL);
390: PetscObjectTypeCompareAny((PetscObject)X, &flg, MATSEQDENSE, MATMPIDENSE, NULL);
392: lu->options.Trans = TRANS;
393: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatMatSolve_SuperLU() is not implemented yet");
394: return 0;
395: }
397: static PetscErrorCode MatLUFactorSymbolic_SuperLU(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info)
398: {
399: Mat_SuperLU *lu = (Mat_SuperLU *)(F->data);
400: PetscInt indx;
401: PetscBool flg, set;
402: PetscReal real_input;
403: const char *colperm[] = {"NATURAL", "MMD_ATA", "MMD_AT_PLUS_A", "COLAMD"}; /* MY_PERMC - not supported by the petsc interface yet */
404: const char *iterrefine[] = {"NOREFINE", "SINGLE", "DOUBLE", "EXTRA"};
405: const char *rowperm[] = {"NOROWPERM", "LargeDiag"}; /* MY_PERMC - not supported by the petsc interface yet */
407: /* Set options to F */
408: PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "SuperLU Options", "Mat");
409: PetscOptionsBool("-mat_superlu_equil", "Equil", "None", (PetscBool)lu->options.Equil, (PetscBool *)&lu->options.Equil, NULL);
410: PetscOptionsEList("-mat_superlu_colperm", "ColPerm", "None", colperm, 4, colperm[3], &indx, &flg);
411: if (flg) lu->options.ColPerm = (colperm_t)indx;
412: PetscOptionsEList("-mat_superlu_iterrefine", "IterRefine", "None", iterrefine, 4, iterrefine[0], &indx, &flg);
413: if (flg) lu->options.IterRefine = (IterRefine_t)indx;
414: PetscOptionsBool("-mat_superlu_symmetricmode", "SymmetricMode", "None", (PetscBool)lu->options.SymmetricMode, &flg, &set);
415: if (set && flg) lu->options.SymmetricMode = YES;
416: PetscOptionsReal("-mat_superlu_diagpivotthresh", "DiagPivotThresh", "None", lu->options.DiagPivotThresh, &real_input, &flg);
417: if (flg) lu->options.DiagPivotThresh = (double)real_input;
418: PetscOptionsBool("-mat_superlu_pivotgrowth", "PivotGrowth", "None", (PetscBool)lu->options.PivotGrowth, &flg, &set);
419: if (set && flg) lu->options.PivotGrowth = YES;
420: PetscOptionsBool("-mat_superlu_conditionnumber", "ConditionNumber", "None", (PetscBool)lu->options.ConditionNumber, &flg, &set);
421: if (set && flg) lu->options.ConditionNumber = YES;
422: PetscOptionsEList("-mat_superlu_rowperm", "rowperm", "None", rowperm, 2, rowperm[lu->options.RowPerm], &indx, &flg);
423: if (flg) lu->options.RowPerm = (rowperm_t)indx;
424: PetscOptionsBool("-mat_superlu_replacetinypivot", "ReplaceTinyPivot", "None", (PetscBool)lu->options.ReplaceTinyPivot, &flg, &set);
425: if (set && flg) lu->options.ReplaceTinyPivot = YES;
426: PetscOptionsBool("-mat_superlu_printstat", "PrintStat", "None", (PetscBool)lu->options.PrintStat, &flg, &set);
427: if (set && flg) lu->options.PrintStat = YES;
428: PetscOptionsInt("-mat_superlu_lwork", "size of work array in bytes used by factorization", "None", lu->lwork, &lu->lwork, NULL);
429: if (lu->lwork > 0) {
430: /* lwork is in bytes, hence PetscMalloc() is used here, not PetscMalloc1()*/
431: PetscMalloc(lu->lwork, &lu->work);
432: } else if (lu->lwork != 0 && lu->lwork != -1) {
433: PetscPrintf(PETSC_COMM_SELF, " Warning: lwork %" PetscInt_FMT " is not supported by SUPERLU. The default lwork=0 is used.\n", lu->lwork);
434: lu->lwork = 0;
435: }
436: /* ilu options */
437: PetscOptionsReal("-mat_superlu_ilu_droptol", "ILU_DropTol", "None", lu->options.ILU_DropTol, &real_input, &flg);
438: if (flg) lu->options.ILU_DropTol = (double)real_input;
439: PetscOptionsReal("-mat_superlu_ilu_filltol", "ILU_FillTol", "None", lu->options.ILU_FillTol, &real_input, &flg);
440: if (flg) lu->options.ILU_FillTol = (double)real_input;
441: PetscOptionsReal("-mat_superlu_ilu_fillfactor", "ILU_FillFactor", "None", lu->options.ILU_FillFactor, &real_input, &flg);
442: if (flg) lu->options.ILU_FillFactor = (double)real_input;
443: PetscOptionsInt("-mat_superlu_ilu_droprull", "ILU_DropRule", "None", lu->options.ILU_DropRule, &lu->options.ILU_DropRule, NULL);
444: PetscOptionsInt("-mat_superlu_ilu_norm", "ILU_Norm", "None", lu->options.ILU_Norm, &indx, &flg);
445: if (flg) lu->options.ILU_Norm = (norm_t)indx;
446: PetscOptionsInt("-mat_superlu_ilu_milu", "ILU_MILU", "None", lu->options.ILU_MILU, &indx, &flg);
447: if (flg) lu->options.ILU_MILU = (milu_t)indx;
448: PetscOptionsEnd();
450: lu->flg = DIFFERENT_NONZERO_PATTERN;
451: lu->CleanUpSuperLU = PETSC_TRUE;
452: F->ops->lufactornumeric = MatLUFactorNumeric_SuperLU;
454: /* if we are here, the nonzero pattern has changed unless the user explicitly called MatLUFactorSymbolic */
455: MatDestroy(&lu->A_dup);
456: if (lu->needconversion) MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &lu->A_dup);
457: if (lu->options.Equil == YES && !lu->A_dup) { /* superlu overwrites input matrix and rhs when Equil is used, thus create A_dup to keep user's A unchanged */
458: MatDuplicate_SeqAIJ(A, MAT_COPY_VALUES, &lu->A_dup);
459: }
460: return 0;
461: }
463: static PetscErrorCode MatSuperluSetILUDropTol_SuperLU(Mat F, PetscReal dtol)
464: {
465: Mat_SuperLU *lu = (Mat_SuperLU *)F->data;
467: lu->options.ILU_DropTol = dtol;
468: return 0;
469: }
471: /*@
472: MatSuperluSetILUDropTol - Set SuperLU ILU drop tolerance
474: Logically Collective
476: Input Parameters:
477: + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-SuperLU interface
478: - dtol - drop tolerance
480: Options Database Key:
481: . -mat_superlu_ilu_droptol <dtol> - the drop tolerance
483: Level: beginner
485: References:
486: . * - SuperLU Users' Guide
488: .seealso: `MatGetFactor()`
489: @*/
490: PetscErrorCode MatSuperluSetILUDropTol(Mat F, PetscReal dtol)
491: {
494: PetscTryMethod(F, "MatSuperluSetILUDropTol_C", (Mat, PetscReal), (F, dtol));
495: return 0;
496: }
498: PetscErrorCode MatFactorGetSolverType_seqaij_superlu(Mat A, MatSolverType *type)
499: {
500: *type = MATSOLVERSUPERLU;
501: return 0;
502: }
504: /*MC
505: MATSOLVERSUPERLU = "superlu" - A solver package providing solvers LU and ILU for sequential matrices
506: via the external package SuperLU.
508: Use ./configure --download-superlu to have PETSc installed with SuperLU
510: Use -pc_type lu -pc_factor_mat_solver_type superlu to use this direct solver
512: Options Database Keys:
513: + -mat_superlu_equil <FALSE> - Equil (None)
514: . -mat_superlu_colperm <COLAMD> - (choose one of) NATURAL MMD_ATA MMD_AT_PLUS_A COLAMD
515: . -mat_superlu_iterrefine <NOREFINE> - (choose one of) NOREFINE SINGLE DOUBLE EXTRA
516: . -mat_superlu_symmetricmode: <FALSE> - SymmetricMode (None)
517: . -mat_superlu_diagpivotthresh <1> - DiagPivotThresh (None)
518: . -mat_superlu_pivotgrowth <FALSE> - PivotGrowth (None)
519: . -mat_superlu_conditionnumber <FALSE> - ConditionNumber (None)
520: . -mat_superlu_rowperm <NOROWPERM> - (choose one of) NOROWPERM LargeDiag
521: . -mat_superlu_replacetinypivot <FALSE> - ReplaceTinyPivot (None)
522: . -mat_superlu_printstat <FALSE> - PrintStat (None)
523: . -mat_superlu_lwork <0> - size of work array in bytes used by factorization (None)
524: . -mat_superlu_ilu_droptol <0> - ILU_DropTol (None)
525: . -mat_superlu_ilu_filltol <0> - ILU_FillTol (None)
526: . -mat_superlu_ilu_fillfactor <0> - ILU_FillFactor (None)
527: . -mat_superlu_ilu_droprull <0> - ILU_DropRule (None)
528: . -mat_superlu_ilu_norm <0> - ILU_Norm (None)
529: - -mat_superlu_ilu_milu <0> - ILU_MILU (None)
531: Notes:
532: Do not confuse this with `MATSOLVERSUPERLU_DIST` which is for parallel sparse solves
534: Cannot use ordering provided by PETSc, provides its own.
536: Level: beginner
538: .seealso: `PCLU`, `PCILU`, `MATSOLVERSUPERLU_DIST`, `MATSOLVERMUMPS`, `PCFactorSetMatSolverType()`, `MatSolverType`
539: M*/
541: static PetscErrorCode MatGetFactor_seqaij_superlu(Mat A, MatFactorType ftype, Mat *F)
542: {
543: Mat B;
544: Mat_SuperLU *lu;
545: PetscInt m = A->rmap->n, n = A->cmap->n;
547: MatCreate(PetscObjectComm((PetscObject)A), &B);
548: MatSetSizes(B, A->rmap->n, A->cmap->n, PETSC_DETERMINE, PETSC_DETERMINE);
549: PetscStrallocpy("superlu", &((PetscObject)B)->type_name);
550: MatSetUp(B);
551: B->trivialsymbolic = PETSC_TRUE;
552: if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU) {
553: B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU;
554: B->ops->ilufactorsymbolic = MatLUFactorSymbolic_SuperLU;
555: } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor type not supported");
557: PetscFree(B->solvertype);
558: PetscStrallocpy(MATSOLVERSUPERLU, &B->solvertype);
560: B->ops->getinfo = MatGetInfo_External;
561: B->ops->destroy = MatDestroy_SuperLU;
562: B->ops->view = MatView_SuperLU;
563: B->factortype = ftype;
564: B->assembled = PETSC_TRUE; /* required by -ksp_view */
565: B->preallocated = PETSC_TRUE;
567: PetscNew(&lu);
569: if (ftype == MAT_FACTOR_LU) {
570: set_default_options(&lu->options);
571: /* Comments from SuperLU_4.0/SRC/dgssvx.c:
572: "Whether or not the system will be equilibrated depends on the
573: scaling of the matrix A, but if equilibration is used, A is
574: overwritten by diag(R)*A*diag(C) and B by diag(R)*B
575: (if options->Trans=NOTRANS) or diag(C)*B (if options->Trans = TRANS or CONJ)."
576: We set 'options.Equil = NO' as default because additional space is needed for it.
577: */
578: lu->options.Equil = NO;
579: } else if (ftype == MAT_FACTOR_ILU) {
580: /* Set the default input options of ilu: */
581: PetscStackCallExternalVoid("SuperLU:ilu_set_default_options", ilu_set_default_options(&lu->options));
582: }
583: lu->options.PrintStat = NO;
585: /* Initialize the statistics variables. */
586: PetscStackCallExternalVoid("SuperLU:StatInit", StatInit(&lu->stat));
587: lu->lwork = 0; /* allocate space internally by system malloc */
589: /* Allocate spaces (notice sizes are for the transpose) */
590: PetscMalloc1(m, &lu->etree);
591: PetscMalloc1(n, &lu->perm_r);
592: PetscMalloc1(m, &lu->perm_c);
593: PetscMalloc1(n, &lu->R);
594: PetscMalloc1(m, &lu->C);
596: /* create rhs and solution x without allocate space for .Store */
597: #if defined(PETSC_USE_COMPLEX)
598: #if defined(PETSC_USE_REAL_SINGLE)
599: PetscStackCallExternalVoid("SuperLU:cCreate_Dense_Matrix(", cCreate_Dense_Matrix(&lu->B, m, 1, NULL, m, SLU_DN, SLU_C, SLU_GE));
600: PetscStackCallExternalVoid("SuperLU:cCreate_Dense_Matrix(", cCreate_Dense_Matrix(&lu->X, m, 1, NULL, m, SLU_DN, SLU_C, SLU_GE));
601: #else
602: PetscStackCallExternalVoid("SuperLU:zCreate_Dense_Matrix", zCreate_Dense_Matrix(&lu->B, m, 1, NULL, m, SLU_DN, SLU_Z, SLU_GE));
603: PetscStackCallExternalVoid("SuperLU:zCreate_Dense_Matrix", zCreate_Dense_Matrix(&lu->X, m, 1, NULL, m, SLU_DN, SLU_Z, SLU_GE));
604: #endif
605: #else
606: #if defined(PETSC_USE_REAL_SINGLE)
607: PetscStackCallExternalVoid("SuperLU:sCreate_Dense_Matrix", sCreate_Dense_Matrix(&lu->B, m, 1, NULL, m, SLU_DN, SLU_S, SLU_GE));
608: PetscStackCallExternalVoid("SuperLU:sCreate_Dense_Matrix", sCreate_Dense_Matrix(&lu->X, m, 1, NULL, m, SLU_DN, SLU_S, SLU_GE));
609: #else
610: PetscStackCallExternalVoid("SuperLU:dCreate_Dense_Matrix", dCreate_Dense_Matrix(&lu->B, m, 1, NULL, m, SLU_DN, SLU_D, SLU_GE));
611: PetscStackCallExternalVoid("SuperLU:dCreate_Dense_Matrix", dCreate_Dense_Matrix(&lu->X, m, 1, NULL, m, SLU_DN, SLU_D, SLU_GE));
612: #endif
613: #endif
615: PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_seqaij_superlu);
616: PetscObjectComposeFunction((PetscObject)B, "MatSuperluSetILUDropTol_C", MatSuperluSetILUDropTol_SuperLU);
617: B->data = lu;
619: *F = B;
620: return 0;
621: }
623: static PetscErrorCode MatGetFactor_seqsell_superlu(Mat A, MatFactorType ftype, Mat *F)
624: {
625: Mat_SuperLU *lu;
627: MatGetFactor_seqaij_superlu(A, ftype, F);
628: lu = (Mat_SuperLU *)((*F)->data);
629: lu->needconversion = PETSC_TRUE;
630: return 0;
631: }
633: PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_SuperLU(void)
634: {
635: MatSolverTypeRegister(MATSOLVERSUPERLU, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_seqaij_superlu);
636: MatSolverTypeRegister(MATSOLVERSUPERLU, MATSEQAIJ, MAT_FACTOR_ILU, MatGetFactor_seqaij_superlu);
637: MatSolverTypeRegister(MATSOLVERSUPERLU, MATSEQSELL, MAT_FACTOR_LU, MatGetFactor_seqsell_superlu);
638: MatSolverTypeRegister(MATSOLVERSUPERLU, MATSEQSELL, MAT_FACTOR_ILU, MatGetFactor_seqsell_superlu);
639: return 0;
640: }