Actual source code: iispai.c
1: /*
2: 3/99 Modified by Stephen Barnard to support SPAI version 3.0
3: */
5: /*
6: Provides an interface to the SPAI Sparse Approximate Inverse Preconditioner
7: Code written by Stephen Barnard.
9: Note: there is some BAD memory bleeding below!
11: This code needs work
13: 1) get rid of all memory bleeding
14: 2) fix PETSc/interface so that it gets if the matrix is symmetric from the matrix
15: rather than having the sp flag for PC_SPAI
16: 3) fix to set the block size based on the matrix block size
18: */
19: #if !defined(PETSC_SKIP_COMPLEX)
20: #define PETSC_SKIP_COMPLEX /* since spai uses I which conflicts with some complex implementations */
21: #endif
23: #include <petsc/private/pcimpl.h>
25: /*
26: These are the SPAI include files
27: */
28: EXTERN_C_BEGIN
29: #define SPAI_USE_MPI /* required for setting SPAI_Comm correctly in basics.h */
30: #include <spai.h>
31: #include <matrix.h>
32: EXTERN_C_END
34: static PetscErrorCode ConvertMatToMatrix(MPI_Comm, Mat, Mat, matrix **);
35: static PetscErrorCode ConvertMatrixToMat(MPI_Comm, matrix *, Mat *);
37: typedef struct {
38: matrix *B; /* matrix in SPAI format */
39: matrix *BT; /* transpose of matrix in SPAI format */
40: matrix *M; /* the approximate inverse in SPAI format */
42: Mat PM; /* the approximate inverse PETSc format */
44: double epsilon; /* tolerance */
45: int nbsteps; /* max number of "improvement" steps per line */
46: int max; /* max dimensions of is_I, q, etc. */
47: int maxnew; /* max number of new entries per step */
48: int block_size; /* constant block size */
49: int cache_size; /* one of (1,2,3,4,5,6) indicting size of cache */
50: int verbose; /* SPAI prints timing and statistics */
52: int sp; /* symmetric nonzero pattern */
53: MPI_Comm comm_spai; /* communicator to be used with spai */
54: } PC_SPAI;
56: static PetscErrorCode PCSetUp_SPAI(PC pc)
57: {
58: PC_SPAI *ispai = (PC_SPAI *)pc->data;
59: Mat AT;
61: PetscFunctionBegin;
62: init_SPAI();
64: if (ispai->sp) {
65: PetscCall(ConvertMatToMatrix(ispai->comm_spai, pc->pmat, pc->pmat, &ispai->B));
66: } else {
67: /* Use the transpose to get the column nonzero structure. */
68: PetscCall(MatTranspose(pc->pmat, MAT_INITIAL_MATRIX, &AT));
69: PetscCall(ConvertMatToMatrix(ispai->comm_spai, pc->pmat, AT, &ispai->B));
70: PetscCall(MatDestroy(&AT));
71: }
73: /* Destroy the transpose */
74: /* Don't know how to do it. PETSc developers? */
76: /* construct SPAI preconditioner */
77: /* FILE *messages */ /* file for warning messages */
78: /* double epsilon */ /* tolerance */
79: /* int nbsteps */ /* max number of "improvement" steps per line */
80: /* int max */ /* max dimensions of is_I, q, etc. */
81: /* int maxnew */ /* max number of new entries per step */
82: /* int block_size */ /* block_size == 1 specifies scalar elements
83: block_size == n specifies nxn constant-block elements
84: block_size == 0 specifies variable-block elements */
85: /* int cache_size */ /* one of (1,2,3,4,5,6) indicting size of cache. cache_size == 0 indicates no caching */
86: /* int verbose */ /* verbose == 0 specifies that SPAI is silent
87: verbose == 1 prints timing and matrix statistics */
89: PetscCallExternal(bspai, ispai->B, &ispai->M, stdout, ispai->epsilon, ispai->nbsteps, ispai->max, ispai->maxnew, ispai->block_size, ispai->cache_size, ispai->verbose);
91: PetscCall(ConvertMatrixToMat(PetscObjectComm((PetscObject)pc), ispai->M, &ispai->PM));
93: /* free the SPAI matrices */
94: sp_free_matrix(ispai->B);
95: sp_free_matrix(ispai->M);
96: PetscFunctionReturn(PETSC_SUCCESS);
97: }
99: static PetscErrorCode PCApply_SPAI(PC pc, Vec xx, Vec y)
100: {
101: PC_SPAI *ispai = (PC_SPAI *)pc->data;
103: PetscFunctionBegin;
104: /* Now using PETSc's multiply */
105: PetscCall(MatMult(ispai->PM, xx, y));
106: PetscFunctionReturn(PETSC_SUCCESS);
107: }
109: static PetscErrorCode PCMatApply_SPAI(PC pc, Mat X, Mat Y)
110: {
111: PC_SPAI *ispai = (PC_SPAI *)pc->data;
113: PetscFunctionBegin;
114: /* Now using PETSc's multiply */
115: PetscCall(MatMatMult(ispai->PM, X, MAT_REUSE_MATRIX, PETSC_CURRENT, &Y));
116: PetscFunctionReturn(PETSC_SUCCESS);
117: }
119: static PetscErrorCode PCDestroy_SPAI(PC pc)
120: {
121: PC_SPAI *ispai = (PC_SPAI *)pc->data;
123: PetscFunctionBegin;
124: PetscCall(MatDestroy(&ispai->PM));
125: PetscCallMPI(MPI_Comm_free(&ispai->comm_spai));
126: PetscCall(PetscFree(pc->data));
127: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetEpsilon_C", NULL));
128: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetNBSteps_C", NULL));
129: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetMax_C", NULL));
130: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetMaxNew_C", NULL));
131: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetBlockSize_C", NULL));
132: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetCacheSize_C", NULL));
133: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetVerbose_C", NULL));
134: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetSp_C", NULL));
135: PetscFunctionReturn(PETSC_SUCCESS);
136: }
138: static PetscErrorCode PCView_SPAI(PC pc, PetscViewer viewer)
139: {
140: PC_SPAI *ispai = (PC_SPAI *)pc->data;
141: PetscBool isascii;
143: PetscFunctionBegin;
144: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
145: if (isascii) {
146: PetscCall(PetscViewerASCIIPrintf(viewer, " epsilon %g\n", ispai->epsilon));
147: PetscCall(PetscViewerASCIIPrintf(viewer, " nbsteps %d\n", ispai->nbsteps));
148: PetscCall(PetscViewerASCIIPrintf(viewer, " max %d\n", ispai->max));
149: PetscCall(PetscViewerASCIIPrintf(viewer, " maxnew %d\n", ispai->maxnew));
150: PetscCall(PetscViewerASCIIPrintf(viewer, " block_size %d\n", ispai->block_size));
151: PetscCall(PetscViewerASCIIPrintf(viewer, " cache_size %d\n", ispai->cache_size));
152: PetscCall(PetscViewerASCIIPrintf(viewer, " verbose %d\n", ispai->verbose));
153: PetscCall(PetscViewerASCIIPrintf(viewer, " sp %d\n", ispai->sp));
154: }
155: PetscFunctionReturn(PETSC_SUCCESS);
156: }
158: static PetscErrorCode PCSPAISetEpsilon_SPAI(PC pc, PetscReal epsilon1)
159: {
160: PC_SPAI *ispai = (PC_SPAI *)pc->data;
162: PetscFunctionBegin;
163: ispai->epsilon = (double)epsilon1;
164: PetscFunctionReturn(PETSC_SUCCESS);
165: }
167: static PetscErrorCode PCSPAISetNBSteps_SPAI(PC pc, PetscInt nbsteps1)
168: {
169: PC_SPAI *ispai = (PC_SPAI *)pc->data;
171: PetscFunctionBegin;
172: ispai->nbsteps = (int)nbsteps1;
173: PetscFunctionReturn(PETSC_SUCCESS);
174: }
176: /* added 1/7/99 g.h. */
177: static PetscErrorCode PCSPAISetMax_SPAI(PC pc, PetscInt max1)
178: {
179: PC_SPAI *ispai = (PC_SPAI *)pc->data;
181: PetscFunctionBegin;
182: ispai->max = (int)max1;
183: PetscFunctionReturn(PETSC_SUCCESS);
184: }
186: static PetscErrorCode PCSPAISetMaxNew_SPAI(PC pc, PetscInt maxnew1)
187: {
188: PC_SPAI *ispai = (PC_SPAI *)pc->data;
190: PetscFunctionBegin;
191: ispai->maxnew = (int)maxnew1;
192: PetscFunctionReturn(PETSC_SUCCESS);
193: }
195: static PetscErrorCode PCSPAISetBlockSize_SPAI(PC pc, PetscInt block_size1)
196: {
197: PC_SPAI *ispai = (PC_SPAI *)pc->data;
199: PetscFunctionBegin;
200: ispai->block_size = (int)block_size1;
201: PetscFunctionReturn(PETSC_SUCCESS);
202: }
204: static PetscErrorCode PCSPAISetCacheSize_SPAI(PC pc, PetscInt cache_size)
205: {
206: PC_SPAI *ispai = (PC_SPAI *)pc->data;
208: PetscFunctionBegin;
209: ispai->cache_size = (int)cache_size;
210: PetscFunctionReturn(PETSC_SUCCESS);
211: }
213: static PetscErrorCode PCSPAISetVerbose_SPAI(PC pc, PetscInt verbose)
214: {
215: PC_SPAI *ispai = (PC_SPAI *)pc->data;
217: PetscFunctionBegin;
218: ispai->verbose = (int)verbose;
219: PetscFunctionReturn(PETSC_SUCCESS);
220: }
222: static PetscErrorCode PCSPAISetSp_SPAI(PC pc, PetscInt sp)
223: {
224: PC_SPAI *ispai = (PC_SPAI *)pc->data;
226: PetscFunctionBegin;
227: ispai->sp = (int)sp;
228: PetscFunctionReturn(PETSC_SUCCESS);
229: }
231: static PetscErrorCode PCSetFromOptions_SPAI(PC pc, PetscOptionItems PetscOptionsObject)
232: {
233: PC_SPAI *ispai = (PC_SPAI *)pc->data;
234: int nbsteps1, max1, maxnew1, block_size1, cache_size, verbose, sp;
235: double epsilon1;
236: PetscBool flg;
238: PetscFunctionBegin;
239: PetscOptionsHeadBegin(PetscOptionsObject, "SPAI options");
240: PetscCall(PetscOptionsReal("-pc_spai_epsilon", "", "PCSPAISetEpsilon", ispai->epsilon, &epsilon1, &flg));
241: if (flg) PetscCall(PCSPAISetEpsilon(pc, epsilon1));
242: PetscCall(PetscOptionsInt("-pc_spai_nbsteps", "", "PCSPAISetNBSteps", ispai->nbsteps, &nbsteps1, &flg));
243: if (flg) PetscCall(PCSPAISetNBSteps(pc, nbsteps1));
244: /* added 1/7/99 g.h. */
245: PetscCall(PetscOptionsInt("-pc_spai_max", "", "PCSPAISetMax", ispai->max, &max1, &flg));
246: if (flg) PetscCall(PCSPAISetMax(pc, max1));
247: PetscCall(PetscOptionsInt("-pc_spai_maxnew", "", "PCSPAISetMaxNew", ispai->maxnew, &maxnew1, &flg));
248: if (flg) PetscCall(PCSPAISetMaxNew(pc, maxnew1));
249: PetscCall(PetscOptionsInt("-pc_spai_block_size", "", "PCSPAISetBlockSize", ispai->block_size, &block_size1, &flg));
250: if (flg) PetscCall(PCSPAISetBlockSize(pc, block_size1));
251: PetscCall(PetscOptionsInt("-pc_spai_cache_size", "", "PCSPAISetCacheSize", ispai->cache_size, &cache_size, &flg));
252: if (flg) PetscCall(PCSPAISetCacheSize(pc, cache_size));
253: PetscCall(PetscOptionsInt("-pc_spai_verbose", "", "PCSPAISetVerbose", ispai->verbose, &verbose, &flg));
254: if (flg) PetscCall(PCSPAISetVerbose(pc, verbose));
255: PetscCall(PetscOptionsInt("-pc_spai_sp", "", "PCSPAISetSp", ispai->sp, &sp, &flg));
256: if (flg) PetscCall(PCSPAISetSp(pc, sp));
257: PetscOptionsHeadEnd();
258: PetscFunctionReturn(PETSC_SUCCESS);
259: }
261: /*MC
262: PCSPAI - Use the Sparse Approximate Inverse method {cite}`gh97`
264: Options Database Keys:
265: + -pc_spai_epsilon <eps> - set tolerance
266: . -pc_spai_nbstep <n> - set nbsteps
267: . -pc_spai_max <m> - set max
268: . -pc_spai_max_new <m> - set maxnew
269: . -pc_spai_block_size <n> - set block size
270: . -pc_spai_cache_size <n> - set cache size
271: . -pc_spai_sp <m> - set sp
272: - -pc_spai_set_verbose <true,false> - verbose output
274: Level: beginner
276: Note:
277: This only works with `MATAIJ` matrices.
279: .seealso: [](ch_ksp), `PCCreate()`, `PCSetType()`, `PCType`, `PC`,
280: `PCSPAISetEpsilon()`, `PCSPAISetMax()`, `PCSPAISetMaxNew()`, `PCSPAISetBlockSize()`,
281: `PCSPAISetVerbose()`, `PCSPAISetSp()`, `PCSPAISetNBSteps()`, `PCSPAISetCacheSize()`
282: M*/
284: PETSC_EXTERN PetscErrorCode PCCreate_SPAI(PC pc)
285: {
286: PC_SPAI *ispai;
288: PetscFunctionBegin;
289: PetscCall(PetscNew(&ispai));
290: pc->data = ispai;
292: pc->ops->destroy = PCDestroy_SPAI;
293: pc->ops->apply = PCApply_SPAI;
294: pc->ops->matapply = PCMatApply_SPAI;
295: pc->ops->applyrichardson = 0;
296: pc->ops->setup = PCSetUp_SPAI;
297: pc->ops->view = PCView_SPAI;
298: pc->ops->setfromoptions = PCSetFromOptions_SPAI;
300: ispai->epsilon = .4;
301: ispai->nbsteps = 5;
302: ispai->max = 5000;
303: ispai->maxnew = 5;
304: ispai->block_size = 1;
305: ispai->cache_size = 5;
306: ispai->verbose = 0;
308: ispai->sp = 1;
309: PetscCallMPI(MPI_Comm_dup(PetscObjectComm((PetscObject)pc), &ispai->comm_spai));
311: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetEpsilon_C", PCSPAISetEpsilon_SPAI));
312: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetNBSteps_C", PCSPAISetNBSteps_SPAI));
313: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetMax_C", PCSPAISetMax_SPAI));
314: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetMaxNew_C", PCSPAISetMaxNew_SPAI));
315: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetBlockSize_C", PCSPAISetBlockSize_SPAI));
316: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetCacheSize_C", PCSPAISetCacheSize_SPAI));
317: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetVerbose_C", PCSPAISetVerbose_SPAI));
318: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetSp_C", PCSPAISetSp_SPAI));
319: PetscFunctionReturn(PETSC_SUCCESS);
320: }
322: /*
323: Converts from a PETSc matrix to an SPAI matrix
324: */
325: static PetscErrorCode ConvertMatToMatrix(MPI_Comm comm, Mat A, Mat AT, matrix **B)
326: {
327: matrix *M;
328: int i, j, col;
329: int row_indx;
330: int len, pe, local_indx, start_indx;
331: int *mapping;
332: const int *cols;
333: const double *vals;
334: int n, mnl, nnl, nz, rstart, rend;
335: PetscMPIInt size, rank;
336: struct compressed_lines *rows;
338: PetscFunctionBegin;
339: PetscCallMPI(MPI_Comm_size(comm, &size));
340: PetscCallMPI(MPI_Comm_rank(comm, &rank));
341: PetscCall(MatGetSize(A, &n, &n));
342: PetscCall(MatGetLocalSize(A, &mnl, &nnl));
344: /*
345: not sure why a barrier is required. commenting out
346: PetscCallMPI(MPI_Barrier(comm));
347: */
349: M = new_matrix((SPAI_Comm)comm);
351: M->n = n;
352: M->bs = 1;
353: M->max_block_size = 1;
355: M->mnls = (int *)malloc(sizeof(int) * size);
356: M->start_indices = (int *)malloc(sizeof(int) * size);
357: M->pe = (int *)malloc(sizeof(int) * n);
358: M->block_sizes = (int *)malloc(sizeof(int) * n);
359: for (i = 0; i < n; i++) M->block_sizes[i] = 1;
361: PetscCallMPI(MPI_Allgather(&mnl, 1, MPI_INT, M->mnls, 1, MPI_INT, comm));
363: M->start_indices[0] = 0;
364: for (i = 1; i < size; i++) M->start_indices[i] = M->start_indices[i - 1] + M->mnls[i - 1];
366: M->mnl = M->mnls[M->myid];
367: M->my_start_index = M->start_indices[M->myid];
369: for (i = 0; i < size; i++) {
370: start_indx = M->start_indices[i];
371: for (j = 0; j < M->mnls[i]; j++) M->pe[start_indx + j] = i;
372: }
374: if (AT) {
375: M->lines = new_compressed_lines(M->mnls[rank], 1);
376: } else {
377: M->lines = new_compressed_lines(M->mnls[rank], 0);
378: }
380: rows = M->lines;
382: /* Determine the mapping from global indices to pointers */
383: PetscCall(PetscMalloc1(M->n, &mapping));
384: pe = 0;
385: local_indx = 0;
386: for (i = 0; i < M->n; i++) {
387: if (local_indx >= M->mnls[pe]) {
388: pe++;
389: local_indx = 0;
390: }
391: mapping[i] = local_indx + M->start_indices[pe];
392: local_indx++;
393: }
395: /************** Set up the row structure *****************/
397: PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
398: for (i = rstart; i < rend; i++) {
399: row_indx = i - rstart;
400: PetscCall(MatGetRow(A, i, &nz, &cols, &vals));
401: /* allocate buffers */
402: rows->ptrs[row_indx] = (int *)malloc(nz * sizeof(int));
403: rows->A[row_indx] = (double *)malloc(nz * sizeof(double));
404: /* copy the matrix */
405: for (j = 0; j < nz; j++) {
406: col = cols[j];
407: len = rows->len[row_indx]++;
409: rows->ptrs[row_indx][len] = mapping[col];
410: rows->A[row_indx][len] = vals[j];
411: }
412: rows->slen[row_indx] = rows->len[row_indx];
414: PetscCall(MatRestoreRow(A, i, &nz, &cols, &vals));
415: }
417: /************** Set up the column structure *****************/
419: if (AT) {
420: for (i = rstart; i < rend; i++) {
421: row_indx = i - rstart;
422: PetscCall(MatGetRow(AT, i, &nz, &cols, &vals));
423: /* allocate buffers */
424: rows->rptrs[row_indx] = (int *)malloc(nz * sizeof(int));
425: /* copy the matrix (i.e., the structure) */
426: for (j = 0; j < nz; j++) {
427: col = cols[j];
428: len = rows->rlen[row_indx]++;
430: rows->rptrs[row_indx][len] = mapping[col];
431: }
432: PetscCall(MatRestoreRow(AT, i, &nz, &cols, &vals));
433: }
434: }
436: PetscCall(PetscFree(mapping));
438: order_pointers(M);
439: M->maxnz = calc_maxnz(M);
440: *B = M;
441: PetscFunctionReturn(PETSC_SUCCESS);
442: }
444: /*
445: Converts from an SPAI matrix B to a PETSc matrix PB.
446: This assumes that the SPAI matrix B is stored in
447: COMPRESSED-ROW format.
448: */
449: static PetscErrorCode ConvertMatrixToMat(MPI_Comm comm, matrix *B, Mat *PB)
450: {
451: PetscMPIInt size, rank;
452: int m, n, M, N;
453: int d_nz, o_nz;
454: int *d_nnz, *o_nnz;
455: int i, k, global_row, global_col, first_diag_col, last_diag_col;
456: PetscScalar val;
458: PetscFunctionBegin;
459: PetscCallMPI(MPI_Comm_size(comm, &size));
460: PetscCallMPI(MPI_Comm_rank(comm, &rank));
462: m = n = B->mnls[rank];
463: d_nz = o_nz = 0;
465: /* Determine preallocation for MatCreateAIJ */
466: PetscCall(PetscMalloc1(m, &d_nnz));
467: PetscCall(PetscMalloc1(m, &o_nnz));
468: for (i = 0; i < m; i++) d_nnz[i] = o_nnz[i] = 0;
469: first_diag_col = B->start_indices[rank];
470: last_diag_col = first_diag_col + B->mnls[rank];
471: for (i = 0; i < B->mnls[rank]; i++) {
472: for (k = 0; k < B->lines->len[i]; k++) {
473: global_col = B->lines->ptrs[i][k];
474: if ((global_col >= first_diag_col) && (global_col < last_diag_col)) d_nnz[i]++;
475: else o_nnz[i]++;
476: }
477: }
479: M = N = B->n;
480: /* Here we only know how to create AIJ format */
481: PetscCall(MatCreate(comm, PB));
482: PetscCall(MatSetSizes(*PB, m, n, M, N));
483: PetscCall(MatSetType(*PB, MATAIJ));
484: PetscCall(MatSeqAIJSetPreallocation(*PB, d_nz, d_nnz));
485: PetscCall(MatMPIAIJSetPreallocation(*PB, d_nz, d_nnz, o_nz, o_nnz));
487: for (i = 0; i < B->mnls[rank]; i++) {
488: global_row = B->start_indices[rank] + i;
489: for (k = 0; k < B->lines->len[i]; k++) {
490: global_col = B->lines->ptrs[i][k];
492: val = B->lines->A[i][k];
493: PetscCall(MatSetValues(*PB, 1, &global_row, 1, &global_col, &val, ADD_VALUES));
494: }
495: }
496: PetscCall(PetscFree(d_nnz));
497: PetscCall(PetscFree(o_nnz));
499: PetscCall(MatAssemblyBegin(*PB, MAT_FINAL_ASSEMBLY));
500: PetscCall(MatAssemblyEnd(*PB, MAT_FINAL_ASSEMBLY));
501: PetscFunctionReturn(PETSC_SUCCESS);
502: }