Actual source code: ihtool.cxx
1: #include <../src/mat/impls/htool/htool.hpp>
2: #include <petscdraw.h>
3: #include <set>
5: const char *const MatHtoolCompressorTypes[] = {"sympartialACA", "fullACA", "SVD"};
6: const char *const MatHtoolClusteringTypes[] = {"PCARegular", "PCAGeometric", "BoundingBox1Regular", "BoundingBox1Geometric"};
7: const char *HtoolCitations[2] = {"@article{marchand2020two,\n"
8: " Author = {Marchand, Pierre and Claeys, Xavier and Jolivet, Pierre and Nataf, Fr\\'ed\\'eric and Tournier, Pierre-Henri},\n"
9: " Title = {Two-level preconditioning for $h$-version boundary element approximation of hypersingular operator with {GenEO}},\n"
10: " Year = {2020},\n"
11: " Publisher = {Elsevier},\n"
12: " Journal = {Numerische Mathematik},\n"
13: " Volume = {146},\n"
14: " Pages = {597--628},\n"
15: " Url = {https://github.com/htool-ddm/htool}\n"
16: "}\n",
17: "@article{Marchand2026,\n"
18: " Author = {Marchand, Pierre and Tournier, Pierre-Henri and Jolivet, Pierre},\n"
19: " Title = {{Htool-DDM}: A {C++} library for parallel solvers and compressed linear systems},\n"
20: " Year = {2026},\n"
21: " Publisher = {The Open Journal},\n"
22: " Journal = {Journal of Open Source Software},\n"
23: " Volume = {11},\n"
24: " Number = {118},\n"
25: " Pages = {9279},\n"
26: " Url = {https://doi.org/10.21105/joss.09279}\n"
27: "}\n"};
28: static PetscBool HtoolCite[2] = {PETSC_FALSE, PETSC_FALSE};
30: static PetscErrorCode MatGetDiagonal_Htool(Mat A, Vec v)
31: {
32: Mat_Htool *a;
33: PetscScalar *x;
34: PetscBool flg;
36: PetscFunctionBegin;
37: PetscCall(MatHasCongruentLayouts(A, &flg));
38: PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only congruent layouts supported");
39: PetscCall(MatShellGetContext(A, &a));
40: PetscCall(VecGetArrayWrite(v, &x));
41: PetscCallExternalVoid("copy_diagonal_in_user_numbering", htool::copy_diagonal_in_user_numbering(*a->block_diagonal_hmatrix, x));
42: PetscCall(VecRestoreArrayWrite(v, &x));
43: PetscFunctionReturn(PETSC_SUCCESS);
44: }
46: static PetscErrorCode MatGetDiagonalBlock_Htool(Mat A, Mat *b)
47: {
48: Mat_Htool *a, *c;
49: Mat B;
50: PetscScalar shift, scale;
51: PetscBool flg;
53: PetscFunctionBegin;
54: PetscCall(MatHasCongruentLayouts(A, &flg));
55: PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only congruent layouts supported");
56: PetscCall(MatShellGetContext(A, &a));
57: PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B)); /* same logic as in MatGetDiagonalBlock_MPIDense() */
58: if (!B) {
59: PetscCall(MatShellGetScalingShifts(A, &shift, &scale, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
60: PetscCheck(a->block_diagonal_hmatrix, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONGSTATE, "Block diagonal htool::HMatrix not found");
61: PetscCall(MatCreate(PETSC_COMM_SELF, &B));
62: PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->n, A->cmap->n));
63: PetscCall(MatSetType(B, MATHTOOL));
64: PetscCall(MatSetUp(B));
65: B->assembled = PETSC_TRUE;
66: B->preallocated = PETSC_TRUE;
67: PetscCall(MatShellGetContext(B, &c));
68: c->dim = a->dim;
69: c->epsilon = a->epsilon;
70: c->eta = a->eta;
71: c->block_tree_consistency = a->block_tree_consistency;
72: c->permutation = a->permutation;
73: c->recompression = a->recompression;
74: c->compressor = a->compressor;
75: c->clustering = a->clustering;
76: c->local_to_local_operator = std::make_unique<htool::LocalToLocalHMatrix<PetscScalar>>(*a->block_diagonal_hmatrix);
77: c->distributed_operator_holder_wo_assembly = std::make_unique<htool::CustomApproximationBuilder<PetscScalar>>(a->block_diagonal_hmatrix->get_target_cluster(), a->block_diagonal_hmatrix->get_source_cluster(), PetscObjectComm((PetscObject)A), *c->local_to_local_operator);
78: c->distributed_operator = &c->distributed_operator_holder_wo_assembly->distributed_operator;
79: c->block_diagonal_hmatrix = a->block_diagonal_hmatrix;
80: c->local_hmatrix = a->block_diagonal_hmatrix;
81: PetscCall(MatPropagateSymmetryOptions(A, B));
82: PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B));
83: *b = B;
84: PetscCall(MatDestroy(&B));
85: PetscCall(MatShift(*b, shift));
86: PetscCall(MatScale(*b, scale));
87: } else {
88: PetscCall(MatShellGetScalingShifts(A, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
89: *b = B;
90: }
91: PetscFunctionReturn(PETSC_SUCCESS);
92: }
94: static PetscErrorCode MatMult_Htool(Mat A, Vec x, Vec y)
95: {
96: Mat_Htool *a;
97: const PetscScalar *in;
98: PetscScalar *out;
100: PetscFunctionBegin;
101: PetscCall(MatShellGetContext(A, &a));
102: PetscCall(VecGetArrayRead(x, &in));
103: PetscCall(VecGetArrayWrite(y, &out));
104: if (a->permutation) PetscCallExternalVoid("add_distributed_operator_vector_product_local_to_local", htool::add_distributed_operator_vector_product_local_to_local<PetscScalar>('N', 1.0, *a->distributed_operator, in, 0.0, out, nullptr));
105: else PetscCallExternalVoid("internal_add_distributed_operator_vector_product_local_to_local", htool::internal_add_distributed_operator_vector_product_local_to_local<PetscScalar>('N', 1.0, *a->distributed_operator, in, 0.0, out, nullptr));
106: PetscCall(VecRestoreArrayRead(x, &in));
107: PetscCall(VecRestoreArrayWrite(y, &out));
108: PetscFunctionReturn(PETSC_SUCCESS);
109: }
111: static PetscErrorCode MatMultTranspose_Htool(Mat A, Vec x, Vec y)
112: {
113: Mat_Htool *a;
114: const PetscScalar *in;
115: PetscScalar *out;
117: PetscFunctionBegin;
118: PetscCall(MatShellGetContext(A, &a));
119: PetscCall(VecGetArrayRead(x, &in));
120: PetscCall(VecGetArrayWrite(y, &out));
121: if (a->permutation) PetscCallExternalVoid("add_distributed_operator_vector_product_local_to_local", htool::add_distributed_operator_vector_product_local_to_local<PetscScalar>('T', 1.0, *a->distributed_operator, in, 0.0, out, nullptr));
122: else PetscCallExternalVoid("internal_add_distributed_operator_vector_product_local_to_local", htool::internal_add_distributed_operator_vector_product_local_to_local<PetscScalar>('T', 1.0, *a->distributed_operator, in, 0.0, out, nullptr));
123: PetscCall(VecRestoreArrayRead(x, &in));
124: PetscCall(VecRestoreArrayWrite(y, &out));
125: PetscFunctionReturn(PETSC_SUCCESS);
126: }
128: static PetscErrorCode MatIncreaseOverlap_Htool(Mat A, PetscInt is_max, IS is[], PetscInt ov)
129: {
130: std::set<PetscInt> set;
131: const PetscInt *idx;
132: PetscInt *oidx, size, bs[2];
133: PetscMPIInt csize;
135: PetscFunctionBegin;
136: PetscCall(MatGetBlockSizes(A, bs, bs + 1));
137: if (bs[0] != bs[1]) bs[0] = 1;
138: for (PetscInt i = 0; i < is_max; ++i) {
139: /* basic implementation that adds indices by shifting an IS by -ov, -ov+1..., -1, 1..., ov-1, ov */
140: /* needed to avoid subdomain matrices to replicate A since it is dense */
141: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)is[i]), &csize));
142: PetscCheck(csize == 1, PETSC_COMM_SELF, PETSC_ERR_WRONG_MPI_SIZE, "Unsupported parallel IS");
143: PetscCall(ISGetSize(is[i], &size));
144: PetscCall(ISGetIndices(is[i], &idx));
145: for (PetscInt j = 0; j < size; ++j) {
146: set.insert(idx[j]);
147: for (PetscInt k = 1; k <= ov; ++k) { /* for each layer of overlap */
148: if (idx[j] - k >= 0) set.insert(idx[j] - k); /* do not insert negative indices */
149: if (idx[j] + k < A->rmap->N && idx[j] + k < A->cmap->N) set.insert(idx[j] + k); /* do not insert indices greater than the dimension of A */
150: }
151: }
152: PetscCall(ISRestoreIndices(is[i], &idx));
153: PetscCall(ISDestroy(is + i));
154: if (bs[0] > 1) {
155: for (std::set<PetscInt>::iterator it = set.cbegin(); it != set.cend(); it++) {
156: std::vector<PetscInt> block(bs[0]);
157: std::iota(block.begin(), block.end(), (*it / bs[0]) * bs[0]);
158: set.insert(block.cbegin(), block.cend());
159: }
160: }
161: size = set.size(); /* size with overlap */
162: PetscCall(PetscMalloc1(size, &oidx));
163: for (const PetscInt j : set) *oidx++ = j;
164: oidx -= size;
165: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, size, oidx, PETSC_OWN_POINTER, is + i));
166: }
167: PetscFunctionReturn(PETSC_SUCCESS);
168: }
170: static PetscErrorCode MatCreateSubMatrices_Htool(Mat A, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *submat[])
171: {
172: Mat_Htool *a;
173: Mat D, B, BT;
174: const PetscScalar *copy;
175: PetscScalar *ptr, shift, scale;
176: const PetscInt *idxr, *idxc, *it;
177: PetscInt nrow, m, i;
178: PetscBool flg;
180: PetscFunctionBegin;
181: PetscCall(MatShellGetScalingShifts(A, &shift, &scale, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
182: PetscCall(MatShellGetContext(A, &a));
183: if (scall != MAT_REUSE_MATRIX) PetscCall(PetscCalloc1(n, submat));
184: for (i = 0; i < n; ++i) {
185: PetscCall(ISGetLocalSize(irow[i], &nrow));
186: PetscCall(ISGetLocalSize(icol[i], &m));
187: PetscCall(ISGetIndices(irow[i], &idxr));
188: PetscCall(ISGetIndices(icol[i], &idxc));
189: if (scall != MAT_REUSE_MATRIX) PetscCall(MatCreateDense(PETSC_COMM_SELF, nrow, m, nrow, m, nullptr, (*submat) + i));
190: PetscCall(MatDenseGetArrayWrite((*submat)[i], &ptr));
191: if (irow[i] == icol[i]) { /* same row and column IS? */
192: PetscCall(MatHasCongruentLayouts(A, &flg));
193: if (flg) {
194: PetscCall(ISSorted(irow[i], &flg));
195: if (flg) { /* sorted IS? */
196: it = std::lower_bound(idxr, idxr + nrow, A->rmap->rstart);
197: if (it != idxr + nrow && *it == A->rmap->rstart) { /* rmap->rstart in IS? */
198: if (std::distance(idxr, it) + A->rmap->n <= nrow) { /* long enough IS to store the local diagonal block? */
199: for (PetscInt j = 0; j < A->rmap->n && flg; ++j)
200: if (PetscUnlikely(it[j] != A->rmap->rstart + j)) flg = PETSC_FALSE;
201: if (flg) { /* complete local diagonal block in IS? */
202: Mat dense;
204: /* fast extraction when the local diagonal block is part of the submatrix, e.g., for PCASM or PCHPDDM
205: * [ B C E ]
206: * A = [ B D E ]
207: * [ B F E ]
208: */
209: m = std::distance(idxr, it); /* shift of the coefficient (0,0) of block D from above */
210: PetscCall(MatGetDiagonalBlock(A, &D));
211: PetscCall(MatConvert(D, MATDENSE, MAT_INITIAL_MATRIX, &dense));
212: PetscCall(MatDenseGetArrayRead(dense, ©));
213: for (PetscInt k = 0; k < A->rmap->n; ++k) PetscCall(PetscArraycpy(ptr + (m + k) * nrow + m, copy + k * A->rmap->n, A->rmap->n)); /* block D from above */
214: PetscCall(MatDenseRestoreArrayRead(dense, ©));
215: PetscCall(MatDestroy(&dense));
216: if (m) {
217: a->wrapper->copy_submatrix(nrow, m, idxr, idxc, ptr); /* vertical block B from above */
218: /* entry-wise assembly may be costly, so transpose already-computed entries when possible */
219: if (A->symmetric == PETSC_BOOL3_TRUE || A->hermitian == PETSC_BOOL3_TRUE) {
220: PetscCall(MatCreateDense(PETSC_COMM_SELF, A->rmap->n, m, A->rmap->n, m, ptr + m, &B));
221: PetscCall(MatDenseSetLDA(B, nrow));
222: PetscCall(MatCreateDense(PETSC_COMM_SELF, m, A->rmap->n, m, A->rmap->n, ptr + m * nrow, &BT));
223: PetscCall(MatDenseSetLDA(BT, nrow));
224: if (A->hermitian == PETSC_BOOL3_TRUE && PetscDefined(USE_COMPLEX)) {
225: PetscCall(MatHermitianTranspose(B, MAT_REUSE_MATRIX, &BT));
226: } else {
227: PetscCall(MatTransposeSetPrecursor(B, BT));
228: PetscCall(MatTranspose(B, MAT_REUSE_MATRIX, &BT));
229: }
230: PetscCall(MatDestroy(&B));
231: PetscCall(MatDestroy(&BT));
232: } else {
233: for (PetscInt k = 0; k < A->rmap->n; ++k) { /* block C from above */
234: a->wrapper->copy_submatrix(m, 1, idxr, idxc + m + k, ptr + (m + k) * nrow);
235: }
236: }
237: }
238: if (m + A->rmap->n != nrow) {
239: a->wrapper->copy_submatrix(nrow, std::distance(it + A->rmap->n, idxr + nrow), idxr, idxc + m + A->rmap->n, ptr + (m + A->rmap->n) * nrow); /* vertical block E from above */
240: /* entry-wise assembly may be costly, so transpose already-computed entries when possible */
241: if (A->symmetric == PETSC_BOOL3_TRUE || A->hermitian == PETSC_BOOL3_TRUE) {
242: PetscCall(MatCreateDense(PETSC_COMM_SELF, A->rmap->n, nrow - (m + A->rmap->n), A->rmap->n, nrow - (m + A->rmap->n), ptr + (m + A->rmap->n) * nrow + m, &B));
243: PetscCall(MatDenseSetLDA(B, nrow));
244: PetscCall(MatCreateDense(PETSC_COMM_SELF, nrow - (m + A->rmap->n), A->rmap->n, nrow - (m + A->rmap->n), A->rmap->n, ptr + m * nrow + m + A->rmap->n, &BT));
245: PetscCall(MatDenseSetLDA(BT, nrow));
246: if (A->hermitian == PETSC_BOOL3_TRUE && PetscDefined(USE_COMPLEX)) {
247: PetscCall(MatHermitianTranspose(B, MAT_REUSE_MATRIX, &BT));
248: } else {
249: PetscCall(MatTransposeSetPrecursor(B, BT));
250: PetscCall(MatTranspose(B, MAT_REUSE_MATRIX, &BT));
251: }
252: PetscCall(MatDestroy(&B));
253: PetscCall(MatDestroy(&BT));
254: } else {
255: for (PetscInt k = 0; k < A->rmap->n; ++k) { /* block F from above */
256: a->wrapper->copy_submatrix(std::distance(it + A->rmap->n, idxr + nrow), 1, it + A->rmap->n, idxc + m + k, ptr + (m + k) * nrow + m + A->rmap->n);
257: }
258: }
259: }
260: } /* complete local diagonal block not in IS */
261: } else flg = PETSC_FALSE; /* IS not long enough to store the local diagonal block */
262: } else flg = PETSC_FALSE; /* rmap->rstart not in IS */
263: } /* unsorted IS */
264: }
265: } else flg = PETSC_FALSE; /* different row and column IS */
266: if (!flg) a->wrapper->copy_submatrix(nrow, m, idxr, idxc, ptr); /* reassemble everything */
267: PetscCall(ISRestoreIndices(irow[i], &idxr));
268: PetscCall(ISRestoreIndices(icol[i], &idxc));
269: PetscCall(MatDenseRestoreArrayWrite((*submat)[i], &ptr));
270: PetscCall(MatShift((*submat)[i], shift));
271: PetscCall(MatScale((*submat)[i], scale));
272: }
273: PetscFunctionReturn(PETSC_SUCCESS);
274: }
276: static PetscErrorCode MatDestroy_Htool(Mat A)
277: {
278: Mat_Htool *a;
279: PetscContainer container;
280: MatHtoolKernelTranspose *kernelt;
282: PetscFunctionBegin;
283: PetscCall(MatShellGetContext(A, &a));
284: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_htool_seqdense_C", nullptr));
285: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_htool_mpidense_C", nullptr));
286: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_htool_seqdense_C", nullptr));
287: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_htool_mpidense_C", nullptr));
288: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolGetHierarchicalMat_C", nullptr));
289: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolSetKernel_C", nullptr));
290: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolGetPermutationSource_C", nullptr));
291: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolGetPermutationTarget_C", nullptr));
292: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolUsePermutation_C", nullptr));
293: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolUseRecompression_C", nullptr));
294: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolGetEpsilon_C", nullptr));
295: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolSetEpsilon_C", nullptr));
296: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolGetEta_C", nullptr));
297: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolSetEta_C", nullptr));
298: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolGetMaxClusterLeafSize_C", nullptr));
299: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolSetMaxClusterLeafSize_C", nullptr));
300: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolGetMinTargetDepth_C", nullptr));
301: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolSetMinTargetDepth_C", nullptr));
302: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolGetMinSourceDepth_C", nullptr));
303: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolSetMinSourceDepth_C", nullptr));
304: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolGetBlockTreeConsistency_C", nullptr));
305: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolSetBlockTreeConsistency_C", nullptr));
306: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolGetCompressorType_C", nullptr));
307: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolSetCompressorType_C", nullptr));
308: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolGetClusteringType_C", nullptr));
309: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolSetClusteringType_C", nullptr));
310: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolCreateFromKernel_C", nullptr));
311: PetscCall(PetscObjectQuery((PetscObject)A, "KernelTranspose", (PetscObject *)&container));
312: if (container) { /* created in MatTranspose_Htool() */
313: PetscCall(PetscContainerGetPointer(container, &kernelt));
314: PetscCall(MatDestroy(&kernelt->A));
315: PetscCall(PetscObjectCompose((PetscObject)A, "KernelTranspose", nullptr));
316: }
317: if (a->gcoords_source != a->gcoords_target) PetscCall(PetscFree(a->gcoords_source));
318: PetscCall(PetscFree(a->gcoords_target));
319: delete a->wrapper;
320: a->target_cluster.reset();
321: a->source_cluster.reset();
322: a->distributed_operator_holder_w_assembly.reset();
323: a->local_to_local_operator.reset();
324: a->distributed_operator_holder_wo_assembly.reset();
325: PetscCall(PetscFree(a));
326: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatShellSetContext_C", nullptr)); // needed to avoid a call to MatShellSetContext_Immutable()
327: PetscFunctionReturn(PETSC_SUCCESS);
328: }
330: static PetscErrorCode MatView_Htool_Draw_Zoom(PetscDraw draw, void *ptr)
331: {
332: Mat A = (Mat)ptr;
333: Mat_Htool *a;
334: PetscReal x_r, y_r, x_l, y_l, w, h;
335: PetscInt min_max[2] = {PETSC_INT_MAX, 0};
336: const int greens[] = {PETSC_DRAW_LIMEGREEN, PETSC_DRAW_FORESTGREEN, PETSC_DRAW_DARKGREEN};
337: int color;
338: char str[16];
339: std::vector<const htool::HMatrix<PetscScalar, PetscReal> *> dense_blocks, low_rank_blocks;
341: PetscFunctionBegin;
342: PetscCall(MatShellGetContext(A, &a));
343: PetscCallExternalVoid("get_leaves", htool::get_leaves(*a->local_hmatrix, dense_blocks, low_rank_blocks));
344: for (const htool::HMatrix<PetscScalar, PetscReal> *block : low_rank_blocks) {
345: const PetscInt rank = block->get_rank();
347: if (rank < min_max[0]) min_max[0] = rank;
348: if (rank > min_max[1]) min_max[1] = rank;
349: }
350: PetscCall(PetscGlobalMinMaxInt(PetscObjectComm((PetscObject)A), min_max, min_max));
351: if (min_max[0] == PETSC_INT_MAX) min_max[0] = min_max[1];
352: PetscCall(PetscDrawStringGetSize(draw, &w, &h));
353: PetscDrawCollectiveBegin(draw);
354: for (const htool::HMatrix<PetscScalar, PetscReal> *block : dense_blocks) {
355: x_l = x_r = (PetscReal)block->get_source_cluster().get_offset();
356: x_r += (PetscReal)block->get_source_cluster().get_size();
357: y_l = y_r = (PetscReal)(A->rmap->N - block->get_target_cluster().get_offset());
358: y_l -= (PetscReal)block->get_target_cluster().get_size();
359: PetscCall(PetscDrawRectangle(draw, x_l, y_l, x_r, y_r, PETSC_DRAW_RED, PETSC_DRAW_RED, PETSC_DRAW_RED, PETSC_DRAW_RED));
360: PetscCall(PetscDrawLine(draw, x_l, y_l, x_r, y_l, PETSC_DRAW_BLACK));
361: PetscCall(PetscDrawLine(draw, x_r, y_l, x_r, y_r, PETSC_DRAW_BLACK));
362: PetscCall(PetscDrawLine(draw, x_r, y_r, x_l, y_r, PETSC_DRAW_BLACK));
363: PetscCall(PetscDrawLine(draw, x_l, y_r, x_l, y_l, PETSC_DRAW_BLACK));
364: }
365: for (const htool::HMatrix<PetscScalar, PetscReal> *block : low_rank_blocks) {
366: PetscReal th;
367: const PetscInt rank = block->get_rank();
369: x_l = x_r = (PetscReal)block->get_source_cluster().get_offset();
370: x_r += (PetscReal)block->get_source_cluster().get_size();
371: y_l = y_r = (PetscReal)(A->rmap->N - block->get_target_cluster().get_offset());
372: y_l -= (PetscReal)block->get_target_cluster().get_size();
373: if (min_max[1] > min_max[0]) color = greens[(int)((PetscReal)(rank - min_max[0]) / (PetscReal)(min_max[1] - min_max[0]) * (PETSC_STATIC_ARRAY_LENGTH(greens) - 1) + 0.5)];
374: else color = greens[PETSC_STATIC_ARRAY_LENGTH(greens) - 1];
375: PetscCall(PetscDrawRectangle(draw, x_l, y_l, x_r, y_r, color, color, color, color));
376: PetscCall(PetscDrawLine(draw, x_l, y_l, x_r, y_l, PETSC_DRAW_BLACK));
377: PetscCall(PetscDrawLine(draw, x_r, y_l, x_r, y_r, PETSC_DRAW_BLACK));
378: PetscCall(PetscDrawLine(draw, x_r, y_r, x_l, y_r, PETSC_DRAW_BLACK));
379: PetscCall(PetscDrawLine(draw, x_l, y_r, x_l, y_l, PETSC_DRAW_BLACK));
380: PetscCall(PetscSNPrintf(str, sizeof(str), "%d", rank));
381: PetscCall(PetscDrawStringGetSize(draw, nullptr, &th));
382: if (x_r - x_l > 4 * w && y_r - y_l > 4 * h) PetscCall(PetscDrawStringCentered(draw, 0.5 * (x_l + x_r), 0.5 * (y_l + y_r) - th / 2, PETSC_DRAW_BLACK, str));
383: }
384: PetscDrawCollectiveEnd(draw);
385: PetscFunctionReturn(PETSC_SUCCESS);
386: }
388: static PetscErrorCode MatView_Htool_Draw(Mat A, PetscViewer viewer)
389: {
390: PetscDraw draw;
391: PetscReal x_r = (PetscReal)A->cmap->N, y_r = (PetscReal)A->rmap->N, w, h;
392: PetscBool flg;
394: PetscFunctionBegin;
395: PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
396: PetscCall(PetscDrawIsNull(draw, &flg));
397: if (flg) PetscFunctionReturn(PETSC_SUCCESS);
398: w = x_r / 10.0;
399: h = y_r / 10.0;
400: PetscCall(PetscDrawSetCoordinates(draw, -w, -h, x_r + w, y_r + h));
401: PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", (PetscObject)viewer));
402: PetscCall(PetscDrawZoom(draw, MatView_Htool_Draw_Zoom, A));
403: PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", nullptr));
404: PetscCall(PetscDrawSave(draw));
405: PetscFunctionReturn(PETSC_SUCCESS);
406: }
408: static PetscErrorCode MatView_Htool(Mat A, PetscViewer pv)
409: {
410: Mat_Htool *a;
411: PetscScalar shift, scale;
412: PetscBool flg;
413: std::map<std::string, std::string> hmatrix_information;
415: PetscFunctionBegin;
416: PetscCall(PetscObjectTypeCompare((PetscObject)pv, PETSCVIEWERDRAW, &flg));
417: if (flg) PetscCall(MatView_Htool_Draw(A, pv));
418: else {
419: PetscCall(MatShellGetContext(A, &a));
420: hmatrix_information = htool::get_distributed_hmatrix_information(*a->local_hmatrix, PetscObjectComm((PetscObject)A));
421: PetscCall(PetscObjectTypeCompare((PetscObject)pv, PETSCVIEWERASCII, &flg));
422: if (flg) {
423: PetscCall(MatShellGetScalingShifts(A, &shift, &scale, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
424: PetscCall(PetscViewerASCIIPrintf(pv, "symmetry: %c\n", a->block_diagonal_hmatrix->get_symmetry()));
425: if (PetscAbsScalar(scale - 1.0) > PETSC_MACHINE_EPSILON) {
426: #if defined(PETSC_USE_COMPLEX)
427: PetscCall(PetscViewerASCIIPrintf(pv, "scaling: %g+%gi\n", (double)PetscRealPart(scale), (double)PetscImaginaryPart(scale)));
428: #else
429: PetscCall(PetscViewerASCIIPrintf(pv, "scaling: %g\n", (double)scale));
430: #endif
431: }
432: if (PetscAbsScalar(shift) > PETSC_MACHINE_EPSILON) {
433: #if defined(PETSC_USE_COMPLEX)
434: PetscCall(PetscViewerASCIIPrintf(pv, "shift: %g+%gi\n", (double)PetscRealPart(shift), (double)PetscImaginaryPart(shift)));
435: #else
436: PetscCall(PetscViewerASCIIPrintf(pv, "shift: %g\n", (double)shift));
437: #endif
438: }
439: PetscCall(PetscViewerASCIIPrintf(pv, "maximal cluster leaf size: %" PetscInt_FMT "\n", a->max_cluster_leaf_size));
440: PetscCall(PetscViewerASCIIPrintf(pv, "epsilon: %g\n", (double)a->epsilon));
441: PetscCall(PetscViewerASCIIPrintf(pv, "eta: %g\n", (double)a->eta));
442: PetscCall(PetscViewerASCIIPrintf(pv, "minimum target depth: %" PetscInt_FMT "\n", a->depth[0]));
443: PetscCall(PetscViewerASCIIPrintf(pv, "minimum source depth: %" PetscInt_FMT "\n", a->depth[1]));
444: PetscCall(PetscViewerASCIIPrintf(pv, "compressor: %s\n", MatHtoolCompressorTypes[a->compressor]));
445: PetscCall(PetscViewerASCIIPrintf(pv, "clustering: %s\n", MatHtoolClusteringTypes[a->clustering]));
446: PetscCall(PetscViewerASCIIPrintf(pv, "compression ratio: %s\n", hmatrix_information["Compression_ratio"].c_str()));
447: PetscCall(PetscViewerASCIIPrintf(pv, "space saving: %s\n", hmatrix_information["Space_saving"].c_str()));
448: PetscCall(PetscViewerASCIIPrintf(pv, "block tree consistency: %s\n", PetscBools[a->local_hmatrix->is_block_tree_consistent()]));
449: PetscCall(PetscViewerASCIIPrintf(pv, "recompression: %s\n", PetscBools[a->recompression]));
450: PetscCall(PetscViewerASCIIPrintf(pv, "number of dense (resp. low rank) matrices: %s (resp. %s)\n", hmatrix_information["Number_of_dense_blocks"].c_str(), hmatrix_information["Number_of_low_rank_blocks"].c_str()));
451: PetscCall(
452: PetscViewerASCIIPrintf(pv, "(minimum, mean, maximum) dense block sizes: (%s, %s, %s)\n", hmatrix_information["Dense_block_size_min"].c_str(), hmatrix_information["Dense_block_size_mean"].c_str(), hmatrix_information["Dense_block_size_max"].c_str()));
453: PetscCall(PetscViewerASCIIPrintf(pv, "(minimum, mean, maximum) low rank block sizes: (%s, %s, %s)\n", hmatrix_information["Low_rank_block_size_min"].c_str(), hmatrix_information["Low_rank_block_size_mean"].c_str(),
454: hmatrix_information["Low_rank_block_size_max"].c_str()));
455: PetscCall(PetscViewerASCIIPrintf(pv, "(minimum, mean, maximum) ranks: (%s, %s, %s)\n", hmatrix_information["Rank_min"].c_str(), hmatrix_information["Rank_mean"].c_str(), hmatrix_information["Rank_max"].c_str()));
456: }
457: }
458: PetscFunctionReturn(PETSC_SUCCESS);
459: }
461: /* naive implementation of MatGetRow() needed for MatConvert_Nest_AIJ() */
462: static PetscErrorCode MatGetRow_Htool(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
463: {
464: Mat_Htool *a;
465: PetscScalar shift, scale;
466: PetscInt *idxc;
467: PetscBLASInt one = 1, bn;
469: PetscFunctionBegin;
470: PetscCall(MatShellGetScalingShifts(A, &shift, &scale, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
471: PetscCall(MatShellGetContext(A, &a));
472: if (nz) *nz = A->cmap->N;
473: if (idx || v) { /* even if !idx, need to set idxc for htool::copy_submatrix() */
474: PetscCall(PetscMalloc1(A->cmap->N, &idxc));
475: for (PetscInt i = 0; i < A->cmap->N; ++i) idxc[i] = i;
476: }
477: if (idx) *idx = idxc;
478: if (v) {
479: PetscCall(PetscMalloc1(A->cmap->N, v));
480: if (a->wrapper) a->wrapper->copy_submatrix(1, A->cmap->N, &row, idxc, *v);
481: else reinterpret_cast<htool::VirtualGenerator<PetscScalar> *>(a->kernelctx)->copy_submatrix(1, A->cmap->N, &row, idxc, *v);
482: PetscCall(PetscBLASIntCast(A->cmap->N, &bn));
483: PetscCallCXX(htool::Blas<PetscScalar>::scal(&bn, &scale, *v, &one));
484: if (row < A->cmap->N) (*v)[row] += shift;
485: }
486: if (!idx) PetscCall(PetscFree(idxc));
487: PetscFunctionReturn(PETSC_SUCCESS);
488: }
490: static PetscErrorCode MatRestoreRow_Htool(Mat, PetscInt, PetscInt *, PetscInt **idx, PetscScalar **v)
491: {
492: PetscFunctionBegin;
493: if (idx) PetscCall(PetscFree(*idx));
494: if (v) PetscCall(PetscFree(*v));
495: PetscFunctionReturn(PETSC_SUCCESS);
496: }
498: static PetscErrorCode MatSetFromOptions_Htool(Mat A, PetscOptionItems PetscOptionsObject)
499: {
500: Mat_Htool *a;
501: PetscInt n;
502: PetscBool flg;
504: PetscFunctionBegin;
505: PetscCall(MatShellGetContext(A, &a));
506: PetscOptionsHeadBegin(PetscOptionsObject, "Htool options");
507: PetscCall(PetscOptionsBoundedInt("-mat_htool_max_cluster_leaf_size", "Maximal leaf size in cluster tree", nullptr, a->max_cluster_leaf_size, &a->max_cluster_leaf_size, nullptr, 0));
508: PetscCall(PetscOptionsBoundedReal("-mat_htool_epsilon", "Relative error in Frobenius norm when approximating a block", nullptr, a->epsilon, &a->epsilon, nullptr, 0.0));
509: PetscCall(PetscOptionsReal("-mat_htool_eta", "Admissibility condition tolerance", nullptr, a->eta, &a->eta, nullptr));
510: PetscCall(PetscOptionsBoundedInt("-mat_htool_min_target_depth", "Minimal cluster tree depth associated with the rows", nullptr, a->depth[0], a->depth, nullptr, 0));
511: PetscCall(PetscOptionsBoundedInt("-mat_htool_min_source_depth", "Minimal cluster tree depth associated with the columns", nullptr, a->depth[1], a->depth + 1, nullptr, 0));
512: PetscCall(PetscOptionsBool("-mat_htool_block_tree_consistency", "Block tree consistency", nullptr, a->block_tree_consistency, &a->block_tree_consistency, nullptr));
513: PetscCall(PetscOptionsBool("-mat_htool_recompression", "Use recompression", nullptr, a->recompression, &a->recompression, nullptr));
515: n = 0;
516: PetscCall(PetscOptionsEList("-mat_htool_compressor", "Type of compression", "MatHtoolCompressorType", MatHtoolCompressorTypes, PETSC_STATIC_ARRAY_LENGTH(MatHtoolCompressorTypes), MatHtoolCompressorTypes[MAT_HTOOL_COMPRESSOR_SYMPARTIAL_ACA], &n, &flg));
517: if (flg) a->compressor = MatHtoolCompressorType(n);
518: n = 0;
519: PetscCall(PetscOptionsEList("-mat_htool_clustering", "Type of clustering", "MatHtoolClusteringType", MatHtoolClusteringTypes, PETSC_STATIC_ARRAY_LENGTH(MatHtoolClusteringTypes), MatHtoolClusteringTypes[MAT_HTOOL_CLUSTERING_PCA_REGULAR], &n, &flg));
520: if (flg) a->clustering = MatHtoolClusteringType(n);
521: PetscOptionsHeadEnd();
522: PetscFunctionReturn(PETSC_SUCCESS);
523: }
525: static PetscErrorCode MatAssemblyEnd_Htool(Mat A, MatAssemblyType)
526: {
527: Mat_Htool *a;
528: const PetscInt *ranges;
529: PetscInt *offset;
530: PetscMPIInt size, rank;
531: char S = PetscDefined(USE_COMPLEX) && A->hermitian == PETSC_BOOL3_TRUE ? 'H' : (A->symmetric == PETSC_BOOL3_TRUE ? 'S' : 'N'), uplo = S == 'N' ? 'N' : 'U';
532: htool::VirtualGenerator<PetscScalar> *generator = nullptr;
533: htool::ClusterTreeBuilder<PetscReal> recursive_build_strategy;
534: htool::Cluster<PetscReal> *source_cluster;
535: std::shared_ptr<htool::VirtualInternalLowRankGenerator<PetscScalar>> compressor;
537: PetscFunctionBegin;
538: for (size_t i = 0; i < PETSC_STATIC_ARRAY_LENGTH(HtoolCite); ++i) PetscCall(PetscCitationsRegister(HtoolCitations[i], HtoolCite + i));
539: PetscCall(MatShellGetContext(A, &a));
540: if (a->distributed_operator_holder_wo_assembly) PetscFunctionReturn(PETSC_SUCCESS);
541: delete a->wrapper;
542: a->target_cluster.reset();
543: a->source_cluster.reset();
544: a->distributed_operator_holder_w_assembly.reset();
545: a->local_to_local_operator.reset();
546: a->distributed_operator_holder_wo_assembly.reset();
547: // clustering
548: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
549: PetscCall(PetscMalloc1(2 * size, &offset));
550: PetscCall(MatGetOwnershipRanges(A, &ranges));
551: for (PetscInt i = 0; i < size; ++i) {
552: offset[2 * i] = ranges[i];
553: offset[2 * i + 1] = ranges[i + 1] - ranges[i];
554: }
555: switch (a->clustering) {
556: case MAT_HTOOL_CLUSTERING_PCA_GEOMETRIC:
557: recursive_build_strategy.set_partitioning_strategy(std::make_shared<htool::Partitioning<PetscReal, htool::ComputeLargestExtent<PetscReal>, htool::GeometricSplitting<PetscReal>>>());
558: break;
559: case MAT_HTOOL_CLUSTERING_BOUNDING_BOX_1_GEOMETRIC:
560: recursive_build_strategy.set_partitioning_strategy(std::make_shared<htool::Partitioning<PetscReal, htool::ComputeBoundingBox<PetscReal>, htool::GeometricSplitting<PetscReal>>>());
561: break;
562: case MAT_HTOOL_CLUSTERING_BOUNDING_BOX_1_REGULAR:
563: recursive_build_strategy.set_partitioning_strategy(std::make_shared<htool::Partitioning<PetscReal, htool::ComputeBoundingBox<PetscReal>, htool::RegularSplitting<PetscReal>>>());
564: break;
565: default:
566: recursive_build_strategy.set_partitioning_strategy(std::make_shared<htool::Partitioning<PetscReal, htool::ComputeLargestExtent<PetscReal>, htool::RegularSplitting<PetscReal>>>());
567: }
568: recursive_build_strategy.set_maximal_leaf_size(a->max_cluster_leaf_size);
569: a->target_cluster = std::make_unique<htool::Cluster<PetscReal>>(recursive_build_strategy.create_cluster_tree_from_local_partition(A->rmap->N, a->dim, a->gcoords_target, 2, size, offset));
570: if (a->gcoords_target != a->gcoords_source) {
571: PetscCall(MatGetOwnershipRangesColumn(A, &ranges));
572: for (PetscInt i = 0; i < size; ++i) {
573: offset[2 * i] = ranges[i];
574: offset[2 * i + 1] = ranges[i + 1] - ranges[i];
575: }
576: switch (a->clustering) {
577: case MAT_HTOOL_CLUSTERING_PCA_GEOMETRIC:
578: recursive_build_strategy.set_partitioning_strategy(std::make_shared<htool::Partitioning<PetscReal, htool::ComputeLargestExtent<PetscReal>, htool::GeometricSplitting<PetscReal>>>());
579: break;
580: case MAT_HTOOL_CLUSTERING_BOUNDING_BOX_1_GEOMETRIC:
581: recursive_build_strategy.set_partitioning_strategy(std::make_shared<htool::Partitioning<PetscReal, htool::ComputeBoundingBox<PetscReal>, htool::GeometricSplitting<PetscReal>>>());
582: break;
583: case MAT_HTOOL_CLUSTERING_BOUNDING_BOX_1_REGULAR:
584: recursive_build_strategy.set_partitioning_strategy(std::make_shared<htool::Partitioning<PetscReal, htool::ComputeBoundingBox<PetscReal>, htool::RegularSplitting<PetscReal>>>());
585: break;
586: default:
587: recursive_build_strategy.set_partitioning_strategy(std::make_shared<htool::Partitioning<PetscReal, htool::ComputeLargestExtent<PetscReal>, htool::RegularSplitting<PetscReal>>>());
588: }
589: recursive_build_strategy.set_maximal_leaf_size(a->max_cluster_leaf_size);
590: a->source_cluster = std::make_unique<htool::Cluster<PetscReal>>(recursive_build_strategy.create_cluster_tree_from_local_partition(A->cmap->N, a->dim, a->gcoords_source, 2, size, offset));
591: S = uplo = 'N';
592: source_cluster = a->source_cluster.get();
593: } else source_cluster = a->target_cluster.get();
594: PetscCall(PetscFree(offset));
595: // generator
596: if (a->kernel) a->wrapper = new WrapperHtool(a->dim, a->kernel, a->kernelctx);
597: else {
598: a->wrapper = nullptr;
599: generator = reinterpret_cast<htool::VirtualGenerator<PetscScalar> *>(a->kernelctx);
600: }
601: // compressor
602: switch (a->compressor) {
603: case MAT_HTOOL_COMPRESSOR_FULL_ACA:
604: compressor = std::make_shared<htool::fullACA<PetscScalar>>(a->wrapper ? *a->wrapper : *generator, a->target_cluster->get_permutation().data(), source_cluster->get_permutation().data());
605: break;
606: case MAT_HTOOL_COMPRESSOR_SVD:
607: compressor = std::make_shared<htool::SVD<PetscScalar>>(a->wrapper ? *a->wrapper : *generator, a->target_cluster->get_permutation().data(), source_cluster->get_permutation().data());
608: break;
609: default:
610: compressor = std::make_shared<htool::sympartialACA<PetscScalar>>(a->wrapper ? *a->wrapper : *generator, a->target_cluster->get_permutation().data(), source_cluster->get_permutation().data());
611: }
612: // local hierarchical matrix
613: PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
614: auto hmatrix_builder = htool::HMatrixTreeBuilder<PetscScalar>(a->epsilon, a->eta, S, uplo);
615: if (a->recompression) {
616: std::shared_ptr<htool::VirtualInternalLowRankGenerator<PetscScalar>> RecompressedLowRankGenerator = std::make_shared<htool::RecompressedLowRankGenerator<PetscScalar>>(*compressor, std::function<void(htool::LowRankMatrix<PetscScalar> &)>(htool::SVD_recompression<PetscScalar>));
617: hmatrix_builder.set_low_rank_generator(RecompressedLowRankGenerator);
618: } else {
619: hmatrix_builder.set_low_rank_generator(compressor);
620: }
621: hmatrix_builder.set_minimal_target_depth(a->depth[0]);
622: hmatrix_builder.set_minimal_source_depth(a->depth[1]);
623: PetscCheck(a->block_tree_consistency || (!a->block_tree_consistency && !(A->symmetric == PETSC_BOOL3_TRUE || A->hermitian == PETSC_BOOL3_TRUE)), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Cannot have a MatHtool with inconsistent block tree which is either symmetric or Hermitian");
624: hmatrix_builder.set_block_tree_consistency(a->block_tree_consistency);
625: a->distributed_operator_holder_w_assembly = std::make_unique<htool::DefaultApproximationBuilder<PetscScalar>>(a->wrapper ? *a->wrapper : *generator, *a->target_cluster, *source_cluster, hmatrix_builder, PetscObjectComm((PetscObject)A));
626: a->distributed_operator = &a->distributed_operator_holder_w_assembly->distributed_operator;
627: a->block_diagonal_hmatrix = a->distributed_operator_holder_w_assembly->block_diagonal_hmatrix;
628: a->local_hmatrix = &a->distributed_operator_holder_w_assembly->hmatrix;
629: PetscFunctionReturn(PETSC_SUCCESS);
630: }
632: static PetscErrorCode MatProductNumeric_Htool(Mat C)
633: {
634: Mat_Product *product = C->product;
635: Mat_Htool *a;
636: const PetscScalar *in;
637: PetscScalar *out;
638: PetscInt N, lda;
640: PetscFunctionBegin;
641: MatCheckProduct(C, 1);
642: PetscCall(MatGetSize(C, nullptr, &N));
643: PetscCall(MatDenseGetLDA(C, &lda));
644: PetscCheck(lda == C->rmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Unsupported leading dimension (%" PetscInt_FMT " != %" PetscInt_FMT ")", lda, C->rmap->n);
645: PetscCall(MatDenseGetArrayRead(product->B, &in));
646: PetscCall(MatDenseGetArrayWrite(C, &out));
647: PetscCall(MatShellGetContext(product->A, &a));
648: switch (product->type) {
649: case MATPRODUCT_AB:
650: if (a->permutation) PetscCallExternalVoid("add_distributed_operator_matrix_product_local_to_local", htool::add_distributed_operator_matrix_product_local_to_local<PetscScalar>('N', 1.0, *a->distributed_operator, in, 0.0, out, N, nullptr));
651: else PetscCallExternalVoid("internal_add_distributed_operator_matrix_product_local_to_local", htool::internal_add_distributed_operator_matrix_product_local_to_local<PetscScalar>('N', 1.0, *a->distributed_operator, in, 0.0, out, N, nullptr));
652: break;
653: case MATPRODUCT_AtB:
654: if (a->permutation) PetscCallExternalVoid("add_distributed_operator_matrix_product_local_to_local", htool::add_distributed_operator_matrix_product_local_to_local<PetscScalar>('T', 1.0, *a->distributed_operator, in, 0.0, out, N, nullptr));
655: else PetscCallExternalVoid("internal_add_distributed_operator_matrix_product_local_to_local", htool::internal_add_distributed_operator_matrix_product_local_to_local<PetscScalar>('T', 1.0, *a->distributed_operator, in, 0.0, out, N, nullptr));
656: break;
657: default:
658: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatProductType %s is not supported", MatProductTypes[product->type]);
659: }
660: PetscCall(MatDenseRestoreArrayWrite(C, &out));
661: PetscCall(MatDenseRestoreArrayRead(product->B, &in));
662: PetscFunctionReturn(PETSC_SUCCESS);
663: }
665: static PetscErrorCode MatProductSymbolic_Htool(Mat C)
666: {
667: Mat_Product *product = C->product;
668: Mat A, B;
669: PetscBool flg;
671: PetscFunctionBegin;
672: MatCheckProduct(C, 1);
673: A = product->A;
674: B = product->B;
675: PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &flg, MATSEQDENSE, MATMPIDENSE, ""));
676: PetscCheck(flg && (product->type == MATPRODUCT_AB || product->type == MATPRODUCT_AtB), PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "ProductType %s not supported for %s", MatProductTypes[product->type], ((PetscObject)product->B)->type_name);
677: if (C->rmap->n == PETSC_DECIDE || C->cmap->n == PETSC_DECIDE || C->rmap->N == PETSC_DECIDE || C->cmap->N == PETSC_DECIDE) {
678: if (product->type == MATPRODUCT_AB) PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N));
679: else PetscCall(MatSetSizes(C, A->cmap->n, B->cmap->n, A->cmap->N, B->cmap->N));
680: }
681: PetscCall(MatSetType(C, MATDENSE));
682: PetscCall(MatSetUp(C));
683: PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
684: PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
685: PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
686: C->ops->productsymbolic = nullptr;
687: C->ops->productnumeric = MatProductNumeric_Htool;
688: PetscFunctionReturn(PETSC_SUCCESS);
689: }
691: static PetscErrorCode MatProductSetFromOptions_Htool(Mat C)
692: {
693: PetscFunctionBegin;
694: MatCheckProduct(C, 1);
695: if (C->product->type == MATPRODUCT_AB || C->product->type == MATPRODUCT_AtB) C->ops->productsymbolic = MatProductSymbolic_Htool;
696: PetscFunctionReturn(PETSC_SUCCESS);
697: }
699: static PetscErrorCode MatHtoolGetHierarchicalMat_Htool(Mat A, void *distributed_operator)
700: {
701: Mat_Htool *a;
703: PetscFunctionBegin;
704: PetscCall(MatShellGetContext(A, &a));
705: *(const void **)distributed_operator = static_cast<const void *>(a->distributed_operator);
706: PetscFunctionReturn(PETSC_SUCCESS);
707: }
709: static PetscErrorCode MatHtoolSetKernel_Htool(Mat A, MatHtoolKernelFn *kernel, void *kernelctx)
710: {
711: Mat_Htool *a;
713: PetscFunctionBegin;
714: PetscCall(MatShellGetContext(A, &a));
715: a->kernel = kernel;
716: a->kernelctx = kernelctx;
717: delete a->wrapper;
718: if (a->kernel) a->wrapper = new WrapperHtool(a->dim, a->kernel, a->kernelctx);
719: PetscFunctionReturn(PETSC_SUCCESS);
720: }
722: static PetscErrorCode MatHtoolGetPermutationSource_Htool(Mat A, IS *is)
723: {
724: Mat_Htool *a;
725: PetscMPIInt rank;
726: const std::vector<PetscInt> *source;
727: const htool::Cluster<PetscReal> *local_source_cluster;
729: PetscFunctionBegin;
730: PetscCall(MatShellGetContext(A, &a));
731: PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
732: local_source_cluster = a->source_cluster ? &a->source_cluster->get_cluster_on_partition(rank) : &a->target_cluster->get_cluster_on_partition(rank);
733: source = &local_source_cluster->get_permutation();
734: PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), local_source_cluster->get_size(), source->data() + local_source_cluster->get_offset(), PETSC_COPY_VALUES, is));
735: PetscCall(ISSetPermutation(*is));
736: PetscFunctionReturn(PETSC_SUCCESS);
737: }
739: static PetscErrorCode MatHtoolGetPermutationTarget_Htool(Mat A, IS *is)
740: {
741: Mat_Htool *a;
742: const std::vector<PetscInt> *target;
743: PetscMPIInt rank;
745: PetscFunctionBegin;
746: PetscCall(MatShellGetContext(A, &a));
747: PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
748: target = &a->target_cluster->get_permutation();
749: PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), a->target_cluster->get_cluster_on_partition(rank).get_size(), target->data() + a->target_cluster->get_cluster_on_partition(rank).get_offset(), PETSC_COPY_VALUES, is));
750: PetscCall(ISSetPermutation(*is));
751: PetscFunctionReturn(PETSC_SUCCESS);
752: }
754: static PetscErrorCode MatHtoolUsePermutation_Htool(Mat A, PetscBool use)
755: {
756: Mat_Htool *a;
758: PetscFunctionBegin;
759: PetscCall(MatShellGetContext(A, &a));
760: a->permutation = use;
761: PetscFunctionReturn(PETSC_SUCCESS);
762: }
764: static PetscErrorCode MatHtoolUseRecompression_Htool(Mat A, PetscBool use)
765: {
766: Mat_Htool *a;
768: PetscFunctionBegin;
769: PetscCall(MatShellGetContext(A, &a));
770: a->recompression = use;
771: PetscFunctionReturn(PETSC_SUCCESS);
772: }
774: #define PETSC_HTOOL_PARAMETER(Type, Name, member) \
775: static PetscErrorCode MatHtoolGet##Name##_Htool(Mat A, Type *v) \
776: { \
777: Mat_Htool *a; \
778: PetscFunctionBegin; \
779: PetscCall(MatShellGetContext(A, &a)); \
780: *v = a->member; \
781: PetscFunctionReturn(PETSC_SUCCESS); \
782: } \
783: static PetscErrorCode MatHtoolSet##Name##_Htool(Mat A, Type v) \
784: { \
785: Mat_Htool *a; \
786: PetscFunctionBegin; \
787: PetscCall(MatShellGetContext(A, &a)); \
788: a->member = v; \
789: PetscFunctionReturn(PETSC_SUCCESS); \
790: }
792: PETSC_HTOOL_PARAMETER(PetscReal, Epsilon, epsilon)
793: PETSC_HTOOL_PARAMETER(PetscReal, Eta, eta)
794: PETSC_HTOOL_PARAMETER(PetscInt, MaxClusterLeafSize, max_cluster_leaf_size)
795: PETSC_HTOOL_PARAMETER(PetscInt, MinTargetDepth, depth[0])
796: PETSC_HTOOL_PARAMETER(PetscInt, MinSourceDepth, depth[1])
797: PETSC_HTOOL_PARAMETER(PetscBool, BlockTreeConsistency, block_tree_consistency)
798: PETSC_HTOOL_PARAMETER(MatHtoolCompressorType, CompressorType, compressor)
799: PETSC_HTOOL_PARAMETER(MatHtoolClusteringType, ClusteringType, clustering)
801: static PetscErrorCode MatConvert_Htool_Dense(Mat A, MatType, MatReuse reuse, Mat *B)
802: {
803: Mat C;
804: Mat_Htool *a;
805: PetscScalar *array, shift, scale;
806: PetscInt lda;
808: PetscFunctionBegin;
809: PetscCall(MatShellGetScalingShifts(A, &shift, &scale, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
810: PetscCall(MatShellGetContext(A, &a));
811: if (reuse == MAT_REUSE_MATRIX) {
812: C = *B;
813: PetscCheck(C->rmap->n == A->rmap->n && C->cmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible dimensions");
814: PetscCall(MatDenseGetLDA(C, &lda));
815: PetscCheck(lda == C->rmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Unsupported leading dimension (%" PetscInt_FMT " != %" PetscInt_FMT ")", lda, C->rmap->n);
816: } else {
817: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C));
818: PetscCall(MatSetSizes(C, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
819: PetscCall(MatSetType(C, MATDENSE));
820: PetscCall(MatSetUp(C));
821: PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
822: }
823: PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
824: PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
825: PetscCall(MatDenseGetArrayWrite(C, &array));
826: PetscCallExternalVoid("copy_to_dense_in_user_numbering", htool::copy_to_dense_in_user_numbering(*a->local_hmatrix, array));
827: PetscCall(MatDenseRestoreArrayWrite(C, &array));
828: PetscCall(MatShift(C, shift));
829: PetscCall(MatScale(C, scale));
830: if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &C));
831: else *B = C;
832: PetscFunctionReturn(PETSC_SUCCESS);
833: }
835: static PetscErrorCode GenEntriesTranspose(PetscInt sdim, PetscInt M, PetscInt N, const PetscInt *rows, const PetscInt *cols, PetscScalar *ptr, PetscCtx ctx)
836: {
837: MatHtoolKernelTranspose *generator = (MatHtoolKernelTranspose *)ctx;
838: PetscScalar *tmp;
840: PetscFunctionBegin;
841: PetscCall(generator->kernel(sdim, N, M, cols, rows, ptr, generator->kernelctx));
842: PetscCall(PetscMalloc1(M * N, &tmp));
843: PetscCall(PetscArraycpy(tmp, ptr, M * N));
844: for (PetscInt i = 0; i < M; ++i) {
845: for (PetscInt j = 0; j < N; ++j) ptr[i + j * M] = tmp[j + i * N];
846: }
847: PetscCall(PetscFree(tmp));
848: PetscFunctionReturn(PETSC_SUCCESS);
849: }
851: /* naive implementation which keeps a reference to the original Mat */
852: static PetscErrorCode MatTranspose_Htool(Mat A, MatReuse reuse, Mat *B)
853: {
854: Mat C;
855: Mat_Htool *a, *c;
856: PetscScalar shift, scale;
857: PetscInt M = A->rmap->N, N = A->cmap->N, m = A->rmap->n, n = A->cmap->n;
858: PetscContainer container;
859: MatHtoolKernelTranspose *kernelt;
861: PetscFunctionBegin;
862: PetscCall(MatShellGetScalingShifts(A, &shift, &scale, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
863: PetscCall(MatShellGetContext(A, &a));
864: if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
865: PetscCheck(reuse != MAT_INPLACE_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MatTranspose() with MAT_INPLACE_MATRIX not supported");
866: if (reuse == MAT_INITIAL_MATRIX) {
867: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C));
868: PetscCall(MatSetSizes(C, n, m, N, M));
869: PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
870: PetscCall(MatSetUp(C));
871: PetscCall(PetscNew(&kernelt));
872: PetscCall(PetscObjectContainerCompose((PetscObject)C, "KernelTranspose", kernelt, PetscCtxDestroyDefault));
873: } else {
874: C = *B;
875: PetscCall(PetscObjectQuery((PetscObject)C, "KernelTranspose", (PetscObject *)&container));
876: PetscCheck(container, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call MatTranspose() with MAT_INITIAL_MATRIX first");
877: PetscCall(PetscContainerGetPointer(container, &kernelt));
878: }
879: PetscCall(MatShellGetContext(C, &c));
880: c->dim = a->dim;
881: PetscCall(MatShift(C, shift));
882: PetscCall(MatScale(C, scale));
883: c->kernel = GenEntriesTranspose;
884: if (kernelt->A != A) {
885: PetscCall(MatDestroy(&kernelt->A));
886: kernelt->A = A;
887: PetscCall(PetscObjectReference((PetscObject)A));
888: }
889: kernelt->kernel = a->kernel;
890: kernelt->kernelctx = a->kernelctx;
891: c->kernelctx = kernelt;
892: c->max_cluster_leaf_size = a->max_cluster_leaf_size;
893: c->epsilon = a->epsilon;
894: c->eta = a->eta;
895: c->block_tree_consistency = a->block_tree_consistency;
896: c->permutation = a->permutation;
897: c->recompression = a->recompression;
898: c->compressor = a->compressor;
899: c->clustering = a->clustering;
900: if (reuse == MAT_INITIAL_MATRIX) {
901: PetscCall(PetscMalloc1(N * c->dim, &c->gcoords_target));
902: PetscCall(PetscArraycpy(c->gcoords_target, a->gcoords_source, N * c->dim));
903: if (a->gcoords_target != a->gcoords_source) {
904: PetscCall(PetscMalloc1(M * c->dim, &c->gcoords_source));
905: PetscCall(PetscArraycpy(c->gcoords_source, a->gcoords_target, M * c->dim));
906: } else c->gcoords_source = c->gcoords_target;
907: }
908: PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
909: PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
910: if (reuse == MAT_INITIAL_MATRIX) *B = C;
911: PetscFunctionReturn(PETSC_SUCCESS);
912: }
914: static PetscErrorCode MatDestroy_Factor(Mat F)
915: {
916: PetscContainer container;
917: htool::HMatrix<PetscScalar> *A;
919: PetscFunctionBegin;
920: PetscCall(PetscObjectQuery((PetscObject)F, "HMatrix", (PetscObject *)&container));
921: if (container) {
922: PetscCall(PetscContainerGetPointer(container, &A));
923: delete A;
924: PetscCall(PetscObjectCompose((PetscObject)F, "HMatrix", nullptr));
925: }
926: PetscCall(PetscObjectComposeFunction((PetscObject)F, "MatFactorGetSolverType_C", nullptr));
927: PetscFunctionReturn(PETSC_SUCCESS);
928: }
930: static PetscErrorCode MatFactorGetSolverType_Htool(Mat, MatSolverType *type)
931: {
932: PetscFunctionBegin;
933: *type = MATSOLVERHTOOL;
934: PetscFunctionReturn(PETSC_SUCCESS);
935: }
937: template <char trans>
938: static inline PetscErrorCode MatSolve_Private(Mat A, htool::Matrix<PetscScalar> &X)
939: {
940: PetscContainer container;
941: htool::HMatrix<PetscScalar> *B;
943: PetscFunctionBegin;
944: PetscCheck(A->factortype == MAT_FACTOR_LU || A->factortype == MAT_FACTOR_CHOLESKY, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_UNKNOWN_TYPE, "Only MAT_LU_FACTOR and MAT_CHOLESKY_FACTOR are supported");
945: PetscCall(PetscObjectQuery((PetscObject)A, "HMatrix", (PetscObject *)&container));
946: PetscCheck(container, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call Mat%sFactorNumeric() before Mat%sSolve%s()", A->factortype == MAT_FACTOR_LU ? "LU" : "Cholesky", X.nb_cols() == 1 ? "" : "Mat", trans == 'N' ? "" : "Transpose");
947: PetscCall(PetscContainerGetPointer(container, &B));
948: if (A->factortype == MAT_FACTOR_LU) PetscCallExternalVoid("lu_solve", htool::lu_solve(trans, *B, X));
949: else PetscCallExternalVoid("cholesky_solve", htool::cholesky_solve('U', *B, X));
950: PetscFunctionReturn(PETSC_SUCCESS);
951: }
953: template <char trans, class Type, typename std::enable_if<std::is_same<Type, Vec>::value>::type * = nullptr>
954: static PetscErrorCode MatSolve_Htool(Mat A, Type b, Type x)
955: {
956: PetscInt n;
957: htool::Matrix<PetscScalar> v;
958: PetscScalar *array;
960: PetscFunctionBegin;
961: PetscCall(VecGetLocalSize(b, &n));
962: PetscCall(VecCopy(b, x));
963: PetscCall(VecGetArrayWrite(x, &array));
964: v.assign(n, 1, array, false);
965: PetscCall(VecRestoreArrayWrite(x, &array));
966: PetscCall(MatSolve_Private<trans>(A, v));
967: PetscFunctionReturn(PETSC_SUCCESS);
968: }
970: template <char trans, class Type, typename std::enable_if<std::is_same<Type, Mat>::value>::type * = nullptr>
971: static PetscErrorCode MatSolve_Htool(Mat A, Type B, Type X)
972: {
973: PetscInt m, N;
974: htool::Matrix<PetscScalar> v;
975: PetscScalar *array;
977: PetscFunctionBegin;
978: PetscCall(MatGetLocalSize(B, &m, nullptr));
979: PetscCall(MatGetLocalSize(B, nullptr, &N));
980: PetscCall(MatCopy(B, X, SAME_NONZERO_PATTERN));
981: PetscCall(MatDenseGetArrayWrite(X, &array));
982: v.assign(m, N, array, false);
983: PetscCall(MatDenseRestoreArrayWrite(X, &array));
984: PetscCall(MatSolve_Private<trans>(A, v));
985: PetscFunctionReturn(PETSC_SUCCESS);
986: }
988: template <MatFactorType ftype>
989: static PetscErrorCode MatFactorNumeric_Htool(Mat F, Mat A, const MatFactorInfo *)
990: {
991: Mat_Htool *a;
992: htool::HMatrix<PetscScalar> *B;
994: PetscFunctionBegin;
995: PetscCall(MatShellGetContext(A, &a));
996: B = new htool::HMatrix<PetscScalar>(*a->local_hmatrix);
997: if (ftype == MAT_FACTOR_LU) PetscCallExternalVoid("sequential_lu_factorization", htool::sequential_lu_factorization(*B));
998: else PetscCallExternalVoid("sequential_cholesky_factorization", htool::sequential_cholesky_factorization('U', *B));
999: PetscCall(PetscObjectContainerCompose((PetscObject)F, "HMatrix", B, nullptr));
1000: PetscFunctionReturn(PETSC_SUCCESS);
1001: }
1003: template <MatFactorType ftype>
1004: PetscErrorCode MatFactorSymbolic_Htool(Mat F, Mat)
1005: {
1006: PetscFunctionBegin;
1007: F->preallocated = PETSC_TRUE;
1008: F->assembled = PETSC_TRUE;
1009: F->ops->solve = MatSolve_Htool<'N', Vec>;
1010: F->ops->matsolve = MatSolve_Htool<'N', Mat>;
1011: if (!PetscDefined(USE_COMPLEX) || ftype == MAT_FACTOR_LU) {
1012: F->ops->solvetranspose = MatSolve_Htool<'T', Vec>;
1013: F->ops->matsolvetranspose = MatSolve_Htool<'T', Mat>;
1014: }
1015: F->ops->destroy = MatDestroy_Factor;
1016: if (ftype == MAT_FACTOR_LU) F->ops->lufactornumeric = MatFactorNumeric_Htool<MAT_FACTOR_LU>;
1017: else F->ops->choleskyfactornumeric = MatFactorNumeric_Htool<MAT_FACTOR_CHOLESKY>;
1018: PetscFunctionReturn(PETSC_SUCCESS);
1019: }
1021: static PetscErrorCode MatLUFactorSymbolic_Htool(Mat F, Mat A, IS, IS, const MatFactorInfo *)
1022: {
1023: PetscFunctionBegin;
1024: PetscCall(MatFactorSymbolic_Htool<MAT_FACTOR_LU>(F, A));
1025: PetscFunctionReturn(PETSC_SUCCESS);
1026: }
1028: static PetscErrorCode MatCholeskyFactorSymbolic_Htool(Mat F, Mat A, IS, const MatFactorInfo *)
1029: {
1030: PetscFunctionBegin;
1031: PetscCall(MatFactorSymbolic_Htool<MAT_FACTOR_CHOLESKY>(F, A));
1032: PetscFunctionReturn(PETSC_SUCCESS);
1033: }
1035: static PetscErrorCode MatGetFactor_htool_htool(Mat A, MatFactorType ftype, Mat *F)
1036: {
1037: Mat B;
1038: Mat_Htool *a;
1039: PetscMPIInt size;
1041: PetscFunctionBegin;
1042: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
1043: PetscCall(MatShellGetContext(A, &a));
1044: PetscCheck(size == 1, PetscObjectComm((PetscObject)A), PETSC_ERR_WRONG_MPI_SIZE, "Unsupported parallel MatGetFactor()");
1045: PetscCheck(a->block_tree_consistency, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Cannot factor a MatHtool with inconsistent block tree");
1046: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1047: PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
1048: PetscCall(PetscStrallocpy(MATSOLVERHTOOL, &((PetscObject)B)->type_name));
1049: PetscCall(MatSetUp(B));
1051: B->ops->getinfo = MatGetInfo_External;
1052: B->factortype = ftype;
1053: B->trivialsymbolic = PETSC_TRUE;
1055: if (ftype == MAT_FACTOR_LU) B->ops->lufactorsymbolic = MatLUFactorSymbolic_Htool;
1056: else if (ftype == MAT_FACTOR_CHOLESKY) B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_Htool;
1058: PetscCall(PetscFree(B->solvertype));
1059: PetscCall(PetscStrallocpy(MATSOLVERHTOOL, &B->solvertype));
1061: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_Htool));
1062: *F = B;
1063: PetscFunctionReturn(PETSC_SUCCESS);
1064: }
1066: PETSC_INTERN PetscErrorCode MatSolverTypeRegister_Htool(void)
1067: {
1068: PetscFunctionBegin;
1069: PetscCall(MatSolverTypeRegister(MATSOLVERHTOOL, MATHTOOL, MAT_FACTOR_LU, MatGetFactor_htool_htool));
1070: PetscCall(MatSolverTypeRegister(MATSOLVERHTOOL, MATHTOOL, MAT_FACTOR_CHOLESKY, MatGetFactor_htool_htool));
1071: PetscFunctionReturn(PETSC_SUCCESS);
1072: }
1074: static PetscErrorCode MatHtoolCreateFromKernel_Htool(Mat A, PetscInt spacedim, const PetscReal coords_target[], const PetscReal coords_source[], MatHtoolKernelFn *kernel, void *kernelctx)
1075: {
1076: Mat_Htool *a;
1078: PetscFunctionBegin;
1079: PetscCall(MatShellGetContext(A, &a));
1080: a->dim = spacedim;
1081: a->kernel = kernel;
1082: a->kernelctx = kernelctx;
1083: PetscCall(PetscCalloc1(A->rmap->N * spacedim, &a->gcoords_target));
1084: PetscCall(PetscArraycpy(a->gcoords_target + A->rmap->rstart * spacedim, coords_target, A->rmap->n * spacedim));
1085: PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, a->gcoords_target, A->rmap->N * spacedim, MPIU_REAL, MPI_SUM, PetscObjectComm((PetscObject)A)));
1086: if (coords_target != coords_source) {
1087: PetscCall(PetscCalloc1(A->cmap->N * spacedim, &a->gcoords_source));
1088: PetscCall(PetscArraycpy(a->gcoords_source + A->cmap->rstart * spacedim, coords_source, A->cmap->n * spacedim));
1089: PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, a->gcoords_source, A->cmap->N * spacedim, MPIU_REAL, MPI_SUM, PetscObjectComm((PetscObject)A)));
1090: } else a->gcoords_source = a->gcoords_target;
1091: PetscFunctionReturn(PETSC_SUCCESS);
1092: }
1094: /*MC
1095: MATHTOOL = "htool" - A matrix type for hierarchical matrices using the Htool package.
1097: Use `./configure --download-htool` to install PETSc to use Htool.
1099: Options Database Key:
1100: . -mat_type htool - matrix type to `MATHTOOL`
1102: Level: beginner
1104: .seealso: [](ch_matrices), `Mat`, `MATH2OPUS`, `MATDENSE`, `MatCreateHtoolFromKernel()`, `MatHtoolSetKernel()`
1105: M*/
1106: PETSC_EXTERN PetscErrorCode MatCreate_Htool(Mat A)
1107: {
1108: Mat_Htool *a;
1110: PetscFunctionBegin;
1111: PetscCall(MatSetType(A, MATSHELL));
1112: PetscCall(PetscNew(&a));
1113: PetscCall(MatShellSetContext(A, a));
1114: PetscCall(MatShellSetOperation(A, MATOP_GET_DIAGONAL, (PetscErrorCodeFn *)MatGetDiagonal_Htool));
1115: PetscCall(MatShellSetOperation(A, MATOP_GET_DIAGONAL_BLOCK, (PetscErrorCodeFn *)MatGetDiagonalBlock_Htool));
1116: PetscCall(MatShellSetOperation(A, MATOP_MULT, (PetscErrorCodeFn *)MatMult_Htool));
1117: PetscCall(MatShellSetOperation(A, MATOP_MULT_TRANSPOSE, (PetscErrorCodeFn *)MatMultTranspose_Htool));
1118: if (!PetscDefined(USE_COMPLEX)) PetscCall(MatShellSetOperation(A, MATOP_MULT_HERMITIAN_TRANSPOSE, (PetscErrorCodeFn *)MatMultTranspose_Htool));
1119: A->ops->increaseoverlap = MatIncreaseOverlap_Htool;
1120: A->ops->createsubmatrices = MatCreateSubMatrices_Htool;
1121: PetscCall(MatShellSetOperation(A, MATOP_VIEW, (PetscErrorCodeFn *)MatView_Htool));
1122: PetscCall(MatShellSetOperation(A, MATOP_SET_FROM_OPTIONS, (PetscErrorCodeFn *)MatSetFromOptions_Htool));
1123: PetscCall(MatShellSetOperation(A, MATOP_GET_ROW, (PetscErrorCodeFn *)MatGetRow_Htool));
1124: PetscCall(MatShellSetOperation(A, MATOP_RESTORE_ROW, (PetscErrorCodeFn *)MatRestoreRow_Htool));
1125: PetscCall(MatShellSetOperation(A, MATOP_ASSEMBLY_END, (PetscErrorCodeFn *)MatAssemblyEnd_Htool));
1126: PetscCall(MatShellSetOperation(A, MATOP_TRANSPOSE, (PetscErrorCodeFn *)MatTranspose_Htool));
1127: PetscCall(MatShellSetOperation(A, MATOP_DESTROY, (PetscErrorCodeFn *)MatDestroy_Htool));
1128: a->dim = 0;
1129: a->gcoords_target = nullptr;
1130: a->gcoords_source = nullptr;
1131: a->max_cluster_leaf_size = 10;
1132: a->epsilon = PetscSqrtReal(PETSC_SMALL);
1133: a->eta = 10.0;
1134: a->depth[0] = 0;
1135: a->depth[1] = 0;
1136: a->block_tree_consistency = PETSC_TRUE;
1137: a->permutation = PETSC_TRUE;
1138: a->recompression = PETSC_FALSE;
1139: a->compressor = MAT_HTOOL_COMPRESSOR_SYMPARTIAL_ACA;
1140: a->distributed_operator = nullptr;
1141: a->block_diagonal_hmatrix = nullptr;
1142: a->local_hmatrix = nullptr;
1143: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_htool_seqdense_C", MatProductSetFromOptions_Htool));
1144: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_htool_mpidense_C", MatProductSetFromOptions_Htool));
1145: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_htool_seqdense_C", MatConvert_Htool_Dense));
1146: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_htool_mpidense_C", MatConvert_Htool_Dense));
1147: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolGetHierarchicalMat_C", MatHtoolGetHierarchicalMat_Htool));
1148: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolSetKernel_C", MatHtoolSetKernel_Htool));
1149: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolGetPermutationSource_C", MatHtoolGetPermutationSource_Htool));
1150: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolGetPermutationTarget_C", MatHtoolGetPermutationTarget_Htool));
1151: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolUsePermutation_C", MatHtoolUsePermutation_Htool));
1152: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolUseRecompression_C", MatHtoolUseRecompression_Htool));
1153: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolGetEpsilon_C", MatHtoolGetEpsilon_Htool));
1154: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolSetEpsilon_C", MatHtoolSetEpsilon_Htool));
1155: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolGetEta_C", MatHtoolGetEta_Htool));
1156: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolSetEta_C", MatHtoolSetEta_Htool));
1157: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolGetMaxClusterLeafSize_C", MatHtoolGetMaxClusterLeafSize_Htool));
1158: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolSetMaxClusterLeafSize_C", MatHtoolSetMaxClusterLeafSize_Htool));
1159: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolGetMinTargetDepth_C", MatHtoolGetMinTargetDepth_Htool));
1160: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolSetMinTargetDepth_C", MatHtoolSetMinTargetDepth_Htool));
1161: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolGetMinSourceDepth_C", MatHtoolGetMinSourceDepth_Htool));
1162: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolSetMinSourceDepth_C", MatHtoolSetMinSourceDepth_Htool));
1163: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolGetBlockTreeConsistency_C", MatHtoolGetBlockTreeConsistency_Htool));
1164: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolSetBlockTreeConsistency_C", MatHtoolSetBlockTreeConsistency_Htool));
1165: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolGetCompressorType_C", MatHtoolGetCompressorType_Htool));
1166: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolSetCompressorType_C", MatHtoolSetCompressorType_Htool));
1167: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolGetClusteringType_C", MatHtoolGetClusteringType_Htool));
1168: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolSetClusteringType_C", MatHtoolSetClusteringType_Htool));
1169: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolCreateFromKernel_C", MatHtoolCreateFromKernel_Htool));
1170: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatShellSetContext_C", MatShellSetContext_Immutable));
1171: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatShellSetContextDestroy_C", MatShellSetContextDestroy_Immutable));
1172: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatShellSetManageScalingShifts_C", MatShellSetManageScalingShifts_Immutable));
1173: PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATHTOOL));
1174: PetscFunctionReturn(PETSC_SUCCESS);
1175: }