Actual source code: ihtool.cxx
1: #include <../src/mat/impls/htool/htool.hpp>
2: #include <petscdraw.h>
3: #include <set>
5: const char *const MatHtoolCompressorTypes[] = {"sympartialACA", "fullACA", "SVD"};
6: const char *const MatHtoolClusteringTypes[] = {"PCARegular", "PCAGeometric", "BoundingBox1Regular", "BoundingBox1Geometric"};
7: const char *HtoolCitations[2] = {"@article{marchand2020two,\n"
8: " Author = {Marchand, Pierre and Claeys, Xavier and Jolivet, Pierre and Nataf, Fr\\'ed\\'eric and Tournier, Pierre-Henri},\n"
9: " Title = {Two-level preconditioning for $h$-version boundary element approximation of hypersingular operator with {GenEO}},\n"
10: " Year = {2020},\n"
11: " Publisher = {Elsevier},\n"
12: " Journal = {Numerische Mathematik},\n"
13: " Volume = {146},\n"
14: " Pages = {597--628},\n"
15: " Url = {https://github.com/htool-ddm/htool}\n"
16: "}\n",
17: "@article{Marchand2026,\n"
18: " Author = {Marchand, Pierre and Tournier, Pierre-Henri and Jolivet, Pierre},\n"
19: " Title = {{Htool-DDM}: A {C++} library for parallel solvers and compressed linear systems},\n"
20: " Year = {2026},\n"
21: " Publisher = {The Open Journal},\n"
22: " Journal = {Journal of Open Source Software},\n"
23: " Volume = {11},\n"
24: " Number = {118},\n"
25: " Pages = {9279},\n"
26: " Url = {https://doi.org/10.21105/joss.09279}\n"
27: "}\n"};
28: static PetscBool HtoolCite[2] = {PETSC_FALSE, PETSC_FALSE};
30: static PetscErrorCode MatGetDiagonal_Htool(Mat A, Vec v)
31: {
32: Mat_Htool *a;
33: PetscScalar *x;
34: PetscBool flg;
36: PetscFunctionBegin;
37: PetscCall(MatHasCongruentLayouts(A, &flg));
38: PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only congruent layouts supported");
39: PetscCall(MatShellGetContext(A, &a));
40: PetscCheck(a->block_diagonal_hmatrix, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONGSTATE, "Block diagonal htool::HMatrix not found");
41: PetscCall(VecGetArrayWrite(v, &x));
42: PetscCallExternalVoid("copy_diagonal_in_user_numbering", htool::copy_diagonal_in_user_numbering(*a->block_diagonal_hmatrix, x));
43: PetscCall(VecRestoreArrayWrite(v, &x));
44: PetscFunctionReturn(PETSC_SUCCESS);
45: }
47: static PetscErrorCode MatGetDiagonalBlock_Htool(Mat A, Mat *b)
48: {
49: Mat_Htool *a, *c;
50: Mat B;
51: PetscScalar shift[2], scale[2];
52: PetscBool flg;
54: PetscFunctionBegin;
55: PetscCall(MatHasCongruentLayouts(A, &flg));
56: PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only congruent layouts supported");
57: PetscCall(MatShellGetContext(A, &a));
58: PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B)); /* same logic as in MatGetDiagonalBlock_MPIDense() */
59: PetscCall(MatShellGetScalingShifts(A, shift, scale, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
60: if (B) {
61: PetscCall(MatShellGetScalingShifts(B, shift + 1, scale + 1, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
62: /* invalidate cache when scale or shift changed; PetscObjectCompose() below releases the old entry */
63: if (scale[0] != scale[1] || shift[0] != shift[1]) B = nullptr;
64: }
65: if (!B) {
66: PetscCheck(a->block_diagonal_hmatrix, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONGSTATE, "Block diagonal htool::HMatrix not found");
67: PetscCall(MatCreate(PETSC_COMM_SELF, &B));
68: PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->n, A->cmap->n));
69: PetscCall(MatSetType(B, MATHTOOL));
70: PetscCall(MatSetUp(B));
71: B->assembled = PETSC_TRUE;
72: B->preallocated = PETSC_TRUE;
73: PetscCall(MatShellGetContext(B, &c));
74: c->dim = a->dim;
75: c->epsilon = a->epsilon;
76: c->eta = a->eta;
77: c->block_tree_consistency = a->block_tree_consistency;
78: c->permutation = a->permutation;
79: c->recompression = a->recompression;
80: c->compressor = a->compressor;
81: c->clustering = a->clustering;
82: c->local_to_local_operator = std::make_unique<htool::LocalToLocalHMatrix<PetscScalar>>(*a->block_diagonal_hmatrix);
83: c->distributed_operator_holder_wo_assembly = std::make_unique<htool::CustomApproximationBuilder<PetscScalar>>(a->block_diagonal_hmatrix->get_target_cluster(), a->block_diagonal_hmatrix->get_source_cluster(), PetscObjectComm((PetscObject)A), *c->local_to_local_operator);
84: c->distributed_operator = &c->distributed_operator_holder_wo_assembly->distributed_operator;
85: c->block_diagonal_hmatrix = a->block_diagonal_hmatrix;
86: c->local_hmatrix = a->block_diagonal_hmatrix;
87: PetscCall(MatPropagateSymmetryOptions(A, B));
88: PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B));
89: *b = B;
90: PetscCall(MatDestroy(&B));
91: PetscCall(MatScale(*b, *scale));
92: PetscCall(MatShift(*b, *shift));
93: } else *b = B;
94: PetscFunctionReturn(PETSC_SUCCESS);
95: }
97: static PetscErrorCode MatMult_Htool(Mat A, Vec x, Vec y)
98: {
99: Mat_Htool *a;
100: const PetscScalar *in;
101: PetscScalar *out;
103: PetscFunctionBegin;
104: PetscCall(MatShellGetContext(A, &a));
105: PetscCall(VecGetArrayRead(x, &in));
106: PetscCall(VecGetArrayWrite(y, &out));
107: if (a->permutation) PetscCallExternalVoid("add_distributed_operator_vector_product_local_to_local", htool::add_distributed_operator_vector_product_local_to_local<PetscScalar>('N', 1.0, *a->distributed_operator, in, 0.0, out, nullptr));
108: else PetscCallExternalVoid("internal_add_distributed_operator_vector_product_local_to_local", htool::internal_add_distributed_operator_vector_product_local_to_local<PetscScalar>('N', 1.0, *a->distributed_operator, in, 0.0, out, nullptr));
109: PetscCall(VecRestoreArrayRead(x, &in));
110: PetscCall(VecRestoreArrayWrite(y, &out));
111: PetscFunctionReturn(PETSC_SUCCESS);
112: }
114: static PetscErrorCode MatMultTranspose_Htool(Mat A, Vec x, Vec y)
115: {
116: Mat_Htool *a;
117: const PetscScalar *in;
118: PetscScalar *out;
120: PetscFunctionBegin;
121: PetscCall(MatShellGetContext(A, &a));
122: PetscCall(VecGetArrayRead(x, &in));
123: PetscCall(VecGetArrayWrite(y, &out));
124: if (a->permutation) PetscCallExternalVoid("add_distributed_operator_vector_product_local_to_local", htool::add_distributed_operator_vector_product_local_to_local<PetscScalar>('T', 1.0, *a->distributed_operator, in, 0.0, out, nullptr));
125: else PetscCallExternalVoid("internal_add_distributed_operator_vector_product_local_to_local", htool::internal_add_distributed_operator_vector_product_local_to_local<PetscScalar>('T', 1.0, *a->distributed_operator, in, 0.0, out, nullptr));
126: PetscCall(VecRestoreArrayRead(x, &in));
127: PetscCall(VecRestoreArrayWrite(y, &out));
128: PetscFunctionReturn(PETSC_SUCCESS);
129: }
131: static PetscErrorCode MatIncreaseOverlap_Htool(Mat A, PetscInt is_max, IS is[], PetscInt ov)
132: {
133: std::set<PetscInt> set;
134: const PetscInt *idx;
135: PetscInt *oidx, size, bs[2];
136: PetscMPIInt csize;
138: PetscFunctionBegin;
139: PetscCall(MatGetBlockSizes(A, bs, bs + 1));
140: if (bs[0] != bs[1]) bs[0] = 1;
141: for (PetscInt i = 0; i < is_max; ++i) {
142: /* basic implementation that adds indices by shifting an IS by -ov, -ov+1..., -1, 1..., ov-1, ov */
143: /* needed to avoid subdomain matrices to replicate A since it is dense */
144: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)is[i]), &csize));
145: PetscCheck(csize == 1, PETSC_COMM_SELF, PETSC_ERR_WRONG_MPI_SIZE, "Unsupported parallel IS");
146: PetscCall(ISGetSize(is[i], &size));
147: PetscCall(ISGetIndices(is[i], &idx));
148: for (PetscInt j = 0; j < size; ++j) {
149: set.insert(idx[j]);
150: for (PetscInt k = 1; k <= ov; ++k) { /* for each layer of overlap */
151: if (idx[j] - k >= 0) set.insert(idx[j] - k); /* do not insert negative indices */
152: if (idx[j] + k < A->rmap->N && idx[j] + k < A->cmap->N) set.insert(idx[j] + k); /* do not insert indices greater than the dimension of A */
153: }
154: }
155: PetscCall(ISRestoreIndices(is[i], &idx));
156: PetscCall(ISDestroy(is + i));
157: if (bs[0] > 1) {
158: for (std::set<PetscInt>::iterator it = set.cbegin(); it != set.cend(); it++) {
159: std::vector<PetscInt> block(bs[0]);
160: std::iota(block.begin(), block.end(), (*it / bs[0]) * bs[0]);
161: set.insert(block.cbegin(), block.cend());
162: }
163: }
164: size = set.size(); /* size with overlap */
165: PetscCall(PetscMalloc1(size, &oidx));
166: for (const PetscInt j : set) *oidx++ = j;
167: oidx -= size;
168: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, size, oidx, PETSC_OWN_POINTER, is + i));
169: }
170: PetscFunctionReturn(PETSC_SUCCESS);
171: }
173: static PetscErrorCode MatCreateSubMatrices_Htool(Mat A, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *submat[])
174: {
175: Mat_Htool *a;
176: Mat D, B, BT;
177: const PetscScalar *copy;
178: PetscScalar *ptr, shift, scale;
179: const PetscInt *idxr, *idxc, *it;
180: PetscInt nrow, m, i;
181: PetscBool flg;
183: PetscFunctionBegin;
184: PetscCall(MatShellGetScalingShifts(A, &shift, &scale, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
185: PetscCall(MatShellGetContext(A, &a));
186: if (scall != MAT_REUSE_MATRIX) PetscCall(PetscCalloc1(n, submat));
187: for (i = 0; i < n; ++i) {
188: PetscCall(ISGetLocalSize(irow[i], &nrow));
189: PetscCall(ISGetLocalSize(icol[i], &m));
190: PetscCall(ISGetIndices(irow[i], &idxr));
191: PetscCall(ISGetIndices(icol[i], &idxc));
192: if (scall != MAT_REUSE_MATRIX) PetscCall(MatCreateDense(PETSC_COMM_SELF, nrow, m, nrow, m, nullptr, (*submat) + i));
193: PetscCall(MatDenseGetArrayWrite((*submat)[i], &ptr));
194: if (irow[i] == icol[i]) { /* same row and column IS? */
195: PetscCall(MatHasCongruentLayouts(A, &flg));
196: if (flg) {
197: PetscCall(ISSorted(irow[i], &flg));
198: if (flg) { /* sorted IS? */
199: it = std::lower_bound(idxr, idxr + nrow, A->rmap->rstart);
200: if (it != idxr + nrow && *it == A->rmap->rstart) { /* rmap->rstart in IS? */
201: if (std::distance(idxr, it) + A->rmap->n <= nrow) { /* long enough IS to store the local diagonal block? */
202: for (PetscInt j = 0; j < A->rmap->n && flg; ++j)
203: if (PetscUnlikely(it[j] != A->rmap->rstart + j)) flg = PETSC_FALSE;
204: if (flg) { /* complete local diagonal block in IS? */
205: Mat dense;
207: /* fast extraction when the local diagonal block is part of the submatrix, e.g., for PCASM or PCHPDDM
208: * [ B C E ]
209: * A = [ B D E ]
210: * [ B F E ]
211: */
212: m = std::distance(idxr, it); /* shift of the coefficient (0,0) of block D from above */
213: PetscCall(MatGetDiagonalBlock(A, &D));
214: PetscCall(MatConvert(D, MATDENSE, MAT_INITIAL_MATRIX, &dense));
215: PetscCall(MatDenseGetArrayRead(dense, ©));
216: for (PetscInt k = 0; k < A->rmap->n; ++k) PetscCall(PetscArraycpy(ptr + (m + k) * nrow + m, copy + k * A->rmap->n, A->rmap->n)); /* block D from above */
217: PetscCall(MatDenseRestoreArrayRead(dense, ©));
218: PetscCall(MatDestroy(&dense));
219: if (m) {
220: a->wrapper->copy_submatrix(nrow, m, idxr, idxc, ptr); /* vertical block B from above */
221: /* entry-wise assembly may be costly, so transpose already-computed entries when possible */
222: if (A->symmetric == PETSC_BOOL3_TRUE || A->hermitian == PETSC_BOOL3_TRUE) {
223: PetscCall(MatCreateDense(PETSC_COMM_SELF, A->rmap->n, m, A->rmap->n, m, ptr + m, &B));
224: PetscCall(MatDenseSetLDA(B, nrow));
225: PetscCall(MatCreateDense(PETSC_COMM_SELF, m, A->rmap->n, m, A->rmap->n, ptr + m * nrow, &BT));
226: PetscCall(MatDenseSetLDA(BT, nrow));
227: if (A->hermitian == PETSC_BOOL3_TRUE && PetscDefined(USE_COMPLEX)) {
228: PetscCall(MatHermitianTranspose(B, MAT_REUSE_MATRIX, &BT));
229: } else {
230: PetscCall(MatTransposeSetPrecursor(B, BT));
231: PetscCall(MatTranspose(B, MAT_REUSE_MATRIX, &BT));
232: }
233: PetscCall(MatDestroy(&B));
234: PetscCall(MatDestroy(&BT));
235: } else {
236: for (PetscInt k = 0; k < A->rmap->n; ++k) { /* block C from above */
237: a->wrapper->copy_submatrix(m, 1, idxr, idxc + m + k, ptr + (m + k) * nrow);
238: }
239: }
240: }
241: if (m + A->rmap->n != nrow) {
242: a->wrapper->copy_submatrix(nrow, std::distance(it + A->rmap->n, idxr + nrow), idxr, idxc + m + A->rmap->n, ptr + (m + A->rmap->n) * nrow); /* vertical block E from above */
243: /* entry-wise assembly may be costly, so transpose already-computed entries when possible */
244: if (A->symmetric == PETSC_BOOL3_TRUE || A->hermitian == PETSC_BOOL3_TRUE) {
245: PetscCall(MatCreateDense(PETSC_COMM_SELF, A->rmap->n, nrow - (m + A->rmap->n), A->rmap->n, nrow - (m + A->rmap->n), ptr + (m + A->rmap->n) * nrow + m, &B));
246: PetscCall(MatDenseSetLDA(B, nrow));
247: PetscCall(MatCreateDense(PETSC_COMM_SELF, nrow - (m + A->rmap->n), A->rmap->n, nrow - (m + A->rmap->n), A->rmap->n, ptr + m * nrow + m + A->rmap->n, &BT));
248: PetscCall(MatDenseSetLDA(BT, nrow));
249: if (A->hermitian == PETSC_BOOL3_TRUE && PetscDefined(USE_COMPLEX)) {
250: PetscCall(MatHermitianTranspose(B, MAT_REUSE_MATRIX, &BT));
251: } else {
252: PetscCall(MatTransposeSetPrecursor(B, BT));
253: PetscCall(MatTranspose(B, MAT_REUSE_MATRIX, &BT));
254: }
255: PetscCall(MatDestroy(&B));
256: PetscCall(MatDestroy(&BT));
257: } else {
258: for (PetscInt k = 0; k < A->rmap->n; ++k) { /* block F from above */
259: a->wrapper->copy_submatrix(std::distance(it + A->rmap->n, idxr + nrow), 1, it + A->rmap->n, idxc + m + k, ptr + (m + k) * nrow + m + A->rmap->n);
260: }
261: }
262: }
263: } /* complete local diagonal block not in IS */
264: } else flg = PETSC_FALSE; /* IS not long enough to store the local diagonal block */
265: } else flg = PETSC_FALSE; /* rmap->rstart not in IS */
266: } /* unsorted IS */
267: }
268: } else flg = PETSC_FALSE; /* different row and column IS */
269: if (!flg) a->wrapper->copy_submatrix(nrow, m, idxr, idxc, ptr); /* reassemble everything */
270: PetscCall(ISRestoreIndices(irow[i], &idxr));
271: PetscCall(ISRestoreIndices(icol[i], &idxc));
272: PetscCall(MatDenseRestoreArrayWrite((*submat)[i], &ptr));
273: PetscCall(MatScale((*submat)[i], scale));
274: PetscCall(MatShift((*submat)[i], shift));
275: }
276: PetscFunctionReturn(PETSC_SUCCESS);
277: }
279: static PetscErrorCode MatDestroy_Htool(Mat A)
280: {
281: Mat_Htool *a;
282: PetscContainer container;
283: MatHtoolKernelTranspose *kernelt;
285: PetscFunctionBegin;
286: PetscCall(MatShellGetContext(A, &a));
287: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_htool_seqdense_C", nullptr));
288: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_htool_mpidense_C", nullptr));
289: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_htool_seqdense_C", nullptr));
290: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_htool_mpidense_C", nullptr));
291: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolGetHierarchicalMat_C", nullptr));
292: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolSetKernel_C", nullptr));
293: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolGetPermutationSource_C", nullptr));
294: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolGetPermutationTarget_C", nullptr));
295: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolUsePermutation_C", nullptr));
296: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolUseRecompression_C", nullptr));
297: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolGetEpsilon_C", nullptr));
298: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolSetEpsilon_C", nullptr));
299: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolGetEta_C", nullptr));
300: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolSetEta_C", nullptr));
301: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolGetMaxClusterLeafSize_C", nullptr));
302: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolSetMaxClusterLeafSize_C", nullptr));
303: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolGetMinTargetDepth_C", nullptr));
304: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolSetMinTargetDepth_C", nullptr));
305: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolGetMinSourceDepth_C", nullptr));
306: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolSetMinSourceDepth_C", nullptr));
307: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolGetBlockTreeConsistency_C", nullptr));
308: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolSetBlockTreeConsistency_C", nullptr));
309: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolGetCompressorType_C", nullptr));
310: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolSetCompressorType_C", nullptr));
311: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolGetClusteringType_C", nullptr));
312: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolSetClusteringType_C", nullptr));
313: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolCreateFromKernel_C", nullptr));
314: PetscCall(PetscObjectQuery((PetscObject)A, "KernelTranspose", (PetscObject *)&container));
315: if (container) { /* created in MatTranspose_Htool() */
316: PetscCall(PetscContainerGetPointer(container, &kernelt));
317: PetscCall(MatDestroy(&kernelt->A));
318: PetscCall(PetscObjectCompose((PetscObject)A, "KernelTranspose", nullptr));
319: }
320: if (a->gcoords_source != a->gcoords_target) PetscCall(PetscFree(a->gcoords_source));
321: PetscCall(PetscFree(a->gcoords_target));
322: delete a->wrapper;
323: a->target_cluster.reset();
324: a->source_cluster.reset();
325: a->distributed_operator_holder_w_assembly.reset();
326: a->local_to_local_operator.reset();
327: a->distributed_operator_holder_wo_assembly.reset();
328: PetscCall(PetscFree(a));
329: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatShellSetContext_C", nullptr)); // needed to avoid a call to MatShellSetContext_Immutable()
330: PetscFunctionReturn(PETSC_SUCCESS);
331: }
333: static PetscErrorCode MatView_Htool_Draw_Zoom(PetscDraw draw, void *ptr)
334: {
335: Mat A = (Mat)ptr;
336: Mat_Htool *a;
337: PetscReal x_r, y_r, x_l, y_l, w, h;
338: PetscInt min_max[2] = {PETSC_INT_MAX, 0};
339: const int greens[] = {PETSC_DRAW_LIMEGREEN, PETSC_DRAW_FORESTGREEN, PETSC_DRAW_DARKGREEN};
340: int color;
341: char str[16];
342: std::vector<const htool::HMatrix<PetscScalar, PetscReal> *> dense_blocks, low_rank_blocks;
344: PetscFunctionBegin;
345: PetscCall(MatShellGetContext(A, &a));
346: PetscCallExternalVoid("get_leaves", htool::get_leaves(*a->local_hmatrix, dense_blocks, low_rank_blocks));
347: for (const htool::HMatrix<PetscScalar, PetscReal> *block : low_rank_blocks) {
348: const PetscInt rank = block->get_rank();
350: if (rank < min_max[0]) min_max[0] = rank;
351: if (rank > min_max[1]) min_max[1] = rank;
352: }
353: PetscCall(PetscGlobalMinMaxInt(PetscObjectComm((PetscObject)A), min_max, min_max));
354: if (min_max[0] == PETSC_INT_MAX) min_max[0] = min_max[1];
355: PetscCall(PetscDrawStringGetSize(draw, &w, &h));
356: PetscDrawCollectiveBegin(draw);
357: for (const htool::HMatrix<PetscScalar, PetscReal> *block : dense_blocks) {
358: x_l = x_r = (PetscReal)block->get_source_cluster().get_offset();
359: x_r += (PetscReal)block->get_source_cluster().get_size();
360: y_l = y_r = (PetscReal)(A->rmap->N - block->get_target_cluster().get_offset());
361: y_l -= (PetscReal)block->get_target_cluster().get_size();
362: PetscCall(PetscDrawRectangle(draw, x_l, y_l, x_r, y_r, PETSC_DRAW_RED, PETSC_DRAW_RED, PETSC_DRAW_RED, PETSC_DRAW_RED));
363: PetscCall(PetscDrawLine(draw, x_l, y_l, x_r, y_l, PETSC_DRAW_BLACK));
364: PetscCall(PetscDrawLine(draw, x_r, y_l, x_r, y_r, PETSC_DRAW_BLACK));
365: PetscCall(PetscDrawLine(draw, x_r, y_r, x_l, y_r, PETSC_DRAW_BLACK));
366: PetscCall(PetscDrawLine(draw, x_l, y_r, x_l, y_l, PETSC_DRAW_BLACK));
367: }
368: for (const htool::HMatrix<PetscScalar, PetscReal> *block : low_rank_blocks) {
369: PetscReal th;
370: const PetscInt rank = block->get_rank();
372: x_l = x_r = (PetscReal)block->get_source_cluster().get_offset();
373: x_r += (PetscReal)block->get_source_cluster().get_size();
374: y_l = y_r = (PetscReal)(A->rmap->N - block->get_target_cluster().get_offset());
375: y_l -= (PetscReal)block->get_target_cluster().get_size();
376: if (min_max[1] > min_max[0]) color = greens[(int)((PetscReal)(rank - min_max[0]) / (PetscReal)(min_max[1] - min_max[0]) * (PETSC_STATIC_ARRAY_LENGTH(greens) - 1) + 0.5)];
377: else color = greens[PETSC_STATIC_ARRAY_LENGTH(greens) - 1];
378: PetscCall(PetscDrawRectangle(draw, x_l, y_l, x_r, y_r, color, color, color, color));
379: PetscCall(PetscDrawLine(draw, x_l, y_l, x_r, y_l, PETSC_DRAW_BLACK));
380: PetscCall(PetscDrawLine(draw, x_r, y_l, x_r, y_r, PETSC_DRAW_BLACK));
381: PetscCall(PetscDrawLine(draw, x_r, y_r, x_l, y_r, PETSC_DRAW_BLACK));
382: PetscCall(PetscDrawLine(draw, x_l, y_r, x_l, y_l, PETSC_DRAW_BLACK));
383: PetscCall(PetscSNPrintf(str, sizeof(str), "%d", rank));
384: PetscCall(PetscDrawStringGetSize(draw, nullptr, &th));
385: if (x_r - x_l > 4 * w && y_r - y_l > 4 * h) PetscCall(PetscDrawStringCentered(draw, 0.5 * (x_l + x_r), 0.5 * (y_l + y_r) - th / 2, PETSC_DRAW_BLACK, str));
386: }
387: PetscDrawCollectiveEnd(draw);
388: PetscFunctionReturn(PETSC_SUCCESS);
389: }
391: static PetscErrorCode MatView_Htool_Draw(Mat A, PetscViewer viewer)
392: {
393: PetscDraw draw;
394: PetscReal x_r = (PetscReal)A->cmap->N, y_r = (PetscReal)A->rmap->N, w, h;
395: PetscBool flg;
397: PetscFunctionBegin;
398: PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
399: PetscCall(PetscDrawIsNull(draw, &flg));
400: if (flg) PetscFunctionReturn(PETSC_SUCCESS);
401: w = x_r / 10.0;
402: h = y_r / 10.0;
403: PetscCall(PetscDrawSetCoordinates(draw, -w, -h, x_r + w, y_r + h));
404: PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", (PetscObject)viewer));
405: PetscCall(PetscDrawZoom(draw, MatView_Htool_Draw_Zoom, A));
406: PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", nullptr));
407: PetscCall(PetscDrawSave(draw));
408: PetscFunctionReturn(PETSC_SUCCESS);
409: }
411: static PetscErrorCode MatView_Htool(Mat A, PetscViewer pv)
412: {
413: Mat_Htool *a;
414: PetscScalar shift, scale;
415: PetscBool flg;
416: std::map<std::string, std::string> hmatrix_information;
418: PetscFunctionBegin;
419: PetscCall(PetscObjectTypeCompare((PetscObject)pv, PETSCVIEWERDRAW, &flg));
420: if (flg) PetscCall(MatView_Htool_Draw(A, pv));
421: else {
422: PetscCall(MatShellGetContext(A, &a));
423: hmatrix_information = htool::get_distributed_hmatrix_information(*a->local_hmatrix, PetscObjectComm((PetscObject)A));
424: PetscCall(PetscObjectTypeCompare((PetscObject)pv, PETSCVIEWERASCII, &flg));
425: if (flg) {
426: PetscCall(MatShellGetScalingShifts(A, &shift, &scale, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
427: PetscCall(PetscViewerASCIIPrintf(pv, "symmetry: %c\n", a->block_diagonal_hmatrix ? a->block_diagonal_hmatrix->get_symmetry() : 'N'));
428: if (PetscAbsScalar(scale - 1.0) > PETSC_MACHINE_EPSILON) {
429: #if defined(PETSC_USE_COMPLEX)
430: PetscCall(PetscViewerASCIIPrintf(pv, "scaling: %g+%gi\n", (double)PetscRealPart(scale), (double)PetscImaginaryPart(scale)));
431: #else
432: PetscCall(PetscViewerASCIIPrintf(pv, "scaling: %g\n", (double)scale));
433: #endif
434: }
435: if (PetscAbsScalar(shift) > PETSC_MACHINE_EPSILON) {
436: #if defined(PETSC_USE_COMPLEX)
437: PetscCall(PetscViewerASCIIPrintf(pv, "shift: %g+%gi\n", (double)PetscRealPart(shift), (double)PetscImaginaryPart(shift)));
438: #else
439: PetscCall(PetscViewerASCIIPrintf(pv, "shift: %g\n", (double)shift));
440: #endif
441: }
442: PetscCall(PetscViewerASCIIPrintf(pv, "maximal cluster leaf size: %" PetscInt_FMT "\n", a->max_cluster_leaf_size));
443: PetscCall(PetscViewerASCIIPrintf(pv, "epsilon: %g\n", (double)a->epsilon));
444: PetscCall(PetscViewerASCIIPrintf(pv, "eta: %g\n", (double)a->eta));
445: PetscCall(PetscViewerASCIIPrintf(pv, "minimum target depth: %" PetscInt_FMT "\n", a->depth[0]));
446: PetscCall(PetscViewerASCIIPrintf(pv, "minimum source depth: %" PetscInt_FMT "\n", a->depth[1]));
447: PetscCall(PetscViewerASCIIPrintf(pv, "compressor: %s\n", MatHtoolCompressorTypes[a->compressor]));
448: PetscCall(PetscViewerASCIIPrintf(pv, "clustering: %s\n", MatHtoolClusteringTypes[a->clustering]));
449: PetscCall(PetscViewerASCIIPrintf(pv, "compression ratio: %s\n", hmatrix_information["Compression_ratio"].c_str()));
450: PetscCall(PetscViewerASCIIPrintf(pv, "space saving: %s\n", hmatrix_information["Space_saving"].c_str()));
451: PetscCall(PetscViewerASCIIPrintf(pv, "block tree consistency: %s\n", PetscBools[a->local_hmatrix->is_block_tree_consistent()]));
452: PetscCall(PetscViewerASCIIPrintf(pv, "recompression: %s\n", PetscBools[a->recompression]));
453: PetscCall(PetscViewerASCIIPrintf(pv, "number of dense (resp. low rank) matrices: %s (resp. %s)\n", hmatrix_information["Number_of_dense_blocks"].c_str(), hmatrix_information["Number_of_low_rank_blocks"].c_str()));
454: PetscCall(
455: PetscViewerASCIIPrintf(pv, "(minimum, mean, maximum) dense block sizes: (%s, %s, %s)\n", hmatrix_information["Dense_block_size_min"].c_str(), hmatrix_information["Dense_block_size_mean"].c_str(), hmatrix_information["Dense_block_size_max"].c_str()));
456: PetscCall(PetscViewerASCIIPrintf(pv, "(minimum, mean, maximum) low rank block sizes: (%s, %s, %s)\n", hmatrix_information["Low_rank_block_size_min"].c_str(), hmatrix_information["Low_rank_block_size_mean"].c_str(),
457: hmatrix_information["Low_rank_block_size_max"].c_str()));
458: PetscCall(PetscViewerASCIIPrintf(pv, "(minimum, mean, maximum) ranks: (%s, %s, %s)\n", hmatrix_information["Rank_min"].c_str(), hmatrix_information["Rank_mean"].c_str(), hmatrix_information["Rank_max"].c_str()));
459: }
460: }
461: PetscFunctionReturn(PETSC_SUCCESS);
462: }
464: /* naive implementation of MatGetRow() needed for MatConvert_Nest_AIJ() */
465: static PetscErrorCode MatGetRow_Htool(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
466: {
467: Mat_Htool *a;
468: PetscScalar shift, scale;
469: PetscInt *idxc;
470: PetscBLASInt one = 1, bn;
472: PetscFunctionBegin;
473: PetscCall(MatShellGetScalingShifts(A, &shift, &scale, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
474: PetscCall(MatShellGetContext(A, &a));
475: if (nz) *nz = A->cmap->N;
476: if (idx || v) { /* even if !idx, need to set idxc for htool::copy_submatrix() */
477: PetscCall(PetscMalloc1(A->cmap->N, &idxc));
478: for (PetscInt i = 0; i < A->cmap->N; ++i) idxc[i] = i;
479: }
480: if (idx) *idx = idxc;
481: if (v) {
482: PetscCall(PetscMalloc1(A->cmap->N, v));
483: if (a->wrapper) a->wrapper->copy_submatrix(1, A->cmap->N, &row, idxc, *v);
484: else reinterpret_cast<htool::VirtualGenerator<PetscScalar> *>(a->kernelctx)->copy_submatrix(1, A->cmap->N, &row, idxc, *v);
485: PetscCall(PetscBLASIntCast(A->cmap->N, &bn));
486: PetscCallCXX(htool::Blas<PetscScalar>::scal(&bn, &scale, *v, &one));
487: if (row < A->cmap->N) (*v)[row] += shift;
488: }
489: if (!idx) PetscCall(PetscFree(idxc));
490: PetscFunctionReturn(PETSC_SUCCESS);
491: }
493: static PetscErrorCode MatRestoreRow_Htool(Mat, PetscInt, PetscInt *, PetscInt **idx, PetscScalar **v)
494: {
495: PetscFunctionBegin;
496: if (idx) PetscCall(PetscFree(*idx));
497: if (v) PetscCall(PetscFree(*v));
498: PetscFunctionReturn(PETSC_SUCCESS);
499: }
501: static PetscErrorCode MatSetFromOptions_Htool(Mat A, PetscOptionItems PetscOptionsObject)
502: {
503: Mat_Htool *a;
504: PetscInt n;
505: PetscBool flg;
507: PetscFunctionBegin;
508: PetscCall(MatShellGetContext(A, &a));
509: PetscOptionsHeadBegin(PetscOptionsObject, "Htool options");
510: PetscCall(PetscOptionsBoundedInt("-mat_htool_max_cluster_leaf_size", "Maximal leaf size in cluster tree", nullptr, a->max_cluster_leaf_size, &a->max_cluster_leaf_size, nullptr, 0));
511: PetscCall(PetscOptionsBoundedReal("-mat_htool_epsilon", "Relative error in Frobenius norm when approximating a block", nullptr, a->epsilon, &a->epsilon, nullptr, 0.0));
512: PetscCall(PetscOptionsReal("-mat_htool_eta", "Admissibility condition tolerance", nullptr, a->eta, &a->eta, nullptr));
513: PetscCall(PetscOptionsBoundedInt("-mat_htool_min_target_depth", "Minimal cluster tree depth associated with the rows", nullptr, a->depth[0], a->depth, nullptr, 0));
514: PetscCall(PetscOptionsBoundedInt("-mat_htool_min_source_depth", "Minimal cluster tree depth associated with the columns", nullptr, a->depth[1], a->depth + 1, nullptr, 0));
515: PetscCall(PetscOptionsBool("-mat_htool_block_tree_consistency", "Block tree consistency", nullptr, a->block_tree_consistency, &a->block_tree_consistency, nullptr));
516: PetscCall(PetscOptionsBool("-mat_htool_recompression", "Use recompression", nullptr, a->recompression, &a->recompression, nullptr));
518: n = 0;
519: PetscCall(PetscOptionsEList("-mat_htool_compressor", "Type of compression", "MatHtoolCompressorType", MatHtoolCompressorTypes, PETSC_STATIC_ARRAY_LENGTH(MatHtoolCompressorTypes), MatHtoolCompressorTypes[MAT_HTOOL_COMPRESSOR_SYMPARTIAL_ACA], &n, &flg));
520: if (flg) a->compressor = MatHtoolCompressorType(n);
521: n = 0;
522: PetscCall(PetscOptionsEList("-mat_htool_clustering", "Type of clustering", "MatHtoolClusteringType", MatHtoolClusteringTypes, PETSC_STATIC_ARRAY_LENGTH(MatHtoolClusteringTypes), MatHtoolClusteringTypes[MAT_HTOOL_CLUSTERING_PCA_REGULAR], &n, &flg));
523: if (flg) a->clustering = MatHtoolClusteringType(n);
524: PetscOptionsHeadEnd();
525: PetscFunctionReturn(PETSC_SUCCESS);
526: }
528: static PetscErrorCode MatAssemblyEnd_Htool(Mat A, MatAssemblyType)
529: {
530: Mat_Htool *a;
531: const PetscInt *ranges;
532: PetscInt *offset;
533: PetscMPIInt size, rank;
534: char S = PetscDefined(USE_COMPLEX) && A->hermitian == PETSC_BOOL3_TRUE ? 'H' : (A->symmetric == PETSC_BOOL3_TRUE ? 'S' : 'N'), uplo = S == 'N' ? 'N' : 'U';
535: htool::VirtualGenerator<PetscScalar> *generator = nullptr;
536: htool::ClusterTreeBuilder<PetscReal> recursive_build_strategy;
537: htool::Cluster<PetscReal> *source_cluster;
538: std::shared_ptr<htool::VirtualInternalLowRankGenerator<PetscScalar>> compressor;
540: PetscFunctionBegin;
541: for (size_t i = 0; i < PETSC_STATIC_ARRAY_LENGTH(HtoolCite); ++i) PetscCall(PetscCitationsRegister(HtoolCitations[i], HtoolCite + i));
542: PetscCall(MatShellGetContext(A, &a));
543: if (a->distributed_operator_holder_wo_assembly) PetscFunctionReturn(PETSC_SUCCESS);
544: delete a->wrapper;
545: a->target_cluster.reset();
546: a->source_cluster.reset();
547: a->distributed_operator_holder_w_assembly.reset();
548: a->local_to_local_operator.reset();
549: a->distributed_operator_holder_wo_assembly.reset();
550: // clustering
551: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
552: PetscCall(PetscMalloc1(2 * size, &offset));
553: PetscCall(MatGetOwnershipRanges(A, &ranges));
554: for (PetscInt i = 0; i < size; ++i) {
555: offset[2 * i] = ranges[i];
556: offset[2 * i + 1] = ranges[i + 1] - ranges[i];
557: }
558: switch (a->clustering) {
559: case MAT_HTOOL_CLUSTERING_PCA_GEOMETRIC:
560: recursive_build_strategy.set_partitioning_strategy(std::make_shared<htool::Partitioning<PetscReal, htool::ComputeLargestExtent<PetscReal>, htool::GeometricSplitting<PetscReal>>>());
561: break;
562: case MAT_HTOOL_CLUSTERING_BOUNDING_BOX_1_GEOMETRIC:
563: recursive_build_strategy.set_partitioning_strategy(std::make_shared<htool::Partitioning<PetscReal, htool::ComputeBoundingBox<PetscReal>, htool::GeometricSplitting<PetscReal>>>());
564: break;
565: case MAT_HTOOL_CLUSTERING_BOUNDING_BOX_1_REGULAR:
566: recursive_build_strategy.set_partitioning_strategy(std::make_shared<htool::Partitioning<PetscReal, htool::ComputeBoundingBox<PetscReal>, htool::RegularSplitting<PetscReal>>>());
567: break;
568: default:
569: recursive_build_strategy.set_partitioning_strategy(std::make_shared<htool::Partitioning<PetscReal, htool::ComputeLargestExtent<PetscReal>, htool::RegularSplitting<PetscReal>>>());
570: }
571: recursive_build_strategy.set_maximal_leaf_size(a->max_cluster_leaf_size);
572: a->target_cluster = std::make_unique<htool::Cluster<PetscReal>>(recursive_build_strategy.create_cluster_tree_from_local_partition(A->rmap->N, a->dim, a->gcoords_target, 2, size, offset));
573: if (a->gcoords_target != a->gcoords_source) {
574: PetscCall(MatGetOwnershipRangesColumn(A, &ranges));
575: for (PetscInt i = 0; i < size; ++i) {
576: offset[2 * i] = ranges[i];
577: offset[2 * i + 1] = ranges[i + 1] - ranges[i];
578: }
579: switch (a->clustering) {
580: case MAT_HTOOL_CLUSTERING_PCA_GEOMETRIC:
581: recursive_build_strategy.set_partitioning_strategy(std::make_shared<htool::Partitioning<PetscReal, htool::ComputeLargestExtent<PetscReal>, htool::GeometricSplitting<PetscReal>>>());
582: break;
583: case MAT_HTOOL_CLUSTERING_BOUNDING_BOX_1_GEOMETRIC:
584: recursive_build_strategy.set_partitioning_strategy(std::make_shared<htool::Partitioning<PetscReal, htool::ComputeBoundingBox<PetscReal>, htool::GeometricSplitting<PetscReal>>>());
585: break;
586: case MAT_HTOOL_CLUSTERING_BOUNDING_BOX_1_REGULAR:
587: recursive_build_strategy.set_partitioning_strategy(std::make_shared<htool::Partitioning<PetscReal, htool::ComputeBoundingBox<PetscReal>, htool::RegularSplitting<PetscReal>>>());
588: break;
589: default:
590: recursive_build_strategy.set_partitioning_strategy(std::make_shared<htool::Partitioning<PetscReal, htool::ComputeLargestExtent<PetscReal>, htool::RegularSplitting<PetscReal>>>());
591: }
592: recursive_build_strategy.set_maximal_leaf_size(a->max_cluster_leaf_size);
593: a->source_cluster = std::make_unique<htool::Cluster<PetscReal>>(recursive_build_strategy.create_cluster_tree_from_local_partition(A->cmap->N, a->dim, a->gcoords_source, 2, size, offset));
594: S = uplo = 'N';
595: source_cluster = a->source_cluster.get();
596: } else source_cluster = a->target_cluster.get();
597: PetscCall(PetscFree(offset));
598: // generator
599: if (a->kernel) a->wrapper = new WrapperHtool(a->dim, a->kernel, a->kernelctx);
600: else {
601: a->wrapper = nullptr;
602: generator = reinterpret_cast<htool::VirtualGenerator<PetscScalar> *>(a->kernelctx);
603: }
604: // compressor
605: switch (a->compressor) {
606: case MAT_HTOOL_COMPRESSOR_FULL_ACA:
607: compressor = std::make_shared<htool::fullACA<PetscScalar>>(a->wrapper ? *a->wrapper : *generator, a->target_cluster->get_permutation().data(), source_cluster->get_permutation().data());
608: break;
609: case MAT_HTOOL_COMPRESSOR_SVD:
610: compressor = std::make_shared<htool::SVD<PetscScalar>>(a->wrapper ? *a->wrapper : *generator, a->target_cluster->get_permutation().data(), source_cluster->get_permutation().data());
611: break;
612: default:
613: compressor = std::make_shared<htool::sympartialACA<PetscScalar>>(a->wrapper ? *a->wrapper : *generator, a->target_cluster->get_permutation().data(), source_cluster->get_permutation().data());
614: }
615: // local hierarchical matrix
616: PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
617: auto hmatrix_builder = htool::HMatrixTreeBuilder<PetscScalar>(a->epsilon, a->eta, S, uplo);
618: if (a->recompression) {
619: std::shared_ptr<htool::VirtualInternalLowRankGenerator<PetscScalar>> RecompressedLowRankGenerator = std::make_shared<htool::RecompressedLowRankGenerator<PetscScalar>>(*compressor, std::function<void(htool::LowRankMatrix<PetscScalar> &)>(htool::SVD_recompression<PetscScalar>));
620: hmatrix_builder.set_low_rank_generator(RecompressedLowRankGenerator);
621: } else {
622: hmatrix_builder.set_low_rank_generator(compressor);
623: }
624: hmatrix_builder.set_minimal_target_depth(a->depth[0]);
625: hmatrix_builder.set_minimal_source_depth(a->depth[1]);
626: PetscCheck(a->block_tree_consistency || (!a->block_tree_consistency && !(A->symmetric == PETSC_BOOL3_TRUE || A->hermitian == PETSC_BOOL3_TRUE)), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Cannot have a MatHtool with inconsistent block tree which is either symmetric or Hermitian");
627: hmatrix_builder.set_block_tree_consistency(a->block_tree_consistency);
628: a->distributed_operator_holder_w_assembly = std::make_unique<htool::DefaultApproximationBuilder<PetscScalar>>(a->wrapper ? *a->wrapper : *generator, *a->target_cluster, *source_cluster, hmatrix_builder, PetscObjectComm((PetscObject)A));
629: a->distributed_operator = &a->distributed_operator_holder_w_assembly->distributed_operator;
630: a->block_diagonal_hmatrix = a->distributed_operator_holder_w_assembly->block_diagonal_hmatrix;
631: a->local_hmatrix = &a->distributed_operator_holder_w_assembly->hmatrix;
632: PetscFunctionReturn(PETSC_SUCCESS);
633: }
635: static PetscErrorCode MatProductNumeric_Htool(Mat C)
636: {
637: Mat_Product *product = C->product;
638: Mat_Htool *a;
639: const PetscScalar *in;
640: PetscScalar *out;
641: PetscInt N, lda;
643: PetscFunctionBegin;
644: MatCheckProduct(C, 1);
645: PetscCall(MatGetSize(C, nullptr, &N));
646: PetscCall(MatDenseGetLDA(C, &lda));
647: PetscCheck(lda == C->rmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Unsupported leading dimension (%" PetscInt_FMT " != %" PetscInt_FMT ")", lda, C->rmap->n);
648: PetscCall(MatDenseGetArrayRead(product->B, &in));
649: PetscCall(MatDenseGetArrayWrite(C, &out));
650: PetscCall(MatShellGetContext(product->A, &a));
651: switch (product->type) {
652: case MATPRODUCT_AB:
653: if (a->permutation) PetscCallExternalVoid("add_distributed_operator_matrix_product_local_to_local", htool::add_distributed_operator_matrix_product_local_to_local<PetscScalar>('N', 1.0, *a->distributed_operator, in, 0.0, out, N, nullptr));
654: else PetscCallExternalVoid("internal_add_distributed_operator_matrix_product_local_to_local", htool::internal_add_distributed_operator_matrix_product_local_to_local<PetscScalar>('N', 1.0, *a->distributed_operator, in, 0.0, out, N, nullptr));
655: break;
656: case MATPRODUCT_AtB:
657: if (a->permutation) PetscCallExternalVoid("add_distributed_operator_matrix_product_local_to_local", htool::add_distributed_operator_matrix_product_local_to_local<PetscScalar>('T', 1.0, *a->distributed_operator, in, 0.0, out, N, nullptr));
658: else PetscCallExternalVoid("internal_add_distributed_operator_matrix_product_local_to_local", htool::internal_add_distributed_operator_matrix_product_local_to_local<PetscScalar>('T', 1.0, *a->distributed_operator, in, 0.0, out, N, nullptr));
659: break;
660: default:
661: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatProductType %s is not supported", MatProductTypes[product->type]);
662: }
663: PetscCall(MatDenseRestoreArrayWrite(C, &out));
664: PetscCall(MatDenseRestoreArrayRead(product->B, &in));
665: PetscFunctionReturn(PETSC_SUCCESS);
666: }
668: static PetscErrorCode MatProductSymbolic_Htool(Mat C)
669: {
670: Mat_Product *product = C->product;
671: Mat A, B;
672: PetscBool flg;
674: PetscFunctionBegin;
675: MatCheckProduct(C, 1);
676: A = product->A;
677: B = product->B;
678: PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &flg, MATSEQDENSE, MATMPIDENSE, ""));
679: PetscCheck(flg && (product->type == MATPRODUCT_AB || product->type == MATPRODUCT_AtB), PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "ProductType %s not supported for %s", MatProductTypes[product->type], ((PetscObject)product->B)->type_name);
680: if (C->rmap->n == PETSC_DECIDE || C->cmap->n == PETSC_DECIDE || C->rmap->N == PETSC_DECIDE || C->cmap->N == PETSC_DECIDE) {
681: if (product->type == MATPRODUCT_AB) PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N));
682: else PetscCall(MatSetSizes(C, A->cmap->n, B->cmap->n, A->cmap->N, B->cmap->N));
683: }
684: PetscCall(MatSetType(C, MATDENSE));
685: PetscCall(MatSetUp(C));
686: PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
687: PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
688: PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
689: C->ops->productsymbolic = nullptr;
690: C->ops->productnumeric = MatProductNumeric_Htool;
691: PetscFunctionReturn(PETSC_SUCCESS);
692: }
694: static PetscErrorCode MatProductSetFromOptions_Htool(Mat C)
695: {
696: PetscFunctionBegin;
697: MatCheckProduct(C, 1);
698: if (C->product->type == MATPRODUCT_AB || C->product->type == MATPRODUCT_AtB) C->ops->productsymbolic = MatProductSymbolic_Htool;
699: PetscFunctionReturn(PETSC_SUCCESS);
700: }
702: static PetscErrorCode MatHtoolGetHierarchicalMat_Htool(Mat A, void *distributed_operator)
703: {
704: Mat_Htool *a;
706: PetscFunctionBegin;
707: PetscCall(MatShellGetContext(A, &a));
708: *(const void **)distributed_operator = static_cast<const void *>(a->distributed_operator);
709: PetscFunctionReturn(PETSC_SUCCESS);
710: }
712: static PetscErrorCode MatHtoolSetKernel_Htool(Mat A, MatHtoolKernelFn *kernel, void *kernelctx)
713: {
714: Mat_Htool *a;
716: PetscFunctionBegin;
717: PetscCall(MatShellGetContext(A, &a));
718: a->kernel = kernel;
719: a->kernelctx = kernelctx;
720: delete a->wrapper;
721: if (a->kernel) a->wrapper = new WrapperHtool(a->dim, a->kernel, a->kernelctx);
722: PetscFunctionReturn(PETSC_SUCCESS);
723: }
725: static PetscErrorCode MatHtoolGetPermutationSource_Htool(Mat A, IS *is)
726: {
727: Mat_Htool *a;
728: PetscMPIInt rank;
729: const std::vector<PetscInt> *source;
730: const htool::Cluster<PetscReal> *local_source_cluster;
732: PetscFunctionBegin;
733: PetscCall(MatShellGetContext(A, &a));
734: PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
735: local_source_cluster = a->source_cluster ? &a->source_cluster->get_cluster_on_partition(rank) : &a->target_cluster->get_cluster_on_partition(rank);
736: source = &local_source_cluster->get_permutation();
737: PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), local_source_cluster->get_size(), source->data() + local_source_cluster->get_offset(), PETSC_COPY_VALUES, is));
738: PetscCall(ISSetPermutation(*is));
739: PetscFunctionReturn(PETSC_SUCCESS);
740: }
742: static PetscErrorCode MatHtoolGetPermutationTarget_Htool(Mat A, IS *is)
743: {
744: Mat_Htool *a;
745: const std::vector<PetscInt> *target;
746: PetscMPIInt rank;
748: PetscFunctionBegin;
749: PetscCall(MatShellGetContext(A, &a));
750: PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
751: target = &a->target_cluster->get_permutation();
752: PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), a->target_cluster->get_cluster_on_partition(rank).get_size(), target->data() + a->target_cluster->get_cluster_on_partition(rank).get_offset(), PETSC_COPY_VALUES, is));
753: PetscCall(ISSetPermutation(*is));
754: PetscFunctionReturn(PETSC_SUCCESS);
755: }
757: static PetscErrorCode MatHtoolUsePermutation_Htool(Mat A, PetscBool use)
758: {
759: Mat_Htool *a;
761: PetscFunctionBegin;
762: PetscCall(MatShellGetContext(A, &a));
763: a->permutation = use;
764: PetscFunctionReturn(PETSC_SUCCESS);
765: }
767: static PetscErrorCode MatHtoolUseRecompression_Htool(Mat A, PetscBool use)
768: {
769: Mat_Htool *a;
771: PetscFunctionBegin;
772: PetscCall(MatShellGetContext(A, &a));
773: a->recompression = use;
774: PetscFunctionReturn(PETSC_SUCCESS);
775: }
777: #define PETSC_HTOOL_PARAMETER(Type, Name, member) \
778: static PetscErrorCode MatHtoolGet##Name##_Htool(Mat A, Type *v) \
779: { \
780: Mat_Htool *a; \
781: PetscFunctionBegin; \
782: PetscCall(MatShellGetContext(A, &a)); \
783: *v = a->member; \
784: PetscFunctionReturn(PETSC_SUCCESS); \
785: } \
786: static PetscErrorCode MatHtoolSet##Name##_Htool(Mat A, Type v) \
787: { \
788: Mat_Htool *a; \
789: PetscFunctionBegin; \
790: PetscCall(MatShellGetContext(A, &a)); \
791: a->member = v; \
792: PetscFunctionReturn(PETSC_SUCCESS); \
793: }
795: PETSC_HTOOL_PARAMETER(PetscReal, Epsilon, epsilon)
796: PETSC_HTOOL_PARAMETER(PetscReal, Eta, eta)
797: PETSC_HTOOL_PARAMETER(PetscInt, MaxClusterLeafSize, max_cluster_leaf_size)
798: PETSC_HTOOL_PARAMETER(PetscInt, MinTargetDepth, depth[0])
799: PETSC_HTOOL_PARAMETER(PetscInt, MinSourceDepth, depth[1])
800: PETSC_HTOOL_PARAMETER(PetscBool, BlockTreeConsistency, block_tree_consistency)
801: PETSC_HTOOL_PARAMETER(MatHtoolCompressorType, CompressorType, compressor)
802: PETSC_HTOOL_PARAMETER(MatHtoolClusteringType, ClusteringType, clustering)
804: static PetscErrorCode MatConvert_Htool_Dense(Mat A, MatType, MatReuse reuse, Mat *B)
805: {
806: Mat C;
807: Mat_Htool *a;
808: PetscScalar *array, shift, scale;
809: PetscInt lda;
811: PetscFunctionBegin;
812: PetscCall(MatShellGetScalingShifts(A, &shift, &scale, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
813: PetscCall(MatShellGetContext(A, &a));
814: if (reuse == MAT_REUSE_MATRIX) {
815: C = *B;
816: PetscCheck(C->rmap->n == A->rmap->n && C->cmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible dimensions");
817: PetscCall(MatDenseGetLDA(C, &lda));
818: PetscCheck(lda == C->rmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Unsupported leading dimension (%" PetscInt_FMT " != %" PetscInt_FMT ")", lda, C->rmap->n);
819: } else {
820: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C));
821: PetscCall(MatSetSizes(C, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
822: PetscCall(MatSetType(C, MATDENSE));
823: PetscCall(MatSetUp(C));
824: PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
825: }
826: PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
827: PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
828: PetscCall(MatDenseGetArrayWrite(C, &array));
829: PetscCallExternalVoid("copy_to_dense_in_user_numbering", htool::copy_to_dense_in_user_numbering(*a->local_hmatrix, array));
830: PetscCall(MatDenseRestoreArrayWrite(C, &array));
831: PetscCall(MatScale(C, scale));
832: PetscCall(MatShift(C, shift));
833: if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &C));
834: else *B = C;
835: PetscFunctionReturn(PETSC_SUCCESS);
836: }
838: static PetscErrorCode GenEntriesTranspose(PetscInt sdim, PetscInt M, PetscInt N, const PetscInt *rows, const PetscInt *cols, PetscScalar *ptr, PetscCtx ctx)
839: {
840: MatHtoolKernelTranspose *generator = (MatHtoolKernelTranspose *)ctx;
841: PetscScalar *tmp;
843: PetscFunctionBegin;
844: PetscCall(generator->kernel(sdim, N, M, cols, rows, ptr, generator->kernelctx));
845: PetscCall(PetscMalloc1(M * N, &tmp));
846: PetscCall(PetscArraycpy(tmp, ptr, M * N));
847: for (PetscInt i = 0; i < M; ++i) {
848: for (PetscInt j = 0; j < N; ++j) ptr[i + j * M] = tmp[j + i * N];
849: }
850: PetscCall(PetscFree(tmp));
851: PetscFunctionReturn(PETSC_SUCCESS);
852: }
854: /* naive implementation which keeps a reference to the original Mat */
855: static PetscErrorCode MatTranspose_Htool(Mat A, MatReuse reuse, Mat *B)
856: {
857: Mat C;
858: Mat_Htool *a, *c;
859: PetscScalar shift, scale;
860: PetscInt M = A->rmap->N, N = A->cmap->N, m = A->rmap->n, n = A->cmap->n;
861: PetscContainer container;
862: MatHtoolKernelTranspose *kernelt;
864: PetscFunctionBegin;
865: PetscCall(MatShellGetScalingShifts(A, &shift, &scale, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
866: PetscCall(MatShellGetContext(A, &a));
867: if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
868: PetscCheck(reuse != MAT_INPLACE_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MatTranspose() with MAT_INPLACE_MATRIX not supported");
869: if (reuse == MAT_INITIAL_MATRIX) {
870: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C));
871: PetscCall(MatSetSizes(C, n, m, N, M));
872: PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
873: PetscCall(MatSetUp(C));
874: PetscCall(PetscNew(&kernelt));
875: PetscCall(PetscObjectContainerCompose((PetscObject)C, "KernelTranspose", kernelt, PetscCtxDestroyDefault));
876: } else {
877: C = *B;
878: PetscCall(PetscObjectQuery((PetscObject)C, "KernelTranspose", (PetscObject *)&container));
879: PetscCheck(container, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call MatTranspose() with MAT_INITIAL_MATRIX first");
880: PetscCall(PetscContainerGetPointer(container, &kernelt));
881: }
882: PetscCall(MatShellGetContext(C, &c));
883: c->dim = a->dim;
884: PetscCall(MatScale(C, scale));
885: PetscCall(MatShift(C, shift));
886: c->kernel = GenEntriesTranspose;
887: if (kernelt->A != A) {
888: PetscCall(MatDestroy(&kernelt->A));
889: kernelt->A = A;
890: PetscCall(PetscObjectReference((PetscObject)A));
891: }
892: kernelt->kernel = a->kernel;
893: kernelt->kernelctx = a->kernelctx;
894: c->kernelctx = kernelt;
895: c->max_cluster_leaf_size = a->max_cluster_leaf_size;
896: c->epsilon = a->epsilon;
897: c->eta = a->eta;
898: c->block_tree_consistency = a->block_tree_consistency;
899: c->permutation = a->permutation;
900: c->recompression = a->recompression;
901: c->compressor = a->compressor;
902: c->clustering = a->clustering;
903: if (reuse == MAT_INITIAL_MATRIX) {
904: PetscCall(PetscMalloc1(N * c->dim, &c->gcoords_target));
905: PetscCall(PetscArraycpy(c->gcoords_target, a->gcoords_source, N * c->dim));
906: if (a->gcoords_target != a->gcoords_source) {
907: PetscCall(PetscMalloc1(M * c->dim, &c->gcoords_source));
908: PetscCall(PetscArraycpy(c->gcoords_source, a->gcoords_target, M * c->dim));
909: } else c->gcoords_source = c->gcoords_target;
910: }
911: PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
912: PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
913: if (reuse == MAT_INITIAL_MATRIX) *B = C;
914: PetscFunctionReturn(PETSC_SUCCESS);
915: }
917: struct MatFactorCtx {
918: htool::HMatrix<PetscScalar> *hmatrix; /* factorized HMatrix filled by MatFactorNumeric_Htool() */
919: PetscScalar scale; /* scaling factor from MatShellGetScalingShifts(), applied as inverse scaling in Mat[Mat]Solve() */
920: };
922: static PetscErrorCode MatFactorCtxDestroy(PetscCtxRt ctx)
923: {
924: MatFactorCtx *data = *(MatFactorCtx **)ctx;
926: PetscFunctionBegin;
927: delete data->hmatrix;
928: PetscCall(PetscFree(data));
929: PetscFunctionReturn(PETSC_SUCCESS);
930: }
932: static PetscErrorCode MatDestroy_Factor(Mat F)
933: {
934: PetscFunctionBegin;
935: PetscCall(PetscObjectCompose((PetscObject)F, "HMatrix", nullptr));
936: PetscCall(PetscObjectComposeFunction((PetscObject)F, "MatFactorGetSolverType_C", nullptr));
937: PetscFunctionReturn(PETSC_SUCCESS);
938: }
940: static PetscErrorCode MatFactorGetSolverType_Htool(Mat, MatSolverType *type)
941: {
942: PetscFunctionBegin;
943: *type = MATSOLVERHTOOL;
944: PetscFunctionReturn(PETSC_SUCCESS);
945: }
947: template <char trans>
948: static inline PetscErrorCode MatSolve_Private(Mat A, htool::Matrix<PetscScalar> &X)
949: {
950: PetscContainer container;
951: MatFactorCtx *data;
953: PetscFunctionBegin;
954: PetscCall(PetscObjectQuery((PetscObject)A, "HMatrix", (PetscObject *)&container));
955: PetscCheck(container, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call Mat%sFactorNumeric() before Mat%sSolve%s()", A->factortype == MAT_FACTOR_LU ? "LU" : "Cholesky", X.nb_cols() == 1 ? "" : "Mat", trans == 'N' ? "" : "Transpose");
956: PetscCall(PetscContainerGetPointer(container, &data));
957: if (A->factortype == MAT_FACTOR_LU) PetscCallExternalVoid("lu_solve", htool::lu_solve(trans, *data->hmatrix, X));
958: else PetscCallExternalVoid("cholesky_solve", htool::cholesky_solve('U', *data->hmatrix, X));
959: PetscCallExternalVoid("scale", htool::scale(1.0 / data->scale, X));
960: PetscFunctionReturn(PETSC_SUCCESS);
961: }
963: template <char trans, class Type, typename std::enable_if<std::is_same<Type, Vec>::value>::type * = nullptr>
964: static PetscErrorCode MatSolve_Htool(Mat A, Type b, Type x)
965: {
966: htool::Matrix<PetscScalar> v;
967: PetscScalar *array;
968: PetscInt n;
970: PetscFunctionBegin;
971: PetscCall(VecGetLocalSize(b, &n));
972: PetscCall(VecCopy(b, x));
973: PetscCall(VecGetArrayWrite(x, &array));
974: v.assign(n, 1, array, false);
975: PetscCall(VecRestoreArrayWrite(x, &array));
976: PetscCall(MatSolve_Private<trans>(A, v));
977: PetscFunctionReturn(PETSC_SUCCESS);
978: }
980: template <char trans, class Type, typename std::enable_if<std::is_same<Type, Mat>::value>::type * = nullptr>
981: static PetscErrorCode MatSolve_Htool(Mat A, Type B, Type X)
982: {
983: htool::Matrix<PetscScalar> v;
984: PetscScalar *array;
985: PetscInt m, N, lda;
987: PetscFunctionBegin;
988: PetscCall(MatGetLocalSize(B, &m, nullptr));
989: PetscCall(MatGetLocalSize(B, nullptr, &N));
990: PetscCall(MatDenseGetLDA(X, &lda));
991: PetscCheck(lda == X->rmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Unsupported leading dimension (%" PetscInt_FMT " != %" PetscInt_FMT ")", lda, X->rmap->n);
992: PetscCall(MatCopy(B, X, SAME_NONZERO_PATTERN));
993: PetscCall(MatDenseGetArrayWrite(X, &array));
994: v.assign(m, N, array, false);
995: PetscCall(MatDenseRestoreArrayWrite(X, &array));
996: PetscCall(MatSolve_Private<trans>(A, v));
997: PetscFunctionReturn(PETSC_SUCCESS);
998: }
1000: template <MatFactorType ftype>
1001: static PetscErrorCode MatFactorNumeric_Htool(Mat F, Mat A, const MatFactorInfo *)
1002: {
1003: Mat_Htool *a;
1004: PetscContainer container;
1005: MatFactorCtx *data;
1007: PetscFunctionBegin;
1008: PetscCall(MatShellGetContext(A, &a));
1009: PetscCall(PetscObjectQuery((PetscObject)F, "HMatrix", (PetscObject *)&container));
1010: PetscCheck(container, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Mat%sFactorSymbolic() must be called before Mat%sFactorNumeric()", ftype == MAT_FACTOR_LU ? "LU" : "Cholesky", ftype == MAT_FACTOR_LU ? "LU" : "Cholesky");
1011: PetscCall(PetscContainerGetPointer(container, &data));
1012: delete data->hmatrix;
1013: data->hmatrix = new htool::HMatrix<PetscScalar>(*a->local_hmatrix);
1014: PetscCall(MatShellGetScalingShifts(A, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, &data->scale, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
1015: if (ftype == MAT_FACTOR_LU) PetscCallExternalVoid("sequential_lu_factorization", htool::sequential_lu_factorization(*data->hmatrix));
1016: else PetscCallExternalVoid("sequential_cholesky_factorization", htool::sequential_cholesky_factorization('U', *data->hmatrix));
1017: PetscFunctionReturn(PETSC_SUCCESS);
1018: }
1020: template <MatFactorType ftype>
1021: PetscErrorCode MatFactorSymbolic_Htool(Mat F, Mat)
1022: {
1023: PetscContainer container;
1024: MatFactorCtx *data;
1026: PetscFunctionBegin;
1027: F->preallocated = PETSC_TRUE;
1028: F->assembled = PETSC_TRUE;
1029: F->ops->solve = MatSolve_Htool<'N', Vec>;
1030: F->ops->matsolve = MatSolve_Htool<'N', Mat>;
1031: if (!PetscDefined(USE_COMPLEX) || ftype == MAT_FACTOR_LU) {
1032: F->ops->solvetranspose = MatSolve_Htool<'T', Vec>;
1033: F->ops->matsolvetranspose = MatSolve_Htool<'T', Mat>;
1034: }
1035: F->ops->destroy = MatDestroy_Factor;
1036: if (ftype == MAT_FACTOR_LU) F->ops->lufactornumeric = MatFactorNumeric_Htool<MAT_FACTOR_LU>;
1037: else F->ops->choleskyfactornumeric = MatFactorNumeric_Htool<MAT_FACTOR_CHOLESKY>;
1038: PetscCall(PetscObjectQuery((PetscObject)F, "HMatrix", (PetscObject *)&container));
1039: if (!container) {
1040: PetscCall(PetscNew(&data));
1041: PetscCall(PetscObjectContainerCompose((PetscObject)F, "HMatrix", data, MatFactorCtxDestroy));
1042: }
1043: PetscFunctionReturn(PETSC_SUCCESS);
1044: }
1046: static PetscErrorCode MatLUFactorSymbolic_Htool(Mat F, Mat A, IS, IS, const MatFactorInfo *)
1047: {
1048: PetscFunctionBegin;
1049: PetscCall(MatFactorSymbolic_Htool<MAT_FACTOR_LU>(F, A));
1050: PetscFunctionReturn(PETSC_SUCCESS);
1051: }
1053: static PetscErrorCode MatCholeskyFactorSymbolic_Htool(Mat F, Mat A, IS, const MatFactorInfo *)
1054: {
1055: PetscFunctionBegin;
1056: PetscCall(MatFactorSymbolic_Htool<MAT_FACTOR_CHOLESKY>(F, A));
1057: PetscFunctionReturn(PETSC_SUCCESS);
1058: }
1060: static PetscErrorCode MatGetFactor_htool_htool(Mat A, MatFactorType ftype, Mat *F)
1061: {
1062: Mat B;
1063: Mat_Htool *a;
1064: PetscMPIInt size;
1066: PetscFunctionBegin;
1067: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
1068: PetscCall(MatShellGetContext(A, &a));
1069: PetscCheck(size == 1, PetscObjectComm((PetscObject)A), PETSC_ERR_WRONG_MPI_SIZE, "Unsupported parallel MatGetFactor()");
1070: PetscCheck(a->block_tree_consistency, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Cannot factor a MatHtool with inconsistent block tree");
1071: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1072: PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
1073: PetscCall(PetscStrallocpy(MATSOLVERHTOOL, &((PetscObject)B)->type_name));
1074: PetscCall(MatSetUp(B));
1076: B->ops->getinfo = MatGetInfo_External;
1077: B->factortype = ftype;
1078: B->trivialsymbolic = PETSC_TRUE;
1080: PetscCheck(ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_CHOLESKY, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_FACTOR_LU and MAT_FACTOR_CHOLESKY are supported");
1081: if (ftype == MAT_FACTOR_LU) B->ops->lufactorsymbolic = MatLUFactorSymbolic_Htool;
1082: else B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_Htool;
1084: PetscCall(PetscFree(B->solvertype));
1085: PetscCall(PetscStrallocpy(MATSOLVERHTOOL, &B->solvertype));
1087: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_Htool));
1088: *F = B;
1089: PetscFunctionReturn(PETSC_SUCCESS);
1090: }
1092: PETSC_INTERN PetscErrorCode MatSolverTypeRegister_Htool(void)
1093: {
1094: PetscFunctionBegin;
1095: PetscCall(MatSolverTypeRegister(MATSOLVERHTOOL, MATHTOOL, MAT_FACTOR_LU, MatGetFactor_htool_htool));
1096: PetscCall(MatSolverTypeRegister(MATSOLVERHTOOL, MATHTOOL, MAT_FACTOR_CHOLESKY, MatGetFactor_htool_htool));
1097: PetscFunctionReturn(PETSC_SUCCESS);
1098: }
1100: static PetscErrorCode MatHtoolCreateFromKernel_Htool(Mat A, PetscInt spacedim, const PetscReal coords_target[], const PetscReal coords_source[], MatHtoolKernelFn *kernel, void *kernelctx)
1101: {
1102: Mat_Htool *a;
1104: PetscFunctionBegin;
1105: PetscCall(MatShellGetContext(A, &a));
1106: a->dim = spacedim;
1107: a->kernel = kernel;
1108: a->kernelctx = kernelctx;
1109: PetscCall(PetscCalloc1(A->rmap->N * spacedim, &a->gcoords_target));
1110: PetscCall(PetscArraycpy(a->gcoords_target + A->rmap->rstart * spacedim, coords_target, A->rmap->n * spacedim));
1111: PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, a->gcoords_target, A->rmap->N * spacedim, MPIU_REAL, MPI_SUM, PetscObjectComm((PetscObject)A)));
1112: if (coords_target != coords_source) {
1113: PetscCall(PetscCalloc1(A->cmap->N * spacedim, &a->gcoords_source));
1114: PetscCall(PetscArraycpy(a->gcoords_source + A->cmap->rstart * spacedim, coords_source, A->cmap->n * spacedim));
1115: PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, a->gcoords_source, A->cmap->N * spacedim, MPIU_REAL, MPI_SUM, PetscObjectComm((PetscObject)A)));
1116: } else a->gcoords_source = a->gcoords_target;
1117: PetscFunctionReturn(PETSC_SUCCESS);
1118: }
1120: /*MC
1121: MATHTOOL = "htool" - A matrix type for hierarchical matrices using the Htool package.
1123: Use `./configure --download-htool` to install PETSc to use Htool.
1125: Options Database Key:
1126: . -mat_type htool - matrix type to `MATHTOOL`
1128: Level: beginner
1130: .seealso: [](ch_matrices), `Mat`, `MATH2OPUS`, `MATDENSE`, `MatCreateHtoolFromKernel()`, `MatHtoolSetKernel()`
1131: M*/
1132: PETSC_EXTERN PetscErrorCode MatCreate_Htool(Mat A)
1133: {
1134: Mat_Htool *a;
1136: PetscFunctionBegin;
1137: PetscCall(MatSetType(A, MATSHELL));
1138: PetscCall(PetscNew(&a));
1139: PetscCall(MatShellSetContext(A, a));
1140: PetscCall(MatShellSetOperation(A, MATOP_GET_DIAGONAL, (PetscErrorCodeFn *)MatGetDiagonal_Htool));
1141: PetscCall(MatShellSetOperation(A, MATOP_GET_DIAGONAL_BLOCK, (PetscErrorCodeFn *)MatGetDiagonalBlock_Htool));
1142: PetscCall(MatShellSetOperation(A, MATOP_MULT, (PetscErrorCodeFn *)MatMult_Htool));
1143: PetscCall(MatShellSetOperation(A, MATOP_MULT_TRANSPOSE, (PetscErrorCodeFn *)MatMultTranspose_Htool));
1144: if (!PetscDefined(USE_COMPLEX)) PetscCall(MatShellSetOperation(A, MATOP_MULT_HERMITIAN_TRANSPOSE, (PetscErrorCodeFn *)MatMultTranspose_Htool));
1145: A->ops->increaseoverlap = MatIncreaseOverlap_Htool;
1146: A->ops->createsubmatrices = MatCreateSubMatrices_Htool;
1147: PetscCall(MatShellSetOperation(A, MATOP_VIEW, (PetscErrorCodeFn *)MatView_Htool));
1148: PetscCall(MatShellSetOperation(A, MATOP_SET_FROM_OPTIONS, (PetscErrorCodeFn *)MatSetFromOptions_Htool));
1149: PetscCall(MatShellSetOperation(A, MATOP_GET_ROW, (PetscErrorCodeFn *)MatGetRow_Htool));
1150: PetscCall(MatShellSetOperation(A, MATOP_RESTORE_ROW, (PetscErrorCodeFn *)MatRestoreRow_Htool));
1151: PetscCall(MatShellSetOperation(A, MATOP_ASSEMBLY_END, (PetscErrorCodeFn *)MatAssemblyEnd_Htool));
1152: PetscCall(MatShellSetOperation(A, MATOP_TRANSPOSE, (PetscErrorCodeFn *)MatTranspose_Htool));
1153: PetscCall(MatShellSetOperation(A, MATOP_DESTROY, (PetscErrorCodeFn *)MatDestroy_Htool));
1154: a->dim = 0;
1155: a->gcoords_target = nullptr;
1156: a->gcoords_source = nullptr;
1157: a->max_cluster_leaf_size = 10;
1158: a->epsilon = PetscSqrtReal(PETSC_SMALL);
1159: a->eta = 10.0;
1160: a->depth[0] = 0;
1161: a->depth[1] = 0;
1162: a->block_tree_consistency = PETSC_TRUE;
1163: a->permutation = PETSC_TRUE;
1164: a->recompression = PETSC_FALSE;
1165: a->compressor = MAT_HTOOL_COMPRESSOR_SYMPARTIAL_ACA;
1166: a->distributed_operator = nullptr;
1167: a->block_diagonal_hmatrix = nullptr;
1168: a->local_hmatrix = nullptr;
1169: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_htool_seqdense_C", MatProductSetFromOptions_Htool));
1170: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_htool_mpidense_C", MatProductSetFromOptions_Htool));
1171: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_htool_seqdense_C", MatConvert_Htool_Dense));
1172: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_htool_mpidense_C", MatConvert_Htool_Dense));
1173: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolGetHierarchicalMat_C", MatHtoolGetHierarchicalMat_Htool));
1174: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolSetKernel_C", MatHtoolSetKernel_Htool));
1175: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolGetPermutationSource_C", MatHtoolGetPermutationSource_Htool));
1176: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolGetPermutationTarget_C", MatHtoolGetPermutationTarget_Htool));
1177: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolUsePermutation_C", MatHtoolUsePermutation_Htool));
1178: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolUseRecompression_C", MatHtoolUseRecompression_Htool));
1179: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolGetEpsilon_C", MatHtoolGetEpsilon_Htool));
1180: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolSetEpsilon_C", MatHtoolSetEpsilon_Htool));
1181: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolGetEta_C", MatHtoolGetEta_Htool));
1182: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolSetEta_C", MatHtoolSetEta_Htool));
1183: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolGetMaxClusterLeafSize_C", MatHtoolGetMaxClusterLeafSize_Htool));
1184: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolSetMaxClusterLeafSize_C", MatHtoolSetMaxClusterLeafSize_Htool));
1185: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolGetMinTargetDepth_C", MatHtoolGetMinTargetDepth_Htool));
1186: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolSetMinTargetDepth_C", MatHtoolSetMinTargetDepth_Htool));
1187: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolGetMinSourceDepth_C", MatHtoolGetMinSourceDepth_Htool));
1188: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolSetMinSourceDepth_C", MatHtoolSetMinSourceDepth_Htool));
1189: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolGetBlockTreeConsistency_C", MatHtoolGetBlockTreeConsistency_Htool));
1190: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolSetBlockTreeConsistency_C", MatHtoolSetBlockTreeConsistency_Htool));
1191: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolGetCompressorType_C", MatHtoolGetCompressorType_Htool));
1192: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolSetCompressorType_C", MatHtoolSetCompressorType_Htool));
1193: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolGetClusteringType_C", MatHtoolGetClusteringType_Htool));
1194: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolSetClusteringType_C", MatHtoolSetClusteringType_Htool));
1195: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolCreateFromKernel_C", MatHtoolCreateFromKernel_Htool));
1196: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatShellSetContext_C", MatShellSetContext_Immutable));
1197: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatShellSetContextDestroy_C", MatShellSetContextDestroy_Immutable));
1198: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatShellSetManageScalingShifts_C", MatShellSetManageScalingShifts_Immutable));
1199: PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATHTOOL));
1200: PetscFunctionReturn(PETSC_SUCCESS);
1201: }