Actual source code: ihtool.cxx

  1: #include <../src/mat/impls/htool/htool.hpp>
  2: #include <petscdraw.h>
  3: #include <set>

  5: const char *const MatHtoolCompressorTypes[] = {"sympartialACA", "fullACA", "SVD"};
  6: const char *const MatHtoolClusteringTypes[] = {"PCARegular", "PCAGeometric", "BoundingBox1Regular", "BoundingBox1Geometric"};
  7: const char       *HtoolCitations[2]         = {"@article{marchand2020two,\n"
  8:                                                "  Author = {Marchand, Pierre and Claeys, Xavier and Jolivet, Pierre and Nataf, Fr\\'ed\\'eric and Tournier, Pierre-Henri},\n"
  9:                                                "  Title = {Two-level preconditioning for $h$-version boundary element approximation of hypersingular operator with {GenEO}},\n"
 10:                                                "  Year = {2020},\n"
 11:                                                "  Publisher = {Elsevier},\n"
 12:                                                "  Journal = {Numerische Mathematik},\n"
 13:                                                "  Volume = {146},\n"
 14:                                                "  Pages = {597--628},\n"
 15:                                                "  Url = {https://github.com/htool-ddm/htool}\n"
 16:                                                "}\n",
 17:                                                "@article{Marchand2026,\n"
 18:                                                "  Author = {Marchand, Pierre and Tournier, Pierre-Henri and Jolivet, Pierre},\n"
 19:                                                "  Title = {{Htool-DDM}: A {C++} library for parallel solvers and compressed linear systems},\n"
 20:                                                "  Year = {2026},\n"
 21:                                                "  Publisher = {The Open Journal},\n"
 22:                                                "  Journal = {Journal of Open Source Software},\n"
 23:                                                "  Volume = {11},\n"
 24:                                                "  Number = {118},\n"
 25:                                                "  Pages = {9279},\n"
 26:                                                "  Url = {https://doi.org/10.21105/joss.09279}\n"
 27:                                                "}\n"};
 28: static PetscBool  HtoolCite[2]              = {PETSC_FALSE, PETSC_FALSE};

 30: static PetscErrorCode MatGetDiagonal_Htool(Mat A, Vec v)
 31: {
 32:   Mat_Htool   *a;
 33:   PetscScalar *x;
 34:   PetscBool    flg;

 36:   PetscFunctionBegin;
 37:   PetscCall(MatHasCongruentLayouts(A, &flg));
 38:   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only congruent layouts supported");
 39:   PetscCall(MatShellGetContext(A, &a));
 40:   PetscCall(VecGetArrayWrite(v, &x));
 41:   PetscCallExternalVoid("copy_diagonal_in_user_numbering", htool::copy_diagonal_in_user_numbering(*a->block_diagonal_hmatrix, x));
 42:   PetscCall(VecRestoreArrayWrite(v, &x));
 43:   PetscFunctionReturn(PETSC_SUCCESS);
 44: }

 46: static PetscErrorCode MatGetDiagonalBlock_Htool(Mat A, Mat *b)
 47: {
 48:   Mat_Htool  *a, *c;
 49:   Mat         B;
 50:   PetscScalar shift, scale;
 51:   PetscBool   flg;

 53:   PetscFunctionBegin;
 54:   PetscCall(MatHasCongruentLayouts(A, &flg));
 55:   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only congruent layouts supported");
 56:   PetscCall(MatShellGetContext(A, &a));
 57:   PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B)); /* same logic as in MatGetDiagonalBlock_MPIDense() */
 58:   if (!B) {
 59:     PetscCall(MatShellGetScalingShifts(A, &shift, &scale, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
 60:     PetscCheck(a->block_diagonal_hmatrix, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONGSTATE, "Block diagonal htool::HMatrix not found");
 61:     PetscCall(MatCreate(PETSC_COMM_SELF, &B));
 62:     PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->n, A->cmap->n));
 63:     PetscCall(MatSetType(B, MATHTOOL));
 64:     PetscCall(MatSetUp(B));
 65:     B->assembled    = PETSC_TRUE;
 66:     B->preallocated = PETSC_TRUE;
 67:     PetscCall(MatShellGetContext(B, &c));
 68:     c->dim                     = a->dim;
 69:     c->epsilon                 = a->epsilon;
 70:     c->eta                     = a->eta;
 71:     c->block_tree_consistency  = a->block_tree_consistency;
 72:     c->permutation             = a->permutation;
 73:     c->recompression           = a->recompression;
 74:     c->compressor              = a->compressor;
 75:     c->clustering              = a->clustering;
 76:     c->local_to_local_operator = std::make_unique<htool::LocalToLocalHMatrix<PetscScalar>>(*a->block_diagonal_hmatrix);
 77:     c->distributed_operator_holder_wo_assembly = std::make_unique<htool::CustomApproximationBuilder<PetscScalar>>(a->block_diagonal_hmatrix->get_target_cluster(), a->block_diagonal_hmatrix->get_source_cluster(), PetscObjectComm((PetscObject)A), *c->local_to_local_operator);
 78:     c->distributed_operator   = &c->distributed_operator_holder_wo_assembly->distributed_operator;
 79:     c->block_diagonal_hmatrix = a->block_diagonal_hmatrix;
 80:     c->local_hmatrix          = a->block_diagonal_hmatrix;
 81:     PetscCall(MatPropagateSymmetryOptions(A, B));
 82:     PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B));
 83:     *b = B;
 84:     PetscCall(MatDestroy(&B));
 85:     PetscCall(MatShift(*b, shift));
 86:     PetscCall(MatScale(*b, scale));
 87:   } else {
 88:     PetscCall(MatShellGetScalingShifts(A, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
 89:     *b = B;
 90:   }
 91:   PetscFunctionReturn(PETSC_SUCCESS);
 92: }

 94: static PetscErrorCode MatMult_Htool(Mat A, Vec x, Vec y)
 95: {
 96:   Mat_Htool         *a;
 97:   const PetscScalar *in;
 98:   PetscScalar       *out;

100:   PetscFunctionBegin;
101:   PetscCall(MatShellGetContext(A, &a));
102:   PetscCall(VecGetArrayRead(x, &in));
103:   PetscCall(VecGetArrayWrite(y, &out));
104:   if (a->permutation) PetscCallExternalVoid("add_distributed_operator_vector_product_local_to_local", htool::add_distributed_operator_vector_product_local_to_local<PetscScalar>('N', 1.0, *a->distributed_operator, in, 0.0, out, nullptr));
105:   else PetscCallExternalVoid("internal_add_distributed_operator_vector_product_local_to_local", htool::internal_add_distributed_operator_vector_product_local_to_local<PetscScalar>('N', 1.0, *a->distributed_operator, in, 0.0, out, nullptr));
106:   PetscCall(VecRestoreArrayRead(x, &in));
107:   PetscCall(VecRestoreArrayWrite(y, &out));
108:   PetscFunctionReturn(PETSC_SUCCESS);
109: }

111: static PetscErrorCode MatMultTranspose_Htool(Mat A, Vec x, Vec y)
112: {
113:   Mat_Htool         *a;
114:   const PetscScalar *in;
115:   PetscScalar       *out;

117:   PetscFunctionBegin;
118:   PetscCall(MatShellGetContext(A, &a));
119:   PetscCall(VecGetArrayRead(x, &in));
120:   PetscCall(VecGetArrayWrite(y, &out));
121:   if (a->permutation) PetscCallExternalVoid("add_distributed_operator_vector_product_local_to_local", htool::add_distributed_operator_vector_product_local_to_local<PetscScalar>('T', 1.0, *a->distributed_operator, in, 0.0, out, nullptr));
122:   else PetscCallExternalVoid("internal_add_distributed_operator_vector_product_local_to_local", htool::internal_add_distributed_operator_vector_product_local_to_local<PetscScalar>('T', 1.0, *a->distributed_operator, in, 0.0, out, nullptr));
123:   PetscCall(VecRestoreArrayRead(x, &in));
124:   PetscCall(VecRestoreArrayWrite(y, &out));
125:   PetscFunctionReturn(PETSC_SUCCESS);
126: }

128: static PetscErrorCode MatIncreaseOverlap_Htool(Mat A, PetscInt is_max, IS is[], PetscInt ov)
129: {
130:   std::set<PetscInt> set;
131:   const PetscInt    *idx;
132:   PetscInt          *oidx, size, bs[2];
133:   PetscMPIInt        csize;

135:   PetscFunctionBegin;
136:   PetscCall(MatGetBlockSizes(A, bs, bs + 1));
137:   if (bs[0] != bs[1]) bs[0] = 1;
138:   for (PetscInt i = 0; i < is_max; ++i) {
139:     /* basic implementation that adds indices by shifting an IS by -ov, -ov+1..., -1, 1..., ov-1, ov */
140:     /* needed to avoid subdomain matrices to replicate A since it is dense                           */
141:     PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)is[i]), &csize));
142:     PetscCheck(csize == 1, PETSC_COMM_SELF, PETSC_ERR_WRONG_MPI_SIZE, "Unsupported parallel IS");
143:     PetscCall(ISGetSize(is[i], &size));
144:     PetscCall(ISGetIndices(is[i], &idx));
145:     for (PetscInt j = 0; j < size; ++j) {
146:       set.insert(idx[j]);
147:       for (PetscInt k = 1; k <= ov; ++k) {                                              /* for each layer of overlap      */
148:         if (idx[j] - k >= 0) set.insert(idx[j] - k);                                    /* do not insert negative indices */
149:         if (idx[j] + k < A->rmap->N && idx[j] + k < A->cmap->N) set.insert(idx[j] + k); /* do not insert indices greater than the dimension of A */
150:       }
151:     }
152:     PetscCall(ISRestoreIndices(is[i], &idx));
153:     PetscCall(ISDestroy(is + i));
154:     if (bs[0] > 1) {
155:       for (std::set<PetscInt>::iterator it = set.cbegin(); it != set.cend(); it++) {
156:         std::vector<PetscInt> block(bs[0]);
157:         std::iota(block.begin(), block.end(), (*it / bs[0]) * bs[0]);
158:         set.insert(block.cbegin(), block.cend());
159:       }
160:     }
161:     size = set.size(); /* size with overlap */
162:     PetscCall(PetscMalloc1(size, &oidx));
163:     for (const PetscInt j : set) *oidx++ = j;
164:     oidx -= size;
165:     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, size, oidx, PETSC_OWN_POINTER, is + i));
166:   }
167:   PetscFunctionReturn(PETSC_SUCCESS);
168: }

170: static PetscErrorCode MatCreateSubMatrices_Htool(Mat A, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *submat[])
171: {
172:   Mat_Htool         *a;
173:   Mat                D, B, BT;
174:   const PetscScalar *copy;
175:   PetscScalar       *ptr, shift, scale;
176:   const PetscInt    *idxr, *idxc, *it;
177:   PetscInt           nrow, m, i;
178:   PetscBool          flg;

180:   PetscFunctionBegin;
181:   PetscCall(MatShellGetScalingShifts(A, &shift, &scale, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
182:   PetscCall(MatShellGetContext(A, &a));
183:   if (scall != MAT_REUSE_MATRIX) PetscCall(PetscCalloc1(n, submat));
184:   for (i = 0; i < n; ++i) {
185:     PetscCall(ISGetLocalSize(irow[i], &nrow));
186:     PetscCall(ISGetLocalSize(icol[i], &m));
187:     PetscCall(ISGetIndices(irow[i], &idxr));
188:     PetscCall(ISGetIndices(icol[i], &idxc));
189:     if (scall != MAT_REUSE_MATRIX) PetscCall(MatCreateDense(PETSC_COMM_SELF, nrow, m, nrow, m, nullptr, (*submat) + i));
190:     PetscCall(MatDenseGetArrayWrite((*submat)[i], &ptr));
191:     if (irow[i] == icol[i]) { /* same row and column IS? */
192:       PetscCall(MatHasCongruentLayouts(A, &flg));
193:       if (flg) {
194:         PetscCall(ISSorted(irow[i], &flg));
195:         if (flg) { /* sorted IS? */
196:           it = std::lower_bound(idxr, idxr + nrow, A->rmap->rstart);
197:           if (it != idxr + nrow && *it == A->rmap->rstart) {    /* rmap->rstart in IS? */
198:             if (std::distance(idxr, it) + A->rmap->n <= nrow) { /* long enough IS to store the local diagonal block? */
199:               for (PetscInt j = 0; j < A->rmap->n && flg; ++j)
200:                 if (PetscUnlikely(it[j] != A->rmap->rstart + j)) flg = PETSC_FALSE;
201:               if (flg) { /* complete local diagonal block in IS? */
202:                 Mat dense;

204:                 /* fast extraction when the local diagonal block is part of the submatrix, e.g., for PCASM or PCHPDDM
205:                  *      [   B   C   E   ]
206:                  *  A = [   B   D   E   ]
207:                  *      [   B   F   E   ]
208:                  */
209:                 m = std::distance(idxr, it); /* shift of the coefficient (0,0) of block D from above */
210:                 PetscCall(MatGetDiagonalBlock(A, &D));
211:                 PetscCall(MatConvert(D, MATDENSE, MAT_INITIAL_MATRIX, &dense));
212:                 PetscCall(MatDenseGetArrayRead(dense, &copy));
213:                 for (PetscInt k = 0; k < A->rmap->n; ++k) PetscCall(PetscArraycpy(ptr + (m + k) * nrow + m, copy + k * A->rmap->n, A->rmap->n)); /* block D from above */
214:                 PetscCall(MatDenseRestoreArrayRead(dense, &copy));
215:                 PetscCall(MatDestroy(&dense));
216:                 if (m) {
217:                   a->wrapper->copy_submatrix(nrow, m, idxr, idxc, ptr); /* vertical block B from above */
218:                   /* entry-wise assembly may be costly, so transpose already-computed entries when possible */
219:                   if (A->symmetric == PETSC_BOOL3_TRUE || A->hermitian == PETSC_BOOL3_TRUE) {
220:                     PetscCall(MatCreateDense(PETSC_COMM_SELF, A->rmap->n, m, A->rmap->n, m, ptr + m, &B));
221:                     PetscCall(MatDenseSetLDA(B, nrow));
222:                     PetscCall(MatCreateDense(PETSC_COMM_SELF, m, A->rmap->n, m, A->rmap->n, ptr + m * nrow, &BT));
223:                     PetscCall(MatDenseSetLDA(BT, nrow));
224:                     if (A->hermitian == PETSC_BOOL3_TRUE && PetscDefined(USE_COMPLEX)) {
225:                       PetscCall(MatHermitianTranspose(B, MAT_REUSE_MATRIX, &BT));
226:                     } else {
227:                       PetscCall(MatTransposeSetPrecursor(B, BT));
228:                       PetscCall(MatTranspose(B, MAT_REUSE_MATRIX, &BT));
229:                     }
230:                     PetscCall(MatDestroy(&B));
231:                     PetscCall(MatDestroy(&BT));
232:                   } else {
233:                     for (PetscInt k = 0; k < A->rmap->n; ++k) { /* block C from above */
234:                       a->wrapper->copy_submatrix(m, 1, idxr, idxc + m + k, ptr + (m + k) * nrow);
235:                     }
236:                   }
237:                 }
238:                 if (m + A->rmap->n != nrow) {
239:                   a->wrapper->copy_submatrix(nrow, std::distance(it + A->rmap->n, idxr + nrow), idxr, idxc + m + A->rmap->n, ptr + (m + A->rmap->n) * nrow); /* vertical block E from above */
240:                   /* entry-wise assembly may be costly, so transpose already-computed entries when possible */
241:                   if (A->symmetric == PETSC_BOOL3_TRUE || A->hermitian == PETSC_BOOL3_TRUE) {
242:                     PetscCall(MatCreateDense(PETSC_COMM_SELF, A->rmap->n, nrow - (m + A->rmap->n), A->rmap->n, nrow - (m + A->rmap->n), ptr + (m + A->rmap->n) * nrow + m, &B));
243:                     PetscCall(MatDenseSetLDA(B, nrow));
244:                     PetscCall(MatCreateDense(PETSC_COMM_SELF, nrow - (m + A->rmap->n), A->rmap->n, nrow - (m + A->rmap->n), A->rmap->n, ptr + m * nrow + m + A->rmap->n, &BT));
245:                     PetscCall(MatDenseSetLDA(BT, nrow));
246:                     if (A->hermitian == PETSC_BOOL3_TRUE && PetscDefined(USE_COMPLEX)) {
247:                       PetscCall(MatHermitianTranspose(B, MAT_REUSE_MATRIX, &BT));
248:                     } else {
249:                       PetscCall(MatTransposeSetPrecursor(B, BT));
250:                       PetscCall(MatTranspose(B, MAT_REUSE_MATRIX, &BT));
251:                     }
252:                     PetscCall(MatDestroy(&B));
253:                     PetscCall(MatDestroy(&BT));
254:                   } else {
255:                     for (PetscInt k = 0; k < A->rmap->n; ++k) { /* block F from above */
256:                       a->wrapper->copy_submatrix(std::distance(it + A->rmap->n, idxr + nrow), 1, it + A->rmap->n, idxc + m + k, ptr + (m + k) * nrow + m + A->rmap->n);
257:                     }
258:                   }
259:                 }
260:               } /* complete local diagonal block not in IS */
261:             } else flg = PETSC_FALSE; /* IS not long enough to store the local diagonal block */
262:           } else flg = PETSC_FALSE;   /* rmap->rstart not in IS */
263:         } /* unsorted IS */
264:       }
265:     } else flg = PETSC_FALSE;                                       /* different row and column IS */
266:     if (!flg) a->wrapper->copy_submatrix(nrow, m, idxr, idxc, ptr); /* reassemble everything */
267:     PetscCall(ISRestoreIndices(irow[i], &idxr));
268:     PetscCall(ISRestoreIndices(icol[i], &idxc));
269:     PetscCall(MatDenseRestoreArrayWrite((*submat)[i], &ptr));
270:     PetscCall(MatShift((*submat)[i], shift));
271:     PetscCall(MatScale((*submat)[i], scale));
272:   }
273:   PetscFunctionReturn(PETSC_SUCCESS);
274: }

276: static PetscErrorCode MatDestroy_Htool(Mat A)
277: {
278:   Mat_Htool               *a;
279:   PetscContainer           container;
280:   MatHtoolKernelTranspose *kernelt;

282:   PetscFunctionBegin;
283:   PetscCall(MatShellGetContext(A, &a));
284:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_htool_seqdense_C", nullptr));
285:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_htool_mpidense_C", nullptr));
286:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_htool_seqdense_C", nullptr));
287:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_htool_mpidense_C", nullptr));
288:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolGetHierarchicalMat_C", nullptr));
289:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolSetKernel_C", nullptr));
290:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolGetPermutationSource_C", nullptr));
291:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolGetPermutationTarget_C", nullptr));
292:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolUsePermutation_C", nullptr));
293:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolUseRecompression_C", nullptr));
294:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolGetEpsilon_C", nullptr));
295:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolSetEpsilon_C", nullptr));
296:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolGetEta_C", nullptr));
297:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolSetEta_C", nullptr));
298:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolGetMaxClusterLeafSize_C", nullptr));
299:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolSetMaxClusterLeafSize_C", nullptr));
300:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolGetMinTargetDepth_C", nullptr));
301:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolSetMinTargetDepth_C", nullptr));
302:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolGetMinSourceDepth_C", nullptr));
303:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolSetMinSourceDepth_C", nullptr));
304:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolGetBlockTreeConsistency_C", nullptr));
305:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolSetBlockTreeConsistency_C", nullptr));
306:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolGetCompressorType_C", nullptr));
307:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolSetCompressorType_C", nullptr));
308:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolGetClusteringType_C", nullptr));
309:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolSetClusteringType_C", nullptr));
310:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolCreateFromKernel_C", nullptr));
311:   PetscCall(PetscObjectQuery((PetscObject)A, "KernelTranspose", (PetscObject *)&container));
312:   if (container) { /* created in MatTranspose_Htool() */
313:     PetscCall(PetscContainerGetPointer(container, &kernelt));
314:     PetscCall(MatDestroy(&kernelt->A));
315:     PetscCall(PetscObjectCompose((PetscObject)A, "KernelTranspose", nullptr));
316:   }
317:   if (a->gcoords_source != a->gcoords_target) PetscCall(PetscFree(a->gcoords_source));
318:   PetscCall(PetscFree(a->gcoords_target));
319:   delete a->wrapper;
320:   a->target_cluster.reset();
321:   a->source_cluster.reset();
322:   a->distributed_operator_holder_w_assembly.reset();
323:   a->local_to_local_operator.reset();
324:   a->distributed_operator_holder_wo_assembly.reset();
325:   PetscCall(PetscFree(a));
326:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatShellSetContext_C", nullptr)); // needed to avoid a call to MatShellSetContext_Immutable()
327:   PetscFunctionReturn(PETSC_SUCCESS);
328: }

330: static PetscErrorCode MatView_Htool_Draw_Zoom(PetscDraw draw, void *ptr)
331: {
332:   Mat                                                         A = (Mat)ptr;
333:   Mat_Htool                                                  *a;
334:   PetscReal                                                   x_r, y_r, x_l, y_l, w, h;
335:   PetscInt                                                    min_max[2] = {PETSC_INT_MAX, 0};
336:   const int                                                   greens[]   = {PETSC_DRAW_LIMEGREEN, PETSC_DRAW_FORESTGREEN, PETSC_DRAW_DARKGREEN};
337:   int                                                         color;
338:   char                                                        str[16];
339:   std::vector<const htool::HMatrix<PetscScalar, PetscReal> *> dense_blocks, low_rank_blocks;

341:   PetscFunctionBegin;
342:   PetscCall(MatShellGetContext(A, &a));
343:   PetscCallExternalVoid("get_leaves", htool::get_leaves(*a->local_hmatrix, dense_blocks, low_rank_blocks));
344:   for (const htool::HMatrix<PetscScalar, PetscReal> *block : low_rank_blocks) {
345:     const PetscInt rank = block->get_rank();

347:     if (rank < min_max[0]) min_max[0] = rank;
348:     if (rank > min_max[1]) min_max[1] = rank;
349:   }
350:   PetscCall(PetscGlobalMinMaxInt(PetscObjectComm((PetscObject)A), min_max, min_max));
351:   if (min_max[0] == PETSC_INT_MAX) min_max[0] = min_max[1];
352:   PetscCall(PetscDrawStringGetSize(draw, &w, &h));
353:   PetscDrawCollectiveBegin(draw);
354:   for (const htool::HMatrix<PetscScalar, PetscReal> *block : dense_blocks) {
355:     x_l = x_r = (PetscReal)block->get_source_cluster().get_offset();
356:     x_r += (PetscReal)block->get_source_cluster().get_size();
357:     y_l = y_r = (PetscReal)(A->rmap->N - block->get_target_cluster().get_offset());
358:     y_l -= (PetscReal)block->get_target_cluster().get_size();
359:     PetscCall(PetscDrawRectangle(draw, x_l, y_l, x_r, y_r, PETSC_DRAW_RED, PETSC_DRAW_RED, PETSC_DRAW_RED, PETSC_DRAW_RED));
360:     PetscCall(PetscDrawLine(draw, x_l, y_l, x_r, y_l, PETSC_DRAW_BLACK));
361:     PetscCall(PetscDrawLine(draw, x_r, y_l, x_r, y_r, PETSC_DRAW_BLACK));
362:     PetscCall(PetscDrawLine(draw, x_r, y_r, x_l, y_r, PETSC_DRAW_BLACK));
363:     PetscCall(PetscDrawLine(draw, x_l, y_r, x_l, y_l, PETSC_DRAW_BLACK));
364:   }
365:   for (const htool::HMatrix<PetscScalar, PetscReal> *block : low_rank_blocks) {
366:     PetscReal      th;
367:     const PetscInt rank = block->get_rank();

369:     x_l = x_r = (PetscReal)block->get_source_cluster().get_offset();
370:     x_r += (PetscReal)block->get_source_cluster().get_size();
371:     y_l = y_r = (PetscReal)(A->rmap->N - block->get_target_cluster().get_offset());
372:     y_l -= (PetscReal)block->get_target_cluster().get_size();
373:     if (min_max[1] > min_max[0]) color = greens[(int)((PetscReal)(rank - min_max[0]) / (PetscReal)(min_max[1] - min_max[0]) * (PETSC_STATIC_ARRAY_LENGTH(greens) - 1) + 0.5)];
374:     else color = greens[PETSC_STATIC_ARRAY_LENGTH(greens) - 1];
375:     PetscCall(PetscDrawRectangle(draw, x_l, y_l, x_r, y_r, color, color, color, color));
376:     PetscCall(PetscDrawLine(draw, x_l, y_l, x_r, y_l, PETSC_DRAW_BLACK));
377:     PetscCall(PetscDrawLine(draw, x_r, y_l, x_r, y_r, PETSC_DRAW_BLACK));
378:     PetscCall(PetscDrawLine(draw, x_r, y_r, x_l, y_r, PETSC_DRAW_BLACK));
379:     PetscCall(PetscDrawLine(draw, x_l, y_r, x_l, y_l, PETSC_DRAW_BLACK));
380:     PetscCall(PetscSNPrintf(str, sizeof(str), "%d", rank));
381:     PetscCall(PetscDrawStringGetSize(draw, nullptr, &th));
382:     if (x_r - x_l > 4 * w && y_r - y_l > 4 * h) PetscCall(PetscDrawStringCentered(draw, 0.5 * (x_l + x_r), 0.5 * (y_l + y_r) - th / 2, PETSC_DRAW_BLACK, str));
383:   }
384:   PetscDrawCollectiveEnd(draw);
385:   PetscFunctionReturn(PETSC_SUCCESS);
386: }

388: static PetscErrorCode MatView_Htool_Draw(Mat A, PetscViewer viewer)
389: {
390:   PetscDraw draw;
391:   PetscReal x_r = (PetscReal)A->cmap->N, y_r = (PetscReal)A->rmap->N, w, h;
392:   PetscBool flg;

394:   PetscFunctionBegin;
395:   PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
396:   PetscCall(PetscDrawIsNull(draw, &flg));
397:   if (flg) PetscFunctionReturn(PETSC_SUCCESS);
398:   w = x_r / 10.0;
399:   h = y_r / 10.0;
400:   PetscCall(PetscDrawSetCoordinates(draw, -w, -h, x_r + w, y_r + h));
401:   PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", (PetscObject)viewer));
402:   PetscCall(PetscDrawZoom(draw, MatView_Htool_Draw_Zoom, A));
403:   PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", nullptr));
404:   PetscCall(PetscDrawSave(draw));
405:   PetscFunctionReturn(PETSC_SUCCESS);
406: }

408: static PetscErrorCode MatView_Htool(Mat A, PetscViewer pv)
409: {
410:   Mat_Htool                         *a;
411:   PetscScalar                        shift, scale;
412:   PetscBool                          flg;
413:   std::map<std::string, std::string> hmatrix_information;

415:   PetscFunctionBegin;
416:   PetscCall(PetscObjectTypeCompare((PetscObject)pv, PETSCVIEWERDRAW, &flg));
417:   if (flg) PetscCall(MatView_Htool_Draw(A, pv));
418:   else {
419:     PetscCall(MatShellGetContext(A, &a));
420:     hmatrix_information = htool::get_distributed_hmatrix_information(*a->local_hmatrix, PetscObjectComm((PetscObject)A));
421:     PetscCall(PetscObjectTypeCompare((PetscObject)pv, PETSCVIEWERASCII, &flg));
422:     if (flg) {
423:       PetscCall(MatShellGetScalingShifts(A, &shift, &scale, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
424:       PetscCall(PetscViewerASCIIPrintf(pv, "symmetry: %c\n", a->block_diagonal_hmatrix->get_symmetry()));
425:       if (PetscAbsScalar(scale - 1.0) > PETSC_MACHINE_EPSILON) {
426: #if defined(PETSC_USE_COMPLEX)
427:         PetscCall(PetscViewerASCIIPrintf(pv, "scaling: %g+%gi\n", (double)PetscRealPart(scale), (double)PetscImaginaryPart(scale)));
428: #else
429:         PetscCall(PetscViewerASCIIPrintf(pv, "scaling: %g\n", (double)scale));
430: #endif
431:       }
432:       if (PetscAbsScalar(shift) > PETSC_MACHINE_EPSILON) {
433: #if defined(PETSC_USE_COMPLEX)
434:         PetscCall(PetscViewerASCIIPrintf(pv, "shift: %g+%gi\n", (double)PetscRealPart(shift), (double)PetscImaginaryPart(shift)));
435: #else
436:         PetscCall(PetscViewerASCIIPrintf(pv, "shift: %g\n", (double)shift));
437: #endif
438:       }
439:       PetscCall(PetscViewerASCIIPrintf(pv, "maximal cluster leaf size: %" PetscInt_FMT "\n", a->max_cluster_leaf_size));
440:       PetscCall(PetscViewerASCIIPrintf(pv, "epsilon: %g\n", (double)a->epsilon));
441:       PetscCall(PetscViewerASCIIPrintf(pv, "eta: %g\n", (double)a->eta));
442:       PetscCall(PetscViewerASCIIPrintf(pv, "minimum target depth: %" PetscInt_FMT "\n", a->depth[0]));
443:       PetscCall(PetscViewerASCIIPrintf(pv, "minimum source depth: %" PetscInt_FMT "\n", a->depth[1]));
444:       PetscCall(PetscViewerASCIIPrintf(pv, "compressor: %s\n", MatHtoolCompressorTypes[a->compressor]));
445:       PetscCall(PetscViewerASCIIPrintf(pv, "clustering: %s\n", MatHtoolClusteringTypes[a->clustering]));
446:       PetscCall(PetscViewerASCIIPrintf(pv, "compression ratio: %s\n", hmatrix_information["Compression_ratio"].c_str()));
447:       PetscCall(PetscViewerASCIIPrintf(pv, "space saving: %s\n", hmatrix_information["Space_saving"].c_str()));
448:       PetscCall(PetscViewerASCIIPrintf(pv, "block tree consistency: %s\n", PetscBools[a->local_hmatrix->is_block_tree_consistent()]));
449:       PetscCall(PetscViewerASCIIPrintf(pv, "recompression: %s\n", PetscBools[a->recompression]));
450:       PetscCall(PetscViewerASCIIPrintf(pv, "number of dense (resp. low rank) matrices: %s (resp. %s)\n", hmatrix_information["Number_of_dense_blocks"].c_str(), hmatrix_information["Number_of_low_rank_blocks"].c_str()));
451:       PetscCall(
452:         PetscViewerASCIIPrintf(pv, "(minimum, mean, maximum) dense block sizes: (%s, %s, %s)\n", hmatrix_information["Dense_block_size_min"].c_str(), hmatrix_information["Dense_block_size_mean"].c_str(), hmatrix_information["Dense_block_size_max"].c_str()));
453:       PetscCall(PetscViewerASCIIPrintf(pv, "(minimum, mean, maximum) low rank block sizes: (%s, %s, %s)\n", hmatrix_information["Low_rank_block_size_min"].c_str(), hmatrix_information["Low_rank_block_size_mean"].c_str(),
454:                                        hmatrix_information["Low_rank_block_size_max"].c_str()));
455:       PetscCall(PetscViewerASCIIPrintf(pv, "(minimum, mean, maximum) ranks: (%s, %s, %s)\n", hmatrix_information["Rank_min"].c_str(), hmatrix_information["Rank_mean"].c_str(), hmatrix_information["Rank_max"].c_str()));
456:     }
457:   }
458:   PetscFunctionReturn(PETSC_SUCCESS);
459: }

461: /* naive implementation of MatGetRow() needed for MatConvert_Nest_AIJ() */
462: static PetscErrorCode MatGetRow_Htool(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
463: {
464:   Mat_Htool   *a;
465:   PetscScalar  shift, scale;
466:   PetscInt    *idxc;
467:   PetscBLASInt one = 1, bn;

469:   PetscFunctionBegin;
470:   PetscCall(MatShellGetScalingShifts(A, &shift, &scale, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
471:   PetscCall(MatShellGetContext(A, &a));
472:   if (nz) *nz = A->cmap->N;
473:   if (idx || v) { /* even if !idx, need to set idxc for htool::copy_submatrix() */
474:     PetscCall(PetscMalloc1(A->cmap->N, &idxc));
475:     for (PetscInt i = 0; i < A->cmap->N; ++i) idxc[i] = i;
476:   }
477:   if (idx) *idx = idxc;
478:   if (v) {
479:     PetscCall(PetscMalloc1(A->cmap->N, v));
480:     if (a->wrapper) a->wrapper->copy_submatrix(1, A->cmap->N, &row, idxc, *v);
481:     else reinterpret_cast<htool::VirtualGenerator<PetscScalar> *>(a->kernelctx)->copy_submatrix(1, A->cmap->N, &row, idxc, *v);
482:     PetscCall(PetscBLASIntCast(A->cmap->N, &bn));
483:     PetscCallCXX(htool::Blas<PetscScalar>::scal(&bn, &scale, *v, &one));
484:     if (row < A->cmap->N) (*v)[row] += shift;
485:   }
486:   if (!idx) PetscCall(PetscFree(idxc));
487:   PetscFunctionReturn(PETSC_SUCCESS);
488: }

490: static PetscErrorCode MatRestoreRow_Htool(Mat, PetscInt, PetscInt *, PetscInt **idx, PetscScalar **v)
491: {
492:   PetscFunctionBegin;
493:   if (idx) PetscCall(PetscFree(*idx));
494:   if (v) PetscCall(PetscFree(*v));
495:   PetscFunctionReturn(PETSC_SUCCESS);
496: }

498: static PetscErrorCode MatSetFromOptions_Htool(Mat A, PetscOptionItems PetscOptionsObject)
499: {
500:   Mat_Htool *a;
501:   PetscInt   n;
502:   PetscBool  flg;

504:   PetscFunctionBegin;
505:   PetscCall(MatShellGetContext(A, &a));
506:   PetscOptionsHeadBegin(PetscOptionsObject, "Htool options");
507:   PetscCall(PetscOptionsBoundedInt("-mat_htool_max_cluster_leaf_size", "Maximal leaf size in cluster tree", nullptr, a->max_cluster_leaf_size, &a->max_cluster_leaf_size, nullptr, 0));
508:   PetscCall(PetscOptionsBoundedReal("-mat_htool_epsilon", "Relative error in Frobenius norm when approximating a block", nullptr, a->epsilon, &a->epsilon, nullptr, 0.0));
509:   PetscCall(PetscOptionsReal("-mat_htool_eta", "Admissibility condition tolerance", nullptr, a->eta, &a->eta, nullptr));
510:   PetscCall(PetscOptionsBoundedInt("-mat_htool_min_target_depth", "Minimal cluster tree depth associated with the rows", nullptr, a->depth[0], a->depth, nullptr, 0));
511:   PetscCall(PetscOptionsBoundedInt("-mat_htool_min_source_depth", "Minimal cluster tree depth associated with the columns", nullptr, a->depth[1], a->depth + 1, nullptr, 0));
512:   PetscCall(PetscOptionsBool("-mat_htool_block_tree_consistency", "Block tree consistency", nullptr, a->block_tree_consistency, &a->block_tree_consistency, nullptr));
513:   PetscCall(PetscOptionsBool("-mat_htool_recompression", "Use recompression", nullptr, a->recompression, &a->recompression, nullptr));

515:   n = 0;
516:   PetscCall(PetscOptionsEList("-mat_htool_compressor", "Type of compression", "MatHtoolCompressorType", MatHtoolCompressorTypes, PETSC_STATIC_ARRAY_LENGTH(MatHtoolCompressorTypes), MatHtoolCompressorTypes[MAT_HTOOL_COMPRESSOR_SYMPARTIAL_ACA], &n, &flg));
517:   if (flg) a->compressor = MatHtoolCompressorType(n);
518:   n = 0;
519:   PetscCall(PetscOptionsEList("-mat_htool_clustering", "Type of clustering", "MatHtoolClusteringType", MatHtoolClusteringTypes, PETSC_STATIC_ARRAY_LENGTH(MatHtoolClusteringTypes), MatHtoolClusteringTypes[MAT_HTOOL_CLUSTERING_PCA_REGULAR], &n, &flg));
520:   if (flg) a->clustering = MatHtoolClusteringType(n);
521:   PetscOptionsHeadEnd();
522:   PetscFunctionReturn(PETSC_SUCCESS);
523: }

525: static PetscErrorCode MatAssemblyEnd_Htool(Mat A, MatAssemblyType)
526: {
527:   Mat_Htool                                                           *a;
528:   const PetscInt                                                      *ranges;
529:   PetscInt                                                            *offset;
530:   PetscMPIInt                                                          size, rank;
531:   char                                                                 S = PetscDefined(USE_COMPLEX) && A->hermitian == PETSC_BOOL3_TRUE ? 'H' : (A->symmetric == PETSC_BOOL3_TRUE ? 'S' : 'N'), uplo = S == 'N' ? 'N' : 'U';
532:   htool::VirtualGenerator<PetscScalar>                                *generator = nullptr;
533:   htool::ClusterTreeBuilder<PetscReal>                                 recursive_build_strategy;
534:   htool::Cluster<PetscReal>                                           *source_cluster;
535:   std::shared_ptr<htool::VirtualInternalLowRankGenerator<PetscScalar>> compressor;

537:   PetscFunctionBegin;
538:   for (size_t i = 0; i < PETSC_STATIC_ARRAY_LENGTH(HtoolCite); ++i) PetscCall(PetscCitationsRegister(HtoolCitations[i], HtoolCite + i));
539:   PetscCall(MatShellGetContext(A, &a));
540:   if (a->distributed_operator_holder_wo_assembly) PetscFunctionReturn(PETSC_SUCCESS);
541:   delete a->wrapper;
542:   a->target_cluster.reset();
543:   a->source_cluster.reset();
544:   a->distributed_operator_holder_w_assembly.reset();
545:   a->local_to_local_operator.reset();
546:   a->distributed_operator_holder_wo_assembly.reset();
547:   // clustering
548:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
549:   PetscCall(PetscMalloc1(2 * size, &offset));
550:   PetscCall(MatGetOwnershipRanges(A, &ranges));
551:   for (PetscInt i = 0; i < size; ++i) {
552:     offset[2 * i]     = ranges[i];
553:     offset[2 * i + 1] = ranges[i + 1] - ranges[i];
554:   }
555:   switch (a->clustering) {
556:   case MAT_HTOOL_CLUSTERING_PCA_GEOMETRIC:
557:     recursive_build_strategy.set_partitioning_strategy(std::make_shared<htool::Partitioning<PetscReal, htool::ComputeLargestExtent<PetscReal>, htool::GeometricSplitting<PetscReal>>>());
558:     break;
559:   case MAT_HTOOL_CLUSTERING_BOUNDING_BOX_1_GEOMETRIC:
560:     recursive_build_strategy.set_partitioning_strategy(std::make_shared<htool::Partitioning<PetscReal, htool::ComputeBoundingBox<PetscReal>, htool::GeometricSplitting<PetscReal>>>());
561:     break;
562:   case MAT_HTOOL_CLUSTERING_BOUNDING_BOX_1_REGULAR:
563:     recursive_build_strategy.set_partitioning_strategy(std::make_shared<htool::Partitioning<PetscReal, htool::ComputeBoundingBox<PetscReal>, htool::RegularSplitting<PetscReal>>>());
564:     break;
565:   default:
566:     recursive_build_strategy.set_partitioning_strategy(std::make_shared<htool::Partitioning<PetscReal, htool::ComputeLargestExtent<PetscReal>, htool::RegularSplitting<PetscReal>>>());
567:   }
568:   recursive_build_strategy.set_maximal_leaf_size(a->max_cluster_leaf_size);
569:   a->target_cluster = std::make_unique<htool::Cluster<PetscReal>>(recursive_build_strategy.create_cluster_tree_from_local_partition(A->rmap->N, a->dim, a->gcoords_target, 2, size, offset));
570:   if (a->gcoords_target != a->gcoords_source) {
571:     PetscCall(MatGetOwnershipRangesColumn(A, &ranges));
572:     for (PetscInt i = 0; i < size; ++i) {
573:       offset[2 * i]     = ranges[i];
574:       offset[2 * i + 1] = ranges[i + 1] - ranges[i];
575:     }
576:     switch (a->clustering) {
577:     case MAT_HTOOL_CLUSTERING_PCA_GEOMETRIC:
578:       recursive_build_strategy.set_partitioning_strategy(std::make_shared<htool::Partitioning<PetscReal, htool::ComputeLargestExtent<PetscReal>, htool::GeometricSplitting<PetscReal>>>());
579:       break;
580:     case MAT_HTOOL_CLUSTERING_BOUNDING_BOX_1_GEOMETRIC:
581:       recursive_build_strategy.set_partitioning_strategy(std::make_shared<htool::Partitioning<PetscReal, htool::ComputeBoundingBox<PetscReal>, htool::GeometricSplitting<PetscReal>>>());
582:       break;
583:     case MAT_HTOOL_CLUSTERING_BOUNDING_BOX_1_REGULAR:
584:       recursive_build_strategy.set_partitioning_strategy(std::make_shared<htool::Partitioning<PetscReal, htool::ComputeBoundingBox<PetscReal>, htool::RegularSplitting<PetscReal>>>());
585:       break;
586:     default:
587:       recursive_build_strategy.set_partitioning_strategy(std::make_shared<htool::Partitioning<PetscReal, htool::ComputeLargestExtent<PetscReal>, htool::RegularSplitting<PetscReal>>>());
588:     }
589:     recursive_build_strategy.set_maximal_leaf_size(a->max_cluster_leaf_size);
590:     a->source_cluster = std::make_unique<htool::Cluster<PetscReal>>(recursive_build_strategy.create_cluster_tree_from_local_partition(A->cmap->N, a->dim, a->gcoords_source, 2, size, offset));
591:     S = uplo       = 'N';
592:     source_cluster = a->source_cluster.get();
593:   } else source_cluster = a->target_cluster.get();
594:   PetscCall(PetscFree(offset));
595:   // generator
596:   if (a->kernel) a->wrapper = new WrapperHtool(a->dim, a->kernel, a->kernelctx);
597:   else {
598:     a->wrapper = nullptr;
599:     generator  = reinterpret_cast<htool::VirtualGenerator<PetscScalar> *>(a->kernelctx);
600:   }
601:   // compressor
602:   switch (a->compressor) {
603:   case MAT_HTOOL_COMPRESSOR_FULL_ACA:
604:     compressor = std::make_shared<htool::fullACA<PetscScalar>>(a->wrapper ? *a->wrapper : *generator, a->target_cluster->get_permutation().data(), source_cluster->get_permutation().data());
605:     break;
606:   case MAT_HTOOL_COMPRESSOR_SVD:
607:     compressor = std::make_shared<htool::SVD<PetscScalar>>(a->wrapper ? *a->wrapper : *generator, a->target_cluster->get_permutation().data(), source_cluster->get_permutation().data());
608:     break;
609:   default:
610:     compressor = std::make_shared<htool::sympartialACA<PetscScalar>>(a->wrapper ? *a->wrapper : *generator, a->target_cluster->get_permutation().data(), source_cluster->get_permutation().data());
611:   }
612:   // local hierarchical matrix
613:   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
614:   auto hmatrix_builder = htool::HMatrixTreeBuilder<PetscScalar>(a->epsilon, a->eta, S, uplo);
615:   if (a->recompression) {
616:     std::shared_ptr<htool::VirtualInternalLowRankGenerator<PetscScalar>> RecompressedLowRankGenerator = std::make_shared<htool::RecompressedLowRankGenerator<PetscScalar>>(*compressor, std::function<void(htool::LowRankMatrix<PetscScalar> &)>(htool::SVD_recompression<PetscScalar>));
617:     hmatrix_builder.set_low_rank_generator(RecompressedLowRankGenerator);
618:   } else {
619:     hmatrix_builder.set_low_rank_generator(compressor);
620:   }
621:   hmatrix_builder.set_minimal_target_depth(a->depth[0]);
622:   hmatrix_builder.set_minimal_source_depth(a->depth[1]);
623:   PetscCheck(a->block_tree_consistency || (!a->block_tree_consistency && !(A->symmetric == PETSC_BOOL3_TRUE || A->hermitian == PETSC_BOOL3_TRUE)), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Cannot have a MatHtool with inconsistent block tree which is either symmetric or Hermitian");
624:   hmatrix_builder.set_block_tree_consistency(a->block_tree_consistency);
625:   a->distributed_operator_holder_w_assembly = std::make_unique<htool::DefaultApproximationBuilder<PetscScalar>>(a->wrapper ? *a->wrapper : *generator, *a->target_cluster, *source_cluster, hmatrix_builder, PetscObjectComm((PetscObject)A));
626:   a->distributed_operator                   = &a->distributed_operator_holder_w_assembly->distributed_operator;
627:   a->block_diagonal_hmatrix                 = a->distributed_operator_holder_w_assembly->block_diagonal_hmatrix;
628:   a->local_hmatrix                          = &a->distributed_operator_holder_w_assembly->hmatrix;
629:   PetscFunctionReturn(PETSC_SUCCESS);
630: }

632: static PetscErrorCode MatProductNumeric_Htool(Mat C)
633: {
634:   Mat_Product       *product = C->product;
635:   Mat_Htool         *a;
636:   const PetscScalar *in;
637:   PetscScalar       *out;
638:   PetscInt           N, lda;

640:   PetscFunctionBegin;
641:   MatCheckProduct(C, 1);
642:   PetscCall(MatGetSize(C, nullptr, &N));
643:   PetscCall(MatDenseGetLDA(C, &lda));
644:   PetscCheck(lda == C->rmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Unsupported leading dimension (%" PetscInt_FMT " != %" PetscInt_FMT ")", lda, C->rmap->n);
645:   PetscCall(MatDenseGetArrayRead(product->B, &in));
646:   PetscCall(MatDenseGetArrayWrite(C, &out));
647:   PetscCall(MatShellGetContext(product->A, &a));
648:   switch (product->type) {
649:   case MATPRODUCT_AB:
650:     if (a->permutation) PetscCallExternalVoid("add_distributed_operator_matrix_product_local_to_local", htool::add_distributed_operator_matrix_product_local_to_local<PetscScalar>('N', 1.0, *a->distributed_operator, in, 0.0, out, N, nullptr));
651:     else PetscCallExternalVoid("internal_add_distributed_operator_matrix_product_local_to_local", htool::internal_add_distributed_operator_matrix_product_local_to_local<PetscScalar>('N', 1.0, *a->distributed_operator, in, 0.0, out, N, nullptr));
652:     break;
653:   case MATPRODUCT_AtB:
654:     if (a->permutation) PetscCallExternalVoid("add_distributed_operator_matrix_product_local_to_local", htool::add_distributed_operator_matrix_product_local_to_local<PetscScalar>('T', 1.0, *a->distributed_operator, in, 0.0, out, N, nullptr));
655:     else PetscCallExternalVoid("internal_add_distributed_operator_matrix_product_local_to_local", htool::internal_add_distributed_operator_matrix_product_local_to_local<PetscScalar>('T', 1.0, *a->distributed_operator, in, 0.0, out, N, nullptr));
656:     break;
657:   default:
658:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatProductType %s is not supported", MatProductTypes[product->type]);
659:   }
660:   PetscCall(MatDenseRestoreArrayWrite(C, &out));
661:   PetscCall(MatDenseRestoreArrayRead(product->B, &in));
662:   PetscFunctionReturn(PETSC_SUCCESS);
663: }

665: static PetscErrorCode MatProductSymbolic_Htool(Mat C)
666: {
667:   Mat_Product *product = C->product;
668:   Mat          A, B;
669:   PetscBool    flg;

671:   PetscFunctionBegin;
672:   MatCheckProduct(C, 1);
673:   A = product->A;
674:   B = product->B;
675:   PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &flg, MATSEQDENSE, MATMPIDENSE, ""));
676:   PetscCheck(flg && (product->type == MATPRODUCT_AB || product->type == MATPRODUCT_AtB), PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "ProductType %s not supported for %s", MatProductTypes[product->type], ((PetscObject)product->B)->type_name);
677:   if (C->rmap->n == PETSC_DECIDE || C->cmap->n == PETSC_DECIDE || C->rmap->N == PETSC_DECIDE || C->cmap->N == PETSC_DECIDE) {
678:     if (product->type == MATPRODUCT_AB) PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N));
679:     else PetscCall(MatSetSizes(C, A->cmap->n, B->cmap->n, A->cmap->N, B->cmap->N));
680:   }
681:   PetscCall(MatSetType(C, MATDENSE));
682:   PetscCall(MatSetUp(C));
683:   PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
684:   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
685:   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
686:   C->ops->productsymbolic = nullptr;
687:   C->ops->productnumeric  = MatProductNumeric_Htool;
688:   PetscFunctionReturn(PETSC_SUCCESS);
689: }

691: static PetscErrorCode MatProductSetFromOptions_Htool(Mat C)
692: {
693:   PetscFunctionBegin;
694:   MatCheckProduct(C, 1);
695:   if (C->product->type == MATPRODUCT_AB || C->product->type == MATPRODUCT_AtB) C->ops->productsymbolic = MatProductSymbolic_Htool;
696:   PetscFunctionReturn(PETSC_SUCCESS);
697: }

699: static PetscErrorCode MatHtoolGetHierarchicalMat_Htool(Mat A, void *distributed_operator)
700: {
701:   Mat_Htool *a;

703:   PetscFunctionBegin;
704:   PetscCall(MatShellGetContext(A, &a));
705:   *(const void **)distributed_operator = static_cast<const void *>(a->distributed_operator);
706:   PetscFunctionReturn(PETSC_SUCCESS);
707: }

709: static PetscErrorCode MatHtoolSetKernel_Htool(Mat A, MatHtoolKernelFn *kernel, void *kernelctx)
710: {
711:   Mat_Htool *a;

713:   PetscFunctionBegin;
714:   PetscCall(MatShellGetContext(A, &a));
715:   a->kernel    = kernel;
716:   a->kernelctx = kernelctx;
717:   delete a->wrapper;
718:   if (a->kernel) a->wrapper = new WrapperHtool(a->dim, a->kernel, a->kernelctx);
719:   PetscFunctionReturn(PETSC_SUCCESS);
720: }

722: static PetscErrorCode MatHtoolGetPermutationSource_Htool(Mat A, IS *is)
723: {
724:   Mat_Htool                       *a;
725:   PetscMPIInt                      rank;
726:   const std::vector<PetscInt>     *source;
727:   const htool::Cluster<PetscReal> *local_source_cluster;

729:   PetscFunctionBegin;
730:   PetscCall(MatShellGetContext(A, &a));
731:   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
732:   local_source_cluster = a->source_cluster ? &a->source_cluster->get_cluster_on_partition(rank) : &a->target_cluster->get_cluster_on_partition(rank);
733:   source               = &local_source_cluster->get_permutation();
734:   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), local_source_cluster->get_size(), source->data() + local_source_cluster->get_offset(), PETSC_COPY_VALUES, is));
735:   PetscCall(ISSetPermutation(*is));
736:   PetscFunctionReturn(PETSC_SUCCESS);
737: }

739: static PetscErrorCode MatHtoolGetPermutationTarget_Htool(Mat A, IS *is)
740: {
741:   Mat_Htool                   *a;
742:   const std::vector<PetscInt> *target;
743:   PetscMPIInt                  rank;

745:   PetscFunctionBegin;
746:   PetscCall(MatShellGetContext(A, &a));
747:   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
748:   target = &a->target_cluster->get_permutation();
749:   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), a->target_cluster->get_cluster_on_partition(rank).get_size(), target->data() + a->target_cluster->get_cluster_on_partition(rank).get_offset(), PETSC_COPY_VALUES, is));
750:   PetscCall(ISSetPermutation(*is));
751:   PetscFunctionReturn(PETSC_SUCCESS);
752: }

754: static PetscErrorCode MatHtoolUsePermutation_Htool(Mat A, PetscBool use)
755: {
756:   Mat_Htool *a;

758:   PetscFunctionBegin;
759:   PetscCall(MatShellGetContext(A, &a));
760:   a->permutation = use;
761:   PetscFunctionReturn(PETSC_SUCCESS);
762: }

764: static PetscErrorCode MatHtoolUseRecompression_Htool(Mat A, PetscBool use)
765: {
766:   Mat_Htool *a;

768:   PetscFunctionBegin;
769:   PetscCall(MatShellGetContext(A, &a));
770:   a->recompression = use;
771:   PetscFunctionReturn(PETSC_SUCCESS);
772: }

774: #define PETSC_HTOOL_PARAMETER(Type, Name, member) \
775:   static PetscErrorCode MatHtoolGet##Name##_Htool(Mat A, Type *v) \
776:   { \
777:     Mat_Htool *a; \
778:     PetscFunctionBegin; \
779:     PetscCall(MatShellGetContext(A, &a)); \
780:     *v = a->member; \
781:     PetscFunctionReturn(PETSC_SUCCESS); \
782:   } \
783:   static PetscErrorCode MatHtoolSet##Name##_Htool(Mat A, Type v) \
784:   { \
785:     Mat_Htool *a; \
786:     PetscFunctionBegin; \
787:     PetscCall(MatShellGetContext(A, &a)); \
788:     a->member = v; \
789:     PetscFunctionReturn(PETSC_SUCCESS); \
790:   }

792: PETSC_HTOOL_PARAMETER(PetscReal, Epsilon, epsilon)
793: PETSC_HTOOL_PARAMETER(PetscReal, Eta, eta)
794: PETSC_HTOOL_PARAMETER(PetscInt, MaxClusterLeafSize, max_cluster_leaf_size)
795: PETSC_HTOOL_PARAMETER(PetscInt, MinTargetDepth, depth[0])
796: PETSC_HTOOL_PARAMETER(PetscInt, MinSourceDepth, depth[1])
797: PETSC_HTOOL_PARAMETER(PetscBool, BlockTreeConsistency, block_tree_consistency)
798: PETSC_HTOOL_PARAMETER(MatHtoolCompressorType, CompressorType, compressor)
799: PETSC_HTOOL_PARAMETER(MatHtoolClusteringType, ClusteringType, clustering)

801: static PetscErrorCode MatConvert_Htool_Dense(Mat A, MatType, MatReuse reuse, Mat *B)
802: {
803:   Mat          C;
804:   Mat_Htool   *a;
805:   PetscScalar *array, shift, scale;
806:   PetscInt     lda;

808:   PetscFunctionBegin;
809:   PetscCall(MatShellGetScalingShifts(A, &shift, &scale, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
810:   PetscCall(MatShellGetContext(A, &a));
811:   if (reuse == MAT_REUSE_MATRIX) {
812:     C = *B;
813:     PetscCheck(C->rmap->n == A->rmap->n && C->cmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible dimensions");
814:     PetscCall(MatDenseGetLDA(C, &lda));
815:     PetscCheck(lda == C->rmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Unsupported leading dimension (%" PetscInt_FMT " != %" PetscInt_FMT ")", lda, C->rmap->n);
816:   } else {
817:     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C));
818:     PetscCall(MatSetSizes(C, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
819:     PetscCall(MatSetType(C, MATDENSE));
820:     PetscCall(MatSetUp(C));
821:     PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
822:   }
823:   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
824:   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
825:   PetscCall(MatDenseGetArrayWrite(C, &array));
826:   PetscCallExternalVoid("copy_to_dense_in_user_numbering", htool::copy_to_dense_in_user_numbering(*a->local_hmatrix, array));
827:   PetscCall(MatDenseRestoreArrayWrite(C, &array));
828:   PetscCall(MatShift(C, shift));
829:   PetscCall(MatScale(C, scale));
830:   if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &C));
831:   else *B = C;
832:   PetscFunctionReturn(PETSC_SUCCESS);
833: }

835: static PetscErrorCode GenEntriesTranspose(PetscInt sdim, PetscInt M, PetscInt N, const PetscInt *rows, const PetscInt *cols, PetscScalar *ptr, PetscCtx ctx)
836: {
837:   MatHtoolKernelTranspose *generator = (MatHtoolKernelTranspose *)ctx;
838:   PetscScalar             *tmp;

840:   PetscFunctionBegin;
841:   PetscCall(generator->kernel(sdim, N, M, cols, rows, ptr, generator->kernelctx));
842:   PetscCall(PetscMalloc1(M * N, &tmp));
843:   PetscCall(PetscArraycpy(tmp, ptr, M * N));
844:   for (PetscInt i = 0; i < M; ++i) {
845:     for (PetscInt j = 0; j < N; ++j) ptr[i + j * M] = tmp[j + i * N];
846:   }
847:   PetscCall(PetscFree(tmp));
848:   PetscFunctionReturn(PETSC_SUCCESS);
849: }

851: /* naive implementation which keeps a reference to the original Mat */
852: static PetscErrorCode MatTranspose_Htool(Mat A, MatReuse reuse, Mat *B)
853: {
854:   Mat                      C;
855:   Mat_Htool               *a, *c;
856:   PetscScalar              shift, scale;
857:   PetscInt                 M = A->rmap->N, N = A->cmap->N, m = A->rmap->n, n = A->cmap->n;
858:   PetscContainer           container;
859:   MatHtoolKernelTranspose *kernelt;

861:   PetscFunctionBegin;
862:   PetscCall(MatShellGetScalingShifts(A, &shift, &scale, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
863:   PetscCall(MatShellGetContext(A, &a));
864:   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
865:   PetscCheck(reuse != MAT_INPLACE_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MatTranspose() with MAT_INPLACE_MATRIX not supported");
866:   if (reuse == MAT_INITIAL_MATRIX) {
867:     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C));
868:     PetscCall(MatSetSizes(C, n, m, N, M));
869:     PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
870:     PetscCall(MatSetUp(C));
871:     PetscCall(PetscNew(&kernelt));
872:     PetscCall(PetscObjectContainerCompose((PetscObject)C, "KernelTranspose", kernelt, PetscCtxDestroyDefault));
873:   } else {
874:     C = *B;
875:     PetscCall(PetscObjectQuery((PetscObject)C, "KernelTranspose", (PetscObject *)&container));
876:     PetscCheck(container, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call MatTranspose() with MAT_INITIAL_MATRIX first");
877:     PetscCall(PetscContainerGetPointer(container, &kernelt));
878:   }
879:   PetscCall(MatShellGetContext(C, &c));
880:   c->dim = a->dim;
881:   PetscCall(MatShift(C, shift));
882:   PetscCall(MatScale(C, scale));
883:   c->kernel = GenEntriesTranspose;
884:   if (kernelt->A != A) {
885:     PetscCall(MatDestroy(&kernelt->A));
886:     kernelt->A = A;
887:     PetscCall(PetscObjectReference((PetscObject)A));
888:   }
889:   kernelt->kernel           = a->kernel;
890:   kernelt->kernelctx        = a->kernelctx;
891:   c->kernelctx              = kernelt;
892:   c->max_cluster_leaf_size  = a->max_cluster_leaf_size;
893:   c->epsilon                = a->epsilon;
894:   c->eta                    = a->eta;
895:   c->block_tree_consistency = a->block_tree_consistency;
896:   c->permutation            = a->permutation;
897:   c->recompression          = a->recompression;
898:   c->compressor             = a->compressor;
899:   c->clustering             = a->clustering;
900:   if (reuse == MAT_INITIAL_MATRIX) {
901:     PetscCall(PetscMalloc1(N * c->dim, &c->gcoords_target));
902:     PetscCall(PetscArraycpy(c->gcoords_target, a->gcoords_source, N * c->dim));
903:     if (a->gcoords_target != a->gcoords_source) {
904:       PetscCall(PetscMalloc1(M * c->dim, &c->gcoords_source));
905:       PetscCall(PetscArraycpy(c->gcoords_source, a->gcoords_target, M * c->dim));
906:     } else c->gcoords_source = c->gcoords_target;
907:   }
908:   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
909:   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
910:   if (reuse == MAT_INITIAL_MATRIX) *B = C;
911:   PetscFunctionReturn(PETSC_SUCCESS);
912: }

914: static PetscErrorCode MatDestroy_Factor(Mat F)
915: {
916:   PetscContainer               container;
917:   htool::HMatrix<PetscScalar> *A;

919:   PetscFunctionBegin;
920:   PetscCall(PetscObjectQuery((PetscObject)F, "HMatrix", (PetscObject *)&container));
921:   if (container) {
922:     PetscCall(PetscContainerGetPointer(container, &A));
923:     delete A;
924:     PetscCall(PetscObjectCompose((PetscObject)F, "HMatrix", nullptr));
925:   }
926:   PetscCall(PetscObjectComposeFunction((PetscObject)F, "MatFactorGetSolverType_C", nullptr));
927:   PetscFunctionReturn(PETSC_SUCCESS);
928: }

930: static PetscErrorCode MatFactorGetSolverType_Htool(Mat, MatSolverType *type)
931: {
932:   PetscFunctionBegin;
933:   *type = MATSOLVERHTOOL;
934:   PetscFunctionReturn(PETSC_SUCCESS);
935: }

937: template <char trans>
938: static inline PetscErrorCode MatSolve_Private(Mat A, htool::Matrix<PetscScalar> &X)
939: {
940:   PetscContainer               container;
941:   htool::HMatrix<PetscScalar> *B;

943:   PetscFunctionBegin;
944:   PetscCheck(A->factortype == MAT_FACTOR_LU || A->factortype == MAT_FACTOR_CHOLESKY, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_UNKNOWN_TYPE, "Only MAT_LU_FACTOR and MAT_CHOLESKY_FACTOR are supported");
945:   PetscCall(PetscObjectQuery((PetscObject)A, "HMatrix", (PetscObject *)&container));
946:   PetscCheck(container, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call Mat%sFactorNumeric() before Mat%sSolve%s()", A->factortype == MAT_FACTOR_LU ? "LU" : "Cholesky", X.nb_cols() == 1 ? "" : "Mat", trans == 'N' ? "" : "Transpose");
947:   PetscCall(PetscContainerGetPointer(container, &B));
948:   if (A->factortype == MAT_FACTOR_LU) PetscCallExternalVoid("lu_solve", htool::lu_solve(trans, *B, X));
949:   else PetscCallExternalVoid("cholesky_solve", htool::cholesky_solve('U', *B, X));
950:   PetscFunctionReturn(PETSC_SUCCESS);
951: }

953: template <char trans, class Type, typename std::enable_if<std::is_same<Type, Vec>::value>::type * = nullptr>
954: static PetscErrorCode MatSolve_Htool(Mat A, Type b, Type x)
955: {
956:   PetscInt                   n;
957:   htool::Matrix<PetscScalar> v;
958:   PetscScalar               *array;

960:   PetscFunctionBegin;
961:   PetscCall(VecGetLocalSize(b, &n));
962:   PetscCall(VecCopy(b, x));
963:   PetscCall(VecGetArrayWrite(x, &array));
964:   v.assign(n, 1, array, false);
965:   PetscCall(VecRestoreArrayWrite(x, &array));
966:   PetscCall(MatSolve_Private<trans>(A, v));
967:   PetscFunctionReturn(PETSC_SUCCESS);
968: }

970: template <char trans, class Type, typename std::enable_if<std::is_same<Type, Mat>::value>::type * = nullptr>
971: static PetscErrorCode MatSolve_Htool(Mat A, Type B, Type X)
972: {
973:   PetscInt                   m, N;
974:   htool::Matrix<PetscScalar> v;
975:   PetscScalar               *array;

977:   PetscFunctionBegin;
978:   PetscCall(MatGetLocalSize(B, &m, nullptr));
979:   PetscCall(MatGetLocalSize(B, nullptr, &N));
980:   PetscCall(MatCopy(B, X, SAME_NONZERO_PATTERN));
981:   PetscCall(MatDenseGetArrayWrite(X, &array));
982:   v.assign(m, N, array, false);
983:   PetscCall(MatDenseRestoreArrayWrite(X, &array));
984:   PetscCall(MatSolve_Private<trans>(A, v));
985:   PetscFunctionReturn(PETSC_SUCCESS);
986: }

988: template <MatFactorType ftype>
989: static PetscErrorCode MatFactorNumeric_Htool(Mat F, Mat A, const MatFactorInfo *)
990: {
991:   Mat_Htool                   *a;
992:   htool::HMatrix<PetscScalar> *B;

994:   PetscFunctionBegin;
995:   PetscCall(MatShellGetContext(A, &a));
996:   B = new htool::HMatrix<PetscScalar>(*a->local_hmatrix);
997:   if (ftype == MAT_FACTOR_LU) PetscCallExternalVoid("sequential_lu_factorization", htool::sequential_lu_factorization(*B));
998:   else PetscCallExternalVoid("sequential_cholesky_factorization", htool::sequential_cholesky_factorization('U', *B));
999:   PetscCall(PetscObjectContainerCompose((PetscObject)F, "HMatrix", B, nullptr));
1000:   PetscFunctionReturn(PETSC_SUCCESS);
1001: }

1003: template <MatFactorType ftype>
1004: PetscErrorCode MatFactorSymbolic_Htool(Mat F, Mat)
1005: {
1006:   PetscFunctionBegin;
1007:   F->preallocated  = PETSC_TRUE;
1008:   F->assembled     = PETSC_TRUE;
1009:   F->ops->solve    = MatSolve_Htool<'N', Vec>;
1010:   F->ops->matsolve = MatSolve_Htool<'N', Mat>;
1011:   if (!PetscDefined(USE_COMPLEX) || ftype == MAT_FACTOR_LU) {
1012:     F->ops->solvetranspose    = MatSolve_Htool<'T', Vec>;
1013:     F->ops->matsolvetranspose = MatSolve_Htool<'T', Mat>;
1014:   }
1015:   F->ops->destroy = MatDestroy_Factor;
1016:   if (ftype == MAT_FACTOR_LU) F->ops->lufactornumeric = MatFactorNumeric_Htool<MAT_FACTOR_LU>;
1017:   else F->ops->choleskyfactornumeric = MatFactorNumeric_Htool<MAT_FACTOR_CHOLESKY>;
1018:   PetscFunctionReturn(PETSC_SUCCESS);
1019: }

1021: static PetscErrorCode MatLUFactorSymbolic_Htool(Mat F, Mat A, IS, IS, const MatFactorInfo *)
1022: {
1023:   PetscFunctionBegin;
1024:   PetscCall(MatFactorSymbolic_Htool<MAT_FACTOR_LU>(F, A));
1025:   PetscFunctionReturn(PETSC_SUCCESS);
1026: }

1028: static PetscErrorCode MatCholeskyFactorSymbolic_Htool(Mat F, Mat A, IS, const MatFactorInfo *)
1029: {
1030:   PetscFunctionBegin;
1031:   PetscCall(MatFactorSymbolic_Htool<MAT_FACTOR_CHOLESKY>(F, A));
1032:   PetscFunctionReturn(PETSC_SUCCESS);
1033: }

1035: static PetscErrorCode MatGetFactor_htool_htool(Mat A, MatFactorType ftype, Mat *F)
1036: {
1037:   Mat         B;
1038:   Mat_Htool  *a;
1039:   PetscMPIInt size;

1041:   PetscFunctionBegin;
1042:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
1043:   PetscCall(MatShellGetContext(A, &a));
1044:   PetscCheck(size == 1, PetscObjectComm((PetscObject)A), PETSC_ERR_WRONG_MPI_SIZE, "Unsupported parallel MatGetFactor()");
1045:   PetscCheck(a->block_tree_consistency, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Cannot factor a MatHtool with inconsistent block tree");
1046:   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1047:   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
1048:   PetscCall(PetscStrallocpy(MATSOLVERHTOOL, &((PetscObject)B)->type_name));
1049:   PetscCall(MatSetUp(B));

1051:   B->ops->getinfo    = MatGetInfo_External;
1052:   B->factortype      = ftype;
1053:   B->trivialsymbolic = PETSC_TRUE;

1055:   if (ftype == MAT_FACTOR_LU) B->ops->lufactorsymbolic = MatLUFactorSymbolic_Htool;
1056:   else if (ftype == MAT_FACTOR_CHOLESKY) B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_Htool;

1058:   PetscCall(PetscFree(B->solvertype));
1059:   PetscCall(PetscStrallocpy(MATSOLVERHTOOL, &B->solvertype));

1061:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_Htool));
1062:   *F = B;
1063:   PetscFunctionReturn(PETSC_SUCCESS);
1064: }

1066: PETSC_INTERN PetscErrorCode MatSolverTypeRegister_Htool(void)
1067: {
1068:   PetscFunctionBegin;
1069:   PetscCall(MatSolverTypeRegister(MATSOLVERHTOOL, MATHTOOL, MAT_FACTOR_LU, MatGetFactor_htool_htool));
1070:   PetscCall(MatSolverTypeRegister(MATSOLVERHTOOL, MATHTOOL, MAT_FACTOR_CHOLESKY, MatGetFactor_htool_htool));
1071:   PetscFunctionReturn(PETSC_SUCCESS);
1072: }

1074: static PetscErrorCode MatHtoolCreateFromKernel_Htool(Mat A, PetscInt spacedim, const PetscReal coords_target[], const PetscReal coords_source[], MatHtoolKernelFn *kernel, void *kernelctx)
1075: {
1076:   Mat_Htool *a;

1078:   PetscFunctionBegin;
1079:   PetscCall(MatShellGetContext(A, &a));
1080:   a->dim       = spacedim;
1081:   a->kernel    = kernel;
1082:   a->kernelctx = kernelctx;
1083:   PetscCall(PetscCalloc1(A->rmap->N * spacedim, &a->gcoords_target));
1084:   PetscCall(PetscArraycpy(a->gcoords_target + A->rmap->rstart * spacedim, coords_target, A->rmap->n * spacedim));
1085:   PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, a->gcoords_target, A->rmap->N * spacedim, MPIU_REAL, MPI_SUM, PetscObjectComm((PetscObject)A)));
1086:   if (coords_target != coords_source) {
1087:     PetscCall(PetscCalloc1(A->cmap->N * spacedim, &a->gcoords_source));
1088:     PetscCall(PetscArraycpy(a->gcoords_source + A->cmap->rstart * spacedim, coords_source, A->cmap->n * spacedim));
1089:     PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, a->gcoords_source, A->cmap->N * spacedim, MPIU_REAL, MPI_SUM, PetscObjectComm((PetscObject)A)));
1090:   } else a->gcoords_source = a->gcoords_target;
1091:   PetscFunctionReturn(PETSC_SUCCESS);
1092: }

1094: /*MC
1095:      MATHTOOL = "htool" - A matrix type for hierarchical matrices using the Htool package.

1097:   Use `./configure --download-htool` to install PETSc to use Htool.

1099:    Options Database Key:
1100: .     -mat_type htool - matrix type to `MATHTOOL`

1102:    Level: beginner

1104: .seealso: [](ch_matrices), `Mat`, `MATH2OPUS`, `MATDENSE`, `MatCreateHtoolFromKernel()`, `MatHtoolSetKernel()`
1105: M*/
1106: PETSC_EXTERN PetscErrorCode MatCreate_Htool(Mat A)
1107: {
1108:   Mat_Htool *a;

1110:   PetscFunctionBegin;
1111:   PetscCall(MatSetType(A, MATSHELL));
1112:   PetscCall(PetscNew(&a));
1113:   PetscCall(MatShellSetContext(A, a));
1114:   PetscCall(MatShellSetOperation(A, MATOP_GET_DIAGONAL, (PetscErrorCodeFn *)MatGetDiagonal_Htool));
1115:   PetscCall(MatShellSetOperation(A, MATOP_GET_DIAGONAL_BLOCK, (PetscErrorCodeFn *)MatGetDiagonalBlock_Htool));
1116:   PetscCall(MatShellSetOperation(A, MATOP_MULT, (PetscErrorCodeFn *)MatMult_Htool));
1117:   PetscCall(MatShellSetOperation(A, MATOP_MULT_TRANSPOSE, (PetscErrorCodeFn *)MatMultTranspose_Htool));
1118:   if (!PetscDefined(USE_COMPLEX)) PetscCall(MatShellSetOperation(A, MATOP_MULT_HERMITIAN_TRANSPOSE, (PetscErrorCodeFn *)MatMultTranspose_Htool));
1119:   A->ops->increaseoverlap   = MatIncreaseOverlap_Htool;
1120:   A->ops->createsubmatrices = MatCreateSubMatrices_Htool;
1121:   PetscCall(MatShellSetOperation(A, MATOP_VIEW, (PetscErrorCodeFn *)MatView_Htool));
1122:   PetscCall(MatShellSetOperation(A, MATOP_SET_FROM_OPTIONS, (PetscErrorCodeFn *)MatSetFromOptions_Htool));
1123:   PetscCall(MatShellSetOperation(A, MATOP_GET_ROW, (PetscErrorCodeFn *)MatGetRow_Htool));
1124:   PetscCall(MatShellSetOperation(A, MATOP_RESTORE_ROW, (PetscErrorCodeFn *)MatRestoreRow_Htool));
1125:   PetscCall(MatShellSetOperation(A, MATOP_ASSEMBLY_END, (PetscErrorCodeFn *)MatAssemblyEnd_Htool));
1126:   PetscCall(MatShellSetOperation(A, MATOP_TRANSPOSE, (PetscErrorCodeFn *)MatTranspose_Htool));
1127:   PetscCall(MatShellSetOperation(A, MATOP_DESTROY, (PetscErrorCodeFn *)MatDestroy_Htool));
1128:   a->dim                    = 0;
1129:   a->gcoords_target         = nullptr;
1130:   a->gcoords_source         = nullptr;
1131:   a->max_cluster_leaf_size  = 10;
1132:   a->epsilon                = PetscSqrtReal(PETSC_SMALL);
1133:   a->eta                    = 10.0;
1134:   a->depth[0]               = 0;
1135:   a->depth[1]               = 0;
1136:   a->block_tree_consistency = PETSC_TRUE;
1137:   a->permutation            = PETSC_TRUE;
1138:   a->recompression          = PETSC_FALSE;
1139:   a->compressor             = MAT_HTOOL_COMPRESSOR_SYMPARTIAL_ACA;
1140:   a->distributed_operator   = nullptr;
1141:   a->block_diagonal_hmatrix = nullptr;
1142:   a->local_hmatrix          = nullptr;
1143:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_htool_seqdense_C", MatProductSetFromOptions_Htool));
1144:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_htool_mpidense_C", MatProductSetFromOptions_Htool));
1145:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_htool_seqdense_C", MatConvert_Htool_Dense));
1146:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_htool_mpidense_C", MatConvert_Htool_Dense));
1147:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolGetHierarchicalMat_C", MatHtoolGetHierarchicalMat_Htool));
1148:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolSetKernel_C", MatHtoolSetKernel_Htool));
1149:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolGetPermutationSource_C", MatHtoolGetPermutationSource_Htool));
1150:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolGetPermutationTarget_C", MatHtoolGetPermutationTarget_Htool));
1151:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolUsePermutation_C", MatHtoolUsePermutation_Htool));
1152:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolUseRecompression_C", MatHtoolUseRecompression_Htool));
1153:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolGetEpsilon_C", MatHtoolGetEpsilon_Htool));
1154:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolSetEpsilon_C", MatHtoolSetEpsilon_Htool));
1155:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolGetEta_C", MatHtoolGetEta_Htool));
1156:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolSetEta_C", MatHtoolSetEta_Htool));
1157:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolGetMaxClusterLeafSize_C", MatHtoolGetMaxClusterLeafSize_Htool));
1158:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolSetMaxClusterLeafSize_C", MatHtoolSetMaxClusterLeafSize_Htool));
1159:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolGetMinTargetDepth_C", MatHtoolGetMinTargetDepth_Htool));
1160:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolSetMinTargetDepth_C", MatHtoolSetMinTargetDepth_Htool));
1161:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolGetMinSourceDepth_C", MatHtoolGetMinSourceDepth_Htool));
1162:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolSetMinSourceDepth_C", MatHtoolSetMinSourceDepth_Htool));
1163:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolGetBlockTreeConsistency_C", MatHtoolGetBlockTreeConsistency_Htool));
1164:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolSetBlockTreeConsistency_C", MatHtoolSetBlockTreeConsistency_Htool));
1165:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolGetCompressorType_C", MatHtoolGetCompressorType_Htool));
1166:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolSetCompressorType_C", MatHtoolSetCompressorType_Htool));
1167:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolGetClusteringType_C", MatHtoolGetClusteringType_Htool));
1168:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolSetClusteringType_C", MatHtoolSetClusteringType_Htool));
1169:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolCreateFromKernel_C", MatHtoolCreateFromKernel_Htool));
1170:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatShellSetContext_C", MatShellSetContext_Immutable));
1171:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatShellSetContextDestroy_C", MatShellSetContextDestroy_Immutable));
1172:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatShellSetManageScalingShifts_C", MatShellSetManageScalingShifts_Immutable));
1173:   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATHTOOL));
1174:   PetscFunctionReturn(PETSC_SUCCESS);
1175: }