Actual source code: pch2opus.c
1: #include <petsc/private/pcimpl.h>
2: #include <petsc/private/matimpl.h>
3: #include <petscdm.h>
4: #include <h2opusconf.h>
6: /* Use GPU only if H2OPUS is configured for GPU */
7: #if defined(PETSC_HAVE_CUDA) && defined(H2OPUS_USE_GPU)
8: #define PETSC_H2OPUS_USE_GPU
9: #endif
11: typedef struct {
12: Mat A;
13: Mat M;
14: PetscScalar s0;
16: /* sampler for Newton-Schultz */
17: Mat S;
18: PetscInt hyperorder;
19: Vec wns[4];
20: Mat wnsmat[4];
22: /* convergence testing */
23: Mat T;
24: Vec w;
26: /* Support for PCSetCoordinates */
27: PetscInt sdim;
28: PetscInt nlocc;
29: PetscReal *coords;
31: /* Newton-Schultz customization */
32: PetscInt maxits;
33: PetscReal rtol, atol;
34: PetscBool monitor;
35: PetscBool useapproximatenorms;
36: NormType normtype;
38: /* Used when pmat != MATH2OPUS */
39: PetscReal eta;
40: PetscInt leafsize;
41: PetscInt max_rank;
42: PetscInt bs;
43: PetscReal mrtol;
45: /* CPU/GPU */
46: PetscBool forcecpu;
47: PetscBool boundtocpu;
48: } PC_H2OPUS;
50: PETSC_EXTERN PetscErrorCode MatNorm_H2OPUS(Mat, NormType, PetscReal *);
52: static PetscErrorCode PCH2OpusInferCoordinates_Private(PC pc)
53: {
54: PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
55: DM dm;
56: PetscBool isdmda;
58: PetscFunctionBegin;
59: if (pch2opus->sdim) PetscFunctionReturn(PETSC_SUCCESS);
60: PetscCall(PCGetDM(pc, &dm));
61: if (!dm) PetscCall(MatGetDM(pc->useAmat ? pc->mat : pc->pmat, &dm));
62: PetscCall(PetscObjectTypeCompare((PetscObject)dm, DMDA, &isdmda));
63: if (isdmda) {
64: Vec c;
65: const PetscScalar *coords;
66: PetscInt n, sdim;
68: PetscCall(DMGetCoordinates(dm, &c));
69: PetscCall(DMGetDimension(dm, &sdim));
70: PetscCall(VecGetLocalSize(c, &n));
71: PetscCall(VecGetArrayRead(c, &coords));
72: PetscCall(PCSetCoordinates(pc, sdim, n / sdim, (PetscScalar *)coords));
73: PetscCall(VecRestoreArrayRead(c, &coords));
74: }
75: PetscFunctionReturn(PETSC_SUCCESS);
76: }
78: static PetscErrorCode PCReset_H2OPUS(PC pc)
79: {
80: PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
82: PetscFunctionBegin;
83: pch2opus->sdim = 0;
84: pch2opus->nlocc = 0;
85: PetscCall(PetscFree(pch2opus->coords));
86: PetscFunctionReturn(PETSC_SUCCESS);
87: }
89: static PetscErrorCode PCSetCoordinates_H2OPUS(PC pc, PetscInt sdim, PetscInt nlocc, PetscReal *coords)
90: {
91: PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
92: PetscBool reset = PETSC_TRUE;
94: PetscFunctionBegin;
95: if (pch2opus->sdim && sdim == pch2opus->sdim && nlocc == pch2opus->nlocc) {
96: PetscCall(PetscArraycmp(pch2opus->coords, coords, sdim * nlocc, &reset));
97: reset = (PetscBool)!reset;
98: }
99: PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &reset, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));
100: if (reset) {
101: PetscCall(PCReset_H2OPUS(pc));
102: PetscCall(PetscMalloc1(sdim * nlocc, &pch2opus->coords));
103: PetscCall(PetscArraycpy(pch2opus->coords, coords, sdim * nlocc));
104: pch2opus->sdim = sdim;
105: pch2opus->nlocc = nlocc;
106: }
107: PetscFunctionReturn(PETSC_SUCCESS);
108: }
110: static PetscErrorCode PCDestroy_H2OPUS(PC pc)
111: {
112: PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
114: PetscFunctionBegin;
115: PetscCall(PCReset_H2OPUS(pc));
116: PetscCall(MatDestroy(&pch2opus->A));
117: PetscCall(MatDestroy(&pch2opus->M));
118: PetscCall(MatDestroy(&pch2opus->T));
119: PetscCall(VecDestroy(&pch2opus->w));
120: PetscCall(MatDestroy(&pch2opus->S));
121: PetscCall(VecDestroy(&pch2opus->wns[0]));
122: PetscCall(VecDestroy(&pch2opus->wns[1]));
123: PetscCall(VecDestroy(&pch2opus->wns[2]));
124: PetscCall(VecDestroy(&pch2opus->wns[3]));
125: PetscCall(MatDestroy(&pch2opus->wnsmat[0]));
126: PetscCall(MatDestroy(&pch2opus->wnsmat[1]));
127: PetscCall(MatDestroy(&pch2opus->wnsmat[2]));
128: PetscCall(MatDestroy(&pch2opus->wnsmat[3]));
129: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSetCoordinates_C", NULL));
130: PetscCall(PetscFree(pc->data));
131: PetscFunctionReturn(PETSC_SUCCESS);
132: }
134: static PetscErrorCode PCSetFromOptions_H2OPUS(PC pc, PetscOptionItems *PetscOptionsObject)
135: {
136: PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
138: PetscFunctionBegin;
139: PetscOptionsHeadBegin(PetscOptionsObject, "H2OPUS options");
140: PetscCall(PetscOptionsInt("-pc_h2opus_maxits", "Maximum number of iterations for Newton-Schultz", NULL, pch2opus->maxits, &pch2opus->maxits, NULL));
141: PetscCall(PetscOptionsBool("-pc_h2opus_monitor", "Monitor Newton-Schultz convergence", NULL, pch2opus->monitor, &pch2opus->monitor, NULL));
142: PetscCall(PetscOptionsReal("-pc_h2opus_atol", "Absolute tolerance", NULL, pch2opus->atol, &pch2opus->atol, NULL));
143: PetscCall(PetscOptionsReal("-pc_h2opus_rtol", "Relative tolerance", NULL, pch2opus->rtol, &pch2opus->rtol, NULL));
144: PetscCall(PetscOptionsEnum("-pc_h2opus_norm_type", "Norm type for convergence monitoring", NULL, NormTypes, (PetscEnum)pch2opus->normtype, (PetscEnum *)&pch2opus->normtype, NULL));
145: PetscCall(PetscOptionsInt("-pc_h2opus_hyperorder", "Hyper power order of sampling", NULL, pch2opus->hyperorder, &pch2opus->hyperorder, NULL));
146: PetscCall(PetscOptionsInt("-pc_h2opus_leafsize", "Leaf size when constructed from kernel", NULL, pch2opus->leafsize, &pch2opus->leafsize, NULL));
147: PetscCall(PetscOptionsReal("-pc_h2opus_eta", "Admissibility condition tolerance", NULL, pch2opus->eta, &pch2opus->eta, NULL));
148: PetscCall(PetscOptionsInt("-pc_h2opus_maxrank", "Maximum rank when constructed from matvecs", NULL, pch2opus->max_rank, &pch2opus->max_rank, NULL));
149: PetscCall(PetscOptionsInt("-pc_h2opus_samples", "Number of samples to be taken concurrently when constructing from matvecs", NULL, pch2opus->bs, &pch2opus->bs, NULL));
150: PetscCall(PetscOptionsReal("-pc_h2opus_mrtol", "Relative tolerance for construction from sampling", NULL, pch2opus->mrtol, &pch2opus->mrtol, NULL));
151: PetscCall(PetscOptionsBool("-pc_h2opus_forcecpu", "Force construction on CPU", NULL, pch2opus->forcecpu, &pch2opus->forcecpu, NULL));
152: PetscOptionsHeadEnd();
153: PetscFunctionReturn(PETSC_SUCCESS);
154: }
156: typedef struct {
157: Mat A;
158: Mat M;
159: Vec w;
160: } AAtCtx;
162: static PetscErrorCode MatMult_AAt(Mat A, Vec x, Vec y)
163: {
164: AAtCtx *aat;
166: PetscFunctionBegin;
167: PetscCall(MatShellGetContext(A, (void *)&aat));
168: /* PetscCall(MatMultTranspose(aat->M,x,aat->w)); */
169: PetscCall(MatMultTranspose(aat->A, x, aat->w));
170: PetscCall(MatMult(aat->A, aat->w, y));
171: PetscFunctionReturn(PETSC_SUCCESS);
172: }
174: static PetscErrorCode PCH2OpusSetUpInit(PC pc)
175: {
176: PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
177: Mat A = pc->useAmat ? pc->mat : pc->pmat, AAt;
178: PetscInt M, m;
179: VecType vtype;
180: PetscReal n;
181: AAtCtx aat;
183: PetscFunctionBegin;
184: aat.A = A;
185: aat.M = pch2opus->M; /* unused so far */
186: PetscCall(MatCreateVecs(A, NULL, &aat.w));
187: PetscCall(MatGetSize(A, &M, NULL));
188: PetscCall(MatGetLocalSize(A, &m, NULL));
189: PetscCall(MatCreateShell(PetscObjectComm((PetscObject)A), m, m, M, M, &aat, &AAt));
190: PetscCall(MatBindToCPU(AAt, pch2opus->boundtocpu));
191: PetscCall(MatShellSetOperation(AAt, MATOP_MULT, (void (*)(void))MatMult_AAt));
192: PetscCall(MatShellSetOperation(AAt, MATOP_MULT_TRANSPOSE, (void (*)(void))MatMult_AAt));
193: PetscCall(MatShellSetOperation(AAt, MATOP_NORM, (void (*)(void))MatNorm_H2OPUS));
194: PetscCall(MatGetVecType(A, &vtype));
195: PetscCall(MatShellSetVecType(AAt, vtype));
196: PetscCall(MatNorm(AAt, NORM_1, &n));
197: pch2opus->s0 = 1. / n;
198: PetscCall(VecDestroy(&aat.w));
199: PetscCall(MatDestroy(&AAt));
200: PetscFunctionReturn(PETSC_SUCCESS);
201: }
203: static PetscErrorCode PCApplyKernel_H2OPUS(PC pc, Vec x, Vec y, PetscBool t)
204: {
205: PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
207: PetscFunctionBegin;
208: if (t) PetscCall(MatMultTranspose(pch2opus->M, x, y));
209: else PetscCall(MatMult(pch2opus->M, x, y));
210: PetscFunctionReturn(PETSC_SUCCESS);
211: }
213: static PetscErrorCode PCApplyMatKernel_H2OPUS(PC pc, Mat X, Mat Y, PetscBool t)
214: {
215: PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
217: PetscFunctionBegin;
218: if (t) PetscCall(MatTransposeMatMult(pch2opus->M, X, MAT_REUSE_MATRIX, PETSC_DETERMINE, &Y));
219: else PetscCall(MatMatMult(pch2opus->M, X, MAT_REUSE_MATRIX, PETSC_CURRENT, &Y));
220: PetscFunctionReturn(PETSC_SUCCESS);
221: }
223: static PetscErrorCode PCApplyMat_H2OPUS(PC pc, Mat X, Mat Y)
224: {
225: PetscFunctionBegin;
226: PetscCall(PCApplyMatKernel_H2OPUS(pc, X, Y, PETSC_FALSE));
227: PetscFunctionReturn(PETSC_SUCCESS);
228: }
230: static PetscErrorCode PCApplyTransposeMat_H2OPUS(PC pc, Mat X, Mat Y)
231: {
232: PetscFunctionBegin;
233: PetscCall(PCApplyMatKernel_H2OPUS(pc, X, Y, PETSC_TRUE));
234: PetscFunctionReturn(PETSC_SUCCESS);
235: }
237: static PetscErrorCode PCApply_H2OPUS(PC pc, Vec x, Vec y)
238: {
239: PetscFunctionBegin;
240: PetscCall(PCApplyKernel_H2OPUS(pc, x, y, PETSC_FALSE));
241: PetscFunctionReturn(PETSC_SUCCESS);
242: }
244: static PetscErrorCode PCApplyTranspose_H2OPUS(PC pc, Vec x, Vec y)
245: {
246: PetscFunctionBegin;
247: PetscCall(PCApplyKernel_H2OPUS(pc, x, y, PETSC_TRUE));
248: PetscFunctionReturn(PETSC_SUCCESS);
249: }
251: /* used to test the norm of (M^-1 A - I) */
252: static PetscErrorCode MatMultKernel_MAmI(Mat M, Vec x, Vec y, PetscBool t)
253: {
254: PC pc;
255: Mat A;
256: PC_H2OPUS *pch2opus;
257: PetscBool sideleft = PETSC_TRUE;
259: PetscFunctionBegin;
260: PetscCall(MatShellGetContext(M, (void *)&pc));
261: pch2opus = (PC_H2OPUS *)pc->data;
262: if (!pch2opus->w) PetscCall(MatCreateVecs(pch2opus->M, &pch2opus->w, NULL));
263: A = pch2opus->A;
264: PetscCall(VecBindToCPU(pch2opus->w, pch2opus->boundtocpu));
265: if (t) {
266: if (sideleft) {
267: PetscCall(PCApplyTranspose_H2OPUS(pc, x, pch2opus->w));
268: PetscCall(MatMultTranspose(A, pch2opus->w, y));
269: } else {
270: PetscCall(MatMultTranspose(A, x, pch2opus->w));
271: PetscCall(PCApplyTranspose_H2OPUS(pc, pch2opus->w, y));
272: }
273: } else {
274: if (sideleft) {
275: PetscCall(MatMult(A, x, pch2opus->w));
276: PetscCall(PCApply_H2OPUS(pc, pch2opus->w, y));
277: } else {
278: PetscCall(PCApply_H2OPUS(pc, x, pch2opus->w));
279: PetscCall(MatMult(A, pch2opus->w, y));
280: }
281: }
282: PetscCall(VecAXPY(y, -1.0, x));
283: PetscFunctionReturn(PETSC_SUCCESS);
284: }
286: static PetscErrorCode MatMult_MAmI(Mat A, Vec x, Vec y)
287: {
288: PetscFunctionBegin;
289: PetscCall(MatMultKernel_MAmI(A, x, y, PETSC_FALSE));
290: PetscFunctionReturn(PETSC_SUCCESS);
291: }
293: static PetscErrorCode MatMultTranspose_MAmI(Mat A, Vec x, Vec y)
294: {
295: PetscFunctionBegin;
296: PetscCall(MatMultKernel_MAmI(A, x, y, PETSC_TRUE));
297: PetscFunctionReturn(PETSC_SUCCESS);
298: }
300: /* HyperPower kernel:
301: Y = R = x
302: for i = 1 . . . l - 1 do
303: R = (I - A * Xk) * R
304: Y = Y + R
305: Y = Xk * Y
306: */
307: static PetscErrorCode MatMultKernel_Hyper(Mat M, Vec x, Vec y, PetscBool t)
308: {
309: PC pc;
310: Mat A;
311: PC_H2OPUS *pch2opus;
313: PetscFunctionBegin;
314: PetscCall(MatShellGetContext(M, (void *)&pc));
315: pch2opus = (PC_H2OPUS *)pc->data;
316: A = pch2opus->A;
317: PetscCall(MatCreateVecs(pch2opus->M, pch2opus->wns[0] ? NULL : &pch2opus->wns[0], pch2opus->wns[1] ? NULL : &pch2opus->wns[1]));
318: PetscCall(MatCreateVecs(pch2opus->M, pch2opus->wns[2] ? NULL : &pch2opus->wns[2], pch2opus->wns[3] ? NULL : &pch2opus->wns[3]));
319: PetscCall(VecBindToCPU(pch2opus->wns[0], pch2opus->boundtocpu));
320: PetscCall(VecBindToCPU(pch2opus->wns[1], pch2opus->boundtocpu));
321: PetscCall(VecBindToCPU(pch2opus->wns[2], pch2opus->boundtocpu));
322: PetscCall(VecBindToCPU(pch2opus->wns[3], pch2opus->boundtocpu));
323: PetscCall(VecCopy(x, pch2opus->wns[0]));
324: PetscCall(VecCopy(x, pch2opus->wns[3]));
325: if (t) {
326: for (PetscInt i = 0; i < pch2opus->hyperorder - 1; i++) {
327: PetscCall(MatMultTranspose(A, pch2opus->wns[0], pch2opus->wns[1]));
328: PetscCall(PCApplyTranspose_H2OPUS(pc, pch2opus->wns[1], pch2opus->wns[2]));
329: PetscCall(VecAXPY(pch2opus->wns[0], -1., pch2opus->wns[2]));
330: PetscCall(VecAXPY(pch2opus->wns[3], 1., pch2opus->wns[0]));
331: }
332: PetscCall(PCApplyTranspose_H2OPUS(pc, pch2opus->wns[3], y));
333: } else {
334: for (PetscInt i = 0; i < pch2opus->hyperorder - 1; i++) {
335: PetscCall(PCApply_H2OPUS(pc, pch2opus->wns[0], pch2opus->wns[1]));
336: PetscCall(MatMult(A, pch2opus->wns[1], pch2opus->wns[2]));
337: PetscCall(VecAXPY(pch2opus->wns[0], -1., pch2opus->wns[2]));
338: PetscCall(VecAXPY(pch2opus->wns[3], 1., pch2opus->wns[0]));
339: }
340: PetscCall(PCApply_H2OPUS(pc, pch2opus->wns[3], y));
341: }
342: PetscFunctionReturn(PETSC_SUCCESS);
343: }
345: static PetscErrorCode MatMult_Hyper(Mat M, Vec x, Vec y)
346: {
347: PetscFunctionBegin;
348: PetscCall(MatMultKernel_Hyper(M, x, y, PETSC_FALSE));
349: PetscFunctionReturn(PETSC_SUCCESS);
350: }
352: static PetscErrorCode MatMultTranspose_Hyper(Mat M, Vec x, Vec y)
353: {
354: PetscFunctionBegin;
355: PetscCall(MatMultKernel_Hyper(M, x, y, PETSC_TRUE));
356: PetscFunctionReturn(PETSC_SUCCESS);
357: }
359: /* Hyper power kernel, MatMat version */
360: static PetscErrorCode MatMatMultKernel_Hyper(Mat M, Mat X, Mat Y, PetscBool t)
361: {
362: PC pc;
363: Mat A;
364: PC_H2OPUS *pch2opus;
365: PetscInt i;
367: PetscFunctionBegin;
368: PetscCall(MatShellGetContext(M, (void *)&pc));
369: pch2opus = (PC_H2OPUS *)pc->data;
370: A = pch2opus->A;
371: if (pch2opus->wnsmat[0] && pch2opus->wnsmat[0]->cmap->N != X->cmap->N) {
372: PetscCall(MatDestroy(&pch2opus->wnsmat[0]));
373: PetscCall(MatDestroy(&pch2opus->wnsmat[1]));
374: }
375: if (!pch2opus->wnsmat[0]) {
376: PetscCall(MatDuplicate(X, MAT_SHARE_NONZERO_PATTERN, &pch2opus->wnsmat[0]));
377: PetscCall(MatDuplicate(Y, MAT_SHARE_NONZERO_PATTERN, &pch2opus->wnsmat[1]));
378: }
379: if (pch2opus->wnsmat[2] && pch2opus->wnsmat[2]->cmap->N != X->cmap->N) {
380: PetscCall(MatDestroy(&pch2opus->wnsmat[2]));
381: PetscCall(MatDestroy(&pch2opus->wnsmat[3]));
382: }
383: if (!pch2opus->wnsmat[2]) {
384: PetscCall(MatDuplicate(X, MAT_SHARE_NONZERO_PATTERN, &pch2opus->wnsmat[2]));
385: PetscCall(MatDuplicate(Y, MAT_SHARE_NONZERO_PATTERN, &pch2opus->wnsmat[3]));
386: }
387: PetscCall(MatCopy(X, pch2opus->wnsmat[0], SAME_NONZERO_PATTERN));
388: PetscCall(MatCopy(X, pch2opus->wnsmat[3], SAME_NONZERO_PATTERN));
389: if (t) {
390: for (i = 0; i < pch2opus->hyperorder - 1; i++) {
391: PetscCall(MatTransposeMatMult(A, pch2opus->wnsmat[0], MAT_REUSE_MATRIX, PETSC_CURRENT, &pch2opus->wnsmat[1]));
392: PetscCall(PCApplyTransposeMat_H2OPUS(pc, pch2opus->wnsmat[1], pch2opus->wnsmat[2]));
393: PetscCall(MatAXPY(pch2opus->wnsmat[0], -1., pch2opus->wnsmat[2], SAME_NONZERO_PATTERN));
394: PetscCall(MatAXPY(pch2opus->wnsmat[3], 1., pch2opus->wnsmat[0], SAME_NONZERO_PATTERN));
395: }
396: PetscCall(PCApplyTransposeMat_H2OPUS(pc, pch2opus->wnsmat[3], Y));
397: } else {
398: for (i = 0; i < pch2opus->hyperorder - 1; i++) {
399: PetscCall(PCApplyMat_H2OPUS(pc, pch2opus->wnsmat[0], pch2opus->wnsmat[1]));
400: PetscCall(MatMatMult(A, pch2opus->wnsmat[1], MAT_REUSE_MATRIX, PETSC_CURRENT, &pch2opus->wnsmat[2]));
401: PetscCall(MatAXPY(pch2opus->wnsmat[0], -1., pch2opus->wnsmat[2], SAME_NONZERO_PATTERN));
402: PetscCall(MatAXPY(pch2opus->wnsmat[3], 1., pch2opus->wnsmat[0], SAME_NONZERO_PATTERN));
403: }
404: PetscCall(PCApplyMat_H2OPUS(pc, pch2opus->wnsmat[3], Y));
405: }
406: PetscFunctionReturn(PETSC_SUCCESS);
407: }
409: static PetscErrorCode MatMatMultNumeric_Hyper(Mat M, Mat X, Mat Y, void *ctx)
410: {
411: PetscFunctionBegin;
412: PetscCall(MatMatMultKernel_Hyper(M, X, Y, PETSC_FALSE));
413: PetscFunctionReturn(PETSC_SUCCESS);
414: }
416: /* Basic Newton-Schultz sampler: (2 * I - M * A)*M */
417: static PetscErrorCode MatMultKernel_NS(Mat M, Vec x, Vec y, PetscBool t)
418: {
419: PC pc;
420: Mat A;
421: PC_H2OPUS *pch2opus;
423: PetscFunctionBegin;
424: PetscCall(MatShellGetContext(M, (void *)&pc));
425: pch2opus = (PC_H2OPUS *)pc->data;
426: A = pch2opus->A;
427: PetscCall(MatCreateVecs(pch2opus->M, pch2opus->wns[0] ? NULL : &pch2opus->wns[0], pch2opus->wns[1] ? NULL : &pch2opus->wns[1]));
428: PetscCall(VecBindToCPU(pch2opus->wns[0], pch2opus->boundtocpu));
429: PetscCall(VecBindToCPU(pch2opus->wns[1], pch2opus->boundtocpu));
430: if (t) {
431: PetscCall(PCApplyTranspose_H2OPUS(pc, x, y));
432: PetscCall(MatMultTranspose(A, y, pch2opus->wns[1]));
433: PetscCall(PCApplyTranspose_H2OPUS(pc, pch2opus->wns[1], pch2opus->wns[0]));
434: PetscCall(VecAXPBY(y, -1., 2., pch2opus->wns[0]));
435: } else {
436: PetscCall(PCApply_H2OPUS(pc, x, y));
437: PetscCall(MatMult(A, y, pch2opus->wns[0]));
438: PetscCall(PCApply_H2OPUS(pc, pch2opus->wns[0], pch2opus->wns[1]));
439: PetscCall(VecAXPBY(y, -1., 2., pch2opus->wns[1]));
440: }
441: PetscFunctionReturn(PETSC_SUCCESS);
442: }
444: static PetscErrorCode MatMult_NS(Mat M, Vec x, Vec y)
445: {
446: PetscFunctionBegin;
447: PetscCall(MatMultKernel_NS(M, x, y, PETSC_FALSE));
448: PetscFunctionReturn(PETSC_SUCCESS);
449: }
451: static PetscErrorCode MatMultTranspose_NS(Mat M, Vec x, Vec y)
452: {
453: PetscFunctionBegin;
454: PetscCall(MatMultKernel_NS(M, x, y, PETSC_TRUE));
455: PetscFunctionReturn(PETSC_SUCCESS);
456: }
458: /* Basic Newton-Schultz sampler: (2 * I - M * A)*M, MatMat version */
459: static PetscErrorCode MatMatMultKernel_NS(Mat M, Mat X, Mat Y, PetscBool t)
460: {
461: PC pc;
462: Mat A;
463: PC_H2OPUS *pch2opus;
465: PetscFunctionBegin;
466: PetscCall(MatShellGetContext(M, (void *)&pc));
467: pch2opus = (PC_H2OPUS *)pc->data;
468: A = pch2opus->A;
469: if (pch2opus->wnsmat[0] && pch2opus->wnsmat[0]->cmap->N != X->cmap->N) {
470: PetscCall(MatDestroy(&pch2opus->wnsmat[0]));
471: PetscCall(MatDestroy(&pch2opus->wnsmat[1]));
472: }
473: if (!pch2opus->wnsmat[0]) {
474: PetscCall(MatDuplicate(X, MAT_SHARE_NONZERO_PATTERN, &pch2opus->wnsmat[0]));
475: PetscCall(MatDuplicate(Y, MAT_SHARE_NONZERO_PATTERN, &pch2opus->wnsmat[1]));
476: }
477: if (t) {
478: PetscCall(PCApplyTransposeMat_H2OPUS(pc, X, Y));
479: PetscCall(MatTransposeMatMult(A, Y, MAT_REUSE_MATRIX, PETSC_CURRENT, &pch2opus->wnsmat[1]));
480: PetscCall(PCApplyTransposeMat_H2OPUS(pc, pch2opus->wnsmat[1], pch2opus->wnsmat[0]));
481: PetscCall(MatScale(Y, 2.));
482: PetscCall(MatAXPY(Y, -1., pch2opus->wnsmat[0], SAME_NONZERO_PATTERN));
483: } else {
484: PetscCall(PCApplyMat_H2OPUS(pc, X, Y));
485: PetscCall(MatMatMult(A, Y, MAT_REUSE_MATRIX, PETSC_CURRENT, &pch2opus->wnsmat[0]));
486: PetscCall(PCApplyMat_H2OPUS(pc, pch2opus->wnsmat[0], pch2opus->wnsmat[1]));
487: PetscCall(MatScale(Y, 2.));
488: PetscCall(MatAXPY(Y, -1., pch2opus->wnsmat[1], SAME_NONZERO_PATTERN));
489: }
490: PetscFunctionReturn(PETSC_SUCCESS);
491: }
493: static PetscErrorCode MatMatMultNumeric_NS(Mat M, Mat X, Mat Y, void *ctx)
494: {
495: PetscFunctionBegin;
496: PetscCall(MatMatMultKernel_NS(M, X, Y, PETSC_FALSE));
497: PetscFunctionReturn(PETSC_SUCCESS);
498: }
500: static PetscErrorCode PCH2OpusSetUpSampler_Private(PC pc)
501: {
502: PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
503: Mat A = pc->useAmat ? pc->mat : pc->pmat;
505: PetscFunctionBegin;
506: if (!pch2opus->S) {
507: PetscInt M, N, m, n;
509: PetscCall(MatGetSize(A, &M, &N));
510: PetscCall(MatGetLocalSize(A, &m, &n));
511: PetscCall(MatCreateShell(PetscObjectComm((PetscObject)A), m, n, M, N, pc, &pch2opus->S));
512: PetscCall(MatSetBlockSizesFromMats(pch2opus->S, A, A));
513: #if defined(PETSC_H2OPUS_USE_GPU)
514: PetscCall(MatShellSetVecType(pch2opus->S, VECCUDA));
515: #endif
516: }
517: if (pch2opus->hyperorder >= 2) {
518: PetscCall(MatShellSetOperation(pch2opus->S, MATOP_MULT, (void (*)(void))MatMult_Hyper));
519: PetscCall(MatShellSetOperation(pch2opus->S, MATOP_MULT_TRANSPOSE, (void (*)(void))MatMultTranspose_Hyper));
520: PetscCall(MatShellSetMatProductOperation(pch2opus->S, MATPRODUCT_AB, NULL, MatMatMultNumeric_Hyper, NULL, MATDENSE, MATDENSE));
521: PetscCall(MatShellSetMatProductOperation(pch2opus->S, MATPRODUCT_AB, NULL, MatMatMultNumeric_Hyper, NULL, MATDENSECUDA, MATDENSECUDA));
522: } else {
523: PetscCall(MatShellSetOperation(pch2opus->S, MATOP_MULT, (void (*)(void))MatMult_NS));
524: PetscCall(MatShellSetOperation(pch2opus->S, MATOP_MULT_TRANSPOSE, (void (*)(void))MatMultTranspose_NS));
525: PetscCall(MatShellSetMatProductOperation(pch2opus->S, MATPRODUCT_AB, NULL, MatMatMultNumeric_NS, NULL, MATDENSE, MATDENSE));
526: PetscCall(MatShellSetMatProductOperation(pch2opus->S, MATPRODUCT_AB, NULL, MatMatMultNumeric_NS, NULL, MATDENSECUDA, MATDENSECUDA));
527: }
528: PetscCall(MatPropagateSymmetryOptions(A, pch2opus->S));
529: PetscCall(MatBindToCPU(pch2opus->S, pch2opus->boundtocpu));
530: /* XXX */
531: PetscCall(MatSetOption(pch2opus->S, MAT_SYMMETRIC, PETSC_TRUE));
532: PetscFunctionReturn(PETSC_SUCCESS);
533: }
535: static PetscErrorCode PCSetUp_H2OPUS(PC pc)
536: {
537: PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
538: Mat A = pc->useAmat ? pc->mat : pc->pmat;
539: NormType norm = pch2opus->normtype;
540: PetscReal initerr = 0.0, err;
541: PetscBool ish2opus;
543: PetscFunctionBegin;
544: if (!pch2opus->T) {
545: PetscInt M, N, m, n;
546: const char *prefix;
548: PetscCall(PCGetOptionsPrefix(pc, &prefix));
549: PetscCall(MatGetSize(A, &M, &N));
550: PetscCall(MatGetLocalSize(A, &m, &n));
551: PetscCall(MatCreateShell(PetscObjectComm((PetscObject)pc->pmat), m, n, M, N, pc, &pch2opus->T));
552: PetscCall(MatSetBlockSizesFromMats(pch2opus->T, A, A));
553: PetscCall(MatShellSetOperation(pch2opus->T, MATOP_MULT, (void (*)(void))MatMult_MAmI));
554: PetscCall(MatShellSetOperation(pch2opus->T, MATOP_MULT_TRANSPOSE, (void (*)(void))MatMultTranspose_MAmI));
555: PetscCall(MatShellSetOperation(pch2opus->T, MATOP_NORM, (void (*)(void))MatNorm_H2OPUS));
556: #if defined(PETSC_H2OPUS_USE_GPU)
557: PetscCall(MatShellSetVecType(pch2opus->T, VECCUDA));
558: #endif
559: PetscCall(MatSetOptionsPrefix(pch2opus->T, prefix));
560: PetscCall(MatAppendOptionsPrefix(pch2opus->T, "pc_h2opus_est_"));
561: }
562: PetscCall(MatDestroy(&pch2opus->A));
563: PetscCall(PetscObjectTypeCompare((PetscObject)A, MATH2OPUS, &ish2opus));
564: if (ish2opus) {
565: PetscCall(PetscObjectReference((PetscObject)A));
566: pch2opus->A = A;
567: } else {
568: const char *prefix;
570: /* See if we can infer coordinates from the DM */
571: if (!pch2opus->sdim) PetscCall(PCH2OpusInferCoordinates_Private(pc));
572: PetscCall(MatCreateH2OpusFromMat(A, pch2opus->sdim, pch2opus->coords, PETSC_FALSE, pch2opus->eta, pch2opus->leafsize, pch2opus->max_rank, pch2opus->bs, pch2opus->mrtol, &pch2opus->A));
573: /* XXX */
574: PetscCall(MatSetOption(pch2opus->A, MAT_SYMMETRIC, PETSC_TRUE));
575: PetscCall(PCGetOptionsPrefix(pc, &prefix));
576: PetscCall(MatSetOptionsPrefix(pch2opus->A, prefix));
577: PetscCall(MatAppendOptionsPrefix(pch2opus->A, "pc_h2opus_init_"));
578: PetscCall(MatSetFromOptions(pch2opus->A));
579: PetscCall(MatAssemblyBegin(pch2opus->A, MAT_FINAL_ASSEMBLY));
580: PetscCall(MatAssemblyEnd(pch2opus->A, MAT_FINAL_ASSEMBLY));
581: /* XXX */
582: PetscCall(MatSetOption(pch2opus->A, MAT_SYMMETRIC, PETSC_TRUE));
584: /* always perform construction on the GPU unless forcecpu is true */
585: PetscCall(MatBindToCPU(pch2opus->A, pch2opus->forcecpu));
586: }
587: #if defined(PETSC_H2OPUS_USE_GPU)
588: pch2opus->boundtocpu = pch2opus->forcecpu ? PETSC_TRUE : pch2opus->A->boundtocpu;
589: #endif
590: PetscCall(MatBindToCPU(pch2opus->T, pch2opus->boundtocpu));
591: if (pch2opus->M) { /* see if we can reuse M as initial guess */
592: PetscReal reuse;
594: PetscCall(MatBindToCPU(pch2opus->M, pch2opus->boundtocpu));
595: PetscCall(MatNorm(pch2opus->T, norm, &reuse));
596: if (reuse >= 1.0) PetscCall(MatDestroy(&pch2opus->M));
597: }
598: if (!pch2opus->M) {
599: const char *prefix;
600: PetscCall(MatDuplicate(pch2opus->A, MAT_COPY_VALUES, &pch2opus->M));
601: PetscCall(PCGetOptionsPrefix(pc, &prefix));
602: PetscCall(MatSetOptionsPrefix(pch2opus->M, prefix));
603: PetscCall(MatAppendOptionsPrefix(pch2opus->M, "pc_h2opus_inv_"));
604: PetscCall(MatSetFromOptions(pch2opus->M));
605: PetscCall(PCH2OpusSetUpInit(pc));
606: PetscCall(MatScale(pch2opus->M, pch2opus->s0));
607: }
608: /* A and M have the same h2 matrix structure, save on reordering routines */
609: PetscCall(MatH2OpusSetNativeMult(pch2opus->A, PETSC_TRUE));
610: PetscCall(MatH2OpusSetNativeMult(pch2opus->M, PETSC_TRUE));
611: if (norm == NORM_1 || norm == NORM_2 || norm == NORM_INFINITY) PetscCall(MatNorm(pch2opus->T, norm, &initerr));
612: if (PetscIsInfOrNanReal(initerr)) pc->failedreason = PC_SETUP_ERROR;
613: err = initerr;
614: if (pch2opus->monitor) PetscCall(PetscPrintf(PetscObjectComm((PetscObject)pc), "%" PetscInt_FMT ": ||M*A - I|| NORM%s abs %g rel %g\n", 0, NormTypes[norm], (double)err, (double)(err / initerr)));
615: if (initerr > pch2opus->atol && !pc->failedreason) {
616: PetscInt i;
618: PetscCall(PCH2OpusSetUpSampler_Private(pc));
619: for (i = 0; i < pch2opus->maxits; i++) {
620: Mat M;
621: const char *prefix;
623: PetscCall(MatDuplicate(pch2opus->M, MAT_SHARE_NONZERO_PATTERN, &M));
624: PetscCall(MatGetOptionsPrefix(pch2opus->M, &prefix));
625: PetscCall(MatSetOptionsPrefix(M, prefix));
626: PetscCall(MatH2OpusSetSamplingMat(M, pch2opus->S, PETSC_DECIDE, PETSC_DECIDE));
627: PetscCall(MatSetFromOptions(M));
628: PetscCall(MatH2OpusSetNativeMult(M, PETSC_TRUE));
629: PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
630: PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
631: PetscCall(MatDestroy(&pch2opus->M));
632: pch2opus->M = M;
633: if (norm == NORM_1 || norm == NORM_2 || norm == NORM_INFINITY) PetscCall(MatNorm(pch2opus->T, norm, &err));
634: if (pch2opus->monitor) PetscCall(PetscPrintf(PetscObjectComm((PetscObject)pc), "%" PetscInt_FMT ": ||M*A - I|| NORM%s abs %g rel %g\n", i + 1, NormTypes[norm], (double)err, (double)(err / initerr)));
635: if (PetscIsInfOrNanReal(err)) pc->failedreason = PC_SETUP_ERROR;
636: if (err < pch2opus->atol || err < pch2opus->rtol * initerr || pc->failedreason) break;
637: }
638: }
639: /* cleanup setup workspace */
640: PetscCall(MatH2OpusSetNativeMult(pch2opus->A, PETSC_FALSE));
641: PetscCall(MatH2OpusSetNativeMult(pch2opus->M, PETSC_FALSE));
642: PetscCall(VecDestroy(&pch2opus->wns[0]));
643: PetscCall(VecDestroy(&pch2opus->wns[1]));
644: PetscCall(VecDestroy(&pch2opus->wns[2]));
645: PetscCall(VecDestroy(&pch2opus->wns[3]));
646: PetscCall(MatDestroy(&pch2opus->wnsmat[0]));
647: PetscCall(MatDestroy(&pch2opus->wnsmat[1]));
648: PetscCall(MatDestroy(&pch2opus->wnsmat[2]));
649: PetscCall(MatDestroy(&pch2opus->wnsmat[3]));
650: PetscFunctionReturn(PETSC_SUCCESS);
651: }
653: static PetscErrorCode PCView_H2OPUS(PC pc, PetscViewer viewer)
654: {
655: PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
656: PetscBool isascii;
658: PetscFunctionBegin;
659: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
660: if (isascii) {
661: if (pch2opus->A && pch2opus->A != pc->mat && pch2opus->A != pc->pmat) {
662: PetscCall(PetscViewerASCIIPrintf(viewer, "Initial approximation matrix\n"));
663: PetscCall(PetscViewerASCIIPushTab(viewer));
664: PetscCall(PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_INFO_DETAIL));
665: PetscCall(MatView(pch2opus->A, viewer));
666: PetscCall(PetscViewerPopFormat(viewer));
667: PetscCall(PetscViewerASCIIPopTab(viewer));
668: }
669: if (pch2opus->M) {
670: PetscCall(PetscViewerASCIIPrintf(viewer, "Inner matrix constructed\n"));
671: PetscCall(PetscViewerASCIIPushTab(viewer));
672: PetscCall(PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_INFO_DETAIL));
673: PetscCall(MatView(pch2opus->M, viewer));
674: PetscCall(PetscViewerPopFormat(viewer));
675: PetscCall(PetscViewerASCIIPopTab(viewer));
676: }
677: PetscCall(PetscViewerASCIIPrintf(viewer, "Initial scaling: %g\n", pch2opus->s0));
678: }
679: PetscFunctionReturn(PETSC_SUCCESS);
680: }
682: /*MC
683: PCH2OPUS = "h2opus" - A preconditioner type for, `MATH2OPUS`, hierarchical matrices using the H2Opus package.
685: Options Database Keys:
686: + -pc_type h2opus - pc type to "h2opus" during a call to `PCSetFromOptions()`
687: . -pc_h2opus_maxits - maximum number of iterations for Newton-Schultz
688: . -pc_h2opus_monitor - monitor Newton-Schultz convergence
689: . -pc_h2opus_atol - absolute tolerance
690: . -pc_h2opus_rtol - relative tolerance
691: . -pc_h2opus_norm_type - normtype
692: . -pc_h2opus_hyperorder - Hyper power order of sampling
693: . -pc_h2opus_leafsize - leaf size when constructed from kernel
694: . -pc_h2opus_eta - admissibility condition tolerance
695: . -pc_h2opus_maxrank - maximum rank when constructed from matvecs
696: . -pc_h2opus_samples - number of samples to be taken concurrently when constructing from matvecs
697: . -pc_h2opus_mrtol - relative tolerance for construction from sampling
698: - -pc_h2opus_forcecpu - force construction of preconditioner on CPU
700: Level: intermediate
702: .seealso: [](ch_ksp), `MATH2OPUS`, `MATHTOOL`, `MATDENSE`, `MatCreateH2OpusFromKernel()`, `MatCreateH2OpusFromMat()`
703: M*/
705: PETSC_EXTERN PetscErrorCode PCCreate_H2OPUS(PC pc)
706: {
707: PC_H2OPUS *pch2opus;
709: PetscFunctionBegin;
710: PetscCall(PetscNew(&pch2opus));
711: pc->data = (void *)pch2opus;
713: pch2opus->atol = 1.e-2;
714: pch2opus->rtol = 1.e-6;
715: pch2opus->maxits = 50;
716: pch2opus->hyperorder = 1; /* defaults to basic Newton-Schultz */
717: pch2opus->normtype = NORM_2;
719: /* these are needed when we are sampling the pmat */
720: pch2opus->eta = PETSC_DECIDE;
721: pch2opus->leafsize = PETSC_DECIDE;
722: pch2opus->max_rank = PETSC_DECIDE;
723: pch2opus->bs = PETSC_DECIDE;
724: pch2opus->mrtol = PETSC_DECIDE;
725: #if defined(PETSC_H2OPUS_USE_GPU)
726: pch2opus->boundtocpu = PETSC_FALSE;
727: #else
728: pch2opus->boundtocpu = PETSC_TRUE;
729: #endif
730: pc->ops->destroy = PCDestroy_H2OPUS;
731: pc->ops->setup = PCSetUp_H2OPUS;
732: pc->ops->apply = PCApply_H2OPUS;
733: pc->ops->matapply = PCApplyMat_H2OPUS;
734: pc->ops->applytranspose = PCApplyTranspose_H2OPUS;
735: pc->ops->reset = PCReset_H2OPUS;
736: pc->ops->setfromoptions = PCSetFromOptions_H2OPUS;
737: pc->ops->view = PCView_H2OPUS;
739: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSetCoordinates_C", PCSetCoordinates_H2OPUS));
740: PetscFunctionReturn(PETSC_SUCCESS);
741: }