Actual source code: pch2opus.c
1: #include <petsc/private/pcimpl.h>
2: #include <petsc/private/matimpl.h>
3: #include <petscdm.h>
4: #include <h2opusconf.h>
6: /* Use GPU only if H2OPUS is configured for GPU */
7: #if defined(PETSC_HAVE_CUDA) && defined(H2OPUS_USE_GPU)
8: #define PETSC_H2OPUS_USE_GPU
9: #endif
11: typedef struct {
12: Mat A;
13: Mat M;
14: PetscScalar s0;
16: /* sampler for Newton-Schultz */
17: Mat S;
18: PetscInt hyperorder;
19: Vec wns[4];
20: Mat wnsmat[4];
22: /* convergence testing */
23: Mat T;
24: Vec w;
26: /* Support for PCSetCoordinates */
27: PetscInt sdim;
28: PetscInt nlocc;
29: PetscReal *coords;
31: /* Newton-Schultz customization */
32: PetscInt maxits;
33: PetscReal rtol, atol;
34: PetscBool monitor;
35: PetscBool useapproximatenorms;
36: NormType normtype;
38: /* Used when pmat != MATH2OPUS */
39: PetscReal eta;
40: PetscInt leafsize;
41: PetscInt max_rank;
42: PetscInt bs;
43: PetscReal mrtol;
45: /* CPU/GPU */
46: PetscBool forcecpu;
47: PetscBool boundtocpu;
48: } PC_H2OPUS;
50: PETSC_EXTERN PetscErrorCode MatNorm_H2OPUS(Mat, NormType, PetscReal *);
52: static PetscErrorCode PCH2OpusInferCoordinates_Private(PC pc)
53: {
54: PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
55: DM dm;
56: PetscBool isdmda;
58: PetscFunctionBegin;
59: if (pch2opus->sdim) PetscFunctionReturn(PETSC_SUCCESS);
60: PetscCall(PCGetDM(pc, &dm));
61: if (!dm) PetscCall(MatGetDM(pc->useAmat ? pc->mat : pc->pmat, &dm));
62: PetscCall(PetscObjectTypeCompare((PetscObject)dm, DMDA, &isdmda));
63: if (isdmda) {
64: Vec c;
65: const PetscScalar *coords;
66: PetscInt n, sdim;
68: PetscCall(DMGetCoordinates(dm, &c));
69: PetscCall(DMGetDimension(dm, &sdim));
70: PetscCall(VecGetLocalSize(c, &n));
71: PetscCall(VecGetArrayRead(c, &coords));
72: PetscCall(PCSetCoordinates(pc, sdim, n / sdim, (PetscScalar *)coords));
73: PetscCall(VecRestoreArrayRead(c, &coords));
74: }
75: PetscFunctionReturn(PETSC_SUCCESS);
76: }
78: static PetscErrorCode PCReset_H2OPUS(PC pc)
79: {
80: PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
82: PetscFunctionBegin;
83: pch2opus->sdim = 0;
84: pch2opus->nlocc = 0;
85: PetscCall(PetscFree(pch2opus->coords));
86: PetscFunctionReturn(PETSC_SUCCESS);
87: }
89: static PetscErrorCode PCSetCoordinates_H2OPUS(PC pc, PetscInt sdim, PetscInt nlocc, PetscReal *coords)
90: {
91: PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
92: PetscBool reset = PETSC_TRUE;
94: PetscFunctionBegin;
95: if (pch2opus->sdim && sdim == pch2opus->sdim && nlocc == pch2opus->nlocc) {
96: PetscCall(PetscArraycmp(pch2opus->coords, coords, sdim * nlocc, &reset));
97: reset = (PetscBool)!reset;
98: }
99: PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &reset, 1, MPI_C_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));
100: if (reset) {
101: PetscCall(PCReset_H2OPUS(pc));
102: PetscCall(PetscMalloc1(sdim * nlocc, &pch2opus->coords));
103: PetscCall(PetscArraycpy(pch2opus->coords, coords, sdim * nlocc));
104: pch2opus->sdim = sdim;
105: pch2opus->nlocc = nlocc;
106: }
107: PetscFunctionReturn(PETSC_SUCCESS);
108: }
110: static PetscErrorCode PCDestroy_H2OPUS(PC pc)
111: {
112: PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
114: PetscFunctionBegin;
115: PetscCall(PCReset_H2OPUS(pc));
116: PetscCall(MatDestroy(&pch2opus->A));
117: PetscCall(MatDestroy(&pch2opus->M));
118: PetscCall(MatDestroy(&pch2opus->T));
119: PetscCall(VecDestroy(&pch2opus->w));
120: PetscCall(MatDestroy(&pch2opus->S));
121: PetscCall(VecDestroy(&pch2opus->wns[0]));
122: PetscCall(VecDestroy(&pch2opus->wns[1]));
123: PetscCall(VecDestroy(&pch2opus->wns[2]));
124: PetscCall(VecDestroy(&pch2opus->wns[3]));
125: PetscCall(MatDestroy(&pch2opus->wnsmat[0]));
126: PetscCall(MatDestroy(&pch2opus->wnsmat[1]));
127: PetscCall(MatDestroy(&pch2opus->wnsmat[2]));
128: PetscCall(MatDestroy(&pch2opus->wnsmat[3]));
129: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSetCoordinates_C", NULL));
130: PetscCall(PetscFree(pc->data));
131: PetscFunctionReturn(PETSC_SUCCESS);
132: }
134: static PetscErrorCode PCSetFromOptions_H2OPUS(PC pc, PetscOptionItems PetscOptionsObject)
135: {
136: PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
138: PetscFunctionBegin;
139: PetscOptionsHeadBegin(PetscOptionsObject, "H2OPUS options");
140: PetscCall(PetscOptionsInt("-pc_h2opus_maxits", "Maximum number of iterations for Newton-Schultz", NULL, pch2opus->maxits, &pch2opus->maxits, NULL));
141: PetscCall(PetscOptionsBool("-pc_h2opus_monitor", "Monitor Newton-Schultz convergence", NULL, pch2opus->monitor, &pch2opus->monitor, NULL));
142: PetscCall(PetscOptionsReal("-pc_h2opus_atol", "Absolute tolerance", NULL, pch2opus->atol, &pch2opus->atol, NULL));
143: PetscCall(PetscOptionsReal("-pc_h2opus_rtol", "Relative tolerance", NULL, pch2opus->rtol, &pch2opus->rtol, NULL));
144: PetscCall(PetscOptionsEnum("-pc_h2opus_norm_type", "Norm type for convergence monitoring", NULL, NormTypes, (PetscEnum)pch2opus->normtype, (PetscEnum *)&pch2opus->normtype, NULL));
145: PetscCall(PetscOptionsInt("-pc_h2opus_hyperorder", "Hyper power order of sampling", NULL, pch2opus->hyperorder, &pch2opus->hyperorder, NULL));
146: PetscCall(PetscOptionsInt("-pc_h2opus_leafsize", "Leaf size when constructed from kernel", NULL, pch2opus->leafsize, &pch2opus->leafsize, NULL));
147: PetscCall(PetscOptionsReal("-pc_h2opus_eta", "Admissibility condition tolerance", NULL, pch2opus->eta, &pch2opus->eta, NULL));
148: PetscCall(PetscOptionsInt("-pc_h2opus_maxrank", "Maximum rank when constructed from matvecs", NULL, pch2opus->max_rank, &pch2opus->max_rank, NULL));
149: PetscCall(PetscOptionsInt("-pc_h2opus_samples", "Number of samples to be taken concurrently when constructing from matvecs", NULL, pch2opus->bs, &pch2opus->bs, NULL));
150: PetscCall(PetscOptionsReal("-pc_h2opus_mrtol", "Relative tolerance for construction from sampling", NULL, pch2opus->mrtol, &pch2opus->mrtol, NULL));
151: PetscCall(PetscOptionsBool("-pc_h2opus_forcecpu", "Force construction on CPU", NULL, pch2opus->forcecpu, &pch2opus->forcecpu, NULL));
152: PetscOptionsHeadEnd();
153: PetscFunctionReturn(PETSC_SUCCESS);
154: }
156: typedef struct {
157: Mat A;
158: Mat M;
159: Vec w;
160: } AAtCtx;
162: static PetscErrorCode MatMult_AAt(Mat A, Vec x, Vec y)
163: {
164: AAtCtx *aat;
166: PetscFunctionBegin;
167: PetscCall(MatShellGetContext(A, &aat));
168: /* PetscCall(MatMultTranspose(aat->M,x,aat->w)); */
169: PetscCall(MatMultTranspose(aat->A, x, aat->w));
170: PetscCall(MatMult(aat->A, aat->w, y));
171: PetscFunctionReturn(PETSC_SUCCESS);
172: }
174: static PetscErrorCode PCH2OpusSetUpInit(PC pc)
175: {
176: PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
177: Mat A = pc->useAmat ? pc->mat : pc->pmat, AAt;
178: PetscInt M, m;
179: VecType vtype;
180: PetscReal n;
181: AAtCtx aat;
183: PetscFunctionBegin;
184: aat.A = A;
185: aat.M = pch2opus->M; /* unused so far */
186: PetscCall(MatCreateVecs(A, NULL, &aat.w));
187: PetscCall(MatGetSize(A, &M, NULL));
188: PetscCall(MatGetLocalSize(A, &m, NULL));
189: PetscCall(MatCreateShell(PetscObjectComm((PetscObject)A), m, m, M, M, &aat, &AAt));
190: PetscCall(MatBindToCPU(AAt, pch2opus->boundtocpu));
191: PetscCall(MatShellSetOperation(AAt, MATOP_MULT, (PetscErrorCodeFn *)MatMult_AAt));
192: PetscCall(MatShellSetOperation(AAt, MATOP_MULT_TRANSPOSE, (PetscErrorCodeFn *)MatMult_AAt));
193: PetscCall(MatShellSetOperation(AAt, MATOP_NORM, (PetscErrorCodeFn *)MatNorm_H2OPUS));
194: PetscCall(MatGetVecType(A, &vtype));
195: PetscCall(MatShellSetVecType(AAt, vtype));
196: PetscCall(MatNorm(AAt, NORM_1, &n));
197: pch2opus->s0 = 1. / n;
198: PetscCall(VecDestroy(&aat.w));
199: PetscCall(MatDestroy(&AAt));
200: PetscFunctionReturn(PETSC_SUCCESS);
201: }
203: static PetscErrorCode PCApplyKernel_H2OPUS(PC pc, Vec x, Vec y, PetscBool t)
204: {
205: PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
207: PetscFunctionBegin;
208: if (t) PetscCall(MatMultTranspose(pch2opus->M, x, y));
209: else PetscCall(MatMult(pch2opus->M, x, y));
210: PetscFunctionReturn(PETSC_SUCCESS);
211: }
213: static PetscErrorCode PCApplyMatKernel_H2OPUS(PC pc, Mat X, Mat Y, PetscBool t)
214: {
215: PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
217: PetscFunctionBegin;
218: if (t) PetscCall(MatTransposeMatMult(pch2opus->M, X, MAT_REUSE_MATRIX, PETSC_DETERMINE, &Y));
219: else PetscCall(MatMatMult(pch2opus->M, X, MAT_REUSE_MATRIX, PETSC_CURRENT, &Y));
220: PetscFunctionReturn(PETSC_SUCCESS);
221: }
223: static PetscErrorCode PCApplyMat_H2OPUS(PC pc, Mat X, Mat Y)
224: {
225: PetscFunctionBegin;
226: PetscCall(PCApplyMatKernel_H2OPUS(pc, X, Y, PETSC_FALSE));
227: PetscFunctionReturn(PETSC_SUCCESS);
228: }
230: static PetscErrorCode PCApplyTransposeMat_H2OPUS(PC pc, Mat X, Mat Y)
231: {
232: PetscFunctionBegin;
233: PetscCall(PCApplyMatKernel_H2OPUS(pc, X, Y, PETSC_TRUE));
234: PetscFunctionReturn(PETSC_SUCCESS);
235: }
237: static PetscErrorCode PCApply_H2OPUS(PC pc, Vec x, Vec y)
238: {
239: PetscFunctionBegin;
240: PetscCall(PCApplyKernel_H2OPUS(pc, x, y, PETSC_FALSE));
241: PetscFunctionReturn(PETSC_SUCCESS);
242: }
244: static PetscErrorCode PCApplyTranspose_H2OPUS(PC pc, Vec x, Vec y)
245: {
246: PetscFunctionBegin;
247: PetscCall(PCApplyKernel_H2OPUS(pc, x, y, PETSC_TRUE));
248: PetscFunctionReturn(PETSC_SUCCESS);
249: }
251: /* used to test the norm of (M^-1 A - I) */
252: static PetscErrorCode MatMultKernel_MAmI(Mat M, Vec x, Vec y, PetscBool t)
253: {
254: PC pc;
255: Mat A;
256: PC_H2OPUS *pch2opus;
257: PetscBool sideleft = PETSC_TRUE;
259: PetscFunctionBegin;
260: PetscCall(MatShellGetContext(M, &pc));
261: pch2opus = (PC_H2OPUS *)pc->data;
262: if (!pch2opus->w) PetscCall(MatCreateVecs(pch2opus->M, &pch2opus->w, NULL));
263: A = pch2opus->A;
264: PetscCall(VecBindToCPU(pch2opus->w, pch2opus->boundtocpu));
265: if (t) {
266: if (sideleft) {
267: PetscCall(PCApplyTranspose_H2OPUS(pc, x, pch2opus->w));
268: PetscCall(MatMultTranspose(A, pch2opus->w, y));
269: } else {
270: PetscCall(MatMultTranspose(A, x, pch2opus->w));
271: PetscCall(PCApplyTranspose_H2OPUS(pc, pch2opus->w, y));
272: }
273: } else {
274: if (sideleft) {
275: PetscCall(MatMult(A, x, pch2opus->w));
276: PetscCall(PCApply_H2OPUS(pc, pch2opus->w, y));
277: } else {
278: PetscCall(PCApply_H2OPUS(pc, x, pch2opus->w));
279: PetscCall(MatMult(A, pch2opus->w, y));
280: }
281: }
282: PetscCall(VecAXPY(y, -1.0, x));
283: PetscFunctionReturn(PETSC_SUCCESS);
284: }
286: static PetscErrorCode MatMult_MAmI(Mat A, Vec x, Vec y)
287: {
288: PetscFunctionBegin;
289: PetscCall(MatMultKernel_MAmI(A, x, y, PETSC_FALSE));
290: PetscFunctionReturn(PETSC_SUCCESS);
291: }
293: static PetscErrorCode MatMultTranspose_MAmI(Mat A, Vec x, Vec y)
294: {
295: PetscFunctionBegin;
296: PetscCall(MatMultKernel_MAmI(A, x, y, PETSC_TRUE));
297: PetscFunctionReturn(PETSC_SUCCESS);
298: }
300: /* HyperPower kernel:
301: Y = R = x
302: for i = 1 . . . l - 1 do
303: R = (I - A * Xk) * R
304: Y = Y + R
305: Y = Xk * Y
306: */
307: static PetscErrorCode MatMultKernel_Hyper(Mat M, Vec x, Vec y, PetscBool t)
308: {
309: PC pc;
310: Mat A;
311: PC_H2OPUS *pch2opus;
313: PetscFunctionBegin;
314: PetscCall(MatShellGetContext(M, &pc));
315: pch2opus = (PC_H2OPUS *)pc->data;
316: A = pch2opus->A;
317: PetscCall(MatCreateVecs(pch2opus->M, pch2opus->wns[0] ? NULL : &pch2opus->wns[0], pch2opus->wns[1] ? NULL : &pch2opus->wns[1]));
318: PetscCall(MatCreateVecs(pch2opus->M, pch2opus->wns[2] ? NULL : &pch2opus->wns[2], pch2opus->wns[3] ? NULL : &pch2opus->wns[3]));
319: PetscCall(VecBindToCPU(pch2opus->wns[0], pch2opus->boundtocpu));
320: PetscCall(VecBindToCPU(pch2opus->wns[1], pch2opus->boundtocpu));
321: PetscCall(VecBindToCPU(pch2opus->wns[2], pch2opus->boundtocpu));
322: PetscCall(VecBindToCPU(pch2opus->wns[3], pch2opus->boundtocpu));
323: PetscCall(VecCopy(x, pch2opus->wns[0]));
324: PetscCall(VecCopy(x, pch2opus->wns[3]));
325: if (t) {
326: for (PetscInt i = 0; i < pch2opus->hyperorder - 1; i++) {
327: PetscCall(MatMultTranspose(A, pch2opus->wns[0], pch2opus->wns[1]));
328: PetscCall(PCApplyTranspose_H2OPUS(pc, pch2opus->wns[1], pch2opus->wns[2]));
329: PetscCall(VecAXPY(pch2opus->wns[0], -1., pch2opus->wns[2]));
330: PetscCall(VecAXPY(pch2opus->wns[3], 1., pch2opus->wns[0]));
331: }
332: PetscCall(PCApplyTranspose_H2OPUS(pc, pch2opus->wns[3], y));
333: } else {
334: for (PetscInt i = 0; i < pch2opus->hyperorder - 1; i++) {
335: PetscCall(PCApply_H2OPUS(pc, pch2opus->wns[0], pch2opus->wns[1]));
336: PetscCall(MatMult(A, pch2opus->wns[1], pch2opus->wns[2]));
337: PetscCall(VecAXPY(pch2opus->wns[0], -1., pch2opus->wns[2]));
338: PetscCall(VecAXPY(pch2opus->wns[3], 1., pch2opus->wns[0]));
339: }
340: PetscCall(PCApply_H2OPUS(pc, pch2opus->wns[3], y));
341: }
342: PetscFunctionReturn(PETSC_SUCCESS);
343: }
345: static PetscErrorCode MatMult_Hyper(Mat M, Vec x, Vec y)
346: {
347: PetscFunctionBegin;
348: PetscCall(MatMultKernel_Hyper(M, x, y, PETSC_FALSE));
349: PetscFunctionReturn(PETSC_SUCCESS);
350: }
352: static PetscErrorCode MatMultTranspose_Hyper(Mat M, Vec x, Vec y)
353: {
354: PetscFunctionBegin;
355: PetscCall(MatMultKernel_Hyper(M, x, y, PETSC_TRUE));
356: PetscFunctionReturn(PETSC_SUCCESS);
357: }
359: /* Hyper power kernel, MatMat version */
360: static PetscErrorCode MatMatMultKernel_Hyper(Mat M, Mat X, Mat Y, PetscBool t)
361: {
362: PC pc;
363: Mat A;
364: PC_H2OPUS *pch2opus;
366: PetscFunctionBegin;
367: PetscCall(MatShellGetContext(M, &pc));
368: pch2opus = (PC_H2OPUS *)pc->data;
369: A = pch2opus->A;
370: if (pch2opus->wnsmat[0] && pch2opus->wnsmat[0]->cmap->N != X->cmap->N) {
371: PetscCall(MatDestroy(&pch2opus->wnsmat[0]));
372: PetscCall(MatDestroy(&pch2opus->wnsmat[1]));
373: }
374: if (!pch2opus->wnsmat[0]) {
375: PetscCall(MatDuplicate(X, MAT_SHARE_NONZERO_PATTERN, &pch2opus->wnsmat[0]));
376: PetscCall(MatDuplicate(Y, MAT_SHARE_NONZERO_PATTERN, &pch2opus->wnsmat[1]));
377: }
378: if (pch2opus->wnsmat[2] && pch2opus->wnsmat[2]->cmap->N != X->cmap->N) {
379: PetscCall(MatDestroy(&pch2opus->wnsmat[2]));
380: PetscCall(MatDestroy(&pch2opus->wnsmat[3]));
381: }
382: if (!pch2opus->wnsmat[2]) {
383: PetscCall(MatDuplicate(X, MAT_SHARE_NONZERO_PATTERN, &pch2opus->wnsmat[2]));
384: PetscCall(MatDuplicate(Y, MAT_SHARE_NONZERO_PATTERN, &pch2opus->wnsmat[3]));
385: }
386: PetscCall(MatCopy(X, pch2opus->wnsmat[0], SAME_NONZERO_PATTERN));
387: PetscCall(MatCopy(X, pch2opus->wnsmat[3], SAME_NONZERO_PATTERN));
388: if (t) {
389: for (PetscInt i = 0; i < pch2opus->hyperorder - 1; i++) {
390: PetscCall(MatTransposeMatMult(A, pch2opus->wnsmat[0], MAT_REUSE_MATRIX, PETSC_CURRENT, &pch2opus->wnsmat[1]));
391: PetscCall(PCApplyTransposeMat_H2OPUS(pc, pch2opus->wnsmat[1], pch2opus->wnsmat[2]));
392: PetscCall(MatAXPY(pch2opus->wnsmat[0], -1., pch2opus->wnsmat[2], SAME_NONZERO_PATTERN));
393: PetscCall(MatAXPY(pch2opus->wnsmat[3], 1., pch2opus->wnsmat[0], SAME_NONZERO_PATTERN));
394: }
395: PetscCall(PCApplyTransposeMat_H2OPUS(pc, pch2opus->wnsmat[3], Y));
396: } else {
397: for (PetscInt i = 0; i < pch2opus->hyperorder - 1; i++) {
398: PetscCall(PCApplyMat_H2OPUS(pc, pch2opus->wnsmat[0], pch2opus->wnsmat[1]));
399: PetscCall(MatMatMult(A, pch2opus->wnsmat[1], MAT_REUSE_MATRIX, PETSC_CURRENT, &pch2opus->wnsmat[2]));
400: PetscCall(MatAXPY(pch2opus->wnsmat[0], -1., pch2opus->wnsmat[2], SAME_NONZERO_PATTERN));
401: PetscCall(MatAXPY(pch2opus->wnsmat[3], 1., pch2opus->wnsmat[0], SAME_NONZERO_PATTERN));
402: }
403: PetscCall(PCApplyMat_H2OPUS(pc, pch2opus->wnsmat[3], Y));
404: }
405: PetscFunctionReturn(PETSC_SUCCESS);
406: }
408: static PetscErrorCode MatMatMultNumeric_Hyper(Mat M, Mat X, Mat Y, PetscCtx ctx)
409: {
410: PetscFunctionBegin;
411: PetscCall(MatMatMultKernel_Hyper(M, X, Y, PETSC_FALSE));
412: PetscFunctionReturn(PETSC_SUCCESS);
413: }
415: /* Basic Newton-Schultz sampler: (2 * I - M * A)*M */
416: static PetscErrorCode MatMultKernel_NS(Mat M, Vec x, Vec y, PetscBool t)
417: {
418: PC pc;
419: Mat A;
420: PC_H2OPUS *pch2opus;
422: PetscFunctionBegin;
423: PetscCall(MatShellGetContext(M, &pc));
424: pch2opus = (PC_H2OPUS *)pc->data;
425: A = pch2opus->A;
426: PetscCall(MatCreateVecs(pch2opus->M, pch2opus->wns[0] ? NULL : &pch2opus->wns[0], pch2opus->wns[1] ? NULL : &pch2opus->wns[1]));
427: PetscCall(VecBindToCPU(pch2opus->wns[0], pch2opus->boundtocpu));
428: PetscCall(VecBindToCPU(pch2opus->wns[1], pch2opus->boundtocpu));
429: if (t) {
430: PetscCall(PCApplyTranspose_H2OPUS(pc, x, y));
431: PetscCall(MatMultTranspose(A, y, pch2opus->wns[1]));
432: PetscCall(PCApplyTranspose_H2OPUS(pc, pch2opus->wns[1], pch2opus->wns[0]));
433: PetscCall(VecAXPBY(y, -1., 2., pch2opus->wns[0]));
434: } else {
435: PetscCall(PCApply_H2OPUS(pc, x, y));
436: PetscCall(MatMult(A, y, pch2opus->wns[0]));
437: PetscCall(PCApply_H2OPUS(pc, pch2opus->wns[0], pch2opus->wns[1]));
438: PetscCall(VecAXPBY(y, -1., 2., pch2opus->wns[1]));
439: }
440: PetscFunctionReturn(PETSC_SUCCESS);
441: }
443: static PetscErrorCode MatMult_NS(Mat M, Vec x, Vec y)
444: {
445: PetscFunctionBegin;
446: PetscCall(MatMultKernel_NS(M, x, y, PETSC_FALSE));
447: PetscFunctionReturn(PETSC_SUCCESS);
448: }
450: static PetscErrorCode MatMultTranspose_NS(Mat M, Vec x, Vec y)
451: {
452: PetscFunctionBegin;
453: PetscCall(MatMultKernel_NS(M, x, y, PETSC_TRUE));
454: PetscFunctionReturn(PETSC_SUCCESS);
455: }
457: /* Basic Newton-Schultz sampler: (2 * I - M * A)*M, MatMat version */
458: static PetscErrorCode MatMatMultKernel_NS(Mat M, Mat X, Mat Y, PetscBool t)
459: {
460: PC pc;
461: Mat A;
462: PC_H2OPUS *pch2opus;
464: PetscFunctionBegin;
465: PetscCall(MatShellGetContext(M, &pc));
466: pch2opus = (PC_H2OPUS *)pc->data;
467: A = pch2opus->A;
468: if (pch2opus->wnsmat[0] && pch2opus->wnsmat[0]->cmap->N != X->cmap->N) {
469: PetscCall(MatDestroy(&pch2opus->wnsmat[0]));
470: PetscCall(MatDestroy(&pch2opus->wnsmat[1]));
471: }
472: if (!pch2opus->wnsmat[0]) {
473: PetscCall(MatDuplicate(X, MAT_SHARE_NONZERO_PATTERN, &pch2opus->wnsmat[0]));
474: PetscCall(MatDuplicate(Y, MAT_SHARE_NONZERO_PATTERN, &pch2opus->wnsmat[1]));
475: }
476: if (t) {
477: PetscCall(PCApplyTransposeMat_H2OPUS(pc, X, Y));
478: PetscCall(MatTransposeMatMult(A, Y, MAT_REUSE_MATRIX, PETSC_CURRENT, &pch2opus->wnsmat[1]));
479: PetscCall(PCApplyTransposeMat_H2OPUS(pc, pch2opus->wnsmat[1], pch2opus->wnsmat[0]));
480: PetscCall(MatScale(Y, 2.));
481: PetscCall(MatAXPY(Y, -1., pch2opus->wnsmat[0], SAME_NONZERO_PATTERN));
482: } else {
483: PetscCall(PCApplyMat_H2OPUS(pc, X, Y));
484: PetscCall(MatMatMult(A, Y, MAT_REUSE_MATRIX, PETSC_CURRENT, &pch2opus->wnsmat[0]));
485: PetscCall(PCApplyMat_H2OPUS(pc, pch2opus->wnsmat[0], pch2opus->wnsmat[1]));
486: PetscCall(MatScale(Y, 2.));
487: PetscCall(MatAXPY(Y, -1., pch2opus->wnsmat[1], SAME_NONZERO_PATTERN));
488: }
489: PetscFunctionReturn(PETSC_SUCCESS);
490: }
492: static PetscErrorCode MatMatMultNumeric_NS(Mat M, Mat X, Mat Y, PetscCtx ctx)
493: {
494: PetscFunctionBegin;
495: PetscCall(MatMatMultKernel_NS(M, X, Y, PETSC_FALSE));
496: PetscFunctionReturn(PETSC_SUCCESS);
497: }
499: static PetscErrorCode PCH2OpusSetUpSampler_Private(PC pc)
500: {
501: PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
502: Mat A = pc->useAmat ? pc->mat : pc->pmat;
504: PetscFunctionBegin;
505: if (!pch2opus->S) {
506: PetscInt M, N, m, n;
508: PetscCall(MatGetSize(A, &M, &N));
509: PetscCall(MatGetLocalSize(A, &m, &n));
510: PetscCall(MatCreateShell(PetscObjectComm((PetscObject)A), m, n, M, N, pc, &pch2opus->S));
511: PetscCall(MatSetBlockSizesFromMats(pch2opus->S, A, A));
512: #if defined(PETSC_H2OPUS_USE_GPU)
513: PetscCall(MatShellSetVecType(pch2opus->S, VECCUDA));
514: #endif
515: }
516: if (pch2opus->hyperorder >= 2) {
517: PetscCall(MatShellSetOperation(pch2opus->S, MATOP_MULT, (PetscErrorCodeFn *)MatMult_Hyper));
518: PetscCall(MatShellSetOperation(pch2opus->S, MATOP_MULT_TRANSPOSE, (PetscErrorCodeFn *)MatMultTranspose_Hyper));
519: PetscCall(MatShellSetMatProductOperation(pch2opus->S, MATPRODUCT_AB, NULL, MatMatMultNumeric_Hyper, NULL, MATDENSE, MATDENSE));
520: PetscCall(MatShellSetMatProductOperation(pch2opus->S, MATPRODUCT_AB, NULL, MatMatMultNumeric_Hyper, NULL, MATDENSECUDA, MATDENSECUDA));
521: } else {
522: PetscCall(MatShellSetOperation(pch2opus->S, MATOP_MULT, (PetscErrorCodeFn *)MatMult_NS));
523: PetscCall(MatShellSetOperation(pch2opus->S, MATOP_MULT_TRANSPOSE, (PetscErrorCodeFn *)MatMultTranspose_NS));
524: PetscCall(MatShellSetMatProductOperation(pch2opus->S, MATPRODUCT_AB, NULL, MatMatMultNumeric_NS, NULL, MATDENSE, MATDENSE));
525: PetscCall(MatShellSetMatProductOperation(pch2opus->S, MATPRODUCT_AB, NULL, MatMatMultNumeric_NS, NULL, MATDENSECUDA, MATDENSECUDA));
526: }
527: PetscCall(MatPropagateSymmetryOptions(A, pch2opus->S));
528: PetscCall(MatBindToCPU(pch2opus->S, pch2opus->boundtocpu));
529: /* XXX */
530: PetscCall(MatSetOption(pch2opus->S, MAT_SYMMETRIC, PETSC_TRUE));
531: PetscFunctionReturn(PETSC_SUCCESS);
532: }
534: static PetscErrorCode PCSetUp_H2OPUS(PC pc)
535: {
536: PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
537: Mat A = pc->useAmat ? pc->mat : pc->pmat;
538: NormType norm = pch2opus->normtype;
539: PetscReal initerr = 0.0, err;
540: PetscBool ish2opus;
542: PetscFunctionBegin;
543: if (!pch2opus->T) {
544: PetscInt M, N, m, n;
545: const char *prefix;
547: PetscCall(PCGetOptionsPrefix(pc, &prefix));
548: PetscCall(MatGetSize(A, &M, &N));
549: PetscCall(MatGetLocalSize(A, &m, &n));
550: PetscCall(MatCreateShell(PetscObjectComm((PetscObject)pc->pmat), m, n, M, N, pc, &pch2opus->T));
551: PetscCall(MatSetBlockSizesFromMats(pch2opus->T, A, A));
552: PetscCall(MatShellSetOperation(pch2opus->T, MATOP_MULT, (PetscErrorCodeFn *)MatMult_MAmI));
553: PetscCall(MatShellSetOperation(pch2opus->T, MATOP_MULT_TRANSPOSE, (PetscErrorCodeFn *)MatMultTranspose_MAmI));
554: PetscCall(MatShellSetOperation(pch2opus->T, MATOP_NORM, (PetscErrorCodeFn *)MatNorm_H2OPUS));
555: #if defined(PETSC_H2OPUS_USE_GPU)
556: PetscCall(MatShellSetVecType(pch2opus->T, VECCUDA));
557: #endif
558: PetscCall(MatSetOptionsPrefix(pch2opus->T, prefix));
559: PetscCall(MatAppendOptionsPrefix(pch2opus->T, "pc_h2opus_est_"));
560: }
561: PetscCall(MatDestroy(&pch2opus->A));
562: PetscCall(PetscObjectTypeCompare((PetscObject)A, MATH2OPUS, &ish2opus));
563: if (ish2opus) {
564: PetscCall(PetscObjectReference((PetscObject)A));
565: pch2opus->A = A;
566: } else {
567: const char *prefix;
569: /* See if we can infer coordinates from the DM */
570: if (!pch2opus->sdim) PetscCall(PCH2OpusInferCoordinates_Private(pc));
571: PetscCall(MatCreateH2OpusFromMat(A, pch2opus->sdim, pch2opus->coords, PETSC_FALSE, pch2opus->eta, pch2opus->leafsize, pch2opus->max_rank, pch2opus->bs, pch2opus->mrtol, &pch2opus->A));
572: /* XXX */
573: PetscCall(MatSetOption(pch2opus->A, MAT_SYMMETRIC, PETSC_TRUE));
574: PetscCall(PCGetOptionsPrefix(pc, &prefix));
575: PetscCall(MatSetOptionsPrefix(pch2opus->A, prefix));
576: PetscCall(MatAppendOptionsPrefix(pch2opus->A, "pc_h2opus_init_"));
577: PetscCall(MatSetFromOptions(pch2opus->A));
578: PetscCall(MatAssemblyBegin(pch2opus->A, MAT_FINAL_ASSEMBLY));
579: PetscCall(MatAssemblyEnd(pch2opus->A, MAT_FINAL_ASSEMBLY));
580: /* XXX */
581: PetscCall(MatSetOption(pch2opus->A, MAT_SYMMETRIC, PETSC_TRUE));
583: /* always perform construction on the GPU unless forcecpu is true */
584: PetscCall(MatBindToCPU(pch2opus->A, pch2opus->forcecpu));
585: }
586: #if defined(PETSC_H2OPUS_USE_GPU)
587: pch2opus->boundtocpu = pch2opus->forcecpu ? PETSC_TRUE : pch2opus->A->boundtocpu;
588: #endif
589: PetscCall(MatBindToCPU(pch2opus->T, pch2opus->boundtocpu));
590: if (pch2opus->M) { /* see if we can reuse M as initial guess */
591: PetscReal reuse;
593: PetscCall(MatBindToCPU(pch2opus->M, pch2opus->boundtocpu));
594: PetscCall(MatNorm(pch2opus->T, norm, &reuse));
595: if (reuse >= 1.0) PetscCall(MatDestroy(&pch2opus->M));
596: }
597: if (!pch2opus->M) {
598: const char *prefix;
599: PetscCall(MatDuplicate(pch2opus->A, MAT_COPY_VALUES, &pch2opus->M));
600: PetscCall(PCGetOptionsPrefix(pc, &prefix));
601: PetscCall(MatSetOptionsPrefix(pch2opus->M, prefix));
602: PetscCall(MatAppendOptionsPrefix(pch2opus->M, "pc_h2opus_inv_"));
603: PetscCall(MatSetFromOptions(pch2opus->M));
604: PetscCall(PCH2OpusSetUpInit(pc));
605: PetscCall(MatScale(pch2opus->M, pch2opus->s0));
606: }
607: /* A and M have the same h2 matrix nonzero structure, save on reordering routines */
608: PetscCall(MatH2OpusSetNativeMult(pch2opus->A, PETSC_TRUE));
609: PetscCall(MatH2OpusSetNativeMult(pch2opus->M, PETSC_TRUE));
610: if (norm == NORM_1 || norm == NORM_2 || norm == NORM_INFINITY) PetscCall(MatNorm(pch2opus->T, norm, &initerr));
611: if (PetscIsInfOrNanReal(initerr)) pc->failedreason = PC_SETUP_ERROR;
612: err = initerr;
613: if (pch2opus->monitor) PetscCall(PetscPrintf(PetscObjectComm((PetscObject)pc), "%" PetscInt_FMT ": ||M*A - I|| NORM%s abs %g rel %g\n", 0, NormTypes[norm], (double)err, (double)(err / initerr)));
614: if (initerr > pch2opus->atol && !pc->failedreason) {
615: PetscCall(PCH2OpusSetUpSampler_Private(pc));
616: for (PetscInt i = 0; i < pch2opus->maxits; i++) {
617: Mat M;
618: const char *prefix;
620: PetscCall(MatDuplicate(pch2opus->M, MAT_SHARE_NONZERO_PATTERN, &M));
621: PetscCall(MatGetOptionsPrefix(pch2opus->M, &prefix));
622: PetscCall(MatSetOptionsPrefix(M, prefix));
623: PetscCall(MatH2OpusSetSamplingMat(M, pch2opus->S, PETSC_DECIDE, PETSC_DECIDE));
624: PetscCall(MatSetFromOptions(M));
625: PetscCall(MatH2OpusSetNativeMult(M, PETSC_TRUE));
626: PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
627: PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
628: PetscCall(MatDestroy(&pch2opus->M));
629: pch2opus->M = M;
630: if (norm == NORM_1 || norm == NORM_2 || norm == NORM_INFINITY) PetscCall(MatNorm(pch2opus->T, norm, &err));
631: if (pch2opus->monitor) PetscCall(PetscPrintf(PetscObjectComm((PetscObject)pc), "%" PetscInt_FMT ": ||M*A - I|| NORM%s abs %g rel %g\n", i + 1, NormTypes[norm], (double)err, (double)(err / initerr)));
632: if (PetscIsInfOrNanReal(err)) pc->failedreason = PC_SETUP_ERROR;
633: if (err < pch2opus->atol || err < pch2opus->rtol * initerr || pc->failedreason) break;
634: }
635: }
636: /* cleanup setup workspace */
637: PetscCall(MatH2OpusSetNativeMult(pch2opus->A, PETSC_FALSE));
638: PetscCall(MatH2OpusSetNativeMult(pch2opus->M, PETSC_FALSE));
639: PetscCall(VecDestroy(&pch2opus->wns[0]));
640: PetscCall(VecDestroy(&pch2opus->wns[1]));
641: PetscCall(VecDestroy(&pch2opus->wns[2]));
642: PetscCall(VecDestroy(&pch2opus->wns[3]));
643: PetscCall(MatDestroy(&pch2opus->wnsmat[0]));
644: PetscCall(MatDestroy(&pch2opus->wnsmat[1]));
645: PetscCall(MatDestroy(&pch2opus->wnsmat[2]));
646: PetscCall(MatDestroy(&pch2opus->wnsmat[3]));
647: PetscFunctionReturn(PETSC_SUCCESS);
648: }
650: static PetscErrorCode PCView_H2OPUS(PC pc, PetscViewer viewer)
651: {
652: PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
653: PetscBool isascii;
655: PetscFunctionBegin;
656: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
657: if (isascii) {
658: if (pch2opus->A && pch2opus->A != pc->mat && pch2opus->A != pc->pmat) {
659: PetscCall(PetscViewerASCIIPrintf(viewer, "Initial approximation matrix\n"));
660: PetscCall(PetscViewerASCIIPushTab(viewer));
661: PetscCall(PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_INFO_DETAIL));
662: PetscCall(MatView(pch2opus->A, viewer));
663: PetscCall(PetscViewerPopFormat(viewer));
664: PetscCall(PetscViewerASCIIPopTab(viewer));
665: }
666: if (pch2opus->M) {
667: PetscCall(PetscViewerASCIIPrintf(viewer, "Inner matrix constructed\n"));
668: PetscCall(PetscViewerASCIIPushTab(viewer));
669: PetscCall(PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_INFO_DETAIL));
670: PetscCall(MatView(pch2opus->M, viewer));
671: PetscCall(PetscViewerPopFormat(viewer));
672: PetscCall(PetscViewerASCIIPopTab(viewer));
673: }
674: PetscCall(PetscViewerASCIIPrintf(viewer, "Initial scaling: %g\n", pch2opus->s0));
675: }
676: PetscFunctionReturn(PETSC_SUCCESS);
677: }
679: /*MC
680: PCH2OPUS = "h2opus" - A preconditioner type for, `MATH2OPUS`, hierarchical matrices using the H2Opus package.
682: Options Database Keys:
683: + -pc_type h2opus - pc type to "h2opus" during a call to `PCSetFromOptions()`
684: . -pc_h2opus_maxits - maximum number of iterations for Newton-Schultz
685: . -pc_h2opus_monitor - monitor Newton-Schultz convergence
686: . -pc_h2opus_atol - absolute tolerance
687: . -pc_h2opus_rtol - relative tolerance
688: . -pc_h2opus_norm_type - normtype
689: . -pc_h2opus_hyperorder - Hyper power order of sampling
690: . -pc_h2opus_leafsize - leaf size when constructed from kernel
691: . -pc_h2opus_eta - admissibility condition tolerance
692: . -pc_h2opus_maxrank - maximum rank when constructed from matvecs
693: . -pc_h2opus_samples - number of samples to be taken concurrently when constructing from matvecs
694: . -pc_h2opus_mrtol - relative tolerance for construction from sampling
695: - -pc_h2opus_forcecpu - force construction of preconditioner on CPU
697: Level: intermediate
699: .seealso: [](ch_ksp), `MATH2OPUS`, `MATHTOOL`, `MATDENSE`, `MatCreateH2OpusFromKernel()`, `MatCreateH2OpusFromMat()`
700: M*/
702: PETSC_EXTERN PetscErrorCode PCCreate_H2OPUS(PC pc)
703: {
704: PC_H2OPUS *pch2opus;
706: PetscFunctionBegin;
707: PetscCall(PetscNew(&pch2opus));
708: pc->data = (void *)pch2opus;
710: pch2opus->atol = 1.e-2;
711: pch2opus->rtol = 1.e-6;
712: pch2opus->maxits = 50;
713: pch2opus->hyperorder = 1; /* defaults to basic Newton-Schultz */
714: pch2opus->normtype = NORM_2;
716: /* these are needed when we are sampling the pmat */
717: pch2opus->eta = PETSC_DECIDE;
718: pch2opus->leafsize = PETSC_DECIDE;
719: pch2opus->max_rank = PETSC_DECIDE;
720: pch2opus->bs = PETSC_DECIDE;
721: pch2opus->mrtol = PETSC_DECIDE;
722: pch2opus->boundtocpu = PetscDefined(H2OPUS_USE_GPU) ? PETSC_FALSE : PETSC_TRUE;
723: pc->ops->destroy = PCDestroy_H2OPUS;
724: pc->ops->setup = PCSetUp_H2OPUS;
725: pc->ops->apply = PCApply_H2OPUS;
726: pc->ops->matapply = PCApplyMat_H2OPUS;
727: pc->ops->applytranspose = PCApplyTranspose_H2OPUS;
728: pc->ops->reset = PCReset_H2OPUS;
729: pc->ops->setfromoptions = PCSetFromOptions_H2OPUS;
730: pc->ops->view = PCView_H2OPUS;
732: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSetCoordinates_C", PCSetCoordinates_H2OPUS));
733: PetscFunctionReturn(PETSC_SUCCESS);
734: }