Actual source code: pch2opus.c

  1: #include <petsc/private/pcimpl.h>
  2: #include <petsc/private/matimpl.h>
  3: #include <petscdm.h>
  4: #include <h2opusconf.h>

  6: /* Use GPU only if H2OPUS is configured for GPU */
  7: #if defined(PETSC_HAVE_CUDA) && defined(H2OPUS_USE_GPU)
  8:   #define PETSC_H2OPUS_USE_GPU
  9: #endif

 11: typedef struct {
 12:   Mat         A;
 13:   Mat         M;
 14:   PetscScalar s0;

 16:   /* sampler for Newton-Schultz */
 17:   Mat      S;
 18:   PetscInt hyperorder;
 19:   Vec      wns[4];
 20:   Mat      wnsmat[4];

 22:   /* convergence testing */
 23:   Mat T;
 24:   Vec w;

 26:   /* Support for PCSetCoordinates */
 27:   PetscInt   sdim;
 28:   PetscInt   nlocc;
 29:   PetscReal *coords;

 31:   /* Newton-Schultz customization */
 32:   PetscInt  maxits;
 33:   PetscReal rtol, atol;
 34:   PetscBool monitor;
 35:   PetscBool useapproximatenorms;
 36:   NormType  normtype;

 38:   /* Used when pmat != MATH2OPUS */
 39:   PetscReal eta;
 40:   PetscInt  leafsize;
 41:   PetscInt  max_rank;
 42:   PetscInt  bs;
 43:   PetscReal mrtol;

 45:   /* CPU/GPU */
 46:   PetscBool forcecpu;
 47:   PetscBool boundtocpu;
 48: } PC_H2OPUS;

 50: PETSC_EXTERN PetscErrorCode MatNorm_H2OPUS(Mat, NormType, PetscReal *);

 52: static PetscErrorCode PCH2OpusInferCoordinates_Private(PC pc)
 53: {
 54:   PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
 55:   DM         dm;
 56:   PetscBool  isdmda;

 58:   PetscFunctionBegin;
 59:   if (pch2opus->sdim) PetscFunctionReturn(PETSC_SUCCESS);
 60:   PetscCall(PCGetDM(pc, &dm));
 61:   if (!dm) PetscCall(MatGetDM(pc->useAmat ? pc->mat : pc->pmat, &dm));
 62:   PetscCall(PetscObjectTypeCompare((PetscObject)dm, DMDA, &isdmda));
 63:   if (isdmda) {
 64:     Vec                c;
 65:     const PetscScalar *coords;
 66:     PetscInt           n, sdim;

 68:     PetscCall(DMGetCoordinates(dm, &c));
 69:     PetscCall(DMGetDimension(dm, &sdim));
 70:     PetscCall(VecGetLocalSize(c, &n));
 71:     PetscCall(VecGetArrayRead(c, &coords));
 72:     PetscCall(PCSetCoordinates(pc, sdim, n / sdim, (PetscScalar *)coords));
 73:     PetscCall(VecRestoreArrayRead(c, &coords));
 74:   }
 75:   PetscFunctionReturn(PETSC_SUCCESS);
 76: }

 78: static PetscErrorCode PCReset_H2OPUS(PC pc)
 79: {
 80:   PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;

 82:   PetscFunctionBegin;
 83:   pch2opus->sdim  = 0;
 84:   pch2opus->nlocc = 0;
 85:   PetscCall(PetscFree(pch2opus->coords));
 86:   PetscFunctionReturn(PETSC_SUCCESS);
 87: }

 89: static PetscErrorCode PCSetCoordinates_H2OPUS(PC pc, PetscInt sdim, PetscInt nlocc, PetscReal *coords)
 90: {
 91:   PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
 92:   PetscBool  reset    = PETSC_TRUE;

 94:   PetscFunctionBegin;
 95:   if (pch2opus->sdim && sdim == pch2opus->sdim && nlocc == pch2opus->nlocc) {
 96:     PetscCall(PetscArraycmp(pch2opus->coords, coords, sdim * nlocc, &reset));
 97:     reset = (PetscBool)!reset;
 98:   }
 99:   PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &reset, 1, MPI_C_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));
100:   if (reset) {
101:     PetscCall(PCReset_H2OPUS(pc));
102:     PetscCall(PetscMalloc1(sdim * nlocc, &pch2opus->coords));
103:     PetscCall(PetscArraycpy(pch2opus->coords, coords, sdim * nlocc));
104:     pch2opus->sdim  = sdim;
105:     pch2opus->nlocc = nlocc;
106:   }
107:   PetscFunctionReturn(PETSC_SUCCESS);
108: }

110: static PetscErrorCode PCDestroy_H2OPUS(PC pc)
111: {
112:   PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;

114:   PetscFunctionBegin;
115:   PetscCall(PCReset_H2OPUS(pc));
116:   PetscCall(MatDestroy(&pch2opus->A));
117:   PetscCall(MatDestroy(&pch2opus->M));
118:   PetscCall(MatDestroy(&pch2opus->T));
119:   PetscCall(VecDestroy(&pch2opus->w));
120:   PetscCall(MatDestroy(&pch2opus->S));
121:   PetscCall(VecDestroy(&pch2opus->wns[0]));
122:   PetscCall(VecDestroy(&pch2opus->wns[1]));
123:   PetscCall(VecDestroy(&pch2opus->wns[2]));
124:   PetscCall(VecDestroy(&pch2opus->wns[3]));
125:   PetscCall(MatDestroy(&pch2opus->wnsmat[0]));
126:   PetscCall(MatDestroy(&pch2opus->wnsmat[1]));
127:   PetscCall(MatDestroy(&pch2opus->wnsmat[2]));
128:   PetscCall(MatDestroy(&pch2opus->wnsmat[3]));
129:   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSetCoordinates_C", NULL));
130:   PetscCall(PetscFree(pc->data));
131:   PetscFunctionReturn(PETSC_SUCCESS);
132: }

134: static PetscErrorCode PCSetFromOptions_H2OPUS(PC pc, PetscOptionItems PetscOptionsObject)
135: {
136:   PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;

138:   PetscFunctionBegin;
139:   PetscOptionsHeadBegin(PetscOptionsObject, "H2OPUS options");
140:   PetscCall(PetscOptionsInt("-pc_h2opus_maxits", "Maximum number of iterations for Newton-Schultz", NULL, pch2opus->maxits, &pch2opus->maxits, NULL));
141:   PetscCall(PetscOptionsBool("-pc_h2opus_monitor", "Monitor Newton-Schultz convergence", NULL, pch2opus->monitor, &pch2opus->monitor, NULL));
142:   PetscCall(PetscOptionsReal("-pc_h2opus_atol", "Absolute tolerance", NULL, pch2opus->atol, &pch2opus->atol, NULL));
143:   PetscCall(PetscOptionsReal("-pc_h2opus_rtol", "Relative tolerance", NULL, pch2opus->rtol, &pch2opus->rtol, NULL));
144:   PetscCall(PetscOptionsEnum("-pc_h2opus_norm_type", "Norm type for convergence monitoring", NULL, NormTypes, (PetscEnum)pch2opus->normtype, (PetscEnum *)&pch2opus->normtype, NULL));
145:   PetscCall(PetscOptionsInt("-pc_h2opus_hyperorder", "Hyper power order of sampling", NULL, pch2opus->hyperorder, &pch2opus->hyperorder, NULL));
146:   PetscCall(PetscOptionsInt("-pc_h2opus_leafsize", "Leaf size when constructed from kernel", NULL, pch2opus->leafsize, &pch2opus->leafsize, NULL));
147:   PetscCall(PetscOptionsReal("-pc_h2opus_eta", "Admissibility condition tolerance", NULL, pch2opus->eta, &pch2opus->eta, NULL));
148:   PetscCall(PetscOptionsInt("-pc_h2opus_maxrank", "Maximum rank when constructed from matvecs", NULL, pch2opus->max_rank, &pch2opus->max_rank, NULL));
149:   PetscCall(PetscOptionsInt("-pc_h2opus_samples", "Number of samples to be taken concurrently when constructing from matvecs", NULL, pch2opus->bs, &pch2opus->bs, NULL));
150:   PetscCall(PetscOptionsReal("-pc_h2opus_mrtol", "Relative tolerance for construction from sampling", NULL, pch2opus->mrtol, &pch2opus->mrtol, NULL));
151:   PetscCall(PetscOptionsBool("-pc_h2opus_forcecpu", "Force construction on CPU", NULL, pch2opus->forcecpu, &pch2opus->forcecpu, NULL));
152:   PetscOptionsHeadEnd();
153:   PetscFunctionReturn(PETSC_SUCCESS);
154: }

156: typedef struct {
157:   Mat A;
158:   Mat M;
159:   Vec w;
160: } AAtCtx;

162: static PetscErrorCode MatMult_AAt(Mat A, Vec x, Vec y)
163: {
164:   AAtCtx *aat;

166:   PetscFunctionBegin;
167:   PetscCall(MatShellGetContext(A, &aat));
168:   /* PetscCall(MatMultTranspose(aat->M,x,aat->w)); */
169:   PetscCall(MatMultTranspose(aat->A, x, aat->w));
170:   PetscCall(MatMult(aat->A, aat->w, y));
171:   PetscFunctionReturn(PETSC_SUCCESS);
172: }

174: static PetscErrorCode PCH2OpusSetUpInit(PC pc)
175: {
176:   PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
177:   Mat        A        = pc->useAmat ? pc->mat : pc->pmat, AAt;
178:   PetscInt   M, m;
179:   VecType    vtype;
180:   PetscReal  n;
181:   AAtCtx     aat;

183:   PetscFunctionBegin;
184:   aat.A = A;
185:   aat.M = pch2opus->M; /* unused so far */
186:   PetscCall(MatCreateVecs(A, NULL, &aat.w));
187:   PetscCall(MatGetSize(A, &M, NULL));
188:   PetscCall(MatGetLocalSize(A, &m, NULL));
189:   PetscCall(MatCreateShell(PetscObjectComm((PetscObject)A), m, m, M, M, &aat, &AAt));
190:   PetscCall(MatBindToCPU(AAt, pch2opus->boundtocpu));
191:   PetscCall(MatShellSetOperation(AAt, MATOP_MULT, (PetscErrorCodeFn *)MatMult_AAt));
192:   PetscCall(MatShellSetOperation(AAt, MATOP_MULT_TRANSPOSE, (PetscErrorCodeFn *)MatMult_AAt));
193:   PetscCall(MatShellSetOperation(AAt, MATOP_NORM, (PetscErrorCodeFn *)MatNorm_H2OPUS));
194:   PetscCall(MatGetVecType(A, &vtype));
195:   PetscCall(MatShellSetVecType(AAt, vtype));
196:   PetscCall(MatNorm(AAt, NORM_1, &n));
197:   pch2opus->s0 = 1. / n;
198:   PetscCall(VecDestroy(&aat.w));
199:   PetscCall(MatDestroy(&AAt));
200:   PetscFunctionReturn(PETSC_SUCCESS);
201: }

203: static PetscErrorCode PCApplyKernel_H2OPUS(PC pc, Vec x, Vec y, PetscBool t)
204: {
205:   PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;

207:   PetscFunctionBegin;
208:   if (t) PetscCall(MatMultTranspose(pch2opus->M, x, y));
209:   else PetscCall(MatMult(pch2opus->M, x, y));
210:   PetscFunctionReturn(PETSC_SUCCESS);
211: }

213: static PetscErrorCode PCApplyMatKernel_H2OPUS(PC pc, Mat X, Mat Y, PetscBool t)
214: {
215:   PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;

217:   PetscFunctionBegin;
218:   if (t) PetscCall(MatTransposeMatMult(pch2opus->M, X, MAT_REUSE_MATRIX, PETSC_DETERMINE, &Y));
219:   else PetscCall(MatMatMult(pch2opus->M, X, MAT_REUSE_MATRIX, PETSC_CURRENT, &Y));
220:   PetscFunctionReturn(PETSC_SUCCESS);
221: }

223: static PetscErrorCode PCApplyMat_H2OPUS(PC pc, Mat X, Mat Y)
224: {
225:   PetscFunctionBegin;
226:   PetscCall(PCApplyMatKernel_H2OPUS(pc, X, Y, PETSC_FALSE));
227:   PetscFunctionReturn(PETSC_SUCCESS);
228: }

230: static PetscErrorCode PCApplyTransposeMat_H2OPUS(PC pc, Mat X, Mat Y)
231: {
232:   PetscFunctionBegin;
233:   PetscCall(PCApplyMatKernel_H2OPUS(pc, X, Y, PETSC_TRUE));
234:   PetscFunctionReturn(PETSC_SUCCESS);
235: }

237: static PetscErrorCode PCApply_H2OPUS(PC pc, Vec x, Vec y)
238: {
239:   PetscFunctionBegin;
240:   PetscCall(PCApplyKernel_H2OPUS(pc, x, y, PETSC_FALSE));
241:   PetscFunctionReturn(PETSC_SUCCESS);
242: }

244: static PetscErrorCode PCApplyTranspose_H2OPUS(PC pc, Vec x, Vec y)
245: {
246:   PetscFunctionBegin;
247:   PetscCall(PCApplyKernel_H2OPUS(pc, x, y, PETSC_TRUE));
248:   PetscFunctionReturn(PETSC_SUCCESS);
249: }

251: /* used to test the norm of (M^-1 A - I) */
252: static PetscErrorCode MatMultKernel_MAmI(Mat M, Vec x, Vec y, PetscBool t)
253: {
254:   PC         pc;
255:   Mat        A;
256:   PC_H2OPUS *pch2opus;
257:   PetscBool  sideleft = PETSC_TRUE;

259:   PetscFunctionBegin;
260:   PetscCall(MatShellGetContext(M, &pc));
261:   pch2opus = (PC_H2OPUS *)pc->data;
262:   if (!pch2opus->w) PetscCall(MatCreateVecs(pch2opus->M, &pch2opus->w, NULL));
263:   A = pch2opus->A;
264:   PetscCall(VecBindToCPU(pch2opus->w, pch2opus->boundtocpu));
265:   if (t) {
266:     if (sideleft) {
267:       PetscCall(PCApplyTranspose_H2OPUS(pc, x, pch2opus->w));
268:       PetscCall(MatMultTranspose(A, pch2opus->w, y));
269:     } else {
270:       PetscCall(MatMultTranspose(A, x, pch2opus->w));
271:       PetscCall(PCApplyTranspose_H2OPUS(pc, pch2opus->w, y));
272:     }
273:   } else {
274:     if (sideleft) {
275:       PetscCall(MatMult(A, x, pch2opus->w));
276:       PetscCall(PCApply_H2OPUS(pc, pch2opus->w, y));
277:     } else {
278:       PetscCall(PCApply_H2OPUS(pc, x, pch2opus->w));
279:       PetscCall(MatMult(A, pch2opus->w, y));
280:     }
281:   }
282:   PetscCall(VecAXPY(y, -1.0, x));
283:   PetscFunctionReturn(PETSC_SUCCESS);
284: }

286: static PetscErrorCode MatMult_MAmI(Mat A, Vec x, Vec y)
287: {
288:   PetscFunctionBegin;
289:   PetscCall(MatMultKernel_MAmI(A, x, y, PETSC_FALSE));
290:   PetscFunctionReturn(PETSC_SUCCESS);
291: }

293: static PetscErrorCode MatMultTranspose_MAmI(Mat A, Vec x, Vec y)
294: {
295:   PetscFunctionBegin;
296:   PetscCall(MatMultKernel_MAmI(A, x, y, PETSC_TRUE));
297:   PetscFunctionReturn(PETSC_SUCCESS);
298: }

300: /* HyperPower kernel:
301: Y = R = x
302: for i = 1 . . . l - 1 do
303:   R = (I - A * Xk) * R
304:   Y = Y + R
305: Y = Xk * Y
306: */
307: static PetscErrorCode MatMultKernel_Hyper(Mat M, Vec x, Vec y, PetscBool t)
308: {
309:   PC         pc;
310:   Mat        A;
311:   PC_H2OPUS *pch2opus;

313:   PetscFunctionBegin;
314:   PetscCall(MatShellGetContext(M, &pc));
315:   pch2opus = (PC_H2OPUS *)pc->data;
316:   A        = pch2opus->A;
317:   PetscCall(MatCreateVecs(pch2opus->M, pch2opus->wns[0] ? NULL : &pch2opus->wns[0], pch2opus->wns[1] ? NULL : &pch2opus->wns[1]));
318:   PetscCall(MatCreateVecs(pch2opus->M, pch2opus->wns[2] ? NULL : &pch2opus->wns[2], pch2opus->wns[3] ? NULL : &pch2opus->wns[3]));
319:   PetscCall(VecBindToCPU(pch2opus->wns[0], pch2opus->boundtocpu));
320:   PetscCall(VecBindToCPU(pch2opus->wns[1], pch2opus->boundtocpu));
321:   PetscCall(VecBindToCPU(pch2opus->wns[2], pch2opus->boundtocpu));
322:   PetscCall(VecBindToCPU(pch2opus->wns[3], pch2opus->boundtocpu));
323:   PetscCall(VecCopy(x, pch2opus->wns[0]));
324:   PetscCall(VecCopy(x, pch2opus->wns[3]));
325:   if (t) {
326:     for (PetscInt i = 0; i < pch2opus->hyperorder - 1; i++) {
327:       PetscCall(MatMultTranspose(A, pch2opus->wns[0], pch2opus->wns[1]));
328:       PetscCall(PCApplyTranspose_H2OPUS(pc, pch2opus->wns[1], pch2opus->wns[2]));
329:       PetscCall(VecAXPY(pch2opus->wns[0], -1., pch2opus->wns[2]));
330:       PetscCall(VecAXPY(pch2opus->wns[3], 1., pch2opus->wns[0]));
331:     }
332:     PetscCall(PCApplyTranspose_H2OPUS(pc, pch2opus->wns[3], y));
333:   } else {
334:     for (PetscInt i = 0; i < pch2opus->hyperorder - 1; i++) {
335:       PetscCall(PCApply_H2OPUS(pc, pch2opus->wns[0], pch2opus->wns[1]));
336:       PetscCall(MatMult(A, pch2opus->wns[1], pch2opus->wns[2]));
337:       PetscCall(VecAXPY(pch2opus->wns[0], -1., pch2opus->wns[2]));
338:       PetscCall(VecAXPY(pch2opus->wns[3], 1., pch2opus->wns[0]));
339:     }
340:     PetscCall(PCApply_H2OPUS(pc, pch2opus->wns[3], y));
341:   }
342:   PetscFunctionReturn(PETSC_SUCCESS);
343: }

345: static PetscErrorCode MatMult_Hyper(Mat M, Vec x, Vec y)
346: {
347:   PetscFunctionBegin;
348:   PetscCall(MatMultKernel_Hyper(M, x, y, PETSC_FALSE));
349:   PetscFunctionReturn(PETSC_SUCCESS);
350: }

352: static PetscErrorCode MatMultTranspose_Hyper(Mat M, Vec x, Vec y)
353: {
354:   PetscFunctionBegin;
355:   PetscCall(MatMultKernel_Hyper(M, x, y, PETSC_TRUE));
356:   PetscFunctionReturn(PETSC_SUCCESS);
357: }

359: /* Hyper power kernel, MatMat version */
360: static PetscErrorCode MatMatMultKernel_Hyper(Mat M, Mat X, Mat Y, PetscBool t)
361: {
362:   PC         pc;
363:   Mat        A;
364:   PC_H2OPUS *pch2opus;

366:   PetscFunctionBegin;
367:   PetscCall(MatShellGetContext(M, &pc));
368:   pch2opus = (PC_H2OPUS *)pc->data;
369:   A        = pch2opus->A;
370:   if (pch2opus->wnsmat[0] && pch2opus->wnsmat[0]->cmap->N != X->cmap->N) {
371:     PetscCall(MatDestroy(&pch2opus->wnsmat[0]));
372:     PetscCall(MatDestroy(&pch2opus->wnsmat[1]));
373:   }
374:   if (!pch2opus->wnsmat[0]) {
375:     PetscCall(MatDuplicate(X, MAT_SHARE_NONZERO_PATTERN, &pch2opus->wnsmat[0]));
376:     PetscCall(MatDuplicate(Y, MAT_SHARE_NONZERO_PATTERN, &pch2opus->wnsmat[1]));
377:   }
378:   if (pch2opus->wnsmat[2] && pch2opus->wnsmat[2]->cmap->N != X->cmap->N) {
379:     PetscCall(MatDestroy(&pch2opus->wnsmat[2]));
380:     PetscCall(MatDestroy(&pch2opus->wnsmat[3]));
381:   }
382:   if (!pch2opus->wnsmat[2]) {
383:     PetscCall(MatDuplicate(X, MAT_SHARE_NONZERO_PATTERN, &pch2opus->wnsmat[2]));
384:     PetscCall(MatDuplicate(Y, MAT_SHARE_NONZERO_PATTERN, &pch2opus->wnsmat[3]));
385:   }
386:   PetscCall(MatCopy(X, pch2opus->wnsmat[0], SAME_NONZERO_PATTERN));
387:   PetscCall(MatCopy(X, pch2opus->wnsmat[3], SAME_NONZERO_PATTERN));
388:   if (t) {
389:     for (PetscInt i = 0; i < pch2opus->hyperorder - 1; i++) {
390:       PetscCall(MatTransposeMatMult(A, pch2opus->wnsmat[0], MAT_REUSE_MATRIX, PETSC_CURRENT, &pch2opus->wnsmat[1]));
391:       PetscCall(PCApplyTransposeMat_H2OPUS(pc, pch2opus->wnsmat[1], pch2opus->wnsmat[2]));
392:       PetscCall(MatAXPY(pch2opus->wnsmat[0], -1., pch2opus->wnsmat[2], SAME_NONZERO_PATTERN));
393:       PetscCall(MatAXPY(pch2opus->wnsmat[3], 1., pch2opus->wnsmat[0], SAME_NONZERO_PATTERN));
394:     }
395:     PetscCall(PCApplyTransposeMat_H2OPUS(pc, pch2opus->wnsmat[3], Y));
396:   } else {
397:     for (PetscInt i = 0; i < pch2opus->hyperorder - 1; i++) {
398:       PetscCall(PCApplyMat_H2OPUS(pc, pch2opus->wnsmat[0], pch2opus->wnsmat[1]));
399:       PetscCall(MatMatMult(A, pch2opus->wnsmat[1], MAT_REUSE_MATRIX, PETSC_CURRENT, &pch2opus->wnsmat[2]));
400:       PetscCall(MatAXPY(pch2opus->wnsmat[0], -1., pch2opus->wnsmat[2], SAME_NONZERO_PATTERN));
401:       PetscCall(MatAXPY(pch2opus->wnsmat[3], 1., pch2opus->wnsmat[0], SAME_NONZERO_PATTERN));
402:     }
403:     PetscCall(PCApplyMat_H2OPUS(pc, pch2opus->wnsmat[3], Y));
404:   }
405:   PetscFunctionReturn(PETSC_SUCCESS);
406: }

408: static PetscErrorCode MatMatMultNumeric_Hyper(Mat M, Mat X, Mat Y, PetscCtx ctx)
409: {
410:   PetscFunctionBegin;
411:   PetscCall(MatMatMultKernel_Hyper(M, X, Y, PETSC_FALSE));
412:   PetscFunctionReturn(PETSC_SUCCESS);
413: }

415: /* Basic Newton-Schultz sampler: (2 * I - M * A)*M */
416: static PetscErrorCode MatMultKernel_NS(Mat M, Vec x, Vec y, PetscBool t)
417: {
418:   PC         pc;
419:   Mat        A;
420:   PC_H2OPUS *pch2opus;

422:   PetscFunctionBegin;
423:   PetscCall(MatShellGetContext(M, &pc));
424:   pch2opus = (PC_H2OPUS *)pc->data;
425:   A        = pch2opus->A;
426:   PetscCall(MatCreateVecs(pch2opus->M, pch2opus->wns[0] ? NULL : &pch2opus->wns[0], pch2opus->wns[1] ? NULL : &pch2opus->wns[1]));
427:   PetscCall(VecBindToCPU(pch2opus->wns[0], pch2opus->boundtocpu));
428:   PetscCall(VecBindToCPU(pch2opus->wns[1], pch2opus->boundtocpu));
429:   if (t) {
430:     PetscCall(PCApplyTranspose_H2OPUS(pc, x, y));
431:     PetscCall(MatMultTranspose(A, y, pch2opus->wns[1]));
432:     PetscCall(PCApplyTranspose_H2OPUS(pc, pch2opus->wns[1], pch2opus->wns[0]));
433:     PetscCall(VecAXPBY(y, -1., 2., pch2opus->wns[0]));
434:   } else {
435:     PetscCall(PCApply_H2OPUS(pc, x, y));
436:     PetscCall(MatMult(A, y, pch2opus->wns[0]));
437:     PetscCall(PCApply_H2OPUS(pc, pch2opus->wns[0], pch2opus->wns[1]));
438:     PetscCall(VecAXPBY(y, -1., 2., pch2opus->wns[1]));
439:   }
440:   PetscFunctionReturn(PETSC_SUCCESS);
441: }

443: static PetscErrorCode MatMult_NS(Mat M, Vec x, Vec y)
444: {
445:   PetscFunctionBegin;
446:   PetscCall(MatMultKernel_NS(M, x, y, PETSC_FALSE));
447:   PetscFunctionReturn(PETSC_SUCCESS);
448: }

450: static PetscErrorCode MatMultTranspose_NS(Mat M, Vec x, Vec y)
451: {
452:   PetscFunctionBegin;
453:   PetscCall(MatMultKernel_NS(M, x, y, PETSC_TRUE));
454:   PetscFunctionReturn(PETSC_SUCCESS);
455: }

457: /* Basic Newton-Schultz sampler: (2 * I - M * A)*M, MatMat version */
458: static PetscErrorCode MatMatMultKernel_NS(Mat M, Mat X, Mat Y, PetscBool t)
459: {
460:   PC         pc;
461:   Mat        A;
462:   PC_H2OPUS *pch2opus;

464:   PetscFunctionBegin;
465:   PetscCall(MatShellGetContext(M, &pc));
466:   pch2opus = (PC_H2OPUS *)pc->data;
467:   A        = pch2opus->A;
468:   if (pch2opus->wnsmat[0] && pch2opus->wnsmat[0]->cmap->N != X->cmap->N) {
469:     PetscCall(MatDestroy(&pch2opus->wnsmat[0]));
470:     PetscCall(MatDestroy(&pch2opus->wnsmat[1]));
471:   }
472:   if (!pch2opus->wnsmat[0]) {
473:     PetscCall(MatDuplicate(X, MAT_SHARE_NONZERO_PATTERN, &pch2opus->wnsmat[0]));
474:     PetscCall(MatDuplicate(Y, MAT_SHARE_NONZERO_PATTERN, &pch2opus->wnsmat[1]));
475:   }
476:   if (t) {
477:     PetscCall(PCApplyTransposeMat_H2OPUS(pc, X, Y));
478:     PetscCall(MatTransposeMatMult(A, Y, MAT_REUSE_MATRIX, PETSC_CURRENT, &pch2opus->wnsmat[1]));
479:     PetscCall(PCApplyTransposeMat_H2OPUS(pc, pch2opus->wnsmat[1], pch2opus->wnsmat[0]));
480:     PetscCall(MatScale(Y, 2.));
481:     PetscCall(MatAXPY(Y, -1., pch2opus->wnsmat[0], SAME_NONZERO_PATTERN));
482:   } else {
483:     PetscCall(PCApplyMat_H2OPUS(pc, X, Y));
484:     PetscCall(MatMatMult(A, Y, MAT_REUSE_MATRIX, PETSC_CURRENT, &pch2opus->wnsmat[0]));
485:     PetscCall(PCApplyMat_H2OPUS(pc, pch2opus->wnsmat[0], pch2opus->wnsmat[1]));
486:     PetscCall(MatScale(Y, 2.));
487:     PetscCall(MatAXPY(Y, -1., pch2opus->wnsmat[1], SAME_NONZERO_PATTERN));
488:   }
489:   PetscFunctionReturn(PETSC_SUCCESS);
490: }

492: static PetscErrorCode MatMatMultNumeric_NS(Mat M, Mat X, Mat Y, PetscCtx ctx)
493: {
494:   PetscFunctionBegin;
495:   PetscCall(MatMatMultKernel_NS(M, X, Y, PETSC_FALSE));
496:   PetscFunctionReturn(PETSC_SUCCESS);
497: }

499: static PetscErrorCode PCH2OpusSetUpSampler_Private(PC pc)
500: {
501:   PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
502:   Mat        A        = pc->useAmat ? pc->mat : pc->pmat;

504:   PetscFunctionBegin;
505:   if (!pch2opus->S) {
506:     PetscInt M, N, m, n;

508:     PetscCall(MatGetSize(A, &M, &N));
509:     PetscCall(MatGetLocalSize(A, &m, &n));
510:     PetscCall(MatCreateShell(PetscObjectComm((PetscObject)A), m, n, M, N, pc, &pch2opus->S));
511:     PetscCall(MatSetBlockSizesFromMats(pch2opus->S, A, A));
512: #if defined(PETSC_H2OPUS_USE_GPU)
513:     PetscCall(MatShellSetVecType(pch2opus->S, VECCUDA));
514: #endif
515:   }
516:   if (pch2opus->hyperorder >= 2) {
517:     PetscCall(MatShellSetOperation(pch2opus->S, MATOP_MULT, (PetscErrorCodeFn *)MatMult_Hyper));
518:     PetscCall(MatShellSetOperation(pch2opus->S, MATOP_MULT_TRANSPOSE, (PetscErrorCodeFn *)MatMultTranspose_Hyper));
519:     PetscCall(MatShellSetMatProductOperation(pch2opus->S, MATPRODUCT_AB, NULL, MatMatMultNumeric_Hyper, NULL, MATDENSE, MATDENSE));
520:     PetscCall(MatShellSetMatProductOperation(pch2opus->S, MATPRODUCT_AB, NULL, MatMatMultNumeric_Hyper, NULL, MATDENSECUDA, MATDENSECUDA));
521:   } else {
522:     PetscCall(MatShellSetOperation(pch2opus->S, MATOP_MULT, (PetscErrorCodeFn *)MatMult_NS));
523:     PetscCall(MatShellSetOperation(pch2opus->S, MATOP_MULT_TRANSPOSE, (PetscErrorCodeFn *)MatMultTranspose_NS));
524:     PetscCall(MatShellSetMatProductOperation(pch2opus->S, MATPRODUCT_AB, NULL, MatMatMultNumeric_NS, NULL, MATDENSE, MATDENSE));
525:     PetscCall(MatShellSetMatProductOperation(pch2opus->S, MATPRODUCT_AB, NULL, MatMatMultNumeric_NS, NULL, MATDENSECUDA, MATDENSECUDA));
526:   }
527:   PetscCall(MatPropagateSymmetryOptions(A, pch2opus->S));
528:   PetscCall(MatBindToCPU(pch2opus->S, pch2opus->boundtocpu));
529:   /* XXX */
530:   PetscCall(MatSetOption(pch2opus->S, MAT_SYMMETRIC, PETSC_TRUE));
531:   PetscFunctionReturn(PETSC_SUCCESS);
532: }

534: static PetscErrorCode PCSetUp_H2OPUS(PC pc)
535: {
536:   PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
537:   Mat        A        = pc->useAmat ? pc->mat : pc->pmat;
538:   NormType   norm     = pch2opus->normtype;
539:   PetscReal  initerr  = 0.0, err;
540:   PetscBool  ish2opus;

542:   PetscFunctionBegin;
543:   if (!pch2opus->T) {
544:     PetscInt    M, N, m, n;
545:     const char *prefix;

547:     PetscCall(PCGetOptionsPrefix(pc, &prefix));
548:     PetscCall(MatGetSize(A, &M, &N));
549:     PetscCall(MatGetLocalSize(A, &m, &n));
550:     PetscCall(MatCreateShell(PetscObjectComm((PetscObject)pc->pmat), m, n, M, N, pc, &pch2opus->T));
551:     PetscCall(MatSetBlockSizesFromMats(pch2opus->T, A, A));
552:     PetscCall(MatShellSetOperation(pch2opus->T, MATOP_MULT, (PetscErrorCodeFn *)MatMult_MAmI));
553:     PetscCall(MatShellSetOperation(pch2opus->T, MATOP_MULT_TRANSPOSE, (PetscErrorCodeFn *)MatMultTranspose_MAmI));
554:     PetscCall(MatShellSetOperation(pch2opus->T, MATOP_NORM, (PetscErrorCodeFn *)MatNorm_H2OPUS));
555: #if defined(PETSC_H2OPUS_USE_GPU)
556:     PetscCall(MatShellSetVecType(pch2opus->T, VECCUDA));
557: #endif
558:     PetscCall(MatSetOptionsPrefix(pch2opus->T, prefix));
559:     PetscCall(MatAppendOptionsPrefix(pch2opus->T, "pc_h2opus_est_"));
560:   }
561:   PetscCall(MatDestroy(&pch2opus->A));
562:   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATH2OPUS, &ish2opus));
563:   if (ish2opus) {
564:     PetscCall(PetscObjectReference((PetscObject)A));
565:     pch2opus->A = A;
566:   } else {
567:     const char *prefix;

569:     /* See if we can infer coordinates from the DM */
570:     if (!pch2opus->sdim) PetscCall(PCH2OpusInferCoordinates_Private(pc));
571:     PetscCall(MatCreateH2OpusFromMat(A, pch2opus->sdim, pch2opus->coords, PETSC_FALSE, pch2opus->eta, pch2opus->leafsize, pch2opus->max_rank, pch2opus->bs, pch2opus->mrtol, &pch2opus->A));
572:     /* XXX */
573:     PetscCall(MatSetOption(pch2opus->A, MAT_SYMMETRIC, PETSC_TRUE));
574:     PetscCall(PCGetOptionsPrefix(pc, &prefix));
575:     PetscCall(MatSetOptionsPrefix(pch2opus->A, prefix));
576:     PetscCall(MatAppendOptionsPrefix(pch2opus->A, "pc_h2opus_init_"));
577:     PetscCall(MatSetFromOptions(pch2opus->A));
578:     PetscCall(MatAssemblyBegin(pch2opus->A, MAT_FINAL_ASSEMBLY));
579:     PetscCall(MatAssemblyEnd(pch2opus->A, MAT_FINAL_ASSEMBLY));
580:     /* XXX */
581:     PetscCall(MatSetOption(pch2opus->A, MAT_SYMMETRIC, PETSC_TRUE));

583:     /* always perform construction on the GPU unless forcecpu is true */
584:     PetscCall(MatBindToCPU(pch2opus->A, pch2opus->forcecpu));
585:   }
586: #if defined(PETSC_H2OPUS_USE_GPU)
587:   pch2opus->boundtocpu = pch2opus->forcecpu ? PETSC_TRUE : pch2opus->A->boundtocpu;
588: #endif
589:   PetscCall(MatBindToCPU(pch2opus->T, pch2opus->boundtocpu));
590:   if (pch2opus->M) { /* see if we can reuse M as initial guess */
591:     PetscReal reuse;

593:     PetscCall(MatBindToCPU(pch2opus->M, pch2opus->boundtocpu));
594:     PetscCall(MatNorm(pch2opus->T, norm, &reuse));
595:     if (reuse >= 1.0) PetscCall(MatDestroy(&pch2opus->M));
596:   }
597:   if (!pch2opus->M) {
598:     const char *prefix;
599:     PetscCall(MatDuplicate(pch2opus->A, MAT_COPY_VALUES, &pch2opus->M));
600:     PetscCall(PCGetOptionsPrefix(pc, &prefix));
601:     PetscCall(MatSetOptionsPrefix(pch2opus->M, prefix));
602:     PetscCall(MatAppendOptionsPrefix(pch2opus->M, "pc_h2opus_inv_"));
603:     PetscCall(MatSetFromOptions(pch2opus->M));
604:     PetscCall(PCH2OpusSetUpInit(pc));
605:     PetscCall(MatScale(pch2opus->M, pch2opus->s0));
606:   }
607:   /* A and M have the same h2 matrix nonzero structure, save on reordering routines */
608:   PetscCall(MatH2OpusSetNativeMult(pch2opus->A, PETSC_TRUE));
609:   PetscCall(MatH2OpusSetNativeMult(pch2opus->M, PETSC_TRUE));
610:   if (norm == NORM_1 || norm == NORM_2 || norm == NORM_INFINITY) PetscCall(MatNorm(pch2opus->T, norm, &initerr));
611:   if (PetscIsInfOrNanReal(initerr)) pc->failedreason = PC_SETUP_ERROR;
612:   err = initerr;
613:   if (pch2opus->monitor) PetscCall(PetscPrintf(PetscObjectComm((PetscObject)pc), "%" PetscInt_FMT ": ||M*A - I|| NORM%s abs %g rel %g\n", 0, NormTypes[norm], (double)err, (double)(err / initerr)));
614:   if (initerr > pch2opus->atol && !pc->failedreason) {
615:     PetscCall(PCH2OpusSetUpSampler_Private(pc));
616:     for (PetscInt i = 0; i < pch2opus->maxits; i++) {
617:       Mat         M;
618:       const char *prefix;

620:       PetscCall(MatDuplicate(pch2opus->M, MAT_SHARE_NONZERO_PATTERN, &M));
621:       PetscCall(MatGetOptionsPrefix(pch2opus->M, &prefix));
622:       PetscCall(MatSetOptionsPrefix(M, prefix));
623:       PetscCall(MatH2OpusSetSamplingMat(M, pch2opus->S, PETSC_DECIDE, PETSC_DECIDE));
624:       PetscCall(MatSetFromOptions(M));
625:       PetscCall(MatH2OpusSetNativeMult(M, PETSC_TRUE));
626:       PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
627:       PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
628:       PetscCall(MatDestroy(&pch2opus->M));
629:       pch2opus->M = M;
630:       if (norm == NORM_1 || norm == NORM_2 || norm == NORM_INFINITY) PetscCall(MatNorm(pch2opus->T, norm, &err));
631:       if (pch2opus->monitor) PetscCall(PetscPrintf(PetscObjectComm((PetscObject)pc), "%" PetscInt_FMT ": ||M*A - I|| NORM%s abs %g rel %g\n", i + 1, NormTypes[norm], (double)err, (double)(err / initerr)));
632:       if (PetscIsInfOrNanReal(err)) pc->failedreason = PC_SETUP_ERROR;
633:       if (err < pch2opus->atol || err < pch2opus->rtol * initerr || pc->failedreason) break;
634:     }
635:   }
636:   /* cleanup setup workspace */
637:   PetscCall(MatH2OpusSetNativeMult(pch2opus->A, PETSC_FALSE));
638:   PetscCall(MatH2OpusSetNativeMult(pch2opus->M, PETSC_FALSE));
639:   PetscCall(VecDestroy(&pch2opus->wns[0]));
640:   PetscCall(VecDestroy(&pch2opus->wns[1]));
641:   PetscCall(VecDestroy(&pch2opus->wns[2]));
642:   PetscCall(VecDestroy(&pch2opus->wns[3]));
643:   PetscCall(MatDestroy(&pch2opus->wnsmat[0]));
644:   PetscCall(MatDestroy(&pch2opus->wnsmat[1]));
645:   PetscCall(MatDestroy(&pch2opus->wnsmat[2]));
646:   PetscCall(MatDestroy(&pch2opus->wnsmat[3]));
647:   PetscFunctionReturn(PETSC_SUCCESS);
648: }

650: static PetscErrorCode PCView_H2OPUS(PC pc, PetscViewer viewer)
651: {
652:   PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
653:   PetscBool  isascii;

655:   PetscFunctionBegin;
656:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
657:   if (isascii) {
658:     if (pch2opus->A && pch2opus->A != pc->mat && pch2opus->A != pc->pmat) {
659:       PetscCall(PetscViewerASCIIPrintf(viewer, "Initial approximation matrix\n"));
660:       PetscCall(PetscViewerASCIIPushTab(viewer));
661:       PetscCall(PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_INFO_DETAIL));
662:       PetscCall(MatView(pch2opus->A, viewer));
663:       PetscCall(PetscViewerPopFormat(viewer));
664:       PetscCall(PetscViewerASCIIPopTab(viewer));
665:     }
666:     if (pch2opus->M) {
667:       PetscCall(PetscViewerASCIIPrintf(viewer, "Inner matrix constructed\n"));
668:       PetscCall(PetscViewerASCIIPushTab(viewer));
669:       PetscCall(PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_INFO_DETAIL));
670:       PetscCall(MatView(pch2opus->M, viewer));
671:       PetscCall(PetscViewerPopFormat(viewer));
672:       PetscCall(PetscViewerASCIIPopTab(viewer));
673:     }
674:     PetscCall(PetscViewerASCIIPrintf(viewer, "Initial scaling: %g\n", pch2opus->s0));
675:   }
676:   PetscFunctionReturn(PETSC_SUCCESS);
677: }

679: /*MC
680:      PCH2OPUS = "h2opus" - A preconditioner type for, `MATH2OPUS`, hierarchical matrices using the H2Opus package.

682:    Options Database Keys:
683: +   -pc_type h2opus       - pc type to "h2opus" during a call to `PCSetFromOptions()`
684: .   -pc_h2opus_maxits     - maximum number of iterations for Newton-Schultz
685: .   -pc_h2opus_monitor    - monitor Newton-Schultz convergence
686: .   -pc_h2opus_atol       - absolute tolerance
687: .   -pc_h2opus_rtol       - relative tolerance
688: .   -pc_h2opus_norm_type  - normtype
689: .   -pc_h2opus_hyperorder - Hyper power order of sampling
690: .   -pc_h2opus_leafsize   - leaf size when constructed from kernel
691: .   -pc_h2opus_eta        - admissibility condition tolerance
692: .   -pc_h2opus_maxrank    - maximum rank when constructed from matvecs
693: .   -pc_h2opus_samples    - number of samples to be taken concurrently when constructing from matvecs
694: .   -pc_h2opus_mrtol      - relative tolerance for construction from sampling
695: -   -pc_h2opus_forcecpu   - force construction of preconditioner on CPU

697:    Level: intermediate

699: .seealso: [](ch_ksp), `MATH2OPUS`, `MATHTOOL`, `MATDENSE`, `MatCreateH2OpusFromKernel()`, `MatCreateH2OpusFromMat()`
700: M*/

702: PETSC_EXTERN PetscErrorCode PCCreate_H2OPUS(PC pc)
703: {
704:   PC_H2OPUS *pch2opus;

706:   PetscFunctionBegin;
707:   PetscCall(PetscNew(&pch2opus));
708:   pc->data = (void *)pch2opus;

710:   pch2opus->atol       = 1.e-2;
711:   pch2opus->rtol       = 1.e-6;
712:   pch2opus->maxits     = 50;
713:   pch2opus->hyperorder = 1; /* defaults to basic Newton-Schultz */
714:   pch2opus->normtype   = NORM_2;

716:   /* these are needed when we are sampling the pmat */
717:   pch2opus->eta           = PETSC_DECIDE;
718:   pch2opus->leafsize      = PETSC_DECIDE;
719:   pch2opus->max_rank      = PETSC_DECIDE;
720:   pch2opus->bs            = PETSC_DECIDE;
721:   pch2opus->mrtol         = PETSC_DECIDE;
722:   pch2opus->boundtocpu    = PetscDefined(H2OPUS_USE_GPU) ? PETSC_FALSE : PETSC_TRUE;
723:   pc->ops->destroy        = PCDestroy_H2OPUS;
724:   pc->ops->setup          = PCSetUp_H2OPUS;
725:   pc->ops->apply          = PCApply_H2OPUS;
726:   pc->ops->matapply       = PCApplyMat_H2OPUS;
727:   pc->ops->applytranspose = PCApplyTranspose_H2OPUS;
728:   pc->ops->reset          = PCReset_H2OPUS;
729:   pc->ops->setfromoptions = PCSetFromOptions_H2OPUS;
730:   pc->ops->view           = PCView_H2OPUS;

732:   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSetCoordinates_C", PCSetCoordinates_H2OPUS));
733:   PetscFunctionReturn(PETSC_SUCCESS);
734: }