Actual source code: pipeprcg.c

  1: #include <petsc/private/kspimpl.h>

  3: typedef struct KSP_CG_PIPE_PR_s KSP_CG_PIPE_PR;
  4: struct KSP_CG_PIPE_PR_s {
  5:   PetscBool rc_w_q; /* flag to determine whether w_k should be recomputed with A r_k */
  6: };

  8: /*
  9:      KSPSetUp_PIPEPRCG - Sets up the workspace needed by the PIPEPRCG method.

 11:       This is called once, usually automatically by KSPSolve() or KSPSetUp()
 12:      but can be called directly by KSPSetUp()
 13: */
 14: static PetscErrorCode KSPSetUp_PIPEPRCG(KSP ksp)
 15: {
 16:   /* get work vectors needed by PIPEPRCG */
 17:   KSPSetWorkVecs(ksp, 9);

 19:   return 0;
 20: }

 22: static PetscErrorCode KSPSetFromOptions_PIPEPRCG(KSP ksp, PetscOptionItems *PetscOptionsObject)
 23: {
 24:   KSP_CG_PIPE_PR *prcg = (KSP_CG_PIPE_PR *)ksp->data;
 25:   PetscBool       flag = PETSC_FALSE;

 27:   PetscOptionsHeadBegin(PetscOptionsObject, "KSP PIPEPRCG options");
 28:   PetscOptionsBool("-recompute_w", "-recompute w_k with Ar_k? (default = True)", "", prcg->rc_w_q, &prcg->rc_w_q, &flag);
 29:   if (!flag) prcg->rc_w_q = PETSC_TRUE;
 30:   PetscOptionsHeadEnd();
 31:   return 0;
 32: }

 34: /*
 35:  KSPSolve_PIPEPRCG - This routine actually applies the pipelined predict and recompute conjugate gradient method
 36: */
 37: static PetscErrorCode KSPSolve_PIPEPRCG(KSP ksp)
 38: {
 39:   PetscInt        i;
 40:   KSP_CG_PIPE_PR *prcg  = (KSP_CG_PIPE_PR *)ksp->data;
 41:   PetscScalar     alpha = 0.0, beta = 0.0, nu = 0.0, nu_old = 0.0, mudelgam[3], *mu_p, *delta_p, *gamma_p;
 42:   PetscReal       dp = 0.0;
 43:   Vec             X, B, R, RT, W, WT, P, S, ST, U, UT, PRTST[3];
 44:   Mat             Amat, Pmat;
 45:   PetscBool       diagonalscale, rc_w_q = prcg->rc_w_q;

 47:   /* note that these are pointers to entries of muldelgam, different than nu */
 48:   mu_p    = &mudelgam[0];
 49:   delta_p = &mudelgam[1];
 50:   gamma_p = &mudelgam[2];


 53:   PCGetDiagonalScale(ksp->pc, &diagonalscale);

 56:   X  = ksp->vec_sol;
 57:   B  = ksp->vec_rhs;
 58:   R  = ksp->work[0];
 59:   RT = ksp->work[1];
 60:   W  = ksp->work[2];
 61:   WT = ksp->work[3];
 62:   P  = ksp->work[4];
 63:   S  = ksp->work[5];
 64:   ST = ksp->work[6];
 65:   U  = ksp->work[7];
 66:   UT = ksp->work[8];

 68:   PCGetOperators(ksp->pc, &Amat, &Pmat);

 70:   /* initialize */
 71:   ksp->its = 0;
 72:   if (!ksp->guess_zero) {
 73:     KSP_MatMult(ksp, Amat, X, R); /*   r <- b - Ax  */
 74:     VecAYPX(R, -1.0, B);
 75:   } else {
 76:     VecCopy(B, R); /*   r <- b       */
 77:   }

 79:   KSP_PCApply(ksp, R, RT);       /*   rt <- Br     */
 80:   KSP_MatMult(ksp, Amat, RT, W); /*   w <- A rt    */
 81:   KSP_PCApply(ksp, W, WT);       /*   wt <- B w    */

 83:   VecCopy(RT, P);  /*   p <- rt      */
 84:   VecCopy(W, S);   /*   p <- rt      */
 85:   VecCopy(WT, ST); /*   p <- rt      */

 87:   KSP_MatMult(ksp, Amat, ST, U); /*   u <- Ast     */
 88:   KSP_PCApply(ksp, U, UT);       /*   ut <- Bu     */

 90:   VecDotBegin(RT, R, &nu);
 91:   VecDotBegin(P, S, mu_p);
 92:   VecDotBegin(ST, S, gamma_p);

 94:   VecDotEnd(RT, R, &nu);     /*   nu    <- (rt,r)  */
 95:   VecDotEnd(P, S, mu_p);     /*   mu    <- (p,s)   */
 96:   VecDotEnd(ST, S, gamma_p); /*   gamma <- (st,s)  */
 97:   *delta_p = *mu_p;

 99:   i = 0;
100:   do {
101:     /* Compute appropriate norm */
102:     switch (ksp->normtype) {
103:     case KSP_NORM_PRECONDITIONED:
104:       VecNormBegin(RT, NORM_2, &dp);
105:       PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)RT));
106:       VecNormEnd(RT, NORM_2, &dp);
107:       break;
108:     case KSP_NORM_UNPRECONDITIONED:
109:       VecNormBegin(R, NORM_2, &dp);
110:       PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R));
111:       VecNormEnd(R, NORM_2, &dp);
112:       break;
113:     case KSP_NORM_NATURAL:
114:       dp = PetscSqrtReal(PetscAbsScalar(nu));
115:       break;
116:     case KSP_NORM_NONE:
117:       dp = 0.0;
118:       break;
119:     default:
120:       SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "%s", KSPNormTypes[ksp->normtype]);
121:     }

123:     ksp->rnorm = dp;
124:     KSPLogResidualHistory(ksp, dp);
125:     KSPMonitor(ksp, i, dp);
126:     (*ksp->converged)(ksp, i, dp, &ksp->reason, ksp->cnvP);
127:     if (ksp->reason) return 0;

129:     /* update scalars */
130:     alpha  = nu / *mu_p;
131:     nu_old = nu;
132:     nu     = nu_old - 2. * alpha * (*delta_p) + (alpha * alpha) * (*gamma_p);
133:     beta   = nu / nu_old;

135:     /* update vectors */
136:     VecAXPY(X, alpha, P);    /*   x  <- x  + alpha * p   */
137:     VecAXPY(R, -alpha, S);   /*   r  <- r  - alpha * s   */
138:     VecAXPY(RT, -alpha, ST); /*   rt <- rt - alpha * st  */
139:     VecAXPY(W, -alpha, U);   /*   w  <- w  - alpha * u   */
140:     VecAXPY(WT, -alpha, UT); /*   wt <- wt - alpha * ut  */
141:     VecAYPX(P, beta, RT);    /*   p  <- rt + beta  * p   */
142:     VecAYPX(S, beta, W);     /*   s  <- w  + beta  * s   */
143:     VecAYPX(ST, beta, WT);   /*   st <- wt + beta  * st  */

145:     VecDotBegin(RT, R, &nu);

147:     PRTST[0] = P;
148:     PRTST[1] = RT;
149:     PRTST[2] = ST;

151:     VecMDotBegin(S, 3, PRTST, mudelgam);

153:     PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R));

155:     KSP_MatMult(ksp, Amat, ST, U); /*   u  <- A st             */
156:     KSP_PCApply(ksp, U, UT);       /*   ut <- B u              */

158:     /* predict-and-recompute */
159:     /* ideally this is combined with the previous matvec; i.e. equivalent of MDot */
160:     if (rc_w_q) {
161:       KSP_MatMult(ksp, Amat, RT, W); /*   w  <- A rt             */
162:       KSP_PCApply(ksp, W, WT);       /*   wt <- B w              */
163:     }

165:     VecDotEnd(RT, R, &nu);
166:     VecMDotEnd(S, 3, PRTST, mudelgam);

168:     i++;
169:     ksp->its = i;

171:   } while (i <= ksp->max_it);
172:   if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
173:   return 0;
174: }

176: /*MC
177:    KSPPIPEPRCG - Pipelined predict-and-recompute conjugate gradient method. [](sec_pipelineksp)

179:    Options Database Key:
180: .  -ksp_pipeprcg_recompute_w - recompute the w_k with Ar_k, default is true

182:    Level: intermediate

184:    Notes:
185:    This method has only a single non-blocking reduction per iteration, compared to 2 blocking for standard `KSPCG`.
186:    The non-blocking reduction is overlapped by the matrix-vector product and preconditioner application.

188:    MPI configuration may be necessary for reductions to make asynchronous progress, which is important for performance of pipelined methods.
189:    See [](doc_faq_pipelined)

191:    Contributed by:
192:    Tyler Chen, University of Washington, Applied Mathematics Department

194:    Reference:
195:    Tyler Chen and Erin Carson. "Predict-and-recompute conjugate gradient variants." SIAM Journal on Scientific Computing 42.5 (2020): A3084-A3108.

197:    Acknowledgments:
198:    This material is based upon work supported by the National Science Foundation Graduate Research Fellowship Program under Grant No. DGE-1762114.
199:    Any opinions, findings, and conclusions or recommendations expressed in this material are those of the author and do not necessarily
200:    reflect the views of the National Science Foundation.

202: .seealso: [](chapter_ksp), [](doc_faq_pipelined), [](sec_pipelineksp), `KSPCreate()`, `KSPSetType()`, `KSPCG`, `KSPPIPECG`, `KSPPIPECR`, `KSPGROPPCG`, `KSPPGMRES`, `KSPCG`, `KSPCGUseSingleReduction()`
203: M*/
204: PETSC_EXTERN PetscErrorCode KSPCreate_PIPEPRCG(KSP ksp)
205: {
206:   KSP_CG_PIPE_PR *prcg = NULL;
207:   PetscBool       cite = PETSC_FALSE;


210:   PetscCall(PetscCitationsRegister("@article{predict_and_recompute_cg,\n  author = {Tyler Chen and Erin C. Carson},\n  title = {Predict-and-recompute conjugate gradient variants},\n  journal = {},\n  year = {2020},\n  eprint = {1905.01549},\n  "
211:                                    "archivePrefix = {arXiv},\n  primaryClass = {cs.NA}\n}",
212:                                    &cite));

214:   PetscNew(&prcg);
215:   ksp->data = (void *)prcg;

217:   KSPSetSupportedNorm(ksp, KSP_NORM_UNPRECONDITIONED, PC_LEFT, 2);
218:   KSPSetSupportedNorm(ksp, KSP_NORM_PRECONDITIONED, PC_LEFT, 2);
219:   KSPSetSupportedNorm(ksp, KSP_NORM_NATURAL, PC_LEFT, 2);
220:   KSPSetSupportedNorm(ksp, KSP_NORM_NONE, PC_LEFT, 1);

222:   ksp->ops->setup          = KSPSetUp_PIPEPRCG;
223:   ksp->ops->solve          = KSPSolve_PIPEPRCG;
224:   ksp->ops->destroy        = KSPDestroyDefault;
225:   ksp->ops->view           = NULL;
226:   ksp->ops->setfromoptions = KSPSetFromOptions_PIPEPRCG;
227:   ksp->ops->buildsolution  = KSPBuildSolutionDefault;
228:   ksp->ops->buildresidual  = KSPBuildResidualDefault;
229:   return 0;
230: }