Actual source code: pipeprcg.c
  1: #include <petsc/private/kspimpl.h>

  3: typedef struct KSP_CG_PIPE_PR_s KSP_CG_PIPE_PR;
  4: struct KSP_CG_PIPE_PR_s {
  5:   PetscBool rc_w_q; /* flag to determine whether w_k should be recomputed with A r_k */
  6: };

  8: /*
  9:      KSPSetUp_PIPEPRCG - Sets up the workspace needed by the PIPEPRCG method.

 11:       This is called once, usually automatically by KSPSolve() or KSPSetUp()
 12:      but can be called directly by KSPSetUp()
 13: */
 14: static PetscErrorCode KSPSetUp_PIPEPRCG(KSP ksp)
 15: {
 16:   PetscFunctionBegin;
 17:   /* get work vectors needed by PIPEPRCG */
 18:   PetscCall(KSPSetWorkVecs(ksp, 9));
 19:   PetscFunctionReturn(PETSC_SUCCESS);
 20: }

 22: static PetscErrorCode KSPSetFromOptions_PIPEPRCG(KSP ksp, PetscOptionItems *PetscOptionsObject)
 23: {
 24:   KSP_CG_PIPE_PR *prcg = (KSP_CG_PIPE_PR *)ksp->data;
 25:   PetscBool       flag = PETSC_FALSE;

 27:   PetscFunctionBegin;
 28:   PetscOptionsHeadBegin(PetscOptionsObject, "KSP PIPEPRCG options");
 29:   PetscCall(PetscOptionsBool("-recompute_w", "-recompute w_k with Ar_k? (default = True)", "", prcg->rc_w_q, &prcg->rc_w_q, &flag));
 30:   if (!flag) prcg->rc_w_q = PETSC_TRUE;
 31:   PetscOptionsHeadEnd();
 32:   PetscFunctionReturn(PETSC_SUCCESS);
 33: }

 35: /*
 36:  KSPSolve_PIPEPRCG - This routine actually applies the pipelined predict and recompute conjugate gradient method
 37: */
 38: static PetscErrorCode KSPSolve_PIPEPRCG(KSP ksp)
 39: {
 40:   PetscInt        i;
 41:   KSP_CG_PIPE_PR *prcg  = (KSP_CG_PIPE_PR *)ksp->data;
 42:   PetscScalar     alpha = 0.0, beta = 0.0, nu = 0.0, nu_old = 0.0, mudelgam[3], *mu_p, *delta_p, *gamma_p;
 43:   PetscReal       dp = 0.0;
 44:   Vec             X, B, R, RT, W, WT, P, S, ST, U, UT, PRTST[3];
 45:   Mat             Amat, Pmat;
 46:   PetscBool       diagonalscale, rc_w_q = prcg->rc_w_q;

 48:   /* note that these are pointers to entries of muldelgam, different than nu */
 49:   mu_p    = &mudelgam[0];
 50:   delta_p = &mudelgam[1];
 51:   gamma_p = &mudelgam[2];

 53:   PetscFunctionBegin;
 54:   PetscCall(PCGetDiagonalScale(ksp->pc, &diagonalscale));
 55:   PetscCheck(!diagonalscale, PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "Krylov method %s does not support diagonal scaling", ((PetscObject)ksp)->type_name);

 57:   X  = ksp->vec_sol;
 58:   B  = ksp->vec_rhs;
 59:   R  = ksp->work[0];
 60:   RT = ksp->work[1];
 61:   W  = ksp->work[2];
 62:   WT = ksp->work[3];
 63:   P  = ksp->work[4];
 64:   S  = ksp->work[5];
 65:   ST = ksp->work[6];
 66:   U  = ksp->work[7];
 67:   UT = ksp->work[8];

 69:   PetscCall(PCGetOperators(ksp->pc, &Amat, &Pmat));

 71:   /* initialize */
 72:   ksp->its = 0;
 73:   if (!ksp->guess_zero) {
 74:     PetscCall(KSP_MatMult(ksp, Amat, X, R)); /*   r <- b - Ax  */
 75:     PetscCall(VecAYPX(R, -1.0, B));
 76:   } else {
 77:     PetscCall(VecCopy(B, R)); /*   r <- b       */
 78:   }

 80:   PetscCall(KSP_PCApply(ksp, R, RT));       /*   rt <- Br     */
 81:   PetscCall(KSP_MatMult(ksp, Amat, RT, W)); /*   w <- A rt    */
 82:   PetscCall(KSP_PCApply(ksp, W, WT));       /*   wt <- B w    */

 84:   PetscCall(VecCopy(RT, P));  /*   p <- rt      */
 85:   PetscCall(VecCopy(W, S));   /*   p <- rt      */
 86:   PetscCall(VecCopy(WT, ST)); /*   p <- rt      */

 88:   PetscCall(KSP_MatMult(ksp, Amat, ST, U)); /*   u <- Ast     */
 89:   PetscCall(KSP_PCApply(ksp, U, UT));       /*   ut <- Bu     */

 91:   PetscCall(VecDotBegin(RT, R, &nu));
 92:   PetscCall(VecDotBegin(P, S, mu_p));
 93:   PetscCall(VecDotBegin(ST, S, gamma_p));

 95:   PetscCall(VecDotEnd(RT, R, &nu));     /*   nu    <- (rt,r)  */
 96:   PetscCall(VecDotEnd(P, S, mu_p));     /*   mu    <- (p,s)   */
 97:   PetscCall(VecDotEnd(ST, S, gamma_p)); /*   gamma <- (st,s)  */
 98:   *delta_p = *mu_p;

100:   i = 0;
101:   do {
102:     /* Compute appropriate norm */
103:     switch (ksp->normtype) {
104:     case KSP_NORM_PRECONDITIONED:
105:       PetscCall(VecNormBegin(RT, NORM_2, &dp));
106:       PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)RT)));
107:       PetscCall(VecNormEnd(RT, NORM_2, &dp));
108:       break;
109:     case KSP_NORM_UNPRECONDITIONED:
110:       PetscCall(VecNormBegin(R, NORM_2, &dp));
111:       PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R)));
112:       PetscCall(VecNormEnd(R, NORM_2, &dp));
113:       break;
114:     case KSP_NORM_NATURAL:
115:       dp = PetscSqrtReal(PetscAbsScalar(nu));
116:       break;
117:     case KSP_NORM_NONE:
118:       dp = 0.0;
119:       break;
120:     default:
121:       SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "%s", KSPNormTypes[ksp->normtype]);
122:     }

124:     ksp->rnorm = dp;
125:     PetscCall(KSPLogResidualHistory(ksp, dp));
126:     PetscCall(KSPMonitor(ksp, i, dp));
127:     PetscCall((*ksp->converged)(ksp, i, dp, &ksp->reason, ksp->cnvP));
128:     if (ksp->reason) PetscFunctionReturn(PETSC_SUCCESS);

130:     /* update scalars */
131:     alpha  = nu / *mu_p;
132:     nu_old = nu;
133:     nu     = nu_old - 2. * alpha * (*delta_p) + (alpha * alpha) * (*gamma_p);
134:     beta   = nu / nu_old;

136:     /* update vectors */
137:     PetscCall(VecAXPY(X, alpha, P));    /*   x  <- x  + alpha * p   */
138:     PetscCall(VecAXPY(R, -alpha, S));   /*   r  <- r  - alpha * s   */
139:     PetscCall(VecAXPY(RT, -alpha, ST)); /*   rt <- rt - alpha * st  */
140:     PetscCall(VecAXPY(W, -alpha, U));   /*   w  <- w  - alpha * u   */
141:     PetscCall(VecAXPY(WT, -alpha, UT)); /*   wt <- wt - alpha * ut  */
142:     PetscCall(VecAYPX(P, beta, RT));    /*   p  <- rt + beta  * p   */
143:     PetscCall(VecAYPX(S, beta, W));     /*   s  <- w  + beta  * s   */
144:     PetscCall(VecAYPX(ST, beta, WT));   /*   st <- wt + beta  * st  */

146:     PetscCall(VecDotBegin(RT, R, &nu));

148:     PRTST[0] = P;
149:     PRTST[1] = RT;
150:     PRTST[2] = ST;

152:     PetscCall(VecMDotBegin(S, 3, PRTST, mudelgam));

154:     PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R)));

156:     PetscCall(KSP_MatMult(ksp, Amat, ST, U)); /*   u  <- A st             */
157:     PetscCall(KSP_PCApply(ksp, U, UT));       /*   ut <- B u              */

159:     /* predict-and-recompute */
160:     /* ideally this is combined with the previous matvec; i.e. equivalent of MDot */
161:     if (rc_w_q) {
162:       PetscCall(KSP_MatMult(ksp, Amat, RT, W)); /*   w  <- A rt             */
163:       PetscCall(KSP_PCApply(ksp, W, WT));       /*   wt <- B w              */
164:     }

166:     PetscCall(VecDotEnd(RT, R, &nu));
167:     PetscCall(VecMDotEnd(S, 3, PRTST, mudelgam));

169:     i++;
170:     ksp->its = i;

172:   } while (i <= ksp->max_it);
173:   if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
174:   PetscFunctionReturn(PETSC_SUCCESS);
175: }

177: /*MC
178:    KSPPIPEPRCG - Pipelined predict-and-recompute conjugate gradient method {cite}`chen2020predict`. [](sec_pipelineksp)

180:    Options Database Key:
181: .  -ksp_pipeprcg_recompute_w - recompute the $w_k$ with $Ar_k$, default is true

183:    Level: intermediate

185:    Notes:
186:    This method has only a single non-blocking reduction per iteration, compared to 2 blocking for standard `KSPCG`.
187:    The non-blocking reduction is overlapped by the matrix-vector product and preconditioner application.

189:    MPI configuration may be necessary for reductions to make asynchronous progress, which is important for performance of pipelined methods.
190:    See [](doc_faq_pipelined)

192:    Contributed by:
193:    Tyler Chen, University of Washington, Applied Mathematics Department

195:    Acknowledgments:
196:    This material is based upon work supported by the National Science Foundation Graduate Research Fellowship Program under Grant No. DGE-1762114.
197:    Any opinions, findings, and conclusions or recommendations expressed in this material are those of the author and do not necessarily
198:    reflect the views of the National Science Foundation.

200: .seealso: [](ch_ksp), [](doc_faq_pipelined), [](sec_pipelineksp), `KSPCreate()`, `KSPSetType()`, `KSPCG`, `KSPPIPECG`, `KSPPIPECR`, `KSPGROPPCG`, `KSPPGMRES`, `KSPCG`, `KSPCGUseSingleReduction()`
201: M*/
202: PETSC_EXTERN PetscErrorCode KSPCreate_PIPEPRCG(KSP ksp)
203: {
204:   KSP_CG_PIPE_PR *prcg = NULL;
205:   PetscBool       cite = PETSC_FALSE;

207:   PetscFunctionBegin;
208:   PetscCall(PetscCitationsRegister("@article{predict_and_recompute_cg,\n  author = {Tyler Chen and Erin C. Carson},\n  title = {Predict-and-recompute conjugate gradient variants},\n  journal = {},\n  year = {2020},\n  eprint = {1905.01549},\n  "
209:                                    "archivePrefix = {arXiv},\n  primaryClass = {cs.NA}\n}",
210:                                    &cite));

212:   PetscCall(PetscNew(&prcg));
213:   ksp->data = (void *)prcg;

215:   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_UNPRECONDITIONED, PC_LEFT, 2));
216:   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_PRECONDITIONED, PC_LEFT, 2));
217:   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_NATURAL, PC_LEFT, 2));
218:   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_NONE, PC_LEFT, 1));

220:   ksp->ops->setup          = KSPSetUp_PIPEPRCG;
221:   ksp->ops->solve          = KSPSolve_PIPEPRCG;
222:   ksp->ops->destroy        = KSPDestroyDefault;
223:   ksp->ops->view           = NULL;
224:   ksp->ops->setfromoptions = KSPSetFromOptions_PIPEPRCG;
225:   ksp->ops->buildsolution  = KSPBuildSolutionDefault;
226:   ksp->ops->buildresidual  = KSPBuildResidualDefault;
227:   PetscFunctionReturn(PETSC_SUCCESS);
228: }