Actual source code: pipecr.c

  1: #include <petsc/private/kspimpl.h>

  3: /*
  4:      KSPSetUp_PIPECR - Sets up the workspace needed by the PIPECR method.

  6:       This is called once, usually automatically by KSPSolve() or KSPSetUp()
  7:      but can be called directly by KSPSetUp()
  8: */
  9: static PetscErrorCode KSPSetUp_PIPECR(KSP ksp)
 10: {
 11:   /* get work vectors needed by PIPECR */
 12:   KSPSetWorkVecs(ksp, 7);
 13:   return 0;
 14: }

 16: /*
 17:  KSPSolve_PIPECR - This routine actually applies the pipelined conjugate residual method
 18: */
 19: static PetscErrorCode KSPSolve_PIPECR(KSP ksp)
 20: {
 21:   PetscInt    i;
 22:   PetscScalar alpha = 0.0, beta = 0.0, gamma, gammaold = 0.0, delta;
 23:   PetscReal   dp = 0.0;
 24:   Vec         X, B, Z, P, W, Q, U, M, N;
 25:   Mat         Amat, Pmat;
 26:   PetscBool   diagonalscale;

 28:   PCGetDiagonalScale(ksp->pc, &diagonalscale);

 31:   X = ksp->vec_sol;
 32:   B = ksp->vec_rhs;
 33:   M = ksp->work[0];
 34:   Z = ksp->work[1];
 35:   P = ksp->work[2];
 36:   N = ksp->work[3];
 37:   W = ksp->work[4];
 38:   Q = ksp->work[5];
 39:   U = ksp->work[6];

 41:   PCGetOperators(ksp->pc, &Amat, &Pmat);

 43:   ksp->its = 0;
 44:   /* we don't have an R vector, so put the (unpreconditioned) residual in w for now */
 45:   if (!ksp->guess_zero) {
 46:     KSP_MatMult(ksp, Amat, X, W); /*     w <- b - Ax     */
 47:     VecAYPX(W, -1.0, B);
 48:   } else {
 49:     VecCopy(B, W); /*     w <- b (x is 0) */
 50:   }
 51:   KSP_PCApply(ksp, W, U); /*     u <- Bw   */

 53:   switch (ksp->normtype) {
 54:   case KSP_NORM_PRECONDITIONED:
 55:     VecNormBegin(U, NORM_2, &dp); /*     dp <- u'*u = e'*A'*B'*B*A'*e'     */
 56:     PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)U));
 57:     KSP_MatMult(ksp, Amat, U, W); /*     w <- Au   */
 58:     VecNormEnd(U, NORM_2, &dp);
 59:     break;
 60:   case KSP_NORM_NONE:
 61:     KSP_MatMult(ksp, Amat, U, W);
 62:     dp = 0.0;
 63:     break;
 64:   default:
 65:     SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "%s", KSPNormTypes[ksp->normtype]);
 66:   }
 67:   KSPLogResidualHistory(ksp, dp);
 68:   KSPMonitor(ksp, 0, dp);
 69:   ksp->rnorm = dp;
 70:   (*ksp->converged)(ksp, 0, dp, &ksp->reason, ksp->cnvP); /* test for convergence */
 71:   if (ksp->reason) return 0;

 73:   i = 0;
 74:   do {
 75:     KSP_PCApply(ksp, W, M); /*   m <- Bw       */

 77:     if (i > 0 && ksp->normtype == KSP_NORM_PRECONDITIONED) VecNormBegin(U, NORM_2, &dp);
 78:     VecDotBegin(W, U, &gamma);
 79:     VecDotBegin(M, W, &delta);
 80:     PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)U));

 82:     KSP_MatMult(ksp, Amat, M, N); /*   n <- Am       */

 84:     if (i > 0 && ksp->normtype == KSP_NORM_PRECONDITIONED) VecNormEnd(U, NORM_2, &dp);
 85:     VecDotEnd(W, U, &gamma);
 86:     VecDotEnd(M, W, &delta);

 88:     if (i > 0) {
 89:       if (ksp->normtype == KSP_NORM_NONE) dp = 0.0;
 90:       ksp->rnorm = dp;
 91:       KSPLogResidualHistory(ksp, dp);
 92:       KSPMonitor(ksp, i, dp);
 93:       (*ksp->converged)(ksp, i, dp, &ksp->reason, ksp->cnvP);
 94:       if (ksp->reason) return 0;
 95:     }

 97:     if (i == 0) {
 98:       alpha = gamma / delta;
 99:       VecCopy(N, Z); /*     z <- n          */
100:       VecCopy(M, Q); /*     q <- m          */
101:       VecCopy(U, P); /*     p <- u          */
102:     } else {
103:       beta  = gamma / gammaold;
104:       alpha = gamma / (delta - beta / alpha * gamma);
105:       VecAYPX(Z, beta, N); /*     z <- n + beta * z   */
106:       VecAYPX(Q, beta, M); /*     q <- m + beta * q   */
107:       VecAYPX(P, beta, U); /*     p <- u + beta * p   */
108:     }
109:     VecAXPY(X, alpha, P);  /*     x <- x + alpha * p   */
110:     VecAXPY(U, -alpha, Q); /*     u <- u - alpha * q   */
111:     VecAXPY(W, -alpha, Z); /*     w <- w - alpha * z   */
112:     gammaold = gamma;
113:     i++;
114:     ksp->its = i;

116:     /* if (i%50 == 0) { */
117:     /*   KSP_MatMult(ksp,Amat,X,W);            /\*     w <- b - Ax     *\/ */
118:     /*   VecAYPX(W,-1.0,B); */
119:     /*   KSP_PCApply(ksp,W,U); */
120:     /*   KSP_MatMult(ksp,Amat,U,W); */
121:     /* } */

123:   } while (i <= ksp->max_it);
124:   if (i >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS;
125:   return 0;
126: }

128: /*MC
129:    KSPPIPECR - Pipelined conjugate residual method. [](sec_pipelineksp)

131:    Level: intermediate

133:    Notes:
134:    This method has only a single non-blocking reduction per iteration, compared to 2 blocking for standard `KSPCR`.  The
135:    non-blocking reduction is overlapped by the matrix-vector product, but not the preconditioner application.

137:    See also `KSPPIPECG`, where the reduction is only overlapped with the matrix-vector product.

139:    MPI configuration may be necessary for reductions to make asynchronous progress, which is important for performance of pipelined methods.
140:    See [](doc_faq_pipelined)

142:    Contributed by:
143:    Pieter Ghysels, Universiteit Antwerpen, Intel Exascience lab Flanders

145:    Reference:
146:    P. Ghysels and W. Vanroose, "Hiding global synchronization latency in the preconditioned Conjugate Gradient algorithm",
147:    Submitted to Parallel Computing, 2012.

149: .seealso: [](chapter_ksp), [](sec_pipelineksp), [](doc_faq_pipelined), `KSPCreate()`, `KSPSetType()`, `KSPPIPECG`, `KSPGROPPCG`, `KSPPGMRES`, `KSPCG`, `KSPCGUseSingleReduction()`
150: M*/

152: PETSC_EXTERN PetscErrorCode KSPCreate_PIPECR(KSP ksp)
153: {
154:   KSPSetSupportedNorm(ksp, KSP_NORM_PRECONDITIONED, PC_LEFT, 2);
155:   KSPSetSupportedNorm(ksp, KSP_NORM_NONE, PC_LEFT, 1);

157:   ksp->ops->setup          = KSPSetUp_PIPECR;
158:   ksp->ops->solve          = KSPSolve_PIPECR;
159:   ksp->ops->destroy        = KSPDestroyDefault;
160:   ksp->ops->view           = NULL;
161:   ksp->ops->setfromoptions = NULL;
162:   ksp->ops->buildsolution  = KSPBuildSolutionDefault;
163:   ksp->ops->buildresidual  = KSPBuildResidualDefault;
164:   return 0;
165: }