Actual source code: pipecg.c


  2: #include <petsc/private/kspimpl.h>

  4: /*
  5:      KSPSetUp_PIPECG - Sets up the workspace needed by the PIPECG method.

  7:       This is called once, usually automatically by KSPSolve() or KSPSetUp()
  8:      but can be called directly by KSPSetUp()
  9: */
 10: static PetscErrorCode KSPSetUp_PIPECG(KSP ksp)
 11: {
 12:   /* get work vectors needed by PIPECG */
 13:   KSPSetWorkVecs(ksp, 9);
 14:   return 0;
 15: }

 17: /*
 18:  KSPSolve_PIPECG - This routine actually applies the pipelined conjugate gradient method
 19: */
 20: static PetscErrorCode KSPSolve_PIPECG(KSP ksp)
 21: {
 22:   PetscInt    i;
 23:   PetscScalar alpha = 0.0, beta = 0.0, gamma = 0.0, gammaold = 0.0, delta = 0.0;
 24:   PetscReal   dp = 0.0;
 25:   Vec         X, B, Z, P, W, Q, U, M, N, R, S;
 26:   Mat         Amat, Pmat;
 27:   PetscBool   diagonalscale;

 29:   PCGetDiagonalScale(ksp->pc, &diagonalscale);

 32:   X = ksp->vec_sol;
 33:   B = ksp->vec_rhs;
 34:   R = ksp->work[0];
 35:   Z = ksp->work[1];
 36:   P = ksp->work[2];
 37:   N = ksp->work[3];
 38:   W = ksp->work[4];
 39:   Q = ksp->work[5];
 40:   U = ksp->work[6];
 41:   M = ksp->work[7];
 42:   S = ksp->work[8];

 44:   PCGetOperators(ksp->pc, &Amat, &Pmat);

 46:   ksp->its = 0;
 47:   if (!ksp->guess_zero) {
 48:     KSP_MatMult(ksp, Amat, X, R); /*     r <- b - Ax     */
 49:     VecAYPX(R, -1.0, B);
 50:   } else {
 51:     VecCopy(B, R); /*     r <- b (x is 0) */
 52:   }

 54:   KSP_PCApply(ksp, R, U); /*     u <- Br   */

 56:   switch (ksp->normtype) {
 57:   case KSP_NORM_PRECONDITIONED:
 58:     VecNormBegin(U, NORM_2, &dp); /*     dp <- u'*u = e'*A'*B'*B*A'*e'     */
 59:     PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)U));
 60:     KSP_MatMult(ksp, Amat, U, W); /*     w <- Au   */
 61:     VecNormEnd(U, NORM_2, &dp);
 62:     break;
 63:   case KSP_NORM_UNPRECONDITIONED:
 64:     VecNormBegin(R, NORM_2, &dp); /*     dp <- r'*r = e'*A'*A*e            */
 65:     PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R));
 66:     KSP_MatMult(ksp, Amat, U, W); /*     w <- Au   */
 67:     VecNormEnd(R, NORM_2, &dp);
 68:     break;
 69:   case KSP_NORM_NATURAL:
 70:     VecDotBegin(R, U, &gamma); /*     gamma <- u'*r       */
 71:     PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R));
 72:     KSP_MatMult(ksp, Amat, U, W); /*     w <- Au   */
 73:     VecDotEnd(R, U, &gamma);
 74:     KSPCheckDot(ksp, gamma);
 75:     dp = PetscSqrtReal(PetscAbsScalar(gamma)); /*     dp <- r'*u = r'*B*r = e'*A'*B*A*e */
 76:     break;
 77:   case KSP_NORM_NONE:
 78:     KSP_MatMult(ksp, Amat, U, W);
 79:     dp = 0.0;
 80:     break;
 81:   default:
 82:     SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "%s", KSPNormTypes[ksp->normtype]);
 83:   }
 84:   KSPLogResidualHistory(ksp, dp);
 85:   KSPMonitor(ksp, 0, dp);
 86:   ksp->rnorm = dp;
 87:   (*ksp->converged)(ksp, 0, dp, &ksp->reason, ksp->cnvP); /* test for convergence */
 88:   if (ksp->reason) return 0;

 90:   i = 0;
 91:   do {
 92:     if (i > 0 && ksp->normtype == KSP_NORM_UNPRECONDITIONED) {
 93:       VecNormBegin(R, NORM_2, &dp);
 94:     } else if (i > 0 && ksp->normtype == KSP_NORM_PRECONDITIONED) {
 95:       VecNormBegin(U, NORM_2, &dp);
 96:     }
 97:     if (!(i == 0 && ksp->normtype == KSP_NORM_NATURAL)) VecDotBegin(R, U, &gamma);
 98:     VecDotBegin(W, U, &delta);
 99:     PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R));

101:     KSP_PCApply(ksp, W, M);       /*   m <- Bw       */
102:     KSP_MatMult(ksp, Amat, M, N); /*   n <- Am       */

104:     if (i > 0 && ksp->normtype == KSP_NORM_UNPRECONDITIONED) {
105:       VecNormEnd(R, NORM_2, &dp);
106:     } else if (i > 0 && ksp->normtype == KSP_NORM_PRECONDITIONED) {
107:       VecNormEnd(U, NORM_2, &dp);
108:     }
109:     if (!(i == 0 && ksp->normtype == KSP_NORM_NATURAL)) VecDotEnd(R, U, &gamma);
110:     VecDotEnd(W, U, &delta);

112:     if (i > 0) {
113:       if (ksp->normtype == KSP_NORM_NATURAL) dp = PetscSqrtReal(PetscAbsScalar(gamma));
114:       else if (ksp->normtype == KSP_NORM_NONE) dp = 0.0;

116:       ksp->rnorm = dp;
117:       KSPLogResidualHistory(ksp, dp);
118:       KSPMonitor(ksp, i, dp);
119:       (*ksp->converged)(ksp, i, dp, &ksp->reason, ksp->cnvP);
120:       if (ksp->reason) return 0;
121:     }

123:     if (i == 0) {
124:       alpha = gamma / delta;
125:       VecCopy(N, Z); /*     z <- n          */
126:       VecCopy(M, Q); /*     q <- m          */
127:       VecCopy(U, P); /*     p <- u          */
128:       VecCopy(W, S); /*     s <- w          */
129:     } else {
130:       beta  = gamma / gammaold;
131:       alpha = gamma / (delta - beta / alpha * gamma);
132:       VecAYPX(Z, beta, N); /*     z <- n + beta * z   */
133:       VecAYPX(Q, beta, M); /*     q <- m + beta * q   */
134:       VecAYPX(P, beta, U); /*     p <- u + beta * p   */
135:       VecAYPX(S, beta, W); /*     s <- w + beta * s   */
136:     }
137:     VecAXPY(X, alpha, P);  /*     x <- x + alpha * p   */
138:     VecAXPY(U, -alpha, Q); /*     u <- u - alpha * q   */
139:     VecAXPY(W, -alpha, Z); /*     w <- w - alpha * z   */
140:     VecAXPY(R, -alpha, S); /*     r <- r - alpha * s   */
141:     gammaold = gamma;
142:     i++;
143:     ksp->its = i;

145:     /* if (i%50 == 0) { */
146:     /*   KSP_MatMult(ksp,Amat,X,R);            /\*     w <- b - Ax     *\/ */
147:     /*   VecAYPX(R,-1.0,B); */
148:     /*   KSP_PCApply(ksp,R,U); */
149:     /*   KSP_MatMult(ksp,Amat,U,W); */
150:     /* } */

152:   } while (i <= ksp->max_it);
153:   if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
154:   return 0;
155: }

157: PETSC_INTERN PetscErrorCode KSPBuildResidual_CG(KSP, Vec, Vec, Vec *);

159: /*MC
160:    KSPPIPECG - Pipelined conjugate gradient method. [](sec_pipelineksp)

162:    Level: intermediate

164:    Notes:
165:    This method has only a single non-blocking reduction per iteration, compared to 2 blocking for standard CG.  The
166:    non-blocking reduction is overlapped by the matrix-vector product and preconditioner application.

168:    See also `KSPPIPECR`, where the reduction is only overlapped with the matrix-vector product and `KSPGROPPCG`

170:    MPI configuration may be necessary for reductions to make asynchronous progress, which is important for performance of pipelined methods.
171:    See [](doc_faq_pipelined)

173:    Contributed by:
174:    Pieter Ghysels, Universiteit Antwerpen, Intel Exascience lab Flanders

176:    Reference:
177:    P. Ghysels and W. Vanroose, "Hiding global synchronization latency in the preconditioned Conjugate Gradient algorithm",
178:    Submitted to Parallel Computing, 2012.

180: .seealso: [](chapter_ksp), [](doc_faq_pipelined), [](sec_pipelineksp), `KSPCreate()`, `KSPSetType()`, `KSPPIPECG2`, `KSPPIPECR`, `KSPGROPPCG`, `KSPPGMRES`, `KSPCG`, `KSPCGUseSingleReduction()`
181: M*/
182: PETSC_EXTERN PetscErrorCode KSPCreate_PIPECG(KSP ksp)
183: {
184:   KSPSetSupportedNorm(ksp, KSP_NORM_UNPRECONDITIONED, PC_LEFT, 2);
185:   KSPSetSupportedNorm(ksp, KSP_NORM_PRECONDITIONED, PC_LEFT, 2);
186:   KSPSetSupportedNorm(ksp, KSP_NORM_NATURAL, PC_LEFT, 2);
187:   KSPSetSupportedNorm(ksp, KSP_NORM_NONE, PC_LEFT, 1);

189:   ksp->ops->setup          = KSPSetUp_PIPECG;
190:   ksp->ops->solve          = KSPSolve_PIPECG;
191:   ksp->ops->destroy        = KSPDestroyDefault;
192:   ksp->ops->view           = NULL;
193:   ksp->ops->setfromoptions = NULL;
194:   ksp->ops->buildsolution  = KSPBuildSolutionDefault;
195:   ksp->ops->buildresidual  = KSPBuildResidual_CG;
196:   return 0;
197: }