Actual source code: pipecr.c

```  1: #include <petsc/private/kspimpl.h>

3: /*
4:      KSPSetUp_PIPECR - Sets up the workspace needed by the PIPECR method.

6:       This is called once, usually automatically by KSPSolve() or KSPSetUp()
7:      but can be called directly by KSPSetUp()
8: */
9: static PetscErrorCode KSPSetUp_PIPECR(KSP ksp)
10: {
11:   /* get work vectors needed by PIPECR */
12:   KSPSetWorkVecs(ksp, 7);
13:   return 0;
14: }

16: /*
17:  KSPSolve_PIPECR - This routine actually applies the pipelined conjugate residual method
18: */
19: static PetscErrorCode KSPSolve_PIPECR(KSP ksp)
20: {
21:   PetscInt    i;
22:   PetscScalar alpha = 0.0, beta = 0.0, gamma, gammaold = 0.0, delta;
23:   PetscReal   dp = 0.0;
24:   Vec         X, B, Z, P, W, Q, U, M, N;
25:   Mat         Amat, Pmat;
26:   PetscBool   diagonalscale;

28:   PCGetDiagonalScale(ksp->pc, &diagonalscale);

31:   X = ksp->vec_sol;
32:   B = ksp->vec_rhs;
33:   M = ksp->work[0];
34:   Z = ksp->work[1];
35:   P = ksp->work[2];
36:   N = ksp->work[3];
37:   W = ksp->work[4];
38:   Q = ksp->work[5];
39:   U = ksp->work[6];

41:   PCGetOperators(ksp->pc, &Amat, &Pmat);

43:   ksp->its = 0;
44:   /* we don't have an R vector, so put the (unpreconditioned) residual in w for now */
45:   if (!ksp->guess_zero) {
46:     KSP_MatMult(ksp, Amat, X, W); /*     w <- b - Ax     */
47:     VecAYPX(W, -1.0, B);
48:   } else {
49:     VecCopy(B, W); /*     w <- b (x is 0) */
50:   }
51:   KSP_PCApply(ksp, W, U); /*     u <- Bw   */

53:   switch (ksp->normtype) {
54:   case KSP_NORM_PRECONDITIONED:
55:     VecNormBegin(U, NORM_2, &dp); /*     dp <- u'*u = e'*A'*B'*B*A'*e'     */
56:     PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)U));
57:     KSP_MatMult(ksp, Amat, U, W); /*     w <- Au   */
58:     VecNormEnd(U, NORM_2, &dp);
59:     break;
60:   case KSP_NORM_NONE:
61:     KSP_MatMult(ksp, Amat, U, W);
62:     dp = 0.0;
63:     break;
64:   default:
65:     SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "%s", KSPNormTypes[ksp->normtype]);
66:   }
67:   KSPLogResidualHistory(ksp, dp);
68:   KSPMonitor(ksp, 0, dp);
69:   ksp->rnorm = dp;
70:   (*ksp->converged)(ksp, 0, dp, &ksp->reason, ksp->cnvP); /* test for convergence */
71:   if (ksp->reason) return 0;

73:   i = 0;
74:   do {
75:     KSP_PCApply(ksp, W, M); /*   m <- Bw       */

77:     if (i > 0 && ksp->normtype == KSP_NORM_PRECONDITIONED) VecNormBegin(U, NORM_2, &dp);
78:     VecDotBegin(W, U, &gamma);
79:     VecDotBegin(M, W, &delta);
80:     PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)U));

82:     KSP_MatMult(ksp, Amat, M, N); /*   n <- Am       */

84:     if (i > 0 && ksp->normtype == KSP_NORM_PRECONDITIONED) VecNormEnd(U, NORM_2, &dp);
85:     VecDotEnd(W, U, &gamma);
86:     VecDotEnd(M, W, &delta);

88:     if (i > 0) {
89:       if (ksp->normtype == KSP_NORM_NONE) dp = 0.0;
90:       ksp->rnorm = dp;
91:       KSPLogResidualHistory(ksp, dp);
92:       KSPMonitor(ksp, i, dp);
93:       (*ksp->converged)(ksp, i, dp, &ksp->reason, ksp->cnvP);
94:       if (ksp->reason) return 0;
95:     }

97:     if (i == 0) {
98:       alpha = gamma / delta;
99:       VecCopy(N, Z); /*     z <- n          */
100:       VecCopy(M, Q); /*     q <- m          */
101:       VecCopy(U, P); /*     p <- u          */
102:     } else {
103:       beta  = gamma / gammaold;
104:       alpha = gamma / (delta - beta / alpha * gamma);
105:       VecAYPX(Z, beta, N); /*     z <- n + beta * z   */
106:       VecAYPX(Q, beta, M); /*     q <- m + beta * q   */
107:       VecAYPX(P, beta, U); /*     p <- u + beta * p   */
108:     }
109:     VecAXPY(X, alpha, P);  /*     x <- x + alpha * p   */
110:     VecAXPY(U, -alpha, Q); /*     u <- u - alpha * q   */
111:     VecAXPY(W, -alpha, Z); /*     w <- w - alpha * z   */
112:     gammaold = gamma;
113:     i++;
114:     ksp->its = i;

116:     /* if (i%50 == 0) { */
117:     /*   KSP_MatMult(ksp,Amat,X,W);            /\*     w <- b - Ax     *\/ */
118:     /*   VecAYPX(W,-1.0,B); */
119:     /*   KSP_PCApply(ksp,W,U); */
120:     /*   KSP_MatMult(ksp,Amat,U,W); */
121:     /* } */

123:   } while (i <= ksp->max_it);
124:   if (i >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS;
125:   return 0;
126: }

128: /*MC
129:    KSPPIPECR - Pipelined conjugate residual method. [](sec_pipelineksp)

131:    Level: intermediate

133:    Notes:
134:    This method has only a single non-blocking reduction per iteration, compared to 2 blocking for standard `KSPCR`.  The
135:    non-blocking reduction is overlapped by the matrix-vector product, but not the preconditioner application.

137:    See also `KSPPIPECG`, where the reduction is only overlapped with the matrix-vector product.

139:    MPI configuration may be necessary for reductions to make asynchronous progress, which is important for performance of pipelined methods.
140:    See [](doc_faq_pipelined)

142:    Contributed by:
143:    Pieter Ghysels, Universiteit Antwerpen, Intel Exascience lab Flanders

145:    Reference:
146:    P. Ghysels and W. Vanroose, "Hiding global synchronization latency in the preconditioned Conjugate Gradient algorithm",
147:    Submitted to Parallel Computing, 2012.

149: .seealso: [](chapter_ksp), [](sec_pipelineksp), [](doc_faq_pipelined), `KSPCreate()`, `KSPSetType()`, `KSPPIPECG`, `KSPGROPPCG`, `KSPPGMRES`, `KSPCG`, `KSPCGUseSingleReduction()`
150: M*/

152: PETSC_EXTERN PetscErrorCode KSPCreate_PIPECR(KSP ksp)
153: {
154:   KSPSetSupportedNorm(ksp, KSP_NORM_PRECONDITIONED, PC_LEFT, 2);
155:   KSPSetSupportedNorm(ksp, KSP_NORM_NONE, PC_LEFT, 1);

157:   ksp->ops->setup          = KSPSetUp_PIPECR;
158:   ksp->ops->solve          = KSPSolve_PIPECR;
159:   ksp->ops->destroy        = KSPDestroyDefault;
160:   ksp->ops->view           = NULL;
161:   ksp->ops->setfromoptions = NULL;
162:   ksp->ops->buildsolution  = KSPBuildSolutionDefault;
163:   ksp->ops->buildresidual  = KSPBuildResidualDefault;
164:   return 0;
165: }
```