Actual source code: pipecr.c
1: #include <petsc/private/kspimpl.h>
3: /*
4: KSPSetUp_PIPECR - Sets up the workspace needed by the PIPECR method.
6: This is called once, usually automatically by KSPSolve() or KSPSetUp()
7: but can be called directly by KSPSetUp()
8: */
9: static PetscErrorCode KSPSetUp_PIPECR(KSP ksp)
10: {
11: /* get work vectors needed by PIPECR */
12: KSPSetWorkVecs(ksp, 7);
13: return 0;
14: }
16: /*
17: KSPSolve_PIPECR - This routine actually applies the pipelined conjugate residual method
18: */
19: static PetscErrorCode KSPSolve_PIPECR(KSP ksp)
20: {
21: PetscInt i;
22: PetscScalar alpha = 0.0, beta = 0.0, gamma, gammaold = 0.0, delta;
23: PetscReal dp = 0.0;
24: Vec X, B, Z, P, W, Q, U, M, N;
25: Mat Amat, Pmat;
26: PetscBool diagonalscale;
28: PCGetDiagonalScale(ksp->pc, &diagonalscale);
31: X = ksp->vec_sol;
32: B = ksp->vec_rhs;
33: M = ksp->work[0];
34: Z = ksp->work[1];
35: P = ksp->work[2];
36: N = ksp->work[3];
37: W = ksp->work[4];
38: Q = ksp->work[5];
39: U = ksp->work[6];
41: PCGetOperators(ksp->pc, &Amat, &Pmat);
43: ksp->its = 0;
44: /* we don't have an R vector, so put the (unpreconditioned) residual in w for now */
45: if (!ksp->guess_zero) {
46: KSP_MatMult(ksp, Amat, X, W); /* w <- b - Ax */
47: VecAYPX(W, -1.0, B);
48: } else {
49: VecCopy(B, W); /* w <- b (x is 0) */
50: }
51: KSP_PCApply(ksp, W, U); /* u <- Bw */
53: switch (ksp->normtype) {
54: case KSP_NORM_PRECONDITIONED:
55: VecNormBegin(U, NORM_2, &dp); /* dp <- u'*u = e'*A'*B'*B*A'*e' */
56: PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)U));
57: KSP_MatMult(ksp, Amat, U, W); /* w <- Au */
58: VecNormEnd(U, NORM_2, &dp);
59: break;
60: case KSP_NORM_NONE:
61: KSP_MatMult(ksp, Amat, U, W);
62: dp = 0.0;
63: break;
64: default:
65: SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "%s", KSPNormTypes[ksp->normtype]);
66: }
67: KSPLogResidualHistory(ksp, dp);
68: KSPMonitor(ksp, 0, dp);
69: ksp->rnorm = dp;
70: (*ksp->converged)(ksp, 0, dp, &ksp->reason, ksp->cnvP); /* test for convergence */
71: if (ksp->reason) return 0;
73: i = 0;
74: do {
75: KSP_PCApply(ksp, W, M); /* m <- Bw */
77: if (i > 0 && ksp->normtype == KSP_NORM_PRECONDITIONED) VecNormBegin(U, NORM_2, &dp);
78: VecDotBegin(W, U, &gamma);
79: VecDotBegin(M, W, &delta);
80: PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)U));
82: KSP_MatMult(ksp, Amat, M, N); /* n <- Am */
84: if (i > 0 && ksp->normtype == KSP_NORM_PRECONDITIONED) VecNormEnd(U, NORM_2, &dp);
85: VecDotEnd(W, U, &gamma);
86: VecDotEnd(M, W, &delta);
88: if (i > 0) {
89: if (ksp->normtype == KSP_NORM_NONE) dp = 0.0;
90: ksp->rnorm = dp;
91: KSPLogResidualHistory(ksp, dp);
92: KSPMonitor(ksp, i, dp);
93: (*ksp->converged)(ksp, i, dp, &ksp->reason, ksp->cnvP);
94: if (ksp->reason) return 0;
95: }
97: if (i == 0) {
98: alpha = gamma / delta;
99: VecCopy(N, Z); /* z <- n */
100: VecCopy(M, Q); /* q <- m */
101: VecCopy(U, P); /* p <- u */
102: } else {
103: beta = gamma / gammaold;
104: alpha = gamma / (delta - beta / alpha * gamma);
105: VecAYPX(Z, beta, N); /* z <- n + beta * z */
106: VecAYPX(Q, beta, M); /* q <- m + beta * q */
107: VecAYPX(P, beta, U); /* p <- u + beta * p */
108: }
109: VecAXPY(X, alpha, P); /* x <- x + alpha * p */
110: VecAXPY(U, -alpha, Q); /* u <- u - alpha * q */
111: VecAXPY(W, -alpha, Z); /* w <- w - alpha * z */
112: gammaold = gamma;
113: i++;
114: ksp->its = i;
116: /* if (i%50 == 0) { */
117: /* KSP_MatMult(ksp,Amat,X,W); /\* w <- b - Ax *\/ */
118: /* VecAYPX(W,-1.0,B); */
119: /* KSP_PCApply(ksp,W,U); */
120: /* KSP_MatMult(ksp,Amat,U,W); */
121: /* } */
123: } while (i <= ksp->max_it);
124: if (i >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS;
125: return 0;
126: }
128: /*MC
129: KSPPIPECR - Pipelined conjugate residual method. [](sec_pipelineksp)
131: Level: intermediate
133: Notes:
134: This method has only a single non-blocking reduction per iteration, compared to 2 blocking for standard `KSPCR`. The
135: non-blocking reduction is overlapped by the matrix-vector product, but not the preconditioner application.
137: See also `KSPPIPECG`, where the reduction is only overlapped with the matrix-vector product.
139: MPI configuration may be necessary for reductions to make asynchronous progress, which is important for performance of pipelined methods.
140: See [](doc_faq_pipelined)
142: Contributed by:
143: Pieter Ghysels, Universiteit Antwerpen, Intel Exascience lab Flanders
145: Reference:
146: P. Ghysels and W. Vanroose, "Hiding global synchronization latency in the preconditioned Conjugate Gradient algorithm",
147: Submitted to Parallel Computing, 2012.
149: .seealso: [](chapter_ksp), [](sec_pipelineksp), [](doc_faq_pipelined), `KSPCreate()`, `KSPSetType()`, `KSPPIPECG`, `KSPGROPPCG`, `KSPPGMRES`, `KSPCG`, `KSPCGUseSingleReduction()`
150: M*/
152: PETSC_EXTERN PetscErrorCode KSPCreate_PIPECR(KSP ksp)
153: {
154: KSPSetSupportedNorm(ksp, KSP_NORM_PRECONDITIONED, PC_LEFT, 2);
155: KSPSetSupportedNorm(ksp, KSP_NORM_NONE, PC_LEFT, 1);
157: ksp->ops->setup = KSPSetUp_PIPECR;
158: ksp->ops->solve = KSPSolve_PIPECR;
159: ksp->ops->destroy = KSPDestroyDefault;
160: ksp->ops->view = NULL;
161: ksp->ops->setfromoptions = NULL;
162: ksp->ops->buildsolution = KSPBuildSolutionDefault;
163: ksp->ops->buildresidual = KSPBuildResidualDefault;
164: return 0;
165: }