Actual source code: pipecg.c
2: #include <petsc/private/kspimpl.h>
4: /*
5: KSPSetUp_PIPECG - Sets up the workspace needed by the PIPECG method.
7: This is called once, usually automatically by KSPSolve() or KSPSetUp()
8: but can be called directly by KSPSetUp()
9: */
10: static PetscErrorCode KSPSetUp_PIPECG(KSP ksp)
11: {
12: /* get work vectors needed by PIPECG */
13: KSPSetWorkVecs(ksp, 9);
14: return 0;
15: }
17: /*
18: KSPSolve_PIPECG - This routine actually applies the pipelined conjugate gradient method
19: */
20: static PetscErrorCode KSPSolve_PIPECG(KSP ksp)
21: {
22: PetscInt i;
23: PetscScalar alpha = 0.0, beta = 0.0, gamma = 0.0, gammaold = 0.0, delta = 0.0;
24: PetscReal dp = 0.0;
25: Vec X, B, Z, P, W, Q, U, M, N, R, S;
26: Mat Amat, Pmat;
27: PetscBool diagonalscale;
29: PCGetDiagonalScale(ksp->pc, &diagonalscale);
32: X = ksp->vec_sol;
33: B = ksp->vec_rhs;
34: R = ksp->work[0];
35: Z = ksp->work[1];
36: P = ksp->work[2];
37: N = ksp->work[3];
38: W = ksp->work[4];
39: Q = ksp->work[5];
40: U = ksp->work[6];
41: M = ksp->work[7];
42: S = ksp->work[8];
44: PCGetOperators(ksp->pc, &Amat, &Pmat);
46: ksp->its = 0;
47: if (!ksp->guess_zero) {
48: KSP_MatMult(ksp, Amat, X, R); /* r <- b - Ax */
49: VecAYPX(R, -1.0, B);
50: } else {
51: VecCopy(B, R); /* r <- b (x is 0) */
52: }
54: KSP_PCApply(ksp, R, U); /* u <- Br */
56: switch (ksp->normtype) {
57: case KSP_NORM_PRECONDITIONED:
58: VecNormBegin(U, NORM_2, &dp); /* dp <- u'*u = e'*A'*B'*B*A'*e' */
59: PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)U));
60: KSP_MatMult(ksp, Amat, U, W); /* w <- Au */
61: VecNormEnd(U, NORM_2, &dp);
62: break;
63: case KSP_NORM_UNPRECONDITIONED:
64: VecNormBegin(R, NORM_2, &dp); /* dp <- r'*r = e'*A'*A*e */
65: PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R));
66: KSP_MatMult(ksp, Amat, U, W); /* w <- Au */
67: VecNormEnd(R, NORM_2, &dp);
68: break;
69: case KSP_NORM_NATURAL:
70: VecDotBegin(R, U, &gamma); /* gamma <- u'*r */
71: PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R));
72: KSP_MatMult(ksp, Amat, U, W); /* w <- Au */
73: VecDotEnd(R, U, &gamma);
74: KSPCheckDot(ksp, gamma);
75: dp = PetscSqrtReal(PetscAbsScalar(gamma)); /* dp <- r'*u = r'*B*r = e'*A'*B*A*e */
76: break;
77: case KSP_NORM_NONE:
78: KSP_MatMult(ksp, Amat, U, W);
79: dp = 0.0;
80: break;
81: default:
82: SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "%s", KSPNormTypes[ksp->normtype]);
83: }
84: KSPLogResidualHistory(ksp, dp);
85: KSPMonitor(ksp, 0, dp);
86: ksp->rnorm = dp;
87: (*ksp->converged)(ksp, 0, dp, &ksp->reason, ksp->cnvP); /* test for convergence */
88: if (ksp->reason) return 0;
90: i = 0;
91: do {
92: if (i > 0 && ksp->normtype == KSP_NORM_UNPRECONDITIONED) {
93: VecNormBegin(R, NORM_2, &dp);
94: } else if (i > 0 && ksp->normtype == KSP_NORM_PRECONDITIONED) {
95: VecNormBegin(U, NORM_2, &dp);
96: }
97: if (!(i == 0 && ksp->normtype == KSP_NORM_NATURAL)) VecDotBegin(R, U, &gamma);
98: VecDotBegin(W, U, &delta);
99: PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R));
101: KSP_PCApply(ksp, W, M); /* m <- Bw */
102: KSP_MatMult(ksp, Amat, M, N); /* n <- Am */
104: if (i > 0 && ksp->normtype == KSP_NORM_UNPRECONDITIONED) {
105: VecNormEnd(R, NORM_2, &dp);
106: } else if (i > 0 && ksp->normtype == KSP_NORM_PRECONDITIONED) {
107: VecNormEnd(U, NORM_2, &dp);
108: }
109: if (!(i == 0 && ksp->normtype == KSP_NORM_NATURAL)) VecDotEnd(R, U, &gamma);
110: VecDotEnd(W, U, &delta);
112: if (i > 0) {
113: if (ksp->normtype == KSP_NORM_NATURAL) dp = PetscSqrtReal(PetscAbsScalar(gamma));
114: else if (ksp->normtype == KSP_NORM_NONE) dp = 0.0;
116: ksp->rnorm = dp;
117: KSPLogResidualHistory(ksp, dp);
118: KSPMonitor(ksp, i, dp);
119: (*ksp->converged)(ksp, i, dp, &ksp->reason, ksp->cnvP);
120: if (ksp->reason) return 0;
121: }
123: if (i == 0) {
124: alpha = gamma / delta;
125: VecCopy(N, Z); /* z <- n */
126: VecCopy(M, Q); /* q <- m */
127: VecCopy(U, P); /* p <- u */
128: VecCopy(W, S); /* s <- w */
129: } else {
130: beta = gamma / gammaold;
131: alpha = gamma / (delta - beta / alpha * gamma);
132: VecAYPX(Z, beta, N); /* z <- n + beta * z */
133: VecAYPX(Q, beta, M); /* q <- m + beta * q */
134: VecAYPX(P, beta, U); /* p <- u + beta * p */
135: VecAYPX(S, beta, W); /* s <- w + beta * s */
136: }
137: VecAXPY(X, alpha, P); /* x <- x + alpha * p */
138: VecAXPY(U, -alpha, Q); /* u <- u - alpha * q */
139: VecAXPY(W, -alpha, Z); /* w <- w - alpha * z */
140: VecAXPY(R, -alpha, S); /* r <- r - alpha * s */
141: gammaold = gamma;
142: i++;
143: ksp->its = i;
145: /* if (i%50 == 0) { */
146: /* KSP_MatMult(ksp,Amat,X,R); /\* w <- b - Ax *\/ */
147: /* VecAYPX(R,-1.0,B); */
148: /* KSP_PCApply(ksp,R,U); */
149: /* KSP_MatMult(ksp,Amat,U,W); */
150: /* } */
152: } while (i <= ksp->max_it);
153: if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
154: return 0;
155: }
157: PETSC_INTERN PetscErrorCode KSPBuildResidual_CG(KSP, Vec, Vec, Vec *);
159: /*MC
160: KSPPIPECG - Pipelined conjugate gradient method. [](sec_pipelineksp)
162: Level: intermediate
164: Notes:
165: This method has only a single non-blocking reduction per iteration, compared to 2 blocking for standard CG. The
166: non-blocking reduction is overlapped by the matrix-vector product and preconditioner application.
168: See also `KSPPIPECR`, where the reduction is only overlapped with the matrix-vector product and `KSPGROPPCG`
170: MPI configuration may be necessary for reductions to make asynchronous progress, which is important for performance of pipelined methods.
171: See [](doc_faq_pipelined)
173: Contributed by:
174: Pieter Ghysels, Universiteit Antwerpen, Intel Exascience lab Flanders
176: Reference:
177: P. Ghysels and W. Vanroose, "Hiding global synchronization latency in the preconditioned Conjugate Gradient algorithm",
178: Submitted to Parallel Computing, 2012.
180: .seealso: [](chapter_ksp), [](doc_faq_pipelined), [](sec_pipelineksp), `KSPCreate()`, `KSPSetType()`, `KSPPIPECG2`, `KSPPIPECR`, `KSPGROPPCG`, `KSPPGMRES`, `KSPCG`, `KSPCGUseSingleReduction()`
181: M*/
182: PETSC_EXTERN PetscErrorCode KSPCreate_PIPECG(KSP ksp)
183: {
184: KSPSetSupportedNorm(ksp, KSP_NORM_UNPRECONDITIONED, PC_LEFT, 2);
185: KSPSetSupportedNorm(ksp, KSP_NORM_PRECONDITIONED, PC_LEFT, 2);
186: KSPSetSupportedNorm(ksp, KSP_NORM_NATURAL, PC_LEFT, 2);
187: KSPSetSupportedNorm(ksp, KSP_NORM_NONE, PC_LEFT, 1);
189: ksp->ops->setup = KSPSetUp_PIPECG;
190: ksp->ops->solve = KSPSolve_PIPECG;
191: ksp->ops->destroy = KSPDestroyDefault;
192: ksp->ops->view = NULL;
193: ksp->ops->setfromoptions = NULL;
194: ksp->ops->buildsolution = KSPBuildSolutionDefault;
195: ksp->ops->buildresidual = KSPBuildResidual_CG;
196: return 0;
197: }