Actual source code: pcmpi.c

  1: /*
  2:     This file creates an MPI parallel KSP from a sequential PC that lives on MPI rank 0.
  3:     It is intended to allow using PETSc MPI parallel linear solvers from non-MPI codes.

  5:     That program may use OpenMP to compute the right-hand side and matrix for the linear system

  7:     The code uses MPI_COMM_WORLD below but maybe it should be PETSC_COMM_WORLD

  9:     The resulting KSP and PC can only be controlled via the options database, though some common commands
 10:     could be passed through the server.

 12: */
 13: #include <petsc/private/pcimpl.h>
 14: #include <petsc/private/kspimpl.h>
 15: #include <petscts.h>
 16: #include <petsctao.h>

 18: #define PC_MPI_MAX_RANKS  256
 19: #define PC_MPI_COMM_WORLD MPI_COMM_WORLD

 21: typedef struct {
 22:   KSP         ksps[PC_MPI_MAX_RANKS];                               /* The addresses of the MPI parallel KSP on each rank, NULL when not on a rank. */
 23:   PetscMPIInt sendcount[PC_MPI_MAX_RANKS], displ[PC_MPI_MAX_RANKS]; /* For scatter/gather of rhs/solution */
 24:   PetscMPIInt NZ[PC_MPI_MAX_RANKS], NZdispl[PC_MPI_MAX_RANKS];      /* For scatter of nonzero values in matrix (and nonzero column indices initially */
 25:   PetscInt    mincntperrank;                                        /* minimum number of desired nonzeros per active rank in MPI parallel KSP solve */
 26:   PetscBool   alwaysuseserver;                                      /* for debugging use the server infrastructure even if only one MPI rank is used for the solve */
 27: } PC_MPI;

 29: typedef enum {
 30:   PCMPI_EXIT, /* exit the PC server loop, means the controlling sequential program is done */
 31:   PCMPI_CREATE,
 32:   PCMPI_SET_MAT,           /* set original matrix (or one with different nonzero pattern) */
 33:   PCMPI_UPDATE_MAT_VALUES, /* update current matrix with new nonzero values */
 34:   PCMPI_SOLVE,
 35:   PCMPI_VIEW,
 36:   PCMPI_DESTROY /* destroy a KSP that is no longer needed */
 37: } PCMPICommand;

 39: static MPI_Comm  PCMPIComms[PC_MPI_MAX_RANKS];
 40: static PetscBool PCMPICommSet = PETSC_FALSE;
 41: static PetscInt  PCMPISolveCounts[PC_MPI_MAX_RANKS], PCMPIKSPCounts[PC_MPI_MAX_RANKS], PCMPIMatCounts[PC_MPI_MAX_RANKS], PCMPISolveCountsSeq = 0, PCMPIKSPCountsSeq = 0;
 42: static PetscInt  PCMPIIterations[PC_MPI_MAX_RANKS], PCMPISizes[PC_MPI_MAX_RANKS], PCMPIIterationsSeq = 0, PCMPISizesSeq = 0;

 44: static PetscErrorCode PCMPICommsCreate(void)
 45: {
 46:   MPI_Comm    comm = PC_MPI_COMM_WORLD;
 47:   PetscMPIInt size, rank, i;

 49:   PetscFunctionBegin;
 50:   PetscCallMPI(MPI_Comm_size(comm, &size));
 51:   PetscCheck(size <= PC_MPI_MAX_RANKS, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for using more than PC_MPI_MAX_RANKS MPI ranks in an MPI linear solver server solve");
 52:   PetscCallMPI(MPI_Comm_rank(comm, &rank));
 53:   /* comm for size 1 is useful only for debugging */
 54:   for (i = 0; i < size; i++) {
 55:     PetscMPIInt color = rank < i + 1 ? 0 : MPI_UNDEFINED;
 56:     PetscCallMPI(MPI_Comm_split(comm, color, 0, &PCMPIComms[i]));
 57:     PCMPISolveCounts[i] = 0;
 58:     PCMPIKSPCounts[i]   = 0;
 59:     PCMPIIterations[i]  = 0;
 60:     PCMPISizes[i]       = 0;
 61:   }
 62:   PCMPICommSet = PETSC_TRUE;
 63:   PetscFunctionReturn(PETSC_SUCCESS);
 64: }

 66: PetscErrorCode PCMPICommsDestroy(void)
 67: {
 68:   MPI_Comm    comm = PC_MPI_COMM_WORLD;
 69:   PetscMPIInt size, rank, i;

 71:   PetscFunctionBegin;
 72:   if (!PCMPICommSet) PetscFunctionReturn(PETSC_SUCCESS);
 73:   PetscCallMPI(MPI_Comm_size(comm, &size));
 74:   PetscCallMPI(MPI_Comm_rank(comm, &rank));
 75:   for (i = 0; i < size; i++) {
 76:     if (PCMPIComms[i] != MPI_COMM_NULL) PetscCallMPI(MPI_Comm_free(&PCMPIComms[i]));
 77:   }
 78:   PCMPICommSet = PETSC_FALSE;
 79:   PetscFunctionReturn(PETSC_SUCCESS);
 80: }

 82: static PetscErrorCode PCMPICreate(PC pc)
 83: {
 84:   PC_MPI     *km   = pc ? (PC_MPI *)pc->data : NULL;
 85:   MPI_Comm    comm = PC_MPI_COMM_WORLD;
 86:   KSP         ksp;
 87:   PetscInt    N[2], mincntperrank = 0;
 88:   PetscMPIInt size;
 89:   Mat         sA;
 90:   char       *cprefix = NULL;
 91:   PetscMPIInt len     = 0;

 93:   PetscFunctionBegin;
 94:   if (!PCMPICommSet) PetscCall(PCMPICommsCreate());
 95:   PetscCallMPI(MPI_Comm_size(comm, &size));
 96:   if (pc) {
 97:     if (size == 1) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Warning: Running KSP type of MPI on a one rank MPI run, this will be less efficient then not using this type\n"));
 98:     PetscCall(PCGetOperators(pc, &sA, &sA));
 99:     PetscCall(MatGetSize(sA, &N[0], &N[1]));
100:   }
101:   PetscCallMPI(MPI_Bcast(N, 2, MPIU_INT, 0, comm));

103:   /* choose a suitable sized MPI_Comm for the problem to be solved on */
104:   if (km) mincntperrank = km->mincntperrank;
105:   PetscCallMPI(MPI_Bcast(&mincntperrank, 1, MPI_INT, 0, comm));
106:   comm = PCMPIComms[PetscMin(size, PetscMax(1, N[0] / mincntperrank)) - 1];
107:   if (comm == MPI_COMM_NULL) {
108:     ksp = NULL;
109:     PetscFunctionReturn(PETSC_SUCCESS);
110:   }
111:   PetscCall(PetscLogStagePush(PCMPIStage));
112:   PetscCall(KSPCreate(comm, &ksp));
113:   PetscCall(KSPSetNestLevel(ksp, 1));
114:   PetscCall(PetscObjectSetTabLevel((PetscObject)ksp, 1));
115:   PetscCall(PetscLogStagePop());
116:   PetscCallMPI(MPI_Gather(&ksp, 1, MPI_AINT, pc ? km->ksps : NULL, 1, MPI_AINT, 0, comm));
117:   if (pc) {
118:     size_t      slen;
119:     const char *prefix = NULL;
120:     char       *found  = NULL;

122:     PetscCallMPI(MPI_Comm_size(comm, &size));
123:     PCMPIKSPCounts[size - 1]++;
124:     /* Created KSP gets prefix of PC minus the mpi_linear_solver_server_ portion */
125:     PetscCall(PCGetOptionsPrefix(pc, &prefix));
126:     PetscCheck(prefix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing required prefix");
127:     PetscCall(PetscStrallocpy(prefix, &cprefix));
128:     PetscCall(PetscStrstr(cprefix, "mpi_linear_solver_server_", &found));
129:     PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing mpi_linear_solver_server_ portion of prefix");
130:     *found = 0;
131:     PetscCall(PetscStrlen(cprefix, &slen));
132:     len = (PetscMPIInt)slen;
133:   }
134:   PetscCallMPI(MPI_Bcast(&len, 1, MPI_INT, 0, comm));
135:   if (len) {
136:     if (!pc) PetscCall(PetscMalloc1(len + 1, &cprefix));
137:     PetscCallMPI(MPI_Bcast(cprefix, len + 1, MPI_CHAR, 0, comm));
138:     PetscCall(KSPSetOptionsPrefix(ksp, cprefix));
139:   }
140:   PetscCall(PetscFree(cprefix));
141:   PetscFunctionReturn(PETSC_SUCCESS);
142: }

144: static PetscErrorCode PCMPISetMat(PC pc)
145: {
146:   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
147:   Mat                A;
148:   PetscInt           m, n, *ia, *ja, j, bs;
149:   Mat                sA;
150:   MPI_Comm           comm = PC_MPI_COMM_WORLD;
151:   KSP                ksp;
152:   PetscLayout        layout;
153:   const PetscInt    *IA = NULL, *JA = NULL;
154:   const PetscInt    *range;
155:   PetscMPIInt       *NZ = NULL, sendcounti[PC_MPI_MAX_RANKS], displi[PC_MPI_MAX_RANKS], *NZdispl = NULL, nz, size, i;
156:   PetscScalar       *a;
157:   const PetscScalar *sa               = NULL;
158:   PetscInt           matproperties[8] = {0, 0, 0, 0, 0, 0, 0, 0};
159:   char              *cprefix;

161:   PetscFunctionBegin;
162:   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
163:   if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
164:   PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
165:   if (pc) {
166:     PetscBool   isset, issymmetric, ishermitian, isspd, isstructurallysymmetric;
167:     const char *prefix;
168:     size_t      clen;

170:     PetscCallMPI(MPI_Comm_size(comm, &size));
171:     PCMPIMatCounts[size - 1]++;
172:     PetscCall(PCGetOperators(pc, &sA, &sA));
173:     PetscCall(MatGetSize(sA, &matproperties[0], &matproperties[1]));
174:     PetscCall(MatGetBlockSize(sA, &bs));
175:     matproperties[2] = bs;
176:     PetscCall(MatIsSymmetricKnown(sA, &isset, &issymmetric));
177:     matproperties[3] = !isset ? 0 : (issymmetric ? 1 : 2);
178:     PetscCall(MatIsHermitianKnown(sA, &isset, &ishermitian));
179:     matproperties[4] = !isset ? 0 : (ishermitian ? 1 : 2);
180:     PetscCall(MatIsSPDKnown(sA, &isset, &isspd));
181:     matproperties[5] = !isset ? 0 : (isspd ? 1 : 2);
182:     PetscCall(MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric));
183:     matproperties[6] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2);
184:     /* Created Mat gets prefix of input Mat PLUS the mpi_linear_solver_server_ portion */
185:     PetscCall(MatGetOptionsPrefix(sA, &prefix));
186:     PetscCall(PetscStrallocpy(prefix, &cprefix));
187:     PetscCall(PetscStrlen(cprefix, &clen));
188:     matproperties[7] = (PetscInt)clen;
189:   }
190:   PetscCallMPI(MPI_Bcast(matproperties, PETSC_STATIC_ARRAY_LENGTH(matproperties), MPIU_INT, 0, comm));

192:   /* determine ownership ranges of matrix columns */
193:   PetscCall(PetscLayoutCreate(comm, &layout));
194:   PetscCall(PetscLayoutSetBlockSize(layout, matproperties[2]));
195:   PetscCall(PetscLayoutSetSize(layout, matproperties[1]));
196:   PetscCall(PetscLayoutSetUp(layout));
197:   PetscCall(PetscLayoutGetLocalSize(layout, &n));
198:   PetscCall(PetscLayoutDestroy(&layout));

200:   /* determine ownership ranges of matrix rows */
201:   PetscCall(PetscLayoutCreate(comm, &layout));
202:   PetscCall(PetscLayoutSetBlockSize(layout, matproperties[2]));
203:   PetscCall(PetscLayoutSetSize(layout, matproperties[0]));
204:   PetscCall(PetscLayoutSetUp(layout));
205:   PetscCall(PetscLayoutGetLocalSize(layout, &m));

207:   /* copy over the matrix nonzero structure and values */
208:   if (pc) {
209:     NZ      = km->NZ;
210:     NZdispl = km->NZdispl;
211:     PetscCall(PetscLayoutGetRanges(layout, &range));
212:     PetscCall(MatGetRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL));
213:     for (i = 0; i < size; i++) {
214:       sendcounti[i] = (PetscMPIInt)(1 + range[i + 1] - range[i]);
215:       NZ[i]         = (PetscMPIInt)(IA[range[i + 1]] - IA[range[i]]);
216:     }
217:     displi[0]  = 0;
218:     NZdispl[0] = 0;
219:     for (j = 1; j < size; j++) {
220:       displi[j]  = displi[j - 1] + sendcounti[j - 1] - 1;
221:       NZdispl[j] = NZdispl[j - 1] + NZ[j - 1];
222:     }
223:     PetscCall(MatSeqAIJGetArrayRead(sA, &sa));
224:   }
225:   PetscCall(PetscLayoutDestroy(&layout));
226:   PetscCallMPI(MPI_Scatter(NZ, 1, MPI_INT, &nz, 1, MPI_INT, 0, comm));

228:   PetscCall(PetscMalloc3(n + 1, &ia, nz, &ja, nz, &a));
229:   PetscCallMPI(MPI_Scatterv(IA, sendcounti, displi, MPIU_INT, ia, n + 1, MPIU_INT, 0, comm));
230:   PetscCallMPI(MPI_Scatterv(JA, NZ, NZdispl, MPIU_INT, ja, nz, MPIU_INT, 0, comm));
231:   PetscCallMPI(MPI_Scatterv(sa, NZ, NZdispl, MPIU_SCALAR, a, nz, MPIU_SCALAR, 0, comm));

233:   if (pc) {
234:     PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa));
235:     PetscCall(MatRestoreRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL));
236:   }

238:   for (j = 1; j < n + 1; j++) ia[j] -= ia[0];
239:   ia[0] = 0;
240:   PetscCall(PetscLogStagePush(PCMPIStage));
241:   PetscCall(MatCreate(comm, &A));
242:   if (matproperties[7] > 0) {
243:     if (!pc) PetscCall(PetscMalloc1(matproperties[7] + 1, &cprefix));
244:     PetscCallMPI(MPI_Bcast(cprefix, matproperties[7] + 1, MPI_CHAR, 0, comm));
245:     PetscCall(MatSetOptionsPrefix(A, cprefix));
246:     PetscCall(PetscFree(cprefix));
247:   }
248:   PetscCall(MatAppendOptionsPrefix(A, "mpi_linear_solver_server_"));
249:   PetscCall(MatSetSizes(A, m, n, matproperties[0], matproperties[1]));
250:   PetscCall(MatSetType(A, MATMPIAIJ));
251:   PetscCall(MatMPIAIJSetPreallocationCSR(A, ia, ja, a));
252:   PetscCall(MatSetBlockSize(A, matproperties[2]));

254:   if (matproperties[3]) PetscCall(MatSetOption(A, MAT_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE));
255:   if (matproperties[4]) PetscCall(MatSetOption(A, MAT_HERMITIAN, matproperties[4] == 1 ? PETSC_TRUE : PETSC_FALSE));
256:   if (matproperties[5]) PetscCall(MatSetOption(A, MAT_SPD, matproperties[5] == 1 ? PETSC_TRUE : PETSC_FALSE));
257:   if (matproperties[6]) PetscCall(MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[6] == 1 ? PETSC_TRUE : PETSC_FALSE));

259:   PetscCall(PetscFree3(ia, ja, a));
260:   PetscCall(KSPSetOperators(ksp, A, A));
261:   if (!ksp->vec_sol) PetscCall(MatCreateVecs(A, &ksp->vec_sol, &ksp->vec_rhs));
262:   PetscCall(PetscLogStagePop());
263:   if (pc) { /* needed for scatterv/gatherv of rhs and solution */
264:     const PetscInt *range;

266:     PetscCall(VecGetOwnershipRanges(ksp->vec_sol, &range));
267:     for (i = 0; i < size; i++) {
268:       km->sendcount[i] = (PetscMPIInt)(range[i + 1] - range[i]);
269:       km->displ[i]     = (PetscMPIInt)range[i];
270:     }
271:   }
272:   PetscCall(MatDestroy(&A));
273:   PetscCall(KSPSetFromOptions(ksp));
274:   PetscFunctionReturn(PETSC_SUCCESS);
275: }

277: static PetscErrorCode PCMPIUpdateMatValues(PC pc)
278: {
279:   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
280:   KSP                ksp;
281:   Mat                sA, A;
282:   MPI_Comm           comm = PC_MPI_COMM_WORLD;
283:   PetscScalar       *a;
284:   PetscCount         nz;
285:   const PetscScalar *sa = NULL;
286:   PetscMPIInt        size;
287:   PetscInt           matproperties[4] = {0, 0, 0, 0};

289:   PetscFunctionBegin;
290:   if (pc) {
291:     PetscCall(PCGetOperators(pc, &sA, &sA));
292:     PetscCall(MatSeqAIJGetArrayRead(sA, &sa));
293:   }
294:   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
295:   if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
296:   PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
297:   PetscCallMPI(MPI_Comm_size(comm, &size));
298:   PCMPIMatCounts[size - 1]++;
299:   PetscCall(KSPGetOperators(ksp, NULL, &A));
300:   PetscCall(MatMPIAIJGetNumberNonzeros(A, &nz));
301:   PetscCall(PetscMalloc1(nz, &a));
302:   PetscCallMPI(MPI_Scatterv(sa, pc ? km->NZ : NULL, pc ? km->NZdispl : NULL, MPIU_SCALAR, a, nz, MPIU_SCALAR, 0, comm));
303:   if (pc) {
304:     PetscBool isset, issymmetric, ishermitian, isspd, isstructurallysymmetric;

306:     PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa));

308:     PetscCall(MatIsSymmetricKnown(sA, &isset, &issymmetric));
309:     matproperties[0] = !isset ? 0 : (issymmetric ? 1 : 2);
310:     PetscCall(MatIsHermitianKnown(sA, &isset, &ishermitian));
311:     matproperties[1] = !isset ? 0 : (ishermitian ? 1 : 2);
312:     PetscCall(MatIsSPDKnown(sA, &isset, &isspd));
313:     matproperties[2] = !isset ? 0 : (isspd ? 1 : 2);
314:     PetscCall(MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric));
315:     matproperties[3] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2);
316:   }
317:   PetscCall(MatUpdateMPIAIJWithArray(A, a));
318:   PetscCall(PetscFree(a));
319:   PetscCallMPI(MPI_Bcast(matproperties, 4, MPIU_INT, 0, comm));
320:   /* if any of these properties was previously set and is now not set this will result in incorrect properties in A since there is no way to unset a property */
321:   if (matproperties[0]) PetscCall(MatSetOption(A, MAT_SYMMETRIC, matproperties[0] == 1 ? PETSC_TRUE : PETSC_FALSE));
322:   if (matproperties[1]) PetscCall(MatSetOption(A, MAT_HERMITIAN, matproperties[1] == 1 ? PETSC_TRUE : PETSC_FALSE));
323:   if (matproperties[2]) PetscCall(MatSetOption(A, MAT_SPD, matproperties[2] == 1 ? PETSC_TRUE : PETSC_FALSE));
324:   if (matproperties[3]) PetscCall(MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE));
325:   PetscFunctionReturn(PETSC_SUCCESS);
326: }

328: static PetscErrorCode PCMPISolve(PC pc, Vec B, Vec X)
329: {
330:   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
331:   KSP                ksp;
332:   MPI_Comm           comm = PC_MPI_COMM_WORLD;
333:   const PetscScalar *sb   = NULL, *x;
334:   PetscScalar       *b, *sx = NULL;
335:   PetscInt           its, n;
336:   PetscMPIInt        size;

338:   PetscFunctionBegin;
339:   PetscCallMPI(MPI_Scatter(pc ? km->ksps : &ksp, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
340:   if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
341:   PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));

343:   /* TODO: optimize code to not require building counts/displ every time */

345:   /* scatterv rhs */
346:   PetscCallMPI(MPI_Comm_size(comm, &size));
347:   if (pc) {
348:     PetscInt N;

350:     PCMPISolveCounts[size - 1]++;
351:     PetscCall(MatGetSize(pc->pmat, &N, NULL));
352:     PCMPISizes[size - 1] += N;
353:     PetscCall(VecGetArrayRead(B, &sb));
354:   }
355:   PetscCall(VecGetLocalSize(ksp->vec_rhs, &n));
356:   PetscCall(VecGetArray(ksp->vec_rhs, &b));
357:   PetscCallMPI(MPI_Scatterv(sb, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, b, n, MPIU_SCALAR, 0, comm));
358:   PetscCall(VecRestoreArray(ksp->vec_rhs, &b));
359:   if (pc) PetscCall(VecRestoreArrayRead(B, &sb));

361:   PetscCall(PetscLogStagePush(PCMPIStage));
362:   PetscCall(KSPSolve(ksp, NULL, NULL));
363:   PetscCall(PetscLogStagePop());
364:   PetscCall(KSPGetIterationNumber(ksp, &its));
365:   PCMPIIterations[size - 1] += its;

367:   /* gather solution */
368:   PetscCall(VecGetArrayRead(ksp->vec_sol, &x));
369:   if (pc) PetscCall(VecGetArray(X, &sx));
370:   PetscCallMPI(MPI_Gatherv(x, n, MPIU_SCALAR, sx, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, 0, comm));
371:   if (pc) PetscCall(VecRestoreArray(X, &sx));
372:   PetscCall(VecRestoreArrayRead(ksp->vec_sol, &x));
373:   PetscFunctionReturn(PETSC_SUCCESS);
374: }

376: static PetscErrorCode PCMPIDestroy(PC pc)
377: {
378:   PC_MPI  *km = pc ? (PC_MPI *)pc->data : NULL;
379:   KSP      ksp;
380:   MPI_Comm comm = PC_MPI_COMM_WORLD;

382:   PetscFunctionBegin;
383:   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
384:   if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
385:   PetscCall(PetscLogStagePush(PCMPIStage));
386:   PetscCall(KSPDestroy(&ksp));
387:   PetscCall(PetscLogStagePop());
388:   PetscFunctionReturn(PETSC_SUCCESS);
389: }

391: PetscBool PCMPIServerActive = PETSC_FALSE;

393: /*@C
394:   PCMPIServerBegin - starts a server that runs on the `rank != 0` MPI processes waiting to process requests for
395:   parallel `KSP` solves and management of parallel `KSP` objects.

397:   Logically Collective on all MPI processes except rank 0

399:   Options Database Keys:
400: + -mpi_linear_solver_server      - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code
401: - -mpi_linear_solver_server_view - displays information about all the linear systems solved by the MPI linear solver server at the conclusion of the program

403:   Level: developer

405:   Note:
406:   This is normally started automatically in `PetscInitialize()` when the option is provided

408:   See `PCMPI` for information on using the solver with a `KSP` object

410:   Developer Notes:
411:   When called on MPI rank 0 this sets `PETSC_COMM_WORLD` to `PETSC_COMM_SELF` to allow a main program
412:   written with `PETSC_COMM_WORLD` to run correctly on the single rank while all the ranks
413:   (that would normally be sharing `PETSC_COMM_WORLD`) to run the solver server.

415:   Can this be integrated into the `PetscDevice` abstraction that is currently being developed?

417:   Conceivably `PCREDISTRIBUTE` could be organized in a similar manner to simplify its usage

419:   This could be implemented directly at the `KSP` level instead of using the `PCMPI` wrapper object

421:   The code could be extended to allow an MPI + OpenMP application to use the linear solver server concept across all shared-memory
422:   nodes with a single MPI process per node for the user application but multiple MPI processes per node for the linear solver.

424:   The concept could also be extended for users's callbacks for `SNES`, `TS`, and `Tao` where the `SNESSolve()` for example, runs on
425:   all MPI processes but the user callback only runs on one MPI process per node.

427:   PETSc could also be extended with an MPI-less API that provides access to PETSc's solvers without any reference to MPI, essentially remove
428:   the `MPI_Comm` argument from PETSc calls.

430: .seealso: [](sec_pcmpi), `PCMPIServerEnd()`, `PCMPI`, `KSPCheckPCMPI()`
431: @*/
432: PetscErrorCode PCMPIServerBegin(void)
433: {
434:   PetscMPIInt rank;

436:   PetscFunctionBegin;
437:   PetscCall(PetscInfo(NULL, "Starting MPI Linear Solver Server\n"));
438:   if (PetscDefined(USE_SINGLE_LIBRARY)) {
439:     PetscCall(VecInitializePackage());
440:     PetscCall(MatInitializePackage());
441:     PetscCall(DMInitializePackage());
442:     PetscCall(PCInitializePackage());
443:     PetscCall(KSPInitializePackage());
444:     PetscCall(SNESInitializePackage());
445:     PetscCall(TSInitializePackage());
446:     PetscCall(TaoInitializePackage());
447:   }
448:   PetscCall(PetscLogStageRegister("PCMPI", &PCMPIStage));

450:   PetscCallMPI(MPI_Comm_rank(PC_MPI_COMM_WORLD, &rank));
451:   if (rank == 0) {
452:     PETSC_COMM_WORLD  = PETSC_COMM_SELF;
453:     PCMPIServerActive = PETSC_TRUE;
454:     PetscFunctionReturn(PETSC_SUCCESS);
455:   }

457:   while (PETSC_TRUE) {
458:     PCMPICommand request = PCMPI_CREATE;
459:     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD));
460:     switch (request) {
461:     case PCMPI_CREATE:
462:       PetscCall(PCMPICreate(NULL));
463:       break;
464:     case PCMPI_SET_MAT:
465:       PetscCall(PCMPISetMat(NULL));
466:       break;
467:     case PCMPI_UPDATE_MAT_VALUES:
468:       PetscCall(PCMPIUpdateMatValues(NULL));
469:       break;
470:     case PCMPI_VIEW:
471:       // PetscCall(PCMPIView(NULL));
472:       break;
473:     case PCMPI_SOLVE:
474:       PetscCall(PCMPISolve(NULL, NULL, NULL));
475:       break;
476:     case PCMPI_DESTROY:
477:       PetscCall(PCMPIDestroy(NULL));
478:       break;
479:     case PCMPI_EXIT:
480:       PetscCall(PetscFinalize());
481:       exit(0); /* not sure if this is a good idea, but cannot return because it will run users main program */
482:       break;
483:     default:
484:       break;
485:     }
486:   }
487:   PetscFunctionReturn(PETSC_SUCCESS);
488: }

490: /*@C
491:   PCMPIServerEnd - ends a server that runs on the rank != 0 MPI processes waiting to process requests for
492:   parallel KSP solves and management of parallel `KSP` objects.

494:   Logically Collective on all MPI ranks except 0

496:   Level: developer

498:   Note:
499:   This is normally ended automatically in `PetscFinalize()` when the option is provided

501: .seealso: [](sec_pcmpi), `PCMPIServerBegin()`, `PCMPI`, `KSPCheckPCMPI()`
502: @*/
503: PetscErrorCode PCMPIServerEnd(void)
504: {
505:   PCMPICommand request = PCMPI_EXIT;

507:   PetscFunctionBegin;
508:   if (PetscGlobalRank == 0) {
509:     PetscViewer       viewer = NULL;
510:     PetscViewerFormat format;

512:     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD));
513:     PETSC_COMM_WORLD = MPI_COMM_WORLD; /* could use PC_MPI_COMM_WORLD */
514:     PetscOptionsBegin(PETSC_COMM_SELF, NULL, "MPI linear solver server options", NULL);
515:     PetscCall(PetscOptionsViewer("-mpi_linear_solver_server_view", "View information about system solved with the server", "PCMPI", &viewer, &format, NULL));
516:     PetscOptionsEnd();
517:     if (viewer) {
518:       PetscBool isascii;

520:       PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
521:       if (isascii) {
522:         PetscMPIInt size;
523:         PetscMPIInt i;

525:         PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
526:         PetscCall(PetscViewerASCIIPrintf(viewer, "MPI linear solver server statistics:\n"));
527:         PetscCall(PetscViewerASCIIPrintf(viewer, "    Ranks        KSPSolve()s     Mats        KSPs       Avg. Size      Avg. Its\n"));
528:         if (PCMPIKSPCountsSeq) {
529:           PetscCall(PetscViewerASCIIPrintf(viewer, "  Sequential         %" PetscInt_FMT "                         %" PetscInt_FMT "            %" PetscInt_FMT "           %" PetscInt_FMT "\n", PCMPISolveCountsSeq, PCMPIKSPCountsSeq, PCMPISizesSeq / PCMPISolveCountsSeq, PCMPIIterationsSeq / PCMPISolveCountsSeq));
530:         }
531:         for (i = 0; i < size; i++) {
532:           if (PCMPIKSPCounts[i]) {
533:             PetscCall(PetscViewerASCIIPrintf(viewer, "     %d               %" PetscInt_FMT "            %" PetscInt_FMT "           %" PetscInt_FMT "            %" PetscInt_FMT "            %" PetscInt_FMT "\n", i + 1, PCMPISolveCounts[i], PCMPIMatCounts[i], PCMPIKSPCounts[i], PCMPISizes[i] / PCMPISolveCounts[i], PCMPIIterations[i] / PCMPISolveCounts[i]));
534:           }
535:         }
536:       }
537:       PetscCall(PetscOptionsRestoreViewer(&viewer));
538:     }
539:   }
540:   PetscCall(PCMPICommsDestroy());
541:   PCMPIServerActive = PETSC_FALSE;
542:   PetscFunctionReturn(PETSC_SUCCESS);
543: }

545: /*
546:     This version is used in the trivial case when the MPI parallel solver server is running on just the original MPI rank 0
547:     because, for example, the problem is small. This version is more efficient because it does not require copying any data
548: */
549: static PetscErrorCode PCSetUp_Seq(PC pc)
550: {
551:   PC_MPI     *km = (PC_MPI *)pc->data;
552:   Mat         sA;
553:   const char *prefix;
554:   char       *found = NULL, *cprefix;

556:   PetscFunctionBegin;
557:   PetscCall(PCGetOperators(pc, NULL, &sA));
558:   PetscCall(PCGetOptionsPrefix(pc, &prefix));
559:   PetscCall(KSPCreate(PETSC_COMM_SELF, &km->ksps[0]));
560:   PetscCall(KSPSetNestLevel(km->ksps[0], 1));
561:   PetscCall(PetscObjectSetTabLevel((PetscObject)km->ksps[0], 1));

563:   /* Created KSP gets prefix of PC minus the mpi_linear_solver_server_ portion */
564:   PetscCall(PCGetOptionsPrefix(pc, &prefix));
565:   PetscCheck(prefix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing required prefix");
566:   PetscCall(PetscStrallocpy(prefix, &cprefix));
567:   PetscCall(PetscStrstr(cprefix, "mpi_linear_solver_server_", &found));
568:   PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing mpi_linear_solver_server_ portion of prefix");
569:   *found = 0;
570:   PetscCall(KSPSetOptionsPrefix(km->ksps[0], cprefix));
571:   PetscCall(PetscFree(cprefix));

573:   PetscCall(KSPSetOperators(km->ksps[0], sA, sA));
574:   PetscCall(KSPSetFromOptions(km->ksps[0]));
575:   PetscCall(KSPSetUp(km->ksps[0]));
576:   PetscCall(PetscInfo((PetscObject)pc, "MPI parallel linear solver system is being solved directly on rank 0 due to its small size\n"));
577:   PCMPIKSPCountsSeq++;
578:   PetscFunctionReturn(PETSC_SUCCESS);
579: }

581: static PetscErrorCode PCApply_Seq(PC pc, Vec b, Vec x)
582: {
583:   PC_MPI  *km = (PC_MPI *)pc->data;
584:   PetscInt its, n;
585:   Mat      A;

587:   PetscFunctionBegin;
588:   PetscCall(KSPSolve(km->ksps[0], b, x));
589:   PetscCall(KSPGetIterationNumber(km->ksps[0], &its));
590:   PCMPISolveCountsSeq++;
591:   PCMPIIterationsSeq += its;
592:   PetscCall(KSPGetOperators(km->ksps[0], NULL, &A));
593:   PetscCall(MatGetSize(A, &n, NULL));
594:   PCMPISizesSeq += n;
595:   PetscFunctionReturn(PETSC_SUCCESS);
596: }

598: static PetscErrorCode PCView_Seq(PC pc, PetscViewer viewer)
599: {
600:   PC_MPI *km = (PC_MPI *)pc->data;

602:   PetscFunctionBegin;
603:   PetscCall(PetscViewerASCIIPrintf(viewer, "Running MPI linear solver server directly on rank 0 due to its small size\n"));
604:   PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank));
605:   PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to statistics on all the solves ***\n"));
606:   PetscFunctionReturn(PETSC_SUCCESS);
607: }

609: static PetscErrorCode PCDestroy_Seq(PC pc)
610: {
611:   PC_MPI *km = (PC_MPI *)pc->data;

613:   PetscFunctionBegin;
614:   PetscCall(KSPDestroy(&km->ksps[0]));
615:   PetscCall(PetscFree(pc->data));
616:   PetscFunctionReturn(PETSC_SUCCESS);
617: }

619: /*
620:      PCSetUp_MPI - Trigger the creation of the MPI parallel PC and copy parts of the matrix and
621:      right-hand side to the parallel PC
622: */
623: static PetscErrorCode PCSetUp_MPI(PC pc)
624: {
625:   PC_MPI      *km = (PC_MPI *)pc->data;
626:   PetscMPIInt  rank, size;
627:   PCMPICommand request;
628:   PetscBool    newmatrix = PETSC_FALSE;

630:   PetscFunctionBegin;
631:   PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank));
632:   PetscCheck(rank == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "PCMPI can only be used from 0th rank of MPI_COMM_WORLD. Perhaps a missing -mpi_linear_solver_server?");
633:   PetscCallMPI(MPI_Comm_size(MPI_COMM_WORLD, &size));

635:   if (!pc->setupcalled) {
636:     if (!km->alwaysuseserver) {
637:       PetscInt n;
638:       Mat      sA;
639:       /* short circuit for small systems */
640:       PetscCall(PCGetOperators(pc, &sA, &sA));
641:       PetscCall(MatGetSize(sA, &n, NULL));
642:       if (n < 2 * km->mincntperrank - 1 || size == 1) {
643:         pc->ops->setup   = NULL;
644:         pc->ops->apply   = PCApply_Seq;
645:         pc->ops->destroy = PCDestroy_Seq;
646:         pc->ops->view    = PCView_Seq;
647:         PetscCall(PCSetUp_Seq(pc));
648:         PetscFunctionReturn(PETSC_SUCCESS);
649:       }
650:     }

652:     request = PCMPI_CREATE;
653:     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
654:     PetscCall(PCMPICreate(pc));
655:     newmatrix = PETSC_TRUE;
656:   }
657:   if (pc->flag == DIFFERENT_NONZERO_PATTERN) newmatrix = PETSC_TRUE;

659:   if (newmatrix) {
660:     PetscCall(PetscInfo((PetscObject)pc, "New matrix or matrix has changed nonzero structure\n"));
661:     request = PCMPI_SET_MAT;
662:     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
663:     PetscCall(PCMPISetMat(pc));
664:   } else {
665:     PetscCall(PetscInfo((PetscObject)pc, "Matrix has only changed nonzero values\n"));
666:     request = PCMPI_UPDATE_MAT_VALUES;
667:     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
668:     PetscCall(PCMPIUpdateMatValues(pc));
669:   }
670:   PetscFunctionReturn(PETSC_SUCCESS);
671: }

673: static PetscErrorCode PCApply_MPI(PC pc, Vec b, Vec x)
674: {
675:   PCMPICommand request = PCMPI_SOLVE;

677:   PetscFunctionBegin;
678:   PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
679:   PetscCall(PCMPISolve(pc, b, x));
680:   PetscFunctionReturn(PETSC_SUCCESS);
681: }

683: static PetscErrorCode PCDestroy_MPI(PC pc)
684: {
685:   PCMPICommand request = PCMPI_DESTROY;

687:   PetscFunctionBegin;
688:   PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
689:   PetscCall(PCMPIDestroy(pc));
690:   PetscCall(PetscFree(pc->data));
691:   PetscFunctionReturn(PETSC_SUCCESS);
692: }

694: /*
695:      PCView_MPI - Cannot call view on the MPI parallel KSP because other ranks do not have access to the viewer
696: */
697: static PetscErrorCode PCView_MPI(PC pc, PetscViewer viewer)
698: {
699:   PC_MPI     *km = (PC_MPI *)pc->data;
700:   MPI_Comm    comm;
701:   PetscMPIInt size;

703:   PetscFunctionBegin;
704:   PetscCall(PetscObjectGetComm((PetscObject)km->ksps[0], &comm));
705:   PetscCallMPI(MPI_Comm_size(comm, &size));
706:   PetscCall(PetscViewerASCIIPrintf(viewer, "Size of MPI communicator used for MPI parallel KSP solve %d\n", size));
707:   PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank));
708:   PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to statistics on all the solves ***\n"));
709:   PetscFunctionReturn(PETSC_SUCCESS);
710: }

712: static PetscErrorCode PCSetFromOptions_MPI(PC pc, PetscOptionItems *PetscOptionsObject)
713: {
714:   PC_MPI *km = (PC_MPI *)pc->data;

716:   PetscFunctionBegin;
717:   PetscOptionsHeadBegin(PetscOptionsObject, "MPI linear solver server options");
718:   PetscCall(PetscOptionsInt("-minimum_count_per_rank", "Desired minimum number of nonzeros per rank", "None", km->mincntperrank, &km->mincntperrank, NULL));
719:   PetscCall(PetscOptionsBool("-always_use_server", "Use the server even if only one rank is used for the solve (for debugging)", "None", km->alwaysuseserver, &km->alwaysuseserver, NULL));
720:   PetscOptionsHeadEnd();
721:   PetscFunctionReturn(PETSC_SUCCESS);
722: }

724: /*MC
725:      PCMPI - Calls an MPI parallel `KSP` to solve a linear system from user code running on one process

727:    Options Database Keys for the Server:
728: +  -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code
729: -  -mpi_linear_solver_server_view - displays information about all the linear systems solved by the MPI linear solver server

731:    Options Database Keys for a specific `KSP` object
732: +  -[any_ksp_prefix]_mpi_linear_solver_server_minimum_count_per_rank - sets the minimum size of the linear system per MPI rank that the solver will strive for
733: -  -[any_ksp_prefix]_mpi_linear_solver_server_always_use_server - use the server solver code even if the particular system is only solved on the process (for debugging and testing purposes)

735:    Level: developer

737:    Notes:
738:    The options database prefix for the actual solver is any prefix provided before use to the original `KSP` with `KSPSetOptionsPrefix()`, mostly commonly no prefix is used.

740:    It can be particularly useful for user OpenMP code or potentially user GPU code.

742:    When the program is running with a single MPI process then it directly uses the provided matrix and right-hand side
743:    and does not need to distribute the matrix and vector to the various MPI processes; thus it incurs no extra overhead over just using the `KSP` directly.

745:    The solver options for actual solving `KSP` and `PC` must be controlled via the options database, calls to set options directly on the user level `KSP` and `PC` have no effect
746:    because they are not the actual solver objects.

748:    When `-log_view` is used with this solver the events within the parallel solve are logging in their own stage. Some of the logging in the other
749:    stages will be confusing since the event times are only recorded on the 0th MPI rank, thus the percent of time in the events will be misleading.

751:    Developer Note:
752:    This `PCType` is never directly selected by the user, it is set when the option `-mpi_linear_solver_server` is used and the `PC` is at the outer most nesting of
753:    a `KSP`. The outer most `KSP` object is automatically set to `KSPPREONLY` and thus is not directly visible to the user.

755: .seealso: [](sec_pcmpi), `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `PC`, `PCMPIServerBegin()`, `PCMPIServerEnd()`, `KSPCheckPCMPI()`
756: M*/
757: PETSC_EXTERN PetscErrorCode PCCreate_MPI(PC pc)
758: {
759:   PC_MPI *km;
760:   char   *found = NULL;

762:   PetscFunctionBegin;
763:   PetscCall(PetscStrstr(((PetscObject)pc)->prefix, "mpi_linear_solver_server_", &found));
764:   PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI object prefix does not have mpi_linear_solver_server_");

766:   /* material from PCSetType() */
767:   PetscTryTypeMethod(pc, destroy);
768:   pc->ops->destroy = NULL;
769:   pc->data         = NULL;

771:   PetscCall(PetscFunctionListDestroy(&((PetscObject)pc)->qlist));
772:   PetscCall(PetscMemzero(pc->ops, sizeof(struct _PCOps)));
773:   pc->modifysubmatrices  = NULL;
774:   pc->modifysubmatricesP = NULL;
775:   pc->setupcalled        = 0;

777:   PetscCall(PetscNew(&km));
778:   pc->data = (void *)km;

780:   km->mincntperrank = 10000;

782:   pc->ops->setup          = PCSetUp_MPI;
783:   pc->ops->apply          = PCApply_MPI;
784:   pc->ops->destroy        = PCDestroy_MPI;
785:   pc->ops->view           = PCView_MPI;
786:   pc->ops->setfromoptions = PCSetFromOptions_MPI;
787:   PetscCall(PetscObjectChangeTypeName((PetscObject)pc, PCMPI));
788:   PetscFunctionReturn(PETSC_SUCCESS);
789: }