Actual source code: pcmpi.c

  1: /*
  2:     This file creates an MPI parallel KSP from a sequential PC that lives on MPI rank 0.
  3:     It is intended to allow using PETSc MPI parallel linear solvers from non-MPI codes.

  5:     That program may use OpenMP to compute the right-hand side and matrix for the linear system

  7:     The code uses MPI_COMM_WORLD below but maybe it should be PETSC_COMM_WORLD

  9:     The resulting KSP and PC can only be controlled via the options database, though some common commands
 10:     could be passed through the server.

 12: */
 13: #include <petsc/private/pcimpl.h>
 14: #include <petsc/private/kspimpl.h>
 15: #include <petscts.h>
 16: #include <petsctao.h>
 17: #if defined(PETSC_HAVE_PTHREAD_MUTEX)
 18:   #include <pthread.h>
 19: #endif

 21: #define PC_MPI_MAX_RANKS  256
 22: #define PC_MPI_COMM_WORLD MPI_COMM_WORLD

 24: typedef struct {
 25:   KSP         ksps[PC_MPI_MAX_RANKS];                               /* The addresses of the MPI parallel KSP on each process, NULL when not on a process. */
 26:   PetscMPIInt sendcount[PC_MPI_MAX_RANKS], displ[PC_MPI_MAX_RANKS]; /* For scatter/gather of rhs/solution */
 27:   PetscMPIInt NZ[PC_MPI_MAX_RANKS], NZdispl[PC_MPI_MAX_RANKS];      /* For scatter of nonzero values in matrix (and nonzero column indices initially */
 28:   PetscInt    mincntperrank;                                        /* minimum number of desired matrix rows per active rank in MPI parallel KSP solve */
 29:   PetscBool   alwaysuseserver;                                      /* for debugging use the server infrastructure even if only one MPI process is used for the solve */
 30: } PC_MPI;

 32: typedef enum {
 33:   PCMPI_EXIT, /* exit the PC server loop, means the controlling sequential program is done */
 34:   PCMPI_CREATE,
 35:   PCMPI_SET_MAT,           /* set original matrix (or one with different nonzero pattern) */
 36:   PCMPI_UPDATE_MAT_VALUES, /* update current matrix with new nonzero values */
 37:   PCMPI_SOLVE,
 38:   PCMPI_VIEW,
 39:   PCMPI_DESTROY /* destroy a PC that is no longer needed */
 40: } PCMPICommand;

 42: static MPI_Comm      PCMPIComms[PC_MPI_MAX_RANKS];
 43: static PetscBool     PCMPICommSet = PETSC_FALSE;
 44: static PetscInt      PCMPISolveCounts[PC_MPI_MAX_RANKS], PCMPIKSPCounts[PC_MPI_MAX_RANKS], PCMPIMatCounts[PC_MPI_MAX_RANKS], PCMPISolveCountsSeq = 0, PCMPIKSPCountsSeq = 0;
 45: static PetscInt      PCMPIIterations[PC_MPI_MAX_RANKS], PCMPISizes[PC_MPI_MAX_RANKS], PCMPIIterationsSeq = 0, PCMPISizesSeq = 0;
 46: static PetscLogEvent EventServerDist, EventServerDistMPI;
 47: #if defined(PETSC_HAVE_PTHREAD_MUTEX)
 48: static pthread_mutex_t *PCMPIServerLocks;
 49: #else
 50: static void *PCMPIServerLocks;
 51: #endif

 53: static PetscErrorCode PCMPICommsCreate(void)
 54: {
 55:   MPI_Comm    comm = PC_MPI_COMM_WORLD;
 56:   PetscMPIInt size, rank, i;

 58:   PetscFunctionBegin;
 59:   PetscCallMPI(MPI_Comm_size(comm, &size));
 60:   PetscCheck(size <= PC_MPI_MAX_RANKS, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for using more than PC_MPI_MAX_RANKS MPI ranks in an MPI linear solver server solve");
 61:   PetscCallMPI(MPI_Comm_rank(comm, &rank));
 62:   /* comm for size 1 is useful only for debugging */
 63:   for (i = 0; i < size; i++) {
 64:     PetscMPIInt color = rank < i + 1 ? 0 : MPI_UNDEFINED;
 65:     PetscCallMPI(MPI_Comm_split(comm, color, 0, &PCMPIComms[i]));
 66:     PCMPISolveCounts[i] = 0;
 67:     PCMPIKSPCounts[i]   = 0;
 68:     PCMPIIterations[i]  = 0;
 69:     PCMPISizes[i]       = 0;
 70:   }
 71:   PCMPICommSet = PETSC_TRUE;
 72:   PetscFunctionReturn(PETSC_SUCCESS);
 73: }

 75: static PetscErrorCode PCMPICommsDestroy(void)
 76: {
 77:   MPI_Comm    comm = PC_MPI_COMM_WORLD;
 78:   PetscMPIInt size, rank, i;

 80:   PetscFunctionBegin;
 81:   if (!PCMPICommSet) PetscFunctionReturn(PETSC_SUCCESS);
 82:   PetscCallMPI(MPI_Comm_size(comm, &size));
 83:   PetscCallMPI(MPI_Comm_rank(comm, &rank));
 84:   for (i = 0; i < size; i++) {
 85:     if (PCMPIComms[i] != MPI_COMM_NULL) PetscCallMPI(MPI_Comm_free(&PCMPIComms[i]));
 86:   }
 87:   PCMPICommSet = PETSC_FALSE;
 88:   PetscFunctionReturn(PETSC_SUCCESS);
 89: }

 91: static PetscErrorCode PCMPICreate(PC pc)
 92: {
 93:   PC_MPI     *km   = pc ? (PC_MPI *)pc->data : NULL;
 94:   MPI_Comm    comm = PC_MPI_COMM_WORLD;
 95:   KSP         ksp;
 96:   PetscInt    N[2], mincntperrank = 0;
 97:   PetscMPIInt size;
 98:   Mat         sA;
 99:   char       *cprefix = NULL;
100:   PetscMPIInt len     = 0;

102:   PetscFunctionBegin;
103:   PCMPIServerInSolve = PETSC_TRUE;
104:   if (!PCMPICommSet) PetscCall(PCMPICommsCreate());
105:   PetscCallMPI(MPI_Comm_size(comm, &size));
106:   if (pc) {
107:     if (size == 1) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Warning: Running KSP type of MPI on a one rank MPI run, this will be less efficient then not using this type\n"));
108:     PetscCall(PCGetOperators(pc, &sA, &sA));
109:     PetscCall(MatGetSize(sA, &N[0], &N[1]));
110:   }
111:   PetscCallMPI(MPI_Bcast(N, 2, MPIU_INT, 0, comm));

113:   /* choose a suitable sized MPI_Comm for the problem to be solved on */
114:   if (km) mincntperrank = km->mincntperrank;
115:   PetscCallMPI(MPI_Bcast(&mincntperrank, 1, MPI_INT, 0, comm));
116:   comm = PCMPIComms[PetscMin(size, PetscMax(1, N[0] / mincntperrank)) - 1];
117:   if (comm == MPI_COMM_NULL) {
118:     ksp                = NULL;
119:     PCMPIServerInSolve = PETSC_FALSE;
120:     PetscFunctionReturn(PETSC_SUCCESS);
121:   }
122:   PetscCall(PetscLogStagePush(PCMPIStage));
123:   PetscCall(KSPCreate(comm, &ksp));
124:   PetscCall(KSPSetNestLevel(ksp, 1));
125:   PetscCall(PetscObjectSetTabLevel((PetscObject)ksp, 1));
126:   PetscCall(PetscLogStagePop());
127:   PetscCallMPI(MPI_Gather(&ksp, 1, MPI_AINT, pc ? km->ksps : NULL, 1, MPI_AINT, 0, comm));
128:   if (pc) {
129:     size_t      slen;
130:     const char *prefix = NULL;
131:     char       *found  = NULL;

133:     PetscCallMPI(MPI_Comm_size(comm, &size));
134:     PCMPIKSPCounts[size - 1]++;
135:     /* Created KSP gets prefix of PC minus the mpi_linear_solver_server_ portion */
136:     PetscCall(PCGetOptionsPrefix(pc, &prefix));
137:     PetscCheck(prefix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing required prefix");
138:     PetscCall(PetscStrallocpy(prefix, &cprefix));
139:     PetscCall(PetscStrstr(cprefix, "mpi_linear_solver_server_", &found));
140:     PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing mpi_linear_solver_server_ portion of prefix");
141:     *found = 0;
142:     PetscCall(PetscStrlen(cprefix, &slen));
143:     PetscCall(PetscMPIIntCast(slen, &len));
144:   }
145:   PetscCallMPI(MPI_Bcast(&len, 1, MPI_INT, 0, comm));
146:   if (len) {
147:     if (!pc) PetscCall(PetscMalloc1(len + 1, &cprefix));
148:     PetscCallMPI(MPI_Bcast(cprefix, len + 1, MPI_CHAR, 0, comm));
149:     PetscCall(KSPSetOptionsPrefix(ksp, cprefix));
150:   }
151:   PetscCall(PetscFree(cprefix));
152:   PCMPIServerInSolve = PETSC_FALSE;
153:   PetscFunctionReturn(PETSC_SUCCESS);
154: }

156: static PetscErrorCode PCMPISetMat(PC pc)
157: {
158:   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
159:   Mat                A;
160:   PetscInt           m, n, j, bs;
161:   Mat                sA;
162:   MPI_Comm           comm = PC_MPI_COMM_WORLD;
163:   KSP                ksp;
164:   PetscLayout        layout;
165:   const PetscInt    *IA = NULL, *JA = NULL, *ia, *ja;
166:   const PetscInt    *range;
167:   PetscMPIInt       *NZ = NULL, sendcounti[PC_MPI_MAX_RANKS], displi[PC_MPI_MAX_RANKS], *NZdispl = NULL, nz, size, i;
168:   const PetscScalar *a                = NULL, *sa;
169:   PetscInt           matproperties[8] = {0}, rstart, rend;
170:   char              *cprefix;

172:   PetscFunctionBegin;
173:   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
174:   if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
175:   PCMPIServerInSolve = PETSC_TRUE;
176:   PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL));
177:   PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
178:   if (pc) {
179:     PetscBool   isset, issymmetric, ishermitian, isspd, isstructurallysymmetric;
180:     const char *prefix;
181:     size_t      clen;

183:     PetscCallMPI(MPI_Comm_size(comm, &size));
184:     PCMPIMatCounts[size - 1]++;
185:     PetscCall(PCGetOperators(pc, &sA, &sA));
186:     PetscCall(MatGetSize(sA, &matproperties[0], &matproperties[1]));
187:     PetscCall(MatGetBlockSize(sA, &bs));
188:     matproperties[2] = bs;
189:     PetscCall(MatIsSymmetricKnown(sA, &isset, &issymmetric));
190:     matproperties[3] = !isset ? 0 : (issymmetric ? 1 : 2);
191:     PetscCall(MatIsHermitianKnown(sA, &isset, &ishermitian));
192:     matproperties[4] = !isset ? 0 : (ishermitian ? 1 : 2);
193:     PetscCall(MatIsSPDKnown(sA, &isset, &isspd));
194:     matproperties[5] = !isset ? 0 : (isspd ? 1 : 2);
195:     PetscCall(MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric));
196:     matproperties[6] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2);
197:     /* Created Mat gets prefix of input Mat PLUS the mpi_linear_solver_server_ portion */
198:     PetscCall(MatGetOptionsPrefix(sA, &prefix));
199:     PetscCall(PetscStrallocpy(prefix, &cprefix));
200:     PetscCall(PetscStrlen(cprefix, &clen));
201:     matproperties[7] = (PetscInt)clen;
202:   }
203:   PetscCallMPI(MPI_Bcast(matproperties, PETSC_STATIC_ARRAY_LENGTH(matproperties), MPIU_INT, 0, comm));

205:   /* determine ownership ranges of matrix columns */
206:   PetscCall(PetscLayoutCreate(comm, &layout));
207:   PetscCall(PetscLayoutSetBlockSize(layout, matproperties[2]));
208:   PetscCall(PetscLayoutSetSize(layout, matproperties[1]));
209:   PetscCall(PetscLayoutSetUp(layout));
210:   PetscCall(PetscLayoutGetLocalSize(layout, &n));
211:   PetscCall(PetscLayoutDestroy(&layout));

213:   /* determine ownership ranges of matrix rows */
214:   PetscCall(PetscLayoutCreate(comm, &layout));
215:   PetscCall(PetscLayoutSetBlockSize(layout, matproperties[2]));
216:   PetscCall(PetscLayoutSetSize(layout, matproperties[0]));
217:   PetscCall(PetscLayoutSetUp(layout));
218:   PetscCall(PetscLayoutGetLocalSize(layout, &m));
219:   PetscCall(PetscLayoutGetRange(layout, &rstart, &rend));

221:   PetscCall(PetscLogEventBegin(EventServerDistMPI, NULL, NULL, NULL, NULL));
222:   /* copy over the matrix nonzero structure and values */
223:   if (pc) {
224:     PetscCall(MatGetRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL));
225:     if (!PCMPIServerUseShmget) {
226:       NZ      = km->NZ;
227:       NZdispl = km->NZdispl;
228:       PetscCall(PetscLayoutGetRanges(layout, &range));
229:       for (i = 0; i < size; i++) {
230:         PetscCall(PetscMPIIntCast(1 + range[i + 1] - range[i], &sendcounti[i]));
231:         PetscCall(PetscMPIIntCast(IA[range[i + 1]] - IA[range[i]], &NZ[i]));
232:       }
233:       displi[0]  = 0;
234:       NZdispl[0] = 0;
235:       for (j = 1; j < size; j++) {
236:         displi[j]  = displi[j - 1] + sendcounti[j - 1] - 1;
237:         NZdispl[j] = NZdispl[j - 1] + NZ[j - 1];
238:       }
239:     }
240:     PetscCall(MatSeqAIJGetArrayRead(sA, &sa));
241:   }
242:   PetscCall(PetscLayoutDestroy(&layout));

244:   PetscCall(MatCreate(comm, &A));
245:   if (matproperties[7] > 0) {
246:     PetscMPIInt ni;

248:     PetscCall(PetscMPIIntCast(matproperties[7] + 1, &ni));
249:     if (!pc) PetscCall(PetscMalloc1(matproperties[7] + 1, &cprefix));
250:     PetscCallMPI(MPI_Bcast(cprefix, ni, MPI_CHAR, 0, comm));
251:     PetscCall(MatSetOptionsPrefix(A, cprefix));
252:     PetscCall(PetscFree(cprefix));
253:   }
254:   PetscCall(MatAppendOptionsPrefix(A, "mpi_linear_solver_server_"));
255:   PetscCall(MatSetSizes(A, m, n, matproperties[0], matproperties[1]));
256:   PetscCall(MatSetType(A, MATMPIAIJ));

258:   if (!PCMPIServerUseShmget) {
259:     PetscMPIInt in;
260:     PetscCallMPI(MPI_Scatter(NZ, 1, MPI_INT, &nz, 1, MPI_INT, 0, comm));
261:     PetscCall(PetscMalloc3(n + 1, &ia, nz, &ja, nz, &a));
262:     PetscCall(PetscMPIIntCast(n, &in));
263:     PetscCallMPI(MPI_Scatterv(IA, sendcounti, displi, MPIU_INT, (void *)ia, in + 1, MPIU_INT, 0, comm));
264:     PetscCallMPI(MPI_Scatterv(JA, NZ, NZdispl, MPIU_INT, (void *)ja, nz, MPIU_INT, 0, comm));
265:     PetscCallMPI(MPI_Scatterv(sa, NZ, NZdispl, MPIU_SCALAR, (void *)a, nz, MPIU_SCALAR, 0, comm));
266:   } else {
267:     const void           *addr[3] = {(const void **)IA, (const void **)JA, (const void **)sa};
268:     PCMPIServerAddresses *addresses;

270:     PetscCall(PetscNew(&addresses));
271:     addresses->n = 3;
272:     PetscCall(PetscShmgetMapAddresses(comm, addresses->n, addr, addresses->addr));
273:     ia = rstart + (PetscInt *)addresses->addr[0];
274:     ja = ia[0] + (PetscInt *)addresses->addr[1];
275:     a  = ia[0] + (PetscScalar *)addresses->addr[2];
276:     PetscCall(PetscObjectContainerCompose((PetscObject)A, "PCMPIServerAddresses", (void *)addresses, PCMPIServerAddressesDestroy));
277:   }

279:   if (pc) {
280:     PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa));
281:     PetscCall(MatRestoreRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL));
282:   }
283:   PetscCall(PetscLogEventEnd(EventServerDistMPI, NULL, NULL, NULL, NULL));

285:   PetscCall(PetscLogStagePush(PCMPIStage));
286:   PetscCall(MatMPIAIJSetPreallocationCSR(A, ia, ja, a));
287:   PetscCall(MatSetBlockSize(A, matproperties[2]));

289:   if (matproperties[3]) PetscCall(MatSetOption(A, MAT_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE));
290:   if (matproperties[4]) PetscCall(MatSetOption(A, MAT_HERMITIAN, matproperties[4] == 1 ? PETSC_TRUE : PETSC_FALSE));
291:   if (matproperties[5]) PetscCall(MatSetOption(A, MAT_SPD, matproperties[5] == 1 ? PETSC_TRUE : PETSC_FALSE));
292:   if (matproperties[6]) PetscCall(MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[6] == 1 ? PETSC_TRUE : PETSC_FALSE));

294:   if (!PCMPIServerUseShmget) PetscCall(PetscFree3(ia, ja, a));
295:   PetscCall(KSPSetOperators(ksp, A, A));
296:   if (!ksp->vec_sol) PetscCall(MatCreateVecs(A, &ksp->vec_sol, &ksp->vec_rhs));
297:   PetscCall(PetscLogStagePop());
298:   if (pc && !PCMPIServerUseShmget) { /* needed for scatterv/gatherv of rhs and solution */
299:     const PetscInt *range;

301:     PetscCall(VecGetOwnershipRanges(ksp->vec_sol, &range));
302:     for (i = 0; i < size; i++) {
303:       PetscCall(PetscMPIIntCast(range[i + 1] - range[i], &km->sendcount[i]));
304:       PetscCall(PetscMPIIntCast(range[i], &km->displ[i]));
305:     }
306:   }
307:   PetscCall(MatDestroy(&A));
308:   PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL));
309:   PetscCall(KSPSetFromOptions(ksp));
310:   PCMPIServerInSolve = PETSC_FALSE;
311:   PetscFunctionReturn(PETSC_SUCCESS);
312: }

314: static PetscErrorCode PCMPIUpdateMatValues(PC pc)
315: {
316:   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
317:   KSP                ksp;
318:   Mat                sA, A;
319:   MPI_Comm           comm = PC_MPI_COMM_WORLD;
320:   const PetscInt    *ia, *IA;
321:   const PetscScalar *a;
322:   PetscCount         nz;
323:   const PetscScalar *sa = NULL;
324:   PetscMPIInt        size;
325:   PetscInt           rstart, matproperties[4] = {0, 0, 0, 0};

327:   PetscFunctionBegin;
328:   if (pc) {
329:     PetscCall(PCGetOperators(pc, &sA, &sA));
330:     PetscCall(MatSeqAIJGetArrayRead(sA, &sa));
331:     PetscCall(MatGetRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, NULL, NULL));
332:   }
333:   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
334:   if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
335:   PCMPIServerInSolve = PETSC_TRUE;
336:   PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL));
337:   PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
338:   PetscCallMPI(MPI_Comm_size(comm, &size));
339:   PCMPIMatCounts[size - 1]++;
340:   PetscCall(KSPGetOperators(ksp, NULL, &A));
341:   PetscCall(PetscLogEventBegin(EventServerDistMPI, NULL, NULL, NULL, NULL));
342:   if (!PCMPIServerUseShmget) {
343:     PetscMPIInt mpi_nz;

345:     PetscCall(MatMPIAIJGetNumberNonzeros(A, &nz));
346:     PetscCall(PetscMPIIntCast(nz, &mpi_nz));
347:     PetscCall(PetscMalloc1(nz, &a));
348:     PetscCallMPI(MPI_Scatterv(sa, pc ? km->NZ : NULL, pc ? km->NZdispl : NULL, MPIU_SCALAR, (void *)a, mpi_nz, MPIU_SCALAR, 0, comm));
349:   } else {
350:     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
351:     PCMPIServerAddresses *addresses;
352:     PetscCall(PetscObjectContainerQuery((PetscObject)A, "PCMPIServerAddresses", (void **)&addresses));
353:     ia = rstart + (PetscInt *)addresses->addr[0];
354:     a  = ia[0] + (PetscScalar *)addresses->addr[2];
355:   }
356:   PetscCall(PetscLogEventEnd(EventServerDistMPI, NULL, NULL, NULL, NULL));
357:   if (pc) {
358:     PetscBool isset, issymmetric, ishermitian, isspd, isstructurallysymmetric;

360:     PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa));
361:     PetscCall(MatRestoreRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, NULL, NULL));

363:     PetscCall(MatIsSymmetricKnown(sA, &isset, &issymmetric));
364:     matproperties[0] = !isset ? 0 : (issymmetric ? 1 : 2);
365:     PetscCall(MatIsHermitianKnown(sA, &isset, &ishermitian));
366:     matproperties[1] = !isset ? 0 : (ishermitian ? 1 : 2);
367:     PetscCall(MatIsSPDKnown(sA, &isset, &isspd));
368:     matproperties[2] = !isset ? 0 : (isspd ? 1 : 2);
369:     PetscCall(MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric));
370:     matproperties[3] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2);
371:   }
372:   PetscCall(MatUpdateMPIAIJWithArray(A, a));
373:   if (!PCMPIServerUseShmget) PetscCall(PetscFree(a));
374:   PetscCallMPI(MPI_Bcast(matproperties, 4, MPIU_INT, 0, comm));
375:   /* if any of these properties was previously set and is now not set this will result in incorrect properties in A since there is no way to unset a property */
376:   if (matproperties[0]) PetscCall(MatSetOption(A, MAT_SYMMETRIC, matproperties[0] == 1 ? PETSC_TRUE : PETSC_FALSE));
377:   if (matproperties[1]) PetscCall(MatSetOption(A, MAT_HERMITIAN, matproperties[1] == 1 ? PETSC_TRUE : PETSC_FALSE));
378:   if (matproperties[2]) PetscCall(MatSetOption(A, MAT_SPD, matproperties[2] == 1 ? PETSC_TRUE : PETSC_FALSE));
379:   if (matproperties[3]) PetscCall(MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE));
380:   PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL));
381:   PCMPIServerInSolve = PETSC_FALSE;
382:   PetscFunctionReturn(PETSC_SUCCESS);
383: }

385: static PetscErrorCode PCMPISolve(PC pc, Vec B, Vec X)
386: {
387:   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
388:   KSP                ksp;
389:   MPI_Comm           comm = PC_MPI_COMM_WORLD;
390:   const PetscScalar *sb   = NULL, *x;
391:   PetscScalar       *b, *sx = NULL;
392:   PetscInt           its, n;
393:   PetscMPIInt        size;
394:   void              *addr[2];

396:   PetscFunctionBegin;
397:   PetscCallMPI(MPI_Scatter(pc ? km->ksps : &ksp, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
398:   if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
399:   PCMPIServerInSolve = PETSC_TRUE;
400:   PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL));
401:   PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));

403:   /* scatterv rhs */
404:   PetscCallMPI(MPI_Comm_size(comm, &size));
405:   if (pc) {
406:     PetscInt N;

408:     PCMPISolveCounts[size - 1]++;
409:     PetscCall(MatGetSize(pc->pmat, &N, NULL));
410:     PCMPISizes[size - 1] += N;
411:   }
412:   PetscCall(VecGetLocalSize(ksp->vec_rhs, &n));
413:   PetscCall(PetscLogEventBegin(EventServerDistMPI, NULL, NULL, NULL, NULL));
414:   if (!PCMPIServerUseShmget) {
415:     PetscMPIInt in;

417:     PetscCall(VecGetArray(ksp->vec_rhs, &b));
418:     if (pc) PetscCall(VecGetArrayRead(B, &sb));
419:     PetscCall(PetscMPIIntCast(n, &in));
420:     PetscCallMPI(MPI_Scatterv(sb, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, b, in, MPIU_SCALAR, 0, comm));
421:     if (pc) PetscCall(VecRestoreArrayRead(B, &sb));
422:     PetscCall(VecRestoreArray(ksp->vec_rhs, &b));
423:     // TODO: scatter initial guess if needed
424:   } else {
425:     PetscInt rstart;

427:     if (pc) PetscCall(VecGetArrayRead(B, &sb));
428:     if (pc) PetscCall(VecGetArray(X, &sx));
429:     const void *inaddr[2] = {(const void **)sb, (const void **)sx};
430:     if (pc) PetscCall(VecRestoreArray(X, &sx));
431:     if (pc) PetscCall(VecRestoreArrayRead(B, &sb));

433:     PetscCall(PetscShmgetMapAddresses(comm, 2, inaddr, addr));
434:     PetscCall(VecGetOwnershipRange(ksp->vec_rhs, &rstart, NULL));
435:     PetscCall(VecPlaceArray(ksp->vec_rhs, rstart + (PetscScalar *)addr[0]));
436:     PetscCall(VecPlaceArray(ksp->vec_sol, rstart + (PetscScalar *)addr[1]));
437:   }
438:   PetscCall(PetscLogEventEnd(EventServerDistMPI, NULL, NULL, NULL, NULL));

440:   PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL));
441:   PetscCall(PetscLogStagePush(PCMPIStage));
442:   PetscCall(KSPSolve(ksp, NULL, NULL));
443:   PetscCall(PetscLogStagePop());
444:   PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL));
445:   PetscCall(KSPGetIterationNumber(ksp, &its));
446:   PCMPIIterations[size - 1] += its;
447:   // TODO: send iterations up to outer KSP

449:   if (PCMPIServerUseShmget) PetscCall(PetscShmgetUnmapAddresses(2, addr));

451:   /* gather solution */
452:   PetscCall(PetscLogEventBegin(EventServerDistMPI, NULL, NULL, NULL, NULL));
453:   if (!PCMPIServerUseShmget) {
454:     PetscMPIInt in;

456:     PetscCall(VecGetArrayRead(ksp->vec_sol, &x));
457:     if (pc) PetscCall(VecGetArray(X, &sx));
458:     PetscCall(PetscMPIIntCast(n, &in));
459:     PetscCallMPI(MPI_Gatherv(x, in, MPIU_SCALAR, sx, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, 0, comm));
460:     if (pc) PetscCall(VecRestoreArray(X, &sx));
461:     PetscCall(VecRestoreArrayRead(ksp->vec_sol, &x));
462:   } else {
463:     PetscCall(VecResetArray(ksp->vec_rhs));
464:     PetscCall(VecResetArray(ksp->vec_sol));
465:   }
466:   PetscCall(PetscLogEventEnd(EventServerDistMPI, NULL, NULL, NULL, NULL));
467:   PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL));
468:   PCMPIServerInSolve = PETSC_FALSE;
469:   PetscFunctionReturn(PETSC_SUCCESS);
470: }

472: static PetscErrorCode PCMPIDestroy(PC pc)
473: {
474:   PC_MPI  *km = pc ? (PC_MPI *)pc->data : NULL;
475:   KSP      ksp;
476:   MPI_Comm comm = PC_MPI_COMM_WORLD;

478:   PetscFunctionBegin;
479:   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
480:   if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
481:   PetscCall(PetscLogStagePush(PCMPIStage));
482:   PCMPIServerInSolve = PETSC_TRUE;
483:   PetscCall(KSPDestroy(&ksp));
484:   PetscCall(PetscLogStagePop());
485:   PCMPIServerInSolve = PETSC_FALSE;
486:   PetscFunctionReturn(PETSC_SUCCESS);
487: }

489: static PetscErrorCode PCMPIServerBroadcastRequest(PCMPICommand request)
490: {
491: #if defined(PETSC_HAVE_PTHREAD_MUTEX)
492:   PetscMPIInt dummy1 = 1, dummy2;
493: #endif

495:   PetscFunctionBegin;
496: #if defined(PETSC_HAVE_PTHREAD_MUTEX)
497:   if (PCMPIServerUseShmget) {
498:     for (PetscMPIInt i = 1; i < PetscGlobalSize; i++) pthread_mutex_unlock(&PCMPIServerLocks[i]);
499:   }
500: #endif
501:   PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
502:   /* next line ensures the sender has already taken the lock */
503: #if defined(PETSC_HAVE_PTHREAD_MUTEX)
504:   if (PCMPIServerUseShmget) {
505:     PetscCallMPI(MPI_Reduce(&dummy1, &dummy2, 1, MPI_INT, MPI_SUM, 0, PC_MPI_COMM_WORLD));
506:     for (PetscMPIInt i = 1; i < PetscGlobalSize; i++) pthread_mutex_lock(&PCMPIServerLocks[i]);
507:   }
508: #endif
509:   PetscFunctionReturn(PETSC_SUCCESS);
510: }

512: /*@C
513:   PCMPIServerBegin - starts a server that runs on the `rank != 0` MPI processes waiting to process requests for
514:   parallel `KSP` solves and management of parallel `KSP` objects.

516:   Logically Collective on all MPI processes except rank 0

518:   Options Database Keys:
519: + -mpi_linear_solver_server                   - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code
520: . -mpi_linear_solver_server_view              - displays information about all the linear systems solved by the MPI linear solver server at the conclusion of the program
521: - -mpi_linear_solver_server_use_shared_memory - use shared memory when communicating matrices and vectors to server processes (default where supported)

523:   Level: developer

525:   Note:
526:   This is normally started automatically in `PetscInitialize()` when the option is provided

528:   See `PCMPI` for information on using the solver with a `KSP` object

530:   Developer Notes:
531:   When called on MPI rank 0 this sets `PETSC_COMM_WORLD` to `PETSC_COMM_SELF` to allow a main program
532:   written with `PETSC_COMM_WORLD` to run correctly on the single rank while all the ranks
533:   (that would normally be sharing `PETSC_COMM_WORLD`) to run the solver server.

535:   Can this be integrated into the `PetscDevice` abstraction that is currently being developed?

537:   Conceivably `PCREDISTRIBUTE` could be organized in a similar manner to simplify its usage

539:   This could be implemented directly at the `KSP` level instead of using the `PCMPI` wrapper object

541:   The code could be extended to allow an MPI + OpenMP application to use the linear solver server concept across all shared-memory
542:   nodes with a single MPI process per node for the user application but multiple MPI processes per node for the linear solver.

544:   The concept could also be extended for users's callbacks for `SNES`, `TS`, and `Tao` where the `SNESSolve()` for example, runs on
545:   all MPI processes but the user callback only runs on one MPI process per node.

547:   PETSc could also be extended with an MPI-less API that provides access to PETSc's solvers without any reference to MPI, essentially remove
548:   the `MPI_Comm` argument from PETSc calls.

550: .seealso: [](sec_pcmpi), `PCMPIServerEnd()`, `PCMPI`, `KSPCheckPCMPI()`
551: @*/
552: PetscErrorCode PCMPIServerBegin(void)
553: {
554:   PetscMPIInt rank;

556:   PetscFunctionBegin;
557:   PetscCall(PetscInfo(NULL, "Starting MPI Linear Solver Server\n"));
558:   if (PetscDefined(USE_SINGLE_LIBRARY)) {
559:     PetscCall(VecInitializePackage());
560:     PetscCall(MatInitializePackage());
561:     PetscCall(DMInitializePackage());
562:     PetscCall(PCInitializePackage());
563:     PetscCall(KSPInitializePackage());
564:     PetscCall(SNESInitializePackage());
565:     PetscCall(TSInitializePackage());
566:     PetscCall(TaoInitializePackage());
567:   }
568:   PetscCall(PetscLogStageRegister("PCMPI", &PCMPIStage));
569:   PetscCall(PetscLogEventRegister("ServerDist", PC_CLASSID, &EventServerDist));
570:   PetscCall(PetscLogEventRegister("ServerDistMPI", PC_CLASSID, &EventServerDistMPI));

572:   if (!PetscDefined(HAVE_SHMGET)) PCMPIServerUseShmget = PETSC_FALSE;
573:   PetscCall(PetscOptionsGetBool(NULL, NULL, "-mpi_linear_solver_server_use_shared_memory", &PCMPIServerUseShmget, NULL));

575:   PetscCallMPI(MPI_Comm_rank(PC_MPI_COMM_WORLD, &rank));
576:   if (PCMPIServerUseShmget) {
577: #if defined(PETSC_HAVE_PTHREAD_MUTEX)
578:     PetscMPIInt size;

580:     PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
581:     if (size > 1) {
582:       pthread_mutex_t *locks;

584:       if (rank == 0) {
585:         PCMPIServerActive = PETSC_TRUE;
586:         PetscCall(PetscShmgetAllocateArray(size, sizeof(pthread_mutex_t), (void **)&locks));
587:       }
588:       PetscCall(PetscShmgetMapAddresses(PETSC_COMM_WORLD, 1, (const void **)&locks, (void **)&PCMPIServerLocks));
589:       if (rank == 0) {
590:         pthread_mutexattr_t attr;

592:         pthread_mutexattr_init(&attr);
593:         pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED);

595:         for (int i = 1; i < size; i++) {
596:           pthread_mutex_init(&PCMPIServerLocks[i], &attr);
597:           pthread_mutex_lock(&PCMPIServerLocks[i]);
598:         }
599:       }
600:       PetscCallMPI(MPI_Barrier(PETSC_COMM_WORLD));
601:     }
602: #endif
603:   }
604:   if (rank == 0) {
605:     PETSC_COMM_WORLD  = PETSC_COMM_SELF;
606:     PCMPIServerActive = PETSC_TRUE;
607:     PetscFunctionReturn(PETSC_SUCCESS);
608:   }

610:   while (PETSC_TRUE) {
611:     PCMPICommand request = PCMPI_CREATE;
612: #if defined(PETSC_HAVE_PTHREAD_MUTEX)
613:     PetscMPIInt dummy1 = 1, dummy2;
614: #endif

616:     // TODO: can we broadcast the number of active ranks here so only the correct subset of processes waits on the later scatters?
617: #if defined(PETSC_HAVE_PTHREAD_MUTEX)
618:     if (PCMPIServerUseShmget) pthread_mutex_lock(&PCMPIServerLocks[PetscGlobalRank]);
619: #endif
620:     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD));
621: #if defined(PETSC_HAVE_PTHREAD_MUTEX)
622:     if (PCMPIServerUseShmget) {
623:       /* next line ensures PetscGlobalRank has locked before rank 0 can take the lock back */
624:       PetscCallMPI(MPI_Reduce(&dummy1, &dummy2, 1, MPI_INT, MPI_SUM, 0, PC_MPI_COMM_WORLD));
625:       pthread_mutex_unlock(&PCMPIServerLocks[PetscGlobalRank]);
626:     }
627: #endif
628:     switch (request) {
629:     case PCMPI_CREATE:
630:       PetscCall(PCMPICreate(NULL));
631:       break;
632:     case PCMPI_SET_MAT:
633:       PetscCall(PCMPISetMat(NULL));
634:       break;
635:     case PCMPI_UPDATE_MAT_VALUES:
636:       PetscCall(PCMPIUpdateMatValues(NULL));
637:       break;
638:     case PCMPI_VIEW:
639:       // PetscCall(PCMPIView(NULL));
640:       break;
641:     case PCMPI_SOLVE:
642:       PetscCall(PCMPISolve(NULL, NULL, NULL));
643:       break;
644:     case PCMPI_DESTROY:
645:       PetscCall(PCMPIDestroy(NULL));
646:       break;
647:     case PCMPI_EXIT:
648:       if (PCMPIServerUseShmget) PetscCall(PetscShmgetUnmapAddresses(1, (void **)&PCMPIServerLocks));
649:       PetscCall(PetscFinalize());
650:       exit(0); /* not sure if this is a good idea, but cannot return because it will run users main program */
651:       break;
652:     default:
653:       break;
654:     }
655:   }
656:   PetscFunctionReturn(PETSC_SUCCESS);
657: }

659: /*@C
660:   PCMPIServerEnd - ends a server that runs on the rank != 0 MPI processes waiting to process requests for
661:   parallel KSP solves and management of parallel `KSP` objects.

663:   Logically Collective on all MPI ranks except 0

665:   Level: developer

667:   Note:
668:   This is normally called automatically in `PetscFinalize()`

670: .seealso: [](sec_pcmpi), `PCMPIServerBegin()`, `PCMPI`, `KSPCheckPCMPI()`
671: @*/
672: PetscErrorCode PCMPIServerEnd(void)
673: {
674:   PetscFunctionBegin;
675:   if (PetscGlobalRank == 0) {
676:     PetscViewer       viewer = NULL;
677:     PetscViewerFormat format;

679:     PetscCall(PetscShmgetAddressesFinalize());
680:     PetscCall(PCMPIServerBroadcastRequest(PCMPI_EXIT));
681:     if (PCMPIServerUseShmget) PetscCall(PetscShmgetUnmapAddresses(1, (void **)&PCMPIServerLocks));
682:     PETSC_COMM_WORLD = MPI_COMM_WORLD; /* could use PC_MPI_COMM_WORLD */
683:     PetscOptionsBegin(PETSC_COMM_SELF, NULL, "MPI linear solver server options", NULL);
684:     PetscCall(PetscOptionsViewer("-mpi_linear_solver_server_view", "View information about system solved with the server", "PCMPI", &viewer, &format, NULL));
685:     PetscOptionsEnd();
686:     if (viewer) {
687:       PetscBool isascii;

689:       PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
690:       if (isascii) {
691:         PetscMPIInt size;
692:         PetscMPIInt i;

694:         PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
695:         PetscCall(PetscViewerASCIIPrintf(viewer, "MPI linear solver server statistics:\n"));
696:         PetscCall(PetscViewerASCIIPrintf(viewer, "    Ranks        KSPSolve()s     Mats        KSPs       Avg. Size      Avg. Its\n"));
697:         if (PCMPIKSPCountsSeq) {
698:           PetscCall(PetscViewerASCIIPrintf(viewer, "  Sequential         %" PetscInt_FMT "                         %" PetscInt_FMT "            %" PetscInt_FMT "           %" PetscInt_FMT "\n", PCMPISolveCountsSeq, PCMPIKSPCountsSeq, PCMPISizesSeq / PCMPISolveCountsSeq, PCMPIIterationsSeq / PCMPISolveCountsSeq));
699:         }
700:         for (i = 0; i < size; i++) {
701:           if (PCMPIKSPCounts[i]) {
702:             PetscCall(PetscViewerASCIIPrintf(viewer, "     %d               %" PetscInt_FMT "            %" PetscInt_FMT "           %" PetscInt_FMT "            %" PetscInt_FMT "            %" PetscInt_FMT "\n", i + 1, PCMPISolveCounts[i], PCMPIMatCounts[i], PCMPIKSPCounts[i], PCMPISizes[i] / PCMPISolveCounts[i], PCMPIIterations[i] / PCMPISolveCounts[i]));
703:           }
704:         }
705:         PetscCall(PetscViewerASCIIPrintf(viewer, "MPI linear solver server %susing shared memory\n", PCMPIServerUseShmget ? "" : "not "));
706:       }
707:       PetscCall(PetscViewerDestroy(&viewer));
708:     }
709:   }
710:   PetscCall(PCMPICommsDestroy());
711:   PCMPIServerActive = PETSC_FALSE;
712:   PetscFunctionReturn(PETSC_SUCCESS);
713: }

715: /*
716:     This version is used in the trivial case when the MPI parallel solver server is running on just the original MPI rank 0
717:     because, for example, the problem is small. This version is more efficient because it does not require copying any data
718: */
719: static PetscErrorCode PCSetUp_Seq(PC pc)
720: {
721:   PC_MPI     *km = (PC_MPI *)pc->data;
722:   Mat         sA;
723:   const char *prefix;
724:   char       *found = NULL, *cprefix;

726:   PetscFunctionBegin;
727:   PCMPIServerInSolve = PETSC_TRUE;
728:   PetscCall(PCGetOperators(pc, NULL, &sA));
729:   PetscCall(PCGetOptionsPrefix(pc, &prefix));
730:   PetscCall(KSPCreate(PETSC_COMM_SELF, &km->ksps[0]));
731:   PetscCall(KSPSetNestLevel(km->ksps[0], 1));
732:   PetscCall(PetscObjectSetTabLevel((PetscObject)km->ksps[0], 1));

734:   /* Created KSP gets prefix of PC minus the mpi_linear_solver_server_ portion */
735:   PetscCall(PCGetOptionsPrefix(pc, &prefix));
736:   PetscCheck(prefix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing required prefix");
737:   PetscCall(PetscStrallocpy(prefix, &cprefix));
738:   PetscCall(PetscStrstr(cprefix, "mpi_linear_solver_server_", &found));
739:   PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing mpi_linear_solver_server_ portion of prefix");
740:   *found = 0;
741:   PetscCall(KSPSetOptionsPrefix(km->ksps[0], cprefix));
742:   PetscCall(PetscFree(cprefix));

744:   PetscCall(KSPSetOperators(km->ksps[0], sA, sA));
745:   PetscCall(KSPSetFromOptions(km->ksps[0]));
746:   PetscCall(KSPSetUp(km->ksps[0]));
747:   PetscCall(PetscInfo(pc, "MPI parallel linear solver system is being solved directly on rank 0 due to its small size\n"));
748:   PCMPIKSPCountsSeq++;
749:   PCMPIServerInSolve = PETSC_FALSE;
750:   PetscFunctionReturn(PETSC_SUCCESS);
751: }

753: static PetscErrorCode PCApply_Seq(PC pc, Vec b, Vec x)
754: {
755:   PC_MPI  *km = (PC_MPI *)pc->data;
756:   PetscInt its, n;
757:   Mat      A;

759:   PetscFunctionBegin;
760:   PCMPIServerInSolve = PETSC_TRUE;
761:   PetscCall(KSPSolve(km->ksps[0], b, x));
762:   PetscCall(KSPGetIterationNumber(km->ksps[0], &its));
763:   PCMPISolveCountsSeq++;
764:   PCMPIIterationsSeq += its;
765:   PetscCall(KSPGetOperators(km->ksps[0], NULL, &A));
766:   PetscCall(MatGetSize(A, &n, NULL));
767:   PCMPISizesSeq += n;
768:   PCMPIServerInSolve = PETSC_FALSE;
769:   /*
770:     do not keep reference to previous rhs and solution since destroying them in the next KSPSolve()
771:     my use PetscFree() instead of PCMPIArrayDeallocate()
772:   */
773:   PetscCall(VecDestroy(&km->ksps[0]->vec_rhs));
774:   PetscCall(VecDestroy(&km->ksps[0]->vec_sol));
775:   PetscFunctionReturn(PETSC_SUCCESS);
776: }

778: static PetscErrorCode PCView_Seq(PC pc, PetscViewer viewer)
779: {
780:   PC_MPI *km = (PC_MPI *)pc->data;

782:   PetscFunctionBegin;
783:   PetscCall(PetscViewerASCIIPrintf(viewer, "Running MPI linear solver server directly on rank 0 due to its small size\n"));
784:   PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %" PetscInt_FMT "\n", km->mincntperrank));
785:   PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to statistics on all the solves ***\n"));
786:   PetscFunctionReturn(PETSC_SUCCESS);
787: }

789: static PetscErrorCode PCDestroy_Seq(PC pc)
790: {
791:   PC_MPI *km = (PC_MPI *)pc->data;
792:   Mat     A, B;
793:   Vec     x, b;

795:   PetscFunctionBegin;
796:   PCMPIServerInSolve = PETSC_TRUE;
797:   /* since matrices and vectors are shared with outer KSP we need to ensure they are not destroyed with PetscFree() */
798:   PetscCall(KSPGetOperators(km->ksps[0], &A, &B));
799:   PetscCall(PetscObjectReference((PetscObject)A));
800:   PetscCall(PetscObjectReference((PetscObject)B));
801:   PetscCall(KSPGetSolution(km->ksps[0], &x));
802:   PetscCall(PetscObjectReference((PetscObject)x));
803:   PetscCall(KSPGetRhs(km->ksps[0], &b));
804:   PetscCall(PetscObjectReference((PetscObject)b));
805:   PetscCall(KSPDestroy(&km->ksps[0]));
806:   PetscCall(PetscFree(pc->data));
807:   PCMPIServerInSolve = PETSC_FALSE;
808:   PetscCall(MatDestroy(&A));
809:   PetscCall(MatDestroy(&B));
810:   PetscCall(VecDestroy(&x));
811:   PetscCall(VecDestroy(&b));
812:   PetscFunctionReturn(PETSC_SUCCESS);
813: }

815: /*
816:      PCSetUp_MPI - Trigger the creation of the MPI parallel PC and copy parts of the matrix and
817:      right-hand side to the parallel PC
818: */
819: static PetscErrorCode PCSetUp_MPI(PC pc)
820: {
821:   PC_MPI     *km = (PC_MPI *)pc->data;
822:   PetscMPIInt rank, size;
823:   PetscBool   newmatrix = PETSC_FALSE;

825:   PetscFunctionBegin;
826:   PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank));
827:   PetscCheck(rank == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "PCMPI can only be used from 0th rank of MPI_COMM_WORLD. Perhaps a missing -mpi_linear_solver_server?");
828:   PetscCallMPI(MPI_Comm_size(MPI_COMM_WORLD, &size));

830:   if (!pc->setupcalled) {
831:     if (!km->alwaysuseserver) {
832:       PetscInt n;
833:       Mat      sA;
834:       /* short circuit for small systems */
835:       PetscCall(PCGetOperators(pc, &sA, &sA));
836:       PetscCall(MatGetSize(sA, &n, NULL));
837:       if (n < 2 * km->mincntperrank - 1 || size == 1) {
838:         pc->ops->setup   = NULL;
839:         pc->ops->apply   = PCApply_Seq;
840:         pc->ops->destroy = PCDestroy_Seq;
841:         pc->ops->view    = PCView_Seq;
842:         PetscCall(PCSetUp_Seq(pc));
843:         PetscFunctionReturn(PETSC_SUCCESS);
844:       }
845:     }

847:     PetscCall(PCMPIServerBroadcastRequest(PCMPI_CREATE));
848:     PetscCall(PCMPICreate(pc));
849:     newmatrix = PETSC_TRUE;
850:   }
851:   if (pc->flag == DIFFERENT_NONZERO_PATTERN) newmatrix = PETSC_TRUE;

853:   if (newmatrix) {
854:     PetscCall(PetscInfo(pc, "New matrix or matrix has changed nonzero structure\n"));
855:     PetscCall(PCMPIServerBroadcastRequest(PCMPI_SET_MAT));
856:     PetscCall(PCMPISetMat(pc));
857:   } else {
858:     PetscCall(PetscInfo(pc, "Matrix has only changed nonzero values\n"));
859:     PetscCall(PCMPIServerBroadcastRequest(PCMPI_UPDATE_MAT_VALUES));
860:     PetscCall(PCMPIUpdateMatValues(pc));
861:   }
862:   PetscFunctionReturn(PETSC_SUCCESS);
863: }

865: static PetscErrorCode PCApply_MPI(PC pc, Vec b, Vec x)
866: {
867:   PetscFunctionBegin;
868:   PetscCall(PCMPIServerBroadcastRequest(PCMPI_SOLVE));
869:   PetscCall(PCMPISolve(pc, b, x));
870:   PetscFunctionReturn(PETSC_SUCCESS);
871: }

873: static PetscErrorCode PCDestroy_MPI(PC pc)
874: {
875:   PetscFunctionBegin;
876:   PetscCall(PCMPIServerBroadcastRequest(PCMPI_DESTROY));
877:   PetscCall(PCMPIDestroy(pc));
878:   PetscCall(PetscFree(pc->data));
879:   PetscFunctionReturn(PETSC_SUCCESS);
880: }

882: /*
883:      PCView_MPI - Cannot call view on the MPI parallel KSP because other ranks do not have access to the viewer, use options database
884: */
885: static PetscErrorCode PCView_MPI(PC pc, PetscViewer viewer)
886: {
887:   PC_MPI     *km = (PC_MPI *)pc->data;
888:   MPI_Comm    comm;
889:   PetscMPIInt size;

891:   PetscFunctionBegin;
892:   PetscCall(PetscObjectGetComm((PetscObject)km->ksps[0], &comm));
893:   PetscCallMPI(MPI_Comm_size(comm, &size));
894:   PetscCall(PetscViewerASCIIPrintf(viewer, "Size of MPI communicator used for MPI parallel KSP solve %d\n", size));
895:   PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of matrix rows on each MPI process for MPI parallel solve %" PetscInt_FMT "\n", km->mincntperrank));
896:   PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to view statistics on all the solves ***\n"));
897:   PetscFunctionReturn(PETSC_SUCCESS);
898: }

900: static PetscErrorCode PCSetFromOptions_MPI(PC pc, PetscOptionItems *PetscOptionsObject)
901: {
902:   PC_MPI *km = (PC_MPI *)pc->data;

904:   PetscFunctionBegin;
905:   PetscOptionsHeadBegin(PetscOptionsObject, "MPI linear solver server options");
906:   PetscCall(PetscOptionsInt("-minimum_count_per_rank", "Desired minimum number of nonzeros per rank", "None", km->mincntperrank, &km->mincntperrank, NULL));
907:   PetscCall(PetscOptionsBool("-always_use_server", "Use the server even if only one rank is used for the solve (for debugging)", "None", km->alwaysuseserver, &km->alwaysuseserver, NULL));
908:   PetscOptionsHeadEnd();
909:   PetscFunctionReturn(PETSC_SUCCESS);
910: }

912: /*MC
913:      PCMPI - Calls an MPI parallel `KSP` to solve a linear system from user code running on one process

915:    Options Database Keys for the Server:
916: +  -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code
917: .  -mpi_linear_solver_server_view - displays information about all the linear systems solved by the MPI linear solver server
918: -  -mpi_linear_solver_server_use_shared_memory <true, false> - use shared memory to distribute matrix and right hand side, defaults to true

920:    Options Database Keys for a specific `KSP` object
921: +  -[any_ksp_prefix]_mpi_linear_solver_server_minimum_count_per_rank - sets the minimum size of the linear system per MPI rank that the solver will strive for
922: -  -[any_ksp_prefix]_mpi_linear_solver_server_always_use_server - use the server solver code even if the particular system is only solved on the process (for debugging and testing purposes)

924:    Level: developer

926:    Notes:
927:    This cannot be used with vectors or matrices that are created using arrays provided by the user, such as `VecCreateWithArray()` or
928:    `MatCreateSeqAIJWithArrays()`

930:    The options database prefix for the actual solver is any prefix provided before use to the original `KSP` with `KSPSetOptionsPrefix()`, mostly commonly no prefix is used.

932:    It can be particularly useful for user OpenMP code or potentially user GPU code.

934:    When the program is running with a single MPI process then it directly uses the provided matrix and right-hand side
935:    and does not need to distribute the matrix and vector to the various MPI processes; thus it incurs no extra overhead over just using the `KSP` directly.

937:    The solver options for actual solving `KSP` and `PC` must be controlled via the options database, calls to set options directly on the user level `KSP` and `PC` have no effect
938:    because they are not the actual solver objects.

940:    When `-log_view` is used with this solver the events within the parallel solve are logging in their own stage. Some of the logging in the other
941:    stages will be confusing since the event times are only recorded on the 0th MPI rank, thus the percent of time in the events will be misleading.

943:    Developer Note:
944:    This `PCType` is never directly selected by the user, it is set when the option `-mpi_linear_solver_server` is used and the `PC` is at the outer most nesting of
945:    a `KSP`. The outer most `KSP` object is automatically set to `KSPPREONLY` and thus is not directly visible to the user.

947: .seealso: [](sec_pcmpi), `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `PC`, `PCMPIServerBegin()`, `PCMPIServerEnd()`, `KSPCheckPCMPI()`
948: M*/
949: PETSC_EXTERN PetscErrorCode PCCreate_MPI(PC pc)
950: {
951:   PC_MPI *km;
952:   char   *found = NULL;

954:   PetscFunctionBegin;
955:   PetscCall(PetscStrstr(((PetscObject)pc)->prefix, "mpi_linear_solver_server_", &found));
956:   PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI object prefix does not have mpi_linear_solver_server_");

958:   /* material from PCSetType() */
959:   PetscTryTypeMethod(pc, destroy);
960:   pc->ops->destroy = NULL;
961:   pc->data         = NULL;

963:   PetscCall(PetscFunctionListDestroy(&((PetscObject)pc)->qlist));
964:   PetscCall(PetscMemzero(pc->ops, sizeof(struct _PCOps)));
965:   pc->modifysubmatrices  = NULL;
966:   pc->modifysubmatricesP = NULL;
967:   pc->setupcalled        = 0;

969:   PetscCall(PetscNew(&km));
970:   pc->data = (void *)km;

972:   km->mincntperrank = 10000;

974:   pc->ops->setup          = PCSetUp_MPI;
975:   pc->ops->apply          = PCApply_MPI;
976:   pc->ops->destroy        = PCDestroy_MPI;
977:   pc->ops->view           = PCView_MPI;
978:   pc->ops->setfromoptions = PCSetFromOptions_MPI;
979:   PetscCall(PetscObjectChangeTypeName((PetscObject)pc, PCMPI));
980:   PetscFunctionReturn(PETSC_SUCCESS);
981: }

983: /*@
984:   PCMPIGetKSP - Gets the `KSP` created by the `PCMPI`

986:   Not Collective

988:   Input Parameter:
989: . pc - the preconditioner context

991:   Output Parameter:
992: . innerksp - the inner `KSP`

994:   Level: advanced

996: .seealso: [](ch_ksp), `KSP`, `PCMPI`, `PCREDISTRIBUTE`
997: @*/
998: PetscErrorCode PCMPIGetKSP(PC pc, KSP *innerksp)
999: {
1000:   PC_MPI *red = (PC_MPI *)pc->data;

1002:   PetscFunctionBegin;
1004:   PetscAssertPointer(innerksp, 2);
1005:   *innerksp = red->ksps[0];
1006:   PetscFunctionReturn(PETSC_SUCCESS);
1007: }