Actual source code: pcmpi.c
1: /*
2: This file creates an MPI parallel KSP from a sequential PC that lives on MPI rank 0.
3: It is intended to allow using PETSc MPI parallel linear solvers from non-MPI codes.
5: That program may use OpenMP to compute the right-hand side and matrix for the linear system
7: The code uses MPI_COMM_WORLD below but maybe it should be PETSC_COMM_WORLD
9: The resulting KSP and PC can only be controlled via the options database, though some common commands
10: could be passed through the server.
12: */
13: #include <petsc/private/pcimpl.h>
14: #include <petsc/private/kspimpl.h>
15: #include <petscts.h>
16: #include <petsctao.h>
17: #if defined(PETSC_HAVE_PTHREAD_MUTEX)
18: #include <pthread.h>
19: #endif
21: #define PC_MPI_MAX_RANKS 256
22: #define PC_MPI_COMM_WORLD MPI_COMM_WORLD
24: typedef struct {
25: KSP ksps[PC_MPI_MAX_RANKS]; /* The addresses of the MPI parallel KSP on each process, NULL when not on a process. */
26: PetscMPIInt sendcount[PC_MPI_MAX_RANKS], displ[PC_MPI_MAX_RANKS]; /* For scatter/gather of rhs/solution */
27: PetscMPIInt NZ[PC_MPI_MAX_RANKS], NZdispl[PC_MPI_MAX_RANKS]; /* For scatter of nonzero values in matrix (and nonzero column indices initially */
28: PetscInt mincntperrank; /* minimum number of desired matrix rows per active rank in MPI parallel KSP solve */
29: PetscBool alwaysuseserver; /* for debugging use the server infrastructure even if only one MPI process is used for the solve */
30: } PC_MPI;
32: typedef enum {
33: PCMPI_EXIT, /* exit the PC server loop, means the controlling sequential program is done */
34: PCMPI_CREATE,
35: PCMPI_SET_MAT, /* set original matrix (or one with different nonzero pattern) */
36: PCMPI_UPDATE_MAT_VALUES, /* update current matrix with new nonzero values */
37: PCMPI_SOLVE,
38: PCMPI_VIEW,
39: PCMPI_DESTROY /* destroy a PC that is no longer needed */
40: } PCMPICommand;
42: static MPI_Comm PCMPIComms[PC_MPI_MAX_RANKS];
43: static PetscBool PCMPICommSet = PETSC_FALSE;
44: static PetscInt PCMPISolveCounts[PC_MPI_MAX_RANKS], PCMPIKSPCounts[PC_MPI_MAX_RANKS], PCMPIMatCounts[PC_MPI_MAX_RANKS], PCMPISolveCountsSeq = 0, PCMPIKSPCountsSeq = 0;
45: static PetscInt PCMPIIterations[PC_MPI_MAX_RANKS], PCMPISizes[PC_MPI_MAX_RANKS], PCMPIIterationsSeq = 0, PCMPISizesSeq = 0;
46: static PetscLogEvent EventServerDist, EventServerDistMPI;
47: #if defined(PETSC_HAVE_PTHREAD_MUTEX)
48: static pthread_mutex_t *PCMPIServerLocks;
49: #else
50: static void *PCMPIServerLocks;
51: #endif
53: static PetscErrorCode PCMPICommsCreate(void)
54: {
55: MPI_Comm comm = PC_MPI_COMM_WORLD;
56: PetscMPIInt size, rank, i;
58: PetscFunctionBegin;
59: PetscCallMPI(MPI_Comm_size(comm, &size));
60: PetscCheck(size <= PC_MPI_MAX_RANKS, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for using more than PC_MPI_MAX_RANKS MPI ranks in an MPI linear solver server solve");
61: PetscCallMPI(MPI_Comm_rank(comm, &rank));
62: /* comm for size 1 is useful only for debugging */
63: for (i = 0; i < size; i++) {
64: PetscMPIInt color = rank < i + 1 ? 0 : MPI_UNDEFINED;
65: PetscCallMPI(MPI_Comm_split(comm, color, 0, &PCMPIComms[i]));
66: PCMPISolveCounts[i] = 0;
67: PCMPIKSPCounts[i] = 0;
68: PCMPIIterations[i] = 0;
69: PCMPISizes[i] = 0;
70: }
71: PCMPICommSet = PETSC_TRUE;
72: PetscFunctionReturn(PETSC_SUCCESS);
73: }
75: static PetscErrorCode PCMPICommsDestroy(void)
76: {
77: MPI_Comm comm = PC_MPI_COMM_WORLD;
78: PetscMPIInt size, rank, i;
80: PetscFunctionBegin;
81: if (!PCMPICommSet) PetscFunctionReturn(PETSC_SUCCESS);
82: PetscCallMPI(MPI_Comm_size(comm, &size));
83: PetscCallMPI(MPI_Comm_rank(comm, &rank));
84: for (i = 0; i < size; i++) {
85: if (PCMPIComms[i] != MPI_COMM_NULL) PetscCallMPI(MPI_Comm_free(&PCMPIComms[i]));
86: }
87: PCMPICommSet = PETSC_FALSE;
88: PetscFunctionReturn(PETSC_SUCCESS);
89: }
91: static PetscErrorCode PCMPICreate(PC pc)
92: {
93: PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL;
94: MPI_Comm comm = PC_MPI_COMM_WORLD;
95: KSP ksp;
96: PetscInt N[2], mincntperrank = 0;
97: PetscMPIInt size;
98: Mat sA;
99: char *cprefix = NULL;
100: PetscMPIInt len = 0;
102: PetscFunctionBegin;
103: PCMPIServerInSolve = PETSC_TRUE;
104: if (!PCMPICommSet) PetscCall(PCMPICommsCreate());
105: PetscCallMPI(MPI_Comm_size(comm, &size));
106: if (pc) {
107: if (size == 1) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Warning: Running KSP type of MPI on a one rank MPI run, this will be less efficient then not using this type\n"));
108: PetscCall(PCGetOperators(pc, &sA, &sA));
109: PetscCall(MatGetSize(sA, &N[0], &N[1]));
110: }
111: PetscCallMPI(MPI_Bcast(N, 2, MPIU_INT, 0, comm));
113: /* choose a suitable sized MPI_Comm for the problem to be solved on */
114: if (km) mincntperrank = km->mincntperrank;
115: PetscCallMPI(MPI_Bcast(&mincntperrank, 1, MPI_INT, 0, comm));
116: comm = PCMPIComms[PetscMin(size, PetscMax(1, N[0] / mincntperrank)) - 1];
117: if (comm == MPI_COMM_NULL) {
118: ksp = NULL;
119: PCMPIServerInSolve = PETSC_FALSE;
120: PetscFunctionReturn(PETSC_SUCCESS);
121: }
122: PetscCall(PetscLogStagePush(PCMPIStage));
123: PetscCall(KSPCreate(comm, &ksp));
124: PetscCall(KSPSetNestLevel(ksp, 1));
125: PetscCall(PetscObjectSetTabLevel((PetscObject)ksp, 1));
126: PetscCall(PetscLogStagePop());
127: PetscCallMPI(MPI_Gather(&ksp, 1, MPI_AINT, pc ? km->ksps : NULL, 1, MPI_AINT, 0, comm));
128: if (pc) {
129: size_t slen;
130: const char *prefix = NULL;
131: char *found = NULL;
133: PetscCallMPI(MPI_Comm_size(comm, &size));
134: PCMPIKSPCounts[size - 1]++;
135: /* Created KSP gets prefix of PC minus the mpi_linear_solver_server_ portion */
136: PetscCall(PCGetOptionsPrefix(pc, &prefix));
137: PetscCheck(prefix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing required prefix");
138: PetscCall(PetscStrallocpy(prefix, &cprefix));
139: PetscCall(PetscStrstr(cprefix, "mpi_linear_solver_server_", &found));
140: PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing mpi_linear_solver_server_ portion of prefix");
141: *found = 0;
142: PetscCall(PetscStrlen(cprefix, &slen));
143: PetscCall(PetscMPIIntCast(slen, &len));
144: }
145: PetscCallMPI(MPI_Bcast(&len, 1, MPI_INT, 0, comm));
146: if (len) {
147: if (!pc) PetscCall(PetscMalloc1(len + 1, &cprefix));
148: PetscCallMPI(MPI_Bcast(cprefix, len + 1, MPI_CHAR, 0, comm));
149: PetscCall(KSPSetOptionsPrefix(ksp, cprefix));
150: }
151: PetscCall(PetscFree(cprefix));
152: PCMPIServerInSolve = PETSC_FALSE;
153: PetscFunctionReturn(PETSC_SUCCESS);
154: }
156: static PetscErrorCode PCMPISetMat(PC pc)
157: {
158: PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL;
159: Mat A;
160: PetscInt m, n, j, bs;
161: Mat sA;
162: MPI_Comm comm = PC_MPI_COMM_WORLD;
163: KSP ksp;
164: PetscLayout layout;
165: const PetscInt *IA = NULL, *JA = NULL, *ia, *ja;
166: const PetscInt *range;
167: PetscMPIInt *NZ = NULL, sendcounti[PC_MPI_MAX_RANKS], displi[PC_MPI_MAX_RANKS], *NZdispl = NULL, nz, size, i;
168: const PetscScalar *a = NULL, *sa;
169: PetscInt matproperties[8] = {0}, rstart, rend;
170: char *cprefix;
172: PetscFunctionBegin;
173: PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
174: if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
175: PCMPIServerInSolve = PETSC_TRUE;
176: PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL));
177: PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
178: if (pc) {
179: PetscBool isset, issymmetric, ishermitian, isspd, isstructurallysymmetric;
180: const char *prefix;
181: size_t clen;
183: PetscCallMPI(MPI_Comm_size(comm, &size));
184: PCMPIMatCounts[size - 1]++;
185: PetscCall(PCGetOperators(pc, &sA, &sA));
186: PetscCall(MatGetSize(sA, &matproperties[0], &matproperties[1]));
187: PetscCall(MatGetBlockSize(sA, &bs));
188: matproperties[2] = bs;
189: PetscCall(MatIsSymmetricKnown(sA, &isset, &issymmetric));
190: matproperties[3] = !isset ? 0 : (issymmetric ? 1 : 2);
191: PetscCall(MatIsHermitianKnown(sA, &isset, &ishermitian));
192: matproperties[4] = !isset ? 0 : (ishermitian ? 1 : 2);
193: PetscCall(MatIsSPDKnown(sA, &isset, &isspd));
194: matproperties[5] = !isset ? 0 : (isspd ? 1 : 2);
195: PetscCall(MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric));
196: matproperties[6] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2);
197: /* Created Mat gets prefix of input Mat PLUS the mpi_linear_solver_server_ portion */
198: PetscCall(MatGetOptionsPrefix(sA, &prefix));
199: PetscCall(PetscStrallocpy(prefix, &cprefix));
200: PetscCall(PetscStrlen(cprefix, &clen));
201: matproperties[7] = (PetscInt)clen;
202: }
203: PetscCallMPI(MPI_Bcast(matproperties, PETSC_STATIC_ARRAY_LENGTH(matproperties), MPIU_INT, 0, comm));
205: /* determine ownership ranges of matrix columns */
206: PetscCall(PetscLayoutCreate(comm, &layout));
207: PetscCall(PetscLayoutSetBlockSize(layout, matproperties[2]));
208: PetscCall(PetscLayoutSetSize(layout, matproperties[1]));
209: PetscCall(PetscLayoutSetUp(layout));
210: PetscCall(PetscLayoutGetLocalSize(layout, &n));
211: PetscCall(PetscLayoutDestroy(&layout));
213: /* determine ownership ranges of matrix rows */
214: PetscCall(PetscLayoutCreate(comm, &layout));
215: PetscCall(PetscLayoutSetBlockSize(layout, matproperties[2]));
216: PetscCall(PetscLayoutSetSize(layout, matproperties[0]));
217: PetscCall(PetscLayoutSetUp(layout));
218: PetscCall(PetscLayoutGetLocalSize(layout, &m));
219: PetscCall(PetscLayoutGetRange(layout, &rstart, &rend));
221: PetscCall(PetscLogEventBegin(EventServerDistMPI, NULL, NULL, NULL, NULL));
222: /* copy over the matrix nonzero structure and values */
223: if (pc) {
224: PetscCall(MatGetRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL));
225: if (!PCMPIServerUseShmget) {
226: NZ = km->NZ;
227: NZdispl = km->NZdispl;
228: PetscCall(PetscLayoutGetRanges(layout, &range));
229: for (i = 0; i < size; i++) {
230: PetscCall(PetscMPIIntCast(1 + range[i + 1] - range[i], &sendcounti[i]));
231: PetscCall(PetscMPIIntCast(IA[range[i + 1]] - IA[range[i]], &NZ[i]));
232: }
233: displi[0] = 0;
234: NZdispl[0] = 0;
235: for (j = 1; j < size; j++) {
236: displi[j] = displi[j - 1] + sendcounti[j - 1] - 1;
237: NZdispl[j] = NZdispl[j - 1] + NZ[j - 1];
238: }
239: }
240: PetscCall(MatSeqAIJGetArrayRead(sA, &sa));
241: }
242: PetscCall(PetscLayoutDestroy(&layout));
244: PetscCall(MatCreate(comm, &A));
245: if (matproperties[7] > 0) {
246: PetscMPIInt ni;
248: PetscCall(PetscMPIIntCast(matproperties[7] + 1, &ni));
249: if (!pc) PetscCall(PetscMalloc1(matproperties[7] + 1, &cprefix));
250: PetscCallMPI(MPI_Bcast(cprefix, ni, MPI_CHAR, 0, comm));
251: PetscCall(MatSetOptionsPrefix(A, cprefix));
252: PetscCall(PetscFree(cprefix));
253: }
254: PetscCall(MatAppendOptionsPrefix(A, "mpi_linear_solver_server_"));
255: PetscCall(MatSetSizes(A, m, n, matproperties[0], matproperties[1]));
256: PetscCall(MatSetType(A, MATMPIAIJ));
258: if (!PCMPIServerUseShmget) {
259: PetscMPIInt in;
260: PetscCallMPI(MPI_Scatter(NZ, 1, MPI_INT, &nz, 1, MPI_INT, 0, comm));
261: PetscCall(PetscMalloc3(n + 1, &ia, nz, &ja, nz, &a));
262: PetscCall(PetscMPIIntCast(n, &in));
263: PetscCallMPI(MPI_Scatterv(IA, sendcounti, displi, MPIU_INT, (void *)ia, in + 1, MPIU_INT, 0, comm));
264: PetscCallMPI(MPI_Scatterv(JA, NZ, NZdispl, MPIU_INT, (void *)ja, nz, MPIU_INT, 0, comm));
265: PetscCallMPI(MPI_Scatterv(sa, NZ, NZdispl, MPIU_SCALAR, (void *)a, nz, MPIU_SCALAR, 0, comm));
266: } else {
267: const void *addr[3] = {(const void **)IA, (const void **)JA, (const void **)sa};
268: PCMPIServerAddresses *addresses;
270: PetscCall(PetscNew(&addresses));
271: addresses->n = 3;
272: PetscCall(PetscShmgetMapAddresses(comm, addresses->n, addr, addresses->addr));
273: ia = rstart + (PetscInt *)addresses->addr[0];
274: ja = ia[0] + (PetscInt *)addresses->addr[1];
275: a = ia[0] + (PetscScalar *)addresses->addr[2];
276: PetscCall(PetscObjectContainerCompose((PetscObject)A, "PCMPIServerAddresses", (void *)addresses, PCMPIServerAddressesDestroy));
277: }
279: if (pc) {
280: PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa));
281: PetscCall(MatRestoreRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL));
282: }
283: PetscCall(PetscLogEventEnd(EventServerDistMPI, NULL, NULL, NULL, NULL));
285: PetscCall(PetscLogStagePush(PCMPIStage));
286: PetscCall(MatMPIAIJSetPreallocationCSR(A, ia, ja, a));
287: PetscCall(MatSetBlockSize(A, matproperties[2]));
289: if (matproperties[3]) PetscCall(MatSetOption(A, MAT_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE));
290: if (matproperties[4]) PetscCall(MatSetOption(A, MAT_HERMITIAN, matproperties[4] == 1 ? PETSC_TRUE : PETSC_FALSE));
291: if (matproperties[5]) PetscCall(MatSetOption(A, MAT_SPD, matproperties[5] == 1 ? PETSC_TRUE : PETSC_FALSE));
292: if (matproperties[6]) PetscCall(MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[6] == 1 ? PETSC_TRUE : PETSC_FALSE));
294: if (!PCMPIServerUseShmget) PetscCall(PetscFree3(ia, ja, a));
295: PetscCall(KSPSetOperators(ksp, A, A));
296: if (!ksp->vec_sol) PetscCall(MatCreateVecs(A, &ksp->vec_sol, &ksp->vec_rhs));
297: PetscCall(PetscLogStagePop());
298: if (pc && !PCMPIServerUseShmget) { /* needed for scatterv/gatherv of rhs and solution */
299: const PetscInt *range;
301: PetscCall(VecGetOwnershipRanges(ksp->vec_sol, &range));
302: for (i = 0; i < size; i++) {
303: PetscCall(PetscMPIIntCast(range[i + 1] - range[i], &km->sendcount[i]));
304: PetscCall(PetscMPIIntCast(range[i], &km->displ[i]));
305: }
306: }
307: PetscCall(MatDestroy(&A));
308: PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL));
309: PetscCall(KSPSetFromOptions(ksp));
310: PCMPIServerInSolve = PETSC_FALSE;
311: PetscFunctionReturn(PETSC_SUCCESS);
312: }
314: static PetscErrorCode PCMPIUpdateMatValues(PC pc)
315: {
316: PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL;
317: KSP ksp;
318: Mat sA, A;
319: MPI_Comm comm = PC_MPI_COMM_WORLD;
320: const PetscInt *ia, *IA;
321: const PetscScalar *a;
322: PetscCount nz;
323: const PetscScalar *sa = NULL;
324: PetscMPIInt size;
325: PetscInt rstart, matproperties[4] = {0, 0, 0, 0};
327: PetscFunctionBegin;
328: if (pc) {
329: PetscCall(PCGetOperators(pc, &sA, &sA));
330: PetscCall(MatSeqAIJGetArrayRead(sA, &sa));
331: PetscCall(MatGetRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, NULL, NULL));
332: }
333: PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
334: if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
335: PCMPIServerInSolve = PETSC_TRUE;
336: PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL));
337: PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
338: PetscCallMPI(MPI_Comm_size(comm, &size));
339: PCMPIMatCounts[size - 1]++;
340: PetscCall(KSPGetOperators(ksp, NULL, &A));
341: PetscCall(PetscLogEventBegin(EventServerDistMPI, NULL, NULL, NULL, NULL));
342: if (!PCMPIServerUseShmget) {
343: PetscMPIInt mpi_nz;
345: PetscCall(MatMPIAIJGetNumberNonzeros(A, &nz));
346: PetscCall(PetscMPIIntCast(nz, &mpi_nz));
347: PetscCall(PetscMalloc1(nz, &a));
348: PetscCallMPI(MPI_Scatterv(sa, pc ? km->NZ : NULL, pc ? km->NZdispl : NULL, MPIU_SCALAR, (void *)a, mpi_nz, MPIU_SCALAR, 0, comm));
349: } else {
350: PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
351: PCMPIServerAddresses *addresses;
352: PetscCall(PetscObjectContainerQuery((PetscObject)A, "PCMPIServerAddresses", (void **)&addresses));
353: ia = rstart + (PetscInt *)addresses->addr[0];
354: a = ia[0] + (PetscScalar *)addresses->addr[2];
355: }
356: PetscCall(PetscLogEventEnd(EventServerDistMPI, NULL, NULL, NULL, NULL));
357: if (pc) {
358: PetscBool isset, issymmetric, ishermitian, isspd, isstructurallysymmetric;
360: PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa));
361: PetscCall(MatRestoreRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, NULL, NULL));
363: PetscCall(MatIsSymmetricKnown(sA, &isset, &issymmetric));
364: matproperties[0] = !isset ? 0 : (issymmetric ? 1 : 2);
365: PetscCall(MatIsHermitianKnown(sA, &isset, &ishermitian));
366: matproperties[1] = !isset ? 0 : (ishermitian ? 1 : 2);
367: PetscCall(MatIsSPDKnown(sA, &isset, &isspd));
368: matproperties[2] = !isset ? 0 : (isspd ? 1 : 2);
369: PetscCall(MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric));
370: matproperties[3] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2);
371: }
372: PetscCall(MatUpdateMPIAIJWithArray(A, a));
373: if (!PCMPIServerUseShmget) PetscCall(PetscFree(a));
374: PetscCallMPI(MPI_Bcast(matproperties, 4, MPIU_INT, 0, comm));
375: /* if any of these properties was previously set and is now not set this will result in incorrect properties in A since there is no way to unset a property */
376: if (matproperties[0]) PetscCall(MatSetOption(A, MAT_SYMMETRIC, matproperties[0] == 1 ? PETSC_TRUE : PETSC_FALSE));
377: if (matproperties[1]) PetscCall(MatSetOption(A, MAT_HERMITIAN, matproperties[1] == 1 ? PETSC_TRUE : PETSC_FALSE));
378: if (matproperties[2]) PetscCall(MatSetOption(A, MAT_SPD, matproperties[2] == 1 ? PETSC_TRUE : PETSC_FALSE));
379: if (matproperties[3]) PetscCall(MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE));
380: PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL));
381: PCMPIServerInSolve = PETSC_FALSE;
382: PetscFunctionReturn(PETSC_SUCCESS);
383: }
385: static PetscErrorCode PCMPISolve(PC pc, Vec B, Vec X)
386: {
387: PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL;
388: KSP ksp;
389: MPI_Comm comm = PC_MPI_COMM_WORLD;
390: const PetscScalar *sb = NULL, *x;
391: PetscScalar *b, *sx = NULL;
392: PetscInt its, n;
393: PetscMPIInt size;
394: void *addr[2];
396: PetscFunctionBegin;
397: PetscCallMPI(MPI_Scatter(pc ? km->ksps : &ksp, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
398: if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
399: PCMPIServerInSolve = PETSC_TRUE;
400: PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL));
401: PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
403: /* scatterv rhs */
404: PetscCallMPI(MPI_Comm_size(comm, &size));
405: if (pc) {
406: PetscInt N;
408: PCMPISolveCounts[size - 1]++;
409: PetscCall(MatGetSize(pc->pmat, &N, NULL));
410: PCMPISizes[size - 1] += N;
411: }
412: PetscCall(VecGetLocalSize(ksp->vec_rhs, &n));
413: PetscCall(PetscLogEventBegin(EventServerDistMPI, NULL, NULL, NULL, NULL));
414: if (!PCMPIServerUseShmget) {
415: PetscMPIInt in;
417: PetscCall(VecGetArray(ksp->vec_rhs, &b));
418: if (pc) PetscCall(VecGetArrayRead(B, &sb));
419: PetscCall(PetscMPIIntCast(n, &in));
420: PetscCallMPI(MPI_Scatterv(sb, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, b, in, MPIU_SCALAR, 0, comm));
421: if (pc) PetscCall(VecRestoreArrayRead(B, &sb));
422: PetscCall(VecRestoreArray(ksp->vec_rhs, &b));
423: // TODO: scatter initial guess if needed
424: } else {
425: PetscInt rstart;
427: if (pc) PetscCall(VecGetArrayRead(B, &sb));
428: if (pc) PetscCall(VecGetArray(X, &sx));
429: const void *inaddr[2] = {(const void **)sb, (const void **)sx};
430: if (pc) PetscCall(VecRestoreArray(X, &sx));
431: if (pc) PetscCall(VecRestoreArrayRead(B, &sb));
433: PetscCall(PetscShmgetMapAddresses(comm, 2, inaddr, addr));
434: PetscCall(VecGetOwnershipRange(ksp->vec_rhs, &rstart, NULL));
435: PetscCall(VecPlaceArray(ksp->vec_rhs, rstart + (PetscScalar *)addr[0]));
436: PetscCall(VecPlaceArray(ksp->vec_sol, rstart + (PetscScalar *)addr[1]));
437: }
438: PetscCall(PetscLogEventEnd(EventServerDistMPI, NULL, NULL, NULL, NULL));
440: PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL));
441: PetscCall(PetscLogStagePush(PCMPIStage));
442: PetscCall(KSPSolve(ksp, NULL, NULL));
443: PetscCall(PetscLogStagePop());
444: PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL));
445: PetscCall(KSPGetIterationNumber(ksp, &its));
446: PCMPIIterations[size - 1] += its;
447: // TODO: send iterations up to outer KSP
449: if (PCMPIServerUseShmget) PetscCall(PetscShmgetUnmapAddresses(2, addr));
451: /* gather solution */
452: PetscCall(PetscLogEventBegin(EventServerDistMPI, NULL, NULL, NULL, NULL));
453: if (!PCMPIServerUseShmget) {
454: PetscMPIInt in;
456: PetscCall(VecGetArrayRead(ksp->vec_sol, &x));
457: if (pc) PetscCall(VecGetArray(X, &sx));
458: PetscCall(PetscMPIIntCast(n, &in));
459: PetscCallMPI(MPI_Gatherv(x, in, MPIU_SCALAR, sx, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, 0, comm));
460: if (pc) PetscCall(VecRestoreArray(X, &sx));
461: PetscCall(VecRestoreArrayRead(ksp->vec_sol, &x));
462: } else {
463: PetscCall(VecResetArray(ksp->vec_rhs));
464: PetscCall(VecResetArray(ksp->vec_sol));
465: }
466: PetscCall(PetscLogEventEnd(EventServerDistMPI, NULL, NULL, NULL, NULL));
467: PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL));
468: PCMPIServerInSolve = PETSC_FALSE;
469: PetscFunctionReturn(PETSC_SUCCESS);
470: }
472: static PetscErrorCode PCMPIDestroy(PC pc)
473: {
474: PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL;
475: KSP ksp;
476: MPI_Comm comm = PC_MPI_COMM_WORLD;
478: PetscFunctionBegin;
479: PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
480: if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
481: PetscCall(PetscLogStagePush(PCMPIStage));
482: PCMPIServerInSolve = PETSC_TRUE;
483: PetscCall(KSPDestroy(&ksp));
484: PetscCall(PetscLogStagePop());
485: PCMPIServerInSolve = PETSC_FALSE;
486: PetscFunctionReturn(PETSC_SUCCESS);
487: }
489: static PetscErrorCode PCMPIServerBroadcastRequest(PCMPICommand request)
490: {
491: #if defined(PETSC_HAVE_PTHREAD_MUTEX)
492: PetscMPIInt dummy1 = 1, dummy2;
493: #endif
495: PetscFunctionBegin;
496: #if defined(PETSC_HAVE_PTHREAD_MUTEX)
497: if (PCMPIServerUseShmget) {
498: for (PetscMPIInt i = 1; i < PetscGlobalSize; i++) pthread_mutex_unlock(&PCMPIServerLocks[i]);
499: }
500: #endif
501: PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
502: /* next line ensures the sender has already taken the lock */
503: #if defined(PETSC_HAVE_PTHREAD_MUTEX)
504: if (PCMPIServerUseShmget) {
505: PetscCallMPI(MPI_Reduce(&dummy1, &dummy2, 1, MPI_INT, MPI_SUM, 0, PC_MPI_COMM_WORLD));
506: for (PetscMPIInt i = 1; i < PetscGlobalSize; i++) pthread_mutex_lock(&PCMPIServerLocks[i]);
507: }
508: #endif
509: PetscFunctionReturn(PETSC_SUCCESS);
510: }
512: /*@C
513: PCMPIServerBegin - starts a server that runs on the `rank != 0` MPI processes waiting to process requests for
514: parallel `KSP` solves and management of parallel `KSP` objects.
516: Logically Collective on all MPI processes except rank 0
518: Options Database Keys:
519: + -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code
520: . -mpi_linear_solver_server_view - displays information about all the linear systems solved by the MPI linear solver server at the conclusion of the program
521: - -mpi_linear_solver_server_use_shared_memory - use shared memory when communicating matrices and vectors to server processes (default where supported)
523: Level: developer
525: Note:
526: This is normally started automatically in `PetscInitialize()` when the option is provided
528: See `PCMPI` for information on using the solver with a `KSP` object
530: Developer Notes:
531: When called on MPI rank 0 this sets `PETSC_COMM_WORLD` to `PETSC_COMM_SELF` to allow a main program
532: written with `PETSC_COMM_WORLD` to run correctly on the single rank while all the ranks
533: (that would normally be sharing `PETSC_COMM_WORLD`) to run the solver server.
535: Can this be integrated into the `PetscDevice` abstraction that is currently being developed?
537: Conceivably `PCREDISTRIBUTE` could be organized in a similar manner to simplify its usage
539: This could be implemented directly at the `KSP` level instead of using the `PCMPI` wrapper object
541: The code could be extended to allow an MPI + OpenMP application to use the linear solver server concept across all shared-memory
542: nodes with a single MPI process per node for the user application but multiple MPI processes per node for the linear solver.
544: The concept could also be extended for users's callbacks for `SNES`, `TS`, and `Tao` where the `SNESSolve()` for example, runs on
545: all MPI processes but the user callback only runs on one MPI process per node.
547: PETSc could also be extended with an MPI-less API that provides access to PETSc's solvers without any reference to MPI, essentially remove
548: the `MPI_Comm` argument from PETSc calls.
550: .seealso: [](sec_pcmpi), `PCMPIServerEnd()`, `PCMPI`, `KSPCheckPCMPI()`
551: @*/
552: PetscErrorCode PCMPIServerBegin(void)
553: {
554: PetscMPIInt rank;
556: PetscFunctionBegin;
557: PetscCall(PetscInfo(NULL, "Starting MPI Linear Solver Server\n"));
558: if (PetscDefined(USE_SINGLE_LIBRARY)) {
559: PetscCall(VecInitializePackage());
560: PetscCall(MatInitializePackage());
561: PetscCall(DMInitializePackage());
562: PetscCall(PCInitializePackage());
563: PetscCall(KSPInitializePackage());
564: PetscCall(SNESInitializePackage());
565: PetscCall(TSInitializePackage());
566: PetscCall(TaoInitializePackage());
567: }
568: PetscCall(PetscLogStageRegister("PCMPI", &PCMPIStage));
569: PetscCall(PetscLogEventRegister("ServerDist", PC_CLASSID, &EventServerDist));
570: PetscCall(PetscLogEventRegister("ServerDistMPI", PC_CLASSID, &EventServerDistMPI));
572: if (!PetscDefined(HAVE_SHMGET)) PCMPIServerUseShmget = PETSC_FALSE;
573: PetscCall(PetscOptionsGetBool(NULL, NULL, "-mpi_linear_solver_server_use_shared_memory", &PCMPIServerUseShmget, NULL));
575: PetscCallMPI(MPI_Comm_rank(PC_MPI_COMM_WORLD, &rank));
576: if (PCMPIServerUseShmget) {
577: #if defined(PETSC_HAVE_PTHREAD_MUTEX)
578: PetscMPIInt size;
580: PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
581: if (size > 1) {
582: pthread_mutex_t *locks;
584: if (rank == 0) {
585: PCMPIServerActive = PETSC_TRUE;
586: PetscCall(PetscShmgetAllocateArray(size, sizeof(pthread_mutex_t), (void **)&locks));
587: }
588: PetscCall(PetscShmgetMapAddresses(PETSC_COMM_WORLD, 1, (const void **)&locks, (void **)&PCMPIServerLocks));
589: if (rank == 0) {
590: pthread_mutexattr_t attr;
592: pthread_mutexattr_init(&attr);
593: pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED);
595: for (int i = 1; i < size; i++) {
596: pthread_mutex_init(&PCMPIServerLocks[i], &attr);
597: pthread_mutex_lock(&PCMPIServerLocks[i]);
598: }
599: }
600: PetscCallMPI(MPI_Barrier(PETSC_COMM_WORLD));
601: }
602: #endif
603: }
604: if (rank == 0) {
605: PETSC_COMM_WORLD = PETSC_COMM_SELF;
606: PCMPIServerActive = PETSC_TRUE;
607: PetscFunctionReturn(PETSC_SUCCESS);
608: }
610: while (PETSC_TRUE) {
611: PCMPICommand request = PCMPI_CREATE;
612: #if defined(PETSC_HAVE_PTHREAD_MUTEX)
613: PetscMPIInt dummy1 = 1, dummy2;
614: #endif
616: // TODO: can we broadcast the number of active ranks here so only the correct subset of processes waits on the later scatters?
617: #if defined(PETSC_HAVE_PTHREAD_MUTEX)
618: if (PCMPIServerUseShmget) pthread_mutex_lock(&PCMPIServerLocks[PetscGlobalRank]);
619: #endif
620: PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD));
621: #if defined(PETSC_HAVE_PTHREAD_MUTEX)
622: if (PCMPIServerUseShmget) {
623: /* next line ensures PetscGlobalRank has locked before rank 0 can take the lock back */
624: PetscCallMPI(MPI_Reduce(&dummy1, &dummy2, 1, MPI_INT, MPI_SUM, 0, PC_MPI_COMM_WORLD));
625: pthread_mutex_unlock(&PCMPIServerLocks[PetscGlobalRank]);
626: }
627: #endif
628: switch (request) {
629: case PCMPI_CREATE:
630: PetscCall(PCMPICreate(NULL));
631: break;
632: case PCMPI_SET_MAT:
633: PetscCall(PCMPISetMat(NULL));
634: break;
635: case PCMPI_UPDATE_MAT_VALUES:
636: PetscCall(PCMPIUpdateMatValues(NULL));
637: break;
638: case PCMPI_VIEW:
639: // PetscCall(PCMPIView(NULL));
640: break;
641: case PCMPI_SOLVE:
642: PetscCall(PCMPISolve(NULL, NULL, NULL));
643: break;
644: case PCMPI_DESTROY:
645: PetscCall(PCMPIDestroy(NULL));
646: break;
647: case PCMPI_EXIT:
648: if (PCMPIServerUseShmget) PetscCall(PetscShmgetUnmapAddresses(1, (void **)&PCMPIServerLocks));
649: PetscCall(PetscFinalize());
650: exit(0); /* not sure if this is a good idea, but cannot return because it will run users main program */
651: break;
652: default:
653: break;
654: }
655: }
656: PetscFunctionReturn(PETSC_SUCCESS);
657: }
659: /*@C
660: PCMPIServerEnd - ends a server that runs on the rank != 0 MPI processes waiting to process requests for
661: parallel KSP solves and management of parallel `KSP` objects.
663: Logically Collective on all MPI ranks except 0
665: Level: developer
667: Note:
668: This is normally called automatically in `PetscFinalize()`
670: .seealso: [](sec_pcmpi), `PCMPIServerBegin()`, `PCMPI`, `KSPCheckPCMPI()`
671: @*/
672: PetscErrorCode PCMPIServerEnd(void)
673: {
674: PetscFunctionBegin;
675: if (PetscGlobalRank == 0) {
676: PetscViewer viewer = NULL;
677: PetscViewerFormat format;
679: PetscCall(PetscShmgetAddressesFinalize());
680: PetscCall(PCMPIServerBroadcastRequest(PCMPI_EXIT));
681: if (PCMPIServerUseShmget) PetscCall(PetscShmgetUnmapAddresses(1, (void **)&PCMPIServerLocks));
682: PETSC_COMM_WORLD = MPI_COMM_WORLD; /* could use PC_MPI_COMM_WORLD */
683: PetscOptionsBegin(PETSC_COMM_SELF, NULL, "MPI linear solver server options", NULL);
684: PetscCall(PetscOptionsViewer("-mpi_linear_solver_server_view", "View information about system solved with the server", "PCMPI", &viewer, &format, NULL));
685: PetscOptionsEnd();
686: if (viewer) {
687: PetscBool isascii;
689: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
690: if (isascii) {
691: PetscMPIInt size;
692: PetscMPIInt i;
694: PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
695: PetscCall(PetscViewerASCIIPrintf(viewer, "MPI linear solver server statistics:\n"));
696: PetscCall(PetscViewerASCIIPrintf(viewer, " Ranks KSPSolve()s Mats KSPs Avg. Size Avg. Its\n"));
697: if (PCMPIKSPCountsSeq) {
698: PetscCall(PetscViewerASCIIPrintf(viewer, " Sequential %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\n", PCMPISolveCountsSeq, PCMPIKSPCountsSeq, PCMPISizesSeq / PCMPISolveCountsSeq, PCMPIIterationsSeq / PCMPISolveCountsSeq));
699: }
700: for (i = 0; i < size; i++) {
701: if (PCMPIKSPCounts[i]) {
702: PetscCall(PetscViewerASCIIPrintf(viewer, " %d %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\n", i + 1, PCMPISolveCounts[i], PCMPIMatCounts[i], PCMPIKSPCounts[i], PCMPISizes[i] / PCMPISolveCounts[i], PCMPIIterations[i] / PCMPISolveCounts[i]));
703: }
704: }
705: PetscCall(PetscViewerASCIIPrintf(viewer, "MPI linear solver server %susing shared memory\n", PCMPIServerUseShmget ? "" : "not "));
706: }
707: PetscCall(PetscViewerDestroy(&viewer));
708: }
709: }
710: PetscCall(PCMPICommsDestroy());
711: PCMPIServerActive = PETSC_FALSE;
712: PetscFunctionReturn(PETSC_SUCCESS);
713: }
715: /*
716: This version is used in the trivial case when the MPI parallel solver server is running on just the original MPI rank 0
717: because, for example, the problem is small. This version is more efficient because it does not require copying any data
718: */
719: static PetscErrorCode PCSetUp_Seq(PC pc)
720: {
721: PC_MPI *km = (PC_MPI *)pc->data;
722: Mat sA;
723: const char *prefix;
724: char *found = NULL, *cprefix;
726: PetscFunctionBegin;
727: PCMPIServerInSolve = PETSC_TRUE;
728: PetscCall(PCGetOperators(pc, NULL, &sA));
729: PetscCall(PCGetOptionsPrefix(pc, &prefix));
730: PetscCall(KSPCreate(PETSC_COMM_SELF, &km->ksps[0]));
731: PetscCall(KSPSetNestLevel(km->ksps[0], 1));
732: PetscCall(PetscObjectSetTabLevel((PetscObject)km->ksps[0], 1));
734: /* Created KSP gets prefix of PC minus the mpi_linear_solver_server_ portion */
735: PetscCall(PCGetOptionsPrefix(pc, &prefix));
736: PetscCheck(prefix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing required prefix");
737: PetscCall(PetscStrallocpy(prefix, &cprefix));
738: PetscCall(PetscStrstr(cprefix, "mpi_linear_solver_server_", &found));
739: PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing mpi_linear_solver_server_ portion of prefix");
740: *found = 0;
741: PetscCall(KSPSetOptionsPrefix(km->ksps[0], cprefix));
742: PetscCall(PetscFree(cprefix));
744: PetscCall(KSPSetOperators(km->ksps[0], sA, sA));
745: PetscCall(KSPSetFromOptions(km->ksps[0]));
746: PetscCall(KSPSetUp(km->ksps[0]));
747: PetscCall(PetscInfo(pc, "MPI parallel linear solver system is being solved directly on rank 0 due to its small size\n"));
748: PCMPIKSPCountsSeq++;
749: PCMPIServerInSolve = PETSC_FALSE;
750: PetscFunctionReturn(PETSC_SUCCESS);
751: }
753: static PetscErrorCode PCApply_Seq(PC pc, Vec b, Vec x)
754: {
755: PC_MPI *km = (PC_MPI *)pc->data;
756: PetscInt its, n;
757: Mat A;
759: PetscFunctionBegin;
760: PCMPIServerInSolve = PETSC_TRUE;
761: PetscCall(KSPSolve(km->ksps[0], b, x));
762: PetscCall(KSPGetIterationNumber(km->ksps[0], &its));
763: PCMPISolveCountsSeq++;
764: PCMPIIterationsSeq += its;
765: PetscCall(KSPGetOperators(km->ksps[0], NULL, &A));
766: PetscCall(MatGetSize(A, &n, NULL));
767: PCMPISizesSeq += n;
768: PCMPIServerInSolve = PETSC_FALSE;
769: /*
770: do not keep reference to previous rhs and solution since destroying them in the next KSPSolve()
771: my use PetscFree() instead of PCMPIArrayDeallocate()
772: */
773: PetscCall(VecDestroy(&km->ksps[0]->vec_rhs));
774: PetscCall(VecDestroy(&km->ksps[0]->vec_sol));
775: PetscFunctionReturn(PETSC_SUCCESS);
776: }
778: static PetscErrorCode PCView_Seq(PC pc, PetscViewer viewer)
779: {
780: PC_MPI *km = (PC_MPI *)pc->data;
782: PetscFunctionBegin;
783: PetscCall(PetscViewerASCIIPrintf(viewer, "Running MPI linear solver server directly on rank 0 due to its small size\n"));
784: PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %" PetscInt_FMT "\n", km->mincntperrank));
785: PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to statistics on all the solves ***\n"));
786: PetscFunctionReturn(PETSC_SUCCESS);
787: }
789: static PetscErrorCode PCDestroy_Seq(PC pc)
790: {
791: PC_MPI *km = (PC_MPI *)pc->data;
792: Mat A, B;
793: Vec x, b;
795: PetscFunctionBegin;
796: PCMPIServerInSolve = PETSC_TRUE;
797: /* since matrices and vectors are shared with outer KSP we need to ensure they are not destroyed with PetscFree() */
798: PetscCall(KSPGetOperators(km->ksps[0], &A, &B));
799: PetscCall(PetscObjectReference((PetscObject)A));
800: PetscCall(PetscObjectReference((PetscObject)B));
801: PetscCall(KSPGetSolution(km->ksps[0], &x));
802: PetscCall(PetscObjectReference((PetscObject)x));
803: PetscCall(KSPGetRhs(km->ksps[0], &b));
804: PetscCall(PetscObjectReference((PetscObject)b));
805: PetscCall(KSPDestroy(&km->ksps[0]));
806: PetscCall(PetscFree(pc->data));
807: PCMPIServerInSolve = PETSC_FALSE;
808: PetscCall(MatDestroy(&A));
809: PetscCall(MatDestroy(&B));
810: PetscCall(VecDestroy(&x));
811: PetscCall(VecDestroy(&b));
812: PetscFunctionReturn(PETSC_SUCCESS);
813: }
815: /*
816: PCSetUp_MPI - Trigger the creation of the MPI parallel PC and copy parts of the matrix and
817: right-hand side to the parallel PC
818: */
819: static PetscErrorCode PCSetUp_MPI(PC pc)
820: {
821: PC_MPI *km = (PC_MPI *)pc->data;
822: PetscMPIInt rank, size;
823: PetscBool newmatrix = PETSC_FALSE;
825: PetscFunctionBegin;
826: PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank));
827: PetscCheck(rank == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "PCMPI can only be used from 0th rank of MPI_COMM_WORLD. Perhaps a missing -mpi_linear_solver_server?");
828: PetscCallMPI(MPI_Comm_size(MPI_COMM_WORLD, &size));
830: if (!pc->setupcalled) {
831: if (!km->alwaysuseserver) {
832: PetscInt n;
833: Mat sA;
834: /* short circuit for small systems */
835: PetscCall(PCGetOperators(pc, &sA, &sA));
836: PetscCall(MatGetSize(sA, &n, NULL));
837: if (n < 2 * km->mincntperrank - 1 || size == 1) {
838: pc->ops->setup = NULL;
839: pc->ops->apply = PCApply_Seq;
840: pc->ops->destroy = PCDestroy_Seq;
841: pc->ops->view = PCView_Seq;
842: PetscCall(PCSetUp_Seq(pc));
843: PetscFunctionReturn(PETSC_SUCCESS);
844: }
845: }
847: PetscCall(PCMPIServerBroadcastRequest(PCMPI_CREATE));
848: PetscCall(PCMPICreate(pc));
849: newmatrix = PETSC_TRUE;
850: }
851: if (pc->flag == DIFFERENT_NONZERO_PATTERN) newmatrix = PETSC_TRUE;
853: if (newmatrix) {
854: PetscCall(PetscInfo(pc, "New matrix or matrix has changed nonzero structure\n"));
855: PetscCall(PCMPIServerBroadcastRequest(PCMPI_SET_MAT));
856: PetscCall(PCMPISetMat(pc));
857: } else {
858: PetscCall(PetscInfo(pc, "Matrix has only changed nonzero values\n"));
859: PetscCall(PCMPIServerBroadcastRequest(PCMPI_UPDATE_MAT_VALUES));
860: PetscCall(PCMPIUpdateMatValues(pc));
861: }
862: PetscFunctionReturn(PETSC_SUCCESS);
863: }
865: static PetscErrorCode PCApply_MPI(PC pc, Vec b, Vec x)
866: {
867: PetscFunctionBegin;
868: PetscCall(PCMPIServerBroadcastRequest(PCMPI_SOLVE));
869: PetscCall(PCMPISolve(pc, b, x));
870: PetscFunctionReturn(PETSC_SUCCESS);
871: }
873: static PetscErrorCode PCDestroy_MPI(PC pc)
874: {
875: PetscFunctionBegin;
876: PetscCall(PCMPIServerBroadcastRequest(PCMPI_DESTROY));
877: PetscCall(PCMPIDestroy(pc));
878: PetscCall(PetscFree(pc->data));
879: PetscFunctionReturn(PETSC_SUCCESS);
880: }
882: /*
883: PCView_MPI - Cannot call view on the MPI parallel KSP because other ranks do not have access to the viewer, use options database
884: */
885: static PetscErrorCode PCView_MPI(PC pc, PetscViewer viewer)
886: {
887: PC_MPI *km = (PC_MPI *)pc->data;
888: MPI_Comm comm;
889: PetscMPIInt size;
891: PetscFunctionBegin;
892: PetscCall(PetscObjectGetComm((PetscObject)km->ksps[0], &comm));
893: PetscCallMPI(MPI_Comm_size(comm, &size));
894: PetscCall(PetscViewerASCIIPrintf(viewer, "Size of MPI communicator used for MPI parallel KSP solve %d\n", size));
895: PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of matrix rows on each MPI process for MPI parallel solve %" PetscInt_FMT "\n", km->mincntperrank));
896: PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to view statistics on all the solves ***\n"));
897: PetscFunctionReturn(PETSC_SUCCESS);
898: }
900: static PetscErrorCode PCSetFromOptions_MPI(PC pc, PetscOptionItems *PetscOptionsObject)
901: {
902: PC_MPI *km = (PC_MPI *)pc->data;
904: PetscFunctionBegin;
905: PetscOptionsHeadBegin(PetscOptionsObject, "MPI linear solver server options");
906: PetscCall(PetscOptionsInt("-minimum_count_per_rank", "Desired minimum number of nonzeros per rank", "None", km->mincntperrank, &km->mincntperrank, NULL));
907: PetscCall(PetscOptionsBool("-always_use_server", "Use the server even if only one rank is used for the solve (for debugging)", "None", km->alwaysuseserver, &km->alwaysuseserver, NULL));
908: PetscOptionsHeadEnd();
909: PetscFunctionReturn(PETSC_SUCCESS);
910: }
912: /*MC
913: PCMPI - Calls an MPI parallel `KSP` to solve a linear system from user code running on one process
915: Options Database Keys for the Server:
916: + -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code
917: . -mpi_linear_solver_server_view - displays information about all the linear systems solved by the MPI linear solver server
918: - -mpi_linear_solver_server_use_shared_memory <true, false> - use shared memory to distribute matrix and right hand side, defaults to true
920: Options Database Keys for a specific `KSP` object
921: + -[any_ksp_prefix]_mpi_linear_solver_server_minimum_count_per_rank - sets the minimum size of the linear system per MPI rank that the solver will strive for
922: - -[any_ksp_prefix]_mpi_linear_solver_server_always_use_server - use the server solver code even if the particular system is only solved on the process (for debugging and testing purposes)
924: Level: developer
926: Notes:
927: This cannot be used with vectors or matrices that are created using arrays provided by the user, such as `VecCreateWithArray()` or
928: `MatCreateSeqAIJWithArrays()`
930: The options database prefix for the actual solver is any prefix provided before use to the original `KSP` with `KSPSetOptionsPrefix()`, mostly commonly no prefix is used.
932: It can be particularly useful for user OpenMP code or potentially user GPU code.
934: When the program is running with a single MPI process then it directly uses the provided matrix and right-hand side
935: and does not need to distribute the matrix and vector to the various MPI processes; thus it incurs no extra overhead over just using the `KSP` directly.
937: The solver options for actual solving `KSP` and `PC` must be controlled via the options database, calls to set options directly on the user level `KSP` and `PC` have no effect
938: because they are not the actual solver objects.
940: When `-log_view` is used with this solver the events within the parallel solve are logging in their own stage. Some of the logging in the other
941: stages will be confusing since the event times are only recorded on the 0th MPI rank, thus the percent of time in the events will be misleading.
943: Developer Note:
944: This `PCType` is never directly selected by the user, it is set when the option `-mpi_linear_solver_server` is used and the `PC` is at the outer most nesting of
945: a `KSP`. The outer most `KSP` object is automatically set to `KSPPREONLY` and thus is not directly visible to the user.
947: .seealso: [](sec_pcmpi), `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `PC`, `PCMPIServerBegin()`, `PCMPIServerEnd()`, `KSPCheckPCMPI()`
948: M*/
949: PETSC_EXTERN PetscErrorCode PCCreate_MPI(PC pc)
950: {
951: PC_MPI *km;
952: char *found = NULL;
954: PetscFunctionBegin;
955: PetscCall(PetscStrstr(((PetscObject)pc)->prefix, "mpi_linear_solver_server_", &found));
956: PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI object prefix does not have mpi_linear_solver_server_");
958: /* material from PCSetType() */
959: PetscTryTypeMethod(pc, destroy);
960: pc->ops->destroy = NULL;
961: pc->data = NULL;
963: PetscCall(PetscFunctionListDestroy(&((PetscObject)pc)->qlist));
964: PetscCall(PetscMemzero(pc->ops, sizeof(struct _PCOps)));
965: pc->modifysubmatrices = NULL;
966: pc->modifysubmatricesP = NULL;
967: pc->setupcalled = 0;
969: PetscCall(PetscNew(&km));
970: pc->data = (void *)km;
972: km->mincntperrank = 10000;
974: pc->ops->setup = PCSetUp_MPI;
975: pc->ops->apply = PCApply_MPI;
976: pc->ops->destroy = PCDestroy_MPI;
977: pc->ops->view = PCView_MPI;
978: pc->ops->setfromoptions = PCSetFromOptions_MPI;
979: PetscCall(PetscObjectChangeTypeName((PetscObject)pc, PCMPI));
980: PetscFunctionReturn(PETSC_SUCCESS);
981: }
983: /*@
984: PCMPIGetKSP - Gets the `KSP` created by the `PCMPI`
986: Not Collective
988: Input Parameter:
989: . pc - the preconditioner context
991: Output Parameter:
992: . innerksp - the inner `KSP`
994: Level: advanced
996: .seealso: [](ch_ksp), `KSP`, `PCMPI`, `PCREDISTRIBUTE`
997: @*/
998: PetscErrorCode PCMPIGetKSP(PC pc, KSP *innerksp)
999: {
1000: PC_MPI *red = (PC_MPI *)pc->data;
1002: PetscFunctionBegin;
1004: PetscAssertPointer(innerksp, 2);
1005: *innerksp = red->ksps[0];
1006: PetscFunctionReturn(PETSC_SUCCESS);
1007: }