Actual source code: imumps.c
1: /*
2: Provides an interface to the MUMPS sparse solver
3: */
4: #include <petscpkg_version.h>
5: #include <petscsf.h>
6: #include <../src/mat/impls/aij/mpi/mpiaij.h>
7: #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
8: #include <../src/mat/impls/sell/mpi/mpisell.h>
9: #include <petsc/private/vecimpl.h>
11: #define MUMPS_MANUALS "(see users manual https://mumps-solver.org/index.php?page=doc \"Error and warning diagnostics\")"
13: EXTERN_C_BEGIN
14: #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
15: #include <cmumps_c.h>
16: #include <zmumps_c.h>
17: #include <smumps_c.h>
18: #include <dmumps_c.h>
19: #else
20: #if defined(PETSC_USE_COMPLEX)
21: #if defined(PETSC_USE_REAL_SINGLE)
22: #include <cmumps_c.h>
23: #define MUMPS_c cmumps_c
24: #define MumpsScalar CMUMPS_COMPLEX
25: #else
26: #include <zmumps_c.h>
27: #define MUMPS_c zmumps_c
28: #define MumpsScalar ZMUMPS_COMPLEX
29: #endif
30: #else
31: #if defined(PETSC_USE_REAL_SINGLE)
32: #include <smumps_c.h>
33: #define MUMPS_c smumps_c
34: #define MumpsScalar SMUMPS_REAL
35: #else
36: #include <dmumps_c.h>
37: #define MUMPS_c dmumps_c
38: #define MumpsScalar DMUMPS_REAL
39: #endif
40: #endif
41: #endif
42: #if defined(PETSC_USE_COMPLEX)
43: #if defined(PETSC_USE_REAL_SINGLE)
44: #define MUMPS_STRUC_C CMUMPS_STRUC_C
45: #else
46: #define MUMPS_STRUC_C ZMUMPS_STRUC_C
47: #endif
48: #else
49: #if defined(PETSC_USE_REAL_SINGLE)
50: #define MUMPS_STRUC_C SMUMPS_STRUC_C
51: #else
52: #define MUMPS_STRUC_C DMUMPS_STRUC_C
53: #endif
54: #endif
55: EXTERN_C_END
57: #define JOB_INIT -1
58: #define JOB_NULL 0
59: #define JOB_FACTSYMBOLIC 1
60: #define JOB_FACTNUMERIC 2
61: #define JOB_SOLVE 3
62: #define JOB_END -2
64: /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for
65: number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the
66: naming convention in PetscMPIInt, PetscBLASInt etc.
67: */
68: typedef MUMPS_INT PetscMUMPSInt;
70: #if PETSC_PKG_MUMPS_VERSION_GE(5, 3, 0)
71: #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */
72: #error "PETSc has not been tested with full 64-bit MUMPS and we choose to error out"
73: #endif
74: #else
75: #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */
76: #error "PETSc has not been tested with full 64-bit MUMPS and we choose to error out"
77: #endif
78: #endif
80: #define MPIU_MUMPSINT MPI_INT
81: #define PETSC_MUMPS_INT_MAX 2147483647
82: #define PETSC_MUMPS_INT_MIN -2147483648
84: /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/
85: static inline PetscErrorCode PetscMUMPSIntCast(PetscCount a, PetscMUMPSInt *b)
86: {
87: PetscFunctionBegin;
88: #if PetscDefined(USE_64BIT_INDICES)
89: PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt");
90: #endif
91: *b = (PetscMUMPSInt)a;
92: PetscFunctionReturn(PETSC_SUCCESS);
93: }
95: /* Put these utility routines here since they are only used in this file */
96: static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems PetscOptionsObject, const char opt[], const char text[], const char man[], PetscMUMPSInt currentvalue, PetscMUMPSInt *value, PetscBool *set, PetscMUMPSInt lb, PetscMUMPSInt ub)
97: {
98: PetscInt myval;
99: PetscBool myset;
101: PetscFunctionBegin;
102: /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */
103: PetscCall(PetscOptionsInt_Private(PetscOptionsObject, opt, text, man, (PetscInt)currentvalue, &myval, &myset, lb, ub));
104: if (myset) PetscCall(PetscMUMPSIntCast(myval, value));
105: if (set) *set = myset;
106: PetscFunctionReturn(PETSC_SUCCESS);
107: }
108: #define PetscOptionsMUMPSInt(a, b, c, d, e, f) PetscOptionsMUMPSInt_Private(PetscOptionsObject, a, b, c, d, e, f, PETSC_MUMPS_INT_MIN, PETSC_MUMPS_INT_MAX)
110: // An abstract type for specific MUMPS types {S,D,C,Z}MUMPS_STRUC_C.
111: //
112: // With the abstract (outer) type, we can write shared code. We call MUMPS through a type-to-be-determined inner field within the abstract type.
113: // Before/after calling MUMPS, we need to copy in/out fields between the outer and the inner, which seems expensive. But note that the large fixed size
114: // arrays within the types are directly linked. At the end, we only need to copy ~20 integers/pointers, which is doable. See PreMumpsCall()/PostMumpsCall().
115: //
116: // Not all fields in the specific types are exposed in the abstract type. We only need those used by the PETSc/MUMPS interface.
117: // Notably, DMUMPS_COMPLEX* and DMUMPS_REAL* fields are now declared as void *. Their type will be determined by the the actual precision to be used.
118: // Also note that we added some *_len fields not in specific types to track sizes of those MumpsScalar buffers.
119: typedef struct {
120: PetscPrecision precision; // precision used by MUMPS
121: void *internal_id; // the data structure passed to MUMPS, whose actual type {S,D,C,Z}MUMPS_STRUC_C is to be decided by precision and PETSc's use of complex
123: // aliased fields from internal_id, so that we can use XMUMPS_STRUC_C to write shared code across different precisions.
124: MUMPS_INT sym, par, job;
125: MUMPS_INT comm_fortran; /* Fortran communicator */
126: MUMPS_INT *icntl;
127: void *cntl; // MumpsReal, fixed size array
128: MUMPS_INT n;
129: MUMPS_INT nblk;
131: /* Assembled entry */
132: MUMPS_INT8 nnz;
133: MUMPS_INT *irn;
134: MUMPS_INT *jcn;
135: void *a; // MumpsScalar, centralized input
136: PetscCount a_len;
138: /* Distributed entry */
139: MUMPS_INT8 nnz_loc;
140: MUMPS_INT *irn_loc;
141: MUMPS_INT *jcn_loc;
142: void *a_loc; // MumpsScalar, distributed input
143: PetscCount a_loc_len;
145: /* Matrix by blocks */
146: MUMPS_INT *blkptr;
147: MUMPS_INT *blkvar;
149: /* Ordering, if given by user */
150: MUMPS_INT *perm_in;
152: /* RHS, solution, ouptput data and statistics */
153: void *rhs, *redrhs, *rhs_sparse, *sol_loc, *rhs_loc; // MumpsScalar buffers
154: PetscCount rhs_len, redrhs_len, rhs_sparse_len, sol_loc_len, rhs_loc_len; // length of buffers (in MumpsScalar) IF allocated in a different precision than PetscScalar
156: MUMPS_INT *irhs_sparse, *irhs_ptr, *isol_loc, *irhs_loc;
157: MUMPS_INT nrhs, lrhs, lredrhs, nz_rhs, lsol_loc, nloc_rhs, lrhs_loc;
158: // MUMPS_INT nsol_loc; // introduced in MUMPS-5.7, but PETSc doesn't use it; would cause compile errors with the widely used 5.6. If you add it, must also update PreMumpsCall() and guard this with #if PETSC_PKG_MUMPS_VERSION_GE(5, 7, 0)
159: MUMPS_INT schur_lld;
160: MUMPS_INT *info, *infog; // fixed size array
161: void *rinfo, *rinfog; // MumpsReal, fixed size array
163: /* Null space */
164: MUMPS_INT *pivnul_list; // allocated by MUMPS!
165: MUMPS_INT *mapping; // allocated by MUMPS!
167: /* Schur */
168: MUMPS_INT size_schur;
169: MUMPS_INT *listvar_schur;
170: void *schur; // MumpsScalar
171: PetscCount schur_len;
173: /* For out-of-core */
174: char *ooc_tmpdir; // fixed size array
175: char *ooc_prefix; // fixed size array
176: } XMUMPS_STRUC_C;
178: // Note: fixed-size arrays are allocated by MUMPS; redirect them to the outer struct
179: #define AllocateInternalID(MUMPS_STRUC_T, outer) \
180: do { \
181: MUMPS_STRUC_T *inner; \
182: PetscCall(PetscNew(&inner)); \
183: outer->icntl = inner->icntl; \
184: outer->cntl = inner->cntl; \
185: outer->info = inner->info; \
186: outer->infog = inner->infog; \
187: outer->rinfo = inner->rinfo; \
188: outer->rinfog = inner->rinfog; \
189: outer->ooc_tmpdir = inner->ooc_tmpdir; \
190: outer->ooc_prefix = inner->ooc_prefix; \
191: /* the three field should never change after init */ \
192: inner->comm_fortran = outer->comm_fortran; \
193: inner->par = outer->par; \
194: inner->sym = outer->sym; \
195: outer->internal_id = inner; \
196: } while (0)
198: // Allocate the internal [SDCZ]MUMPS_STRUC_C ID data structure in the given , and link fields of the outer and the inner
199: static inline PetscErrorCode MatMumpsAllocateInternalID(XMUMPS_STRUC_C *outer, PetscPrecision precision)
200: {
201: PetscFunctionBegin;
202: outer->precision = precision;
203: #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
204: #if defined(PETSC_USE_COMPLEX)
205: if (precision == PETSC_PRECISION_SINGLE) AllocateInternalID(CMUMPS_STRUC_C, outer);
206: else AllocateInternalID(ZMUMPS_STRUC_C, outer);
207: #else
208: if (precision == PETSC_PRECISION_SINGLE) AllocateInternalID(SMUMPS_STRUC_C, outer);
209: else AllocateInternalID(DMUMPS_STRUC_C, outer);
210: #endif
211: #else
212: AllocateInternalID(MUMPS_STRUC_C, outer);
213: #endif
214: PetscFunctionReturn(PETSC_SUCCESS);
215: }
217: #define FreeInternalIDFields(MUMPS_STRUC_T, outer) \
218: do { \
219: MUMPS_STRUC_T *inner = (MUMPS_STRUC_T *)(outer)->internal_id; \
220: PetscCall(PetscFree(inner->a)); \
221: PetscCall(PetscFree(inner->a_loc)); \
222: PetscCall(PetscFree(inner->redrhs)); \
223: PetscCall(PetscFree(inner->rhs)); \
224: PetscCall(PetscFree(inner->rhs_sparse)); \
225: PetscCall(PetscFree(inner->rhs_loc)); \
226: PetscCall(PetscFree(inner->sol_loc)); \
227: PetscCall(PetscFree(inner->schur)); \
228: } while (0)
230: static inline PetscErrorCode MatMumpsFreeInternalID(XMUMPS_STRUC_C *outer)
231: {
232: PetscFunctionBegin;
233: if (outer->internal_id) { // sometimes, the inner is never created before we destroy the outer
234: #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
235: const PetscPrecision mumps_precision = outer->precision;
236: if (mumps_precision != PETSC_SCALAR_PRECISION) { // Free internal buffers if we used mixed precision
237: #if defined(PETSC_USE_COMPLEX)
238: if (mumps_precision == PETSC_PRECISION_SINGLE) FreeInternalIDFields(CMUMPS_STRUC_C, outer);
239: else FreeInternalIDFields(ZMUMPS_STRUC_C, outer);
240: #else
241: if (mumps_precision == PETSC_PRECISION_SINGLE) FreeInternalIDFields(SMUMPS_STRUC_C, outer);
242: else FreeInternalIDFields(DMUMPS_STRUC_C, outer);
243: #endif
244: }
245: #endif
246: PetscCall(PetscFree(outer->internal_id));
247: }
248: PetscFunctionReturn(PETSC_SUCCESS);
249: }
251: // Make a companion MumpsScalar array (with a given PetscScalar array), to hold at least MumpsScalars in the given and return the address at .
252: // indicates if we need to convert PetscScalars to MumpsScalars after allocating the MumpsScalar array.
253: // (For bravity, we use for array address and for its length in MumpsScalar, though in code they should be <*ma> and <*m>)
254: // If already points to a buffer/array, on input should be its length. Note the buffer might be freed if it is not big enough for this request.
255: //
256: // The returned array is a companion, so how it is created depends on if PetscScalar and MumpsScalar are the same.
257: // 1) If they are different, a separate array will be made and its length and address will be provided at and on output.
258: // 2) Otherwise, will be returned in , and will be zero on output.
259: //
260: //
261: // Input parameters:
262: // + convert - whether to do PetscScalar to MumpsScalar conversion
263: // . n - length of the PetscScalar array
264: // . pa - [n]], points to the PetscScalar array
265: // . precision - precision of MumpsScalar
266: // . m - on input, length of an existing MumpsScalar array if any, otherwise *m is just zero.
267: // - ma - on input, an existing MumpsScalar array if any.
268: //
269: // Output parameters:
270: // + m - length of the MumpsScalar buffer at if MumpsScalar is different from PetscScalar, otherwise 0
271: // . ma - the MumpsScalar array, which could be an alias of when the two types are the same.
272: //
273: // Note:
274: // New memory, if allocated, is done via PetscMalloc1(), and is owned by caller.
275: static PetscErrorCode MatMumpsMakeMumpsScalarArray(PetscBool convert, PetscCount n, const PetscScalar *pa, PetscPrecision precision, PetscCount *m, void **ma)
276: {
277: PetscFunctionBegin;
278: #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
279: const PetscPrecision mumps_precision = precision;
280: PetscCheck(precision == PETSC_PRECISION_SINGLE || precision == PETSC_PRECISION_DOUBLE, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unsupported precicison (%d). Must be single or double", (int)precision);
281: #if defined(PETSC_USE_COMPLEX)
282: if (mumps_precision != PETSC_SCALAR_PRECISION) {
283: if (mumps_precision == PETSC_PRECISION_SINGLE) {
284: if (*m < n) {
285: PetscCall(PetscFree(*ma));
286: PetscCall(PetscMalloc1(n, (CMUMPS_COMPLEX **)ma));
287: *m = n;
288: }
289: if (convert) {
290: CMUMPS_COMPLEX *b = *(CMUMPS_COMPLEX **)ma;
291: for (PetscCount i = 0; i < n; i++) {
292: b[i].r = PetscRealPart(pa[i]);
293: b[i].i = PetscImaginaryPart(pa[i]);
294: }
295: }
296: } else {
297: if (*m < n) {
298: PetscCall(PetscFree(*ma));
299: PetscCall(PetscMalloc1(n, (ZMUMPS_COMPLEX **)ma));
300: *m = n;
301: }
302: if (convert) {
303: ZMUMPS_COMPLEX *b = *(ZMUMPS_COMPLEX **)ma;
304: for (PetscCount i = 0; i < n; i++) {
305: b[i].r = PetscRealPart(pa[i]);
306: b[i].i = PetscImaginaryPart(pa[i]);
307: }
308: }
309: }
310: }
311: #else
312: if (mumps_precision != PETSC_SCALAR_PRECISION) {
313: if (mumps_precision == PETSC_PRECISION_SINGLE) {
314: if (*m < n) {
315: PetscCall(PetscFree(*ma));
316: PetscCall(PetscMalloc1(n, (SMUMPS_REAL **)ma));
317: *m = n;
318: }
319: if (convert) {
320: SMUMPS_REAL *b = *(SMUMPS_REAL **)ma;
321: for (PetscCount i = 0; i < n; i++) b[i] = pa[i];
322: }
323: } else {
324: if (*m < n) {
325: PetscCall(PetscFree(*ma));
326: PetscCall(PetscMalloc1(n, (DMUMPS_REAL **)ma));
327: *m = n;
328: }
329: if (convert) {
330: DMUMPS_REAL *b = *(DMUMPS_REAL **)ma;
331: for (PetscCount i = 0; i < n; i++) b[i] = pa[i];
332: }
333: }
334: }
335: #endif
336: else
337: #endif
338: {
339: if (*m != 0) PetscCall(PetscFree(*ma)); // free existing buffer if any
340: *ma = (void *)pa; // same precision, make them alias
341: *m = 0;
342: }
343: PetscFunctionReturn(PETSC_SUCCESS);
344: }
346: // Cast a MumpsScalar array in to a PetscScalar array at address .
347: //
348: // 1) If the two types are different, cast array elements.
349: // 2) Otherwise, this works as a memcpy; of course, if the two addresses are equal, it is a no-op.
350: static PetscErrorCode MatMumpsCastMumpsScalarArray(PetscCount n, PetscPrecision mumps_precision, const void *ma, PetscScalar *pa)
351: {
352: PetscFunctionBegin;
353: #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
354: if (mumps_precision != PETSC_SCALAR_PRECISION) {
355: #if defined(PETSC_USE_COMPLEX)
356: if (mumps_precision == PETSC_PRECISION_SINGLE) {
357: PetscReal *a = (PetscReal *)pa;
358: const SMUMPS_REAL *b = (const SMUMPS_REAL *)ma;
359: for (PetscCount i = 0; i < 2 * n; i++) a[i] = b[i];
360: } else {
361: PetscReal *a = (PetscReal *)pa;
362: const DMUMPS_REAL *b = (const DMUMPS_REAL *)ma;
363: for (PetscCount i = 0; i < 2 * n; i++) a[i] = b[i];
364: }
365: #else
366: if (mumps_precision == PETSC_PRECISION_SINGLE) {
367: const SMUMPS_REAL *b = (const SMUMPS_REAL *)ma;
368: for (PetscCount i = 0; i < n; i++) pa[i] = b[i];
369: } else {
370: const DMUMPS_REAL *b = (const DMUMPS_REAL *)ma;
371: for (PetscCount i = 0; i < n; i++) pa[i] = b[i];
372: }
373: #endif
374: } else
375: #endif
376: PetscCall(PetscArraycpy((PetscScalar *)pa, (PetscScalar *)ma, n));
377: PetscFunctionReturn(PETSC_SUCCESS);
378: }
380: // Cast a PetscScalar array to a MumpsScalar array in the given at address .
381: //
382: // 1) If the two types are different, cast array elements.
383: // 2) Otherwise, this works as a memcpy; of course, if the two addresses are equal, it is a no-op.
384: static PetscErrorCode MatMumpsCastPetscScalarArray(PetscCount n, const PetscScalar *pa, PetscPrecision mumps_precision, const void *ma)
385: {
386: PetscFunctionBegin;
387: #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
388: if (mumps_precision != PETSC_SCALAR_PRECISION) {
389: #if defined(PETSC_USE_COMPLEX)
390: if (mumps_precision == PETSC_PRECISION_SINGLE) {
391: CMUMPS_COMPLEX *b = (CMUMPS_COMPLEX *)ma;
392: for (PetscCount i = 0; i < n; i++) {
393: b[i].r = PetscRealPart(pa[i]);
394: b[i].i = PetscImaginaryPart(pa[i]);
395: }
396: } else {
397: ZMUMPS_COMPLEX *b = (ZMUMPS_COMPLEX *)ma;
398: for (PetscCount i = 0; i < n; i++) {
399: b[i].r = PetscRealPart(pa[i]);
400: b[i].i = PetscImaginaryPart(pa[i]);
401: }
402: }
403: #else
404: if (mumps_precision == PETSC_PRECISION_SINGLE) {
405: SMUMPS_REAL *b = (SMUMPS_REAL *)ma;
406: for (PetscCount i = 0; i < n; i++) b[i] = pa[i];
407: } else {
408: DMUMPS_REAL *b = (DMUMPS_REAL *)ma;
409: for (PetscCount i = 0; i < n; i++) b[i] = pa[i];
410: }
411: #endif
412: } else
413: #endif
414: PetscCall(PetscArraycpy((PetscScalar *)ma, (PetscScalar *)pa, n));
415: PetscFunctionReturn(PETSC_SUCCESS);
416: }
418: static inline MPI_Datatype MPIU_MUMPSREAL(const XMUMPS_STRUC_C *id)
419: {
420: return id->precision == PETSC_PRECISION_DOUBLE ? MPI_DOUBLE : MPI_FLOAT;
421: }
423: #define PreMumpsCall(inner, outer, mumpsscalar) \
424: do { \
425: inner->job = outer->job; \
426: inner->n = outer->n; \
427: inner->nblk = outer->nblk; \
428: inner->nnz = outer->nnz; \
429: inner->irn = outer->irn; \
430: inner->jcn = outer->jcn; \
431: inner->a = (mumpsscalar *)outer->a; \
432: inner->nnz_loc = outer->nnz_loc; \
433: inner->irn_loc = outer->irn_loc; \
434: inner->jcn_loc = outer->jcn_loc; \
435: inner->a_loc = (mumpsscalar *)outer->a_loc; \
436: inner->blkptr = outer->blkptr; \
437: inner->blkvar = outer->blkvar; \
438: inner->perm_in = outer->perm_in; \
439: inner->rhs = (mumpsscalar *)outer->rhs; \
440: inner->redrhs = (mumpsscalar *)outer->redrhs; \
441: inner->rhs_sparse = (mumpsscalar *)outer->rhs_sparse; \
442: inner->sol_loc = (mumpsscalar *)outer->sol_loc; \
443: inner->rhs_loc = (mumpsscalar *)outer->rhs_loc; \
444: inner->irhs_sparse = outer->irhs_sparse; \
445: inner->irhs_ptr = outer->irhs_ptr; \
446: inner->isol_loc = outer->isol_loc; \
447: inner->irhs_loc = outer->irhs_loc; \
448: inner->nrhs = outer->nrhs; \
449: inner->lrhs = outer->lrhs; \
450: inner->lredrhs = outer->lredrhs; \
451: inner->nz_rhs = outer->nz_rhs; \
452: inner->lsol_loc = outer->lsol_loc; \
453: inner->nloc_rhs = outer->nloc_rhs; \
454: inner->lrhs_loc = outer->lrhs_loc; \
455: inner->schur_lld = outer->schur_lld; \
456: inner->size_schur = outer->size_schur; \
457: inner->listvar_schur = outer->listvar_schur; \
458: inner->schur = (mumpsscalar *)outer->schur; \
459: } while (0)
461: #define PostMumpsCall(inner, outer) \
462: do { \
463: outer->pivnul_list = inner->pivnul_list; \
464: outer->mapping = inner->mapping; \
465: } while (0)
467: // Entry for PETSc to call mumps
468: static inline PetscErrorCode PetscCallMumps_Private(XMUMPS_STRUC_C *outer)
469: {
470: PetscFunctionBegin;
471: #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
472: #if defined(PETSC_USE_COMPLEX)
473: if (outer->precision == PETSC_PRECISION_SINGLE) {
474: CMUMPS_STRUC_C *inner = (CMUMPS_STRUC_C *)outer->internal_id;
475: PreMumpsCall(inner, outer, CMUMPS_COMPLEX);
476: PetscCallExternalVoid("cmumps_c", cmumps_c(inner));
477: PostMumpsCall(inner, outer);
478: } else {
479: ZMUMPS_STRUC_C *inner = (ZMUMPS_STRUC_C *)outer->internal_id;
480: PreMumpsCall(inner, outer, ZMUMPS_COMPLEX);
481: PetscCallExternalVoid("zmumps_c", zmumps_c(inner));
482: PostMumpsCall(inner, outer);
483: }
484: #else
485: if (outer->precision == PETSC_PRECISION_SINGLE) {
486: SMUMPS_STRUC_C *inner = (SMUMPS_STRUC_C *)outer->internal_id;
487: PreMumpsCall(inner, outer, SMUMPS_REAL);
488: PetscCallExternalVoid("smumps_c", smumps_c(inner));
489: PostMumpsCall(inner, outer);
490: } else {
491: DMUMPS_STRUC_C *inner = (DMUMPS_STRUC_C *)outer->internal_id;
492: PreMumpsCall(inner, outer, DMUMPS_REAL);
493: PetscCallExternalVoid("dmumps_c", dmumps_c(inner));
494: PostMumpsCall(inner, outer);
495: }
496: #endif
497: #else
498: MUMPS_STRUC_C *inner = (MUMPS_STRUC_C *)outer->internal_id;
499: PreMumpsCall(inner, outer, MumpsScalar);
500: PetscCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(inner));
501: PostMumpsCall(inner, outer);
502: #endif
503: PetscFunctionReturn(PETSC_SUCCESS);
504: }
506: /* macros s.t. indices match MUMPS documentation */
507: #define ICNTL(I) icntl[(I) - 1]
508: #define INFOG(I) infog[(I) - 1]
509: #define INFO(I) info[(I) - 1]
511: // Get a value from a MumpsScalar array, which is the field in the struct of MUMPS_STRUC_C. The value is convertible to PetscScalar. Note no minus 1 on I!
512: #if defined(PETSC_USE_COMPLEX)
513: #define ID_FIELD_GET(ID, F, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((CMUMPS_COMPLEX *)(ID).F)[I].r + PETSC_i * ((CMUMPS_COMPLEX *)(ID).F)[I].i : ((ZMUMPS_COMPLEX *)(ID).F)[I].r + PETSC_i * ((ZMUMPS_COMPLEX *)(ID).F)[I].i)
514: #else
515: #define ID_FIELD_GET(ID, F, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).F)[I] : ((double *)(ID).F)[I])
516: #endif
518: // Get a value from MumpsReal arrays. The value is convertible to PetscReal.
519: #define ID_CNTL_GET(ID, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).cntl)[(I) - 1] : ((double *)(ID).cntl)[(I) - 1])
520: #define ID_RINFOG_GET(ID, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).rinfog)[(I) - 1] : ((double *)(ID).rinfog)[(I) - 1])
521: #define ID_RINFO_GET(ID, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).rinfo)[(I) - 1] : ((double *)(ID).rinfo)[(I) - 1])
523: // Set the I-th entry of the MumpsReal array id.cntl[] with a PetscReal
524: #define ID_CNTL_SET(ID, I, VAL) \
525: do { \
526: if ((ID).precision == PETSC_PRECISION_SINGLE) ((float *)(ID).cntl)[(I) - 1] = (VAL); \
527: else ((double *)(ID).cntl)[(I) - 1] = (VAL); \
528: } while (0)
530: /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */
531: #if defined(PETSC_HAVE_OPENMP_SUPPORT)
532: #define PetscMUMPS_c(mumps) \
533: do { \
534: if (mumps->use_petsc_omp_support) { \
535: if (mumps->is_omp_master) { \
536: PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \
537: PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \
538: PetscCall(PetscCallMumps_Private(&mumps->id)); \
539: PetscCall(PetscFPTrapPop()); \
540: PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \
541: } \
542: PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \
543: /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific \
544: to processes, so we only Bcast info[1], an error code and leave others (since they do not have \
545: an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82. \
546: omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \
547: */ \
548: MUMPS_STRUC_C tmp; /* All MUMPS_STRUC_C types have same lengths on these info arrays */ \
549: PetscCallMPI(MPI_Bcast(mumps->id.infog, PETSC_STATIC_ARRAY_LENGTH(tmp.infog), MPIU_MUMPSINT, 0, mumps->omp_comm)); \
550: PetscCallMPI(MPI_Bcast(mumps->id.info, PETSC_STATIC_ARRAY_LENGTH(tmp.info), MPIU_MUMPSINT, 0, mumps->omp_comm)); \
551: PetscCallMPI(MPI_Bcast(mumps->id.rinfog, PETSC_STATIC_ARRAY_LENGTH(tmp.rinfog), MPIU_MUMPSREAL(&mumps->id), 0, mumps->omp_comm)); \
552: PetscCallMPI(MPI_Bcast(mumps->id.rinfo, PETSC_STATIC_ARRAY_LENGTH(tmp.rinfo), MPIU_MUMPSREAL(&mumps->id), 0, mumps->omp_comm)); \
553: } else { \
554: PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \
555: PetscCall(PetscCallMumps_Private(&mumps->id)); \
556: PetscCall(PetscFPTrapPop()); \
557: } \
558: } while (0)
559: #else
560: #define PetscMUMPS_c(mumps) \
561: do { \
562: PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \
563: PetscCall(PetscCallMumps_Private(&mumps->id)); \
564: PetscCall(PetscFPTrapPop()); \
565: } while (0)
566: #endif
568: typedef struct Mat_MUMPS Mat_MUMPS;
569: struct Mat_MUMPS {
570: XMUMPS_STRUC_C id;
572: MatStructure matstruc;
573: PetscMPIInt myid, petsc_size;
574: PetscMUMPSInt *irn, *jcn; /* the (i,j,v) triplets passed to mumps. */
575: PetscScalar *val, *val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */
576: PetscCount nnz; /* number of nonzeros. The type is called selective 64-bit in mumps */
577: PetscMUMPSInt sym;
578: MPI_Comm mumps_comm;
579: PetscMUMPSInt *ICNTL_pre;
580: PetscReal *CNTL_pre;
581: PetscMUMPSInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */
582: VecScatter scat_rhs, scat_sol; /* used by MatSolve() */
583: PetscMUMPSInt ICNTL20; /* use centralized (0) or distributed (10) dense RHS */
584: PetscMUMPSInt ICNTL26;
585: PetscMUMPSInt lrhs_loc, nloc_rhs, *irhs_loc;
586: #if defined(PETSC_HAVE_OPENMP_SUPPORT)
587: PetscInt *rhs_nrow, max_nrhs;
588: PetscMPIInt *rhs_recvcounts, *rhs_disps;
589: PetscScalar *rhs_loc, *rhs_recvbuf;
590: #endif
591: Vec b_seq, x_seq;
592: PetscInt ninfo, *info; /* which INFO to display */
593: PetscInt sizeredrhs;
594: PetscScalar *schur_sol;
595: PetscInt schur_sizesol;
596: PetscScalar *redrhs; // buffer in PetscScalar in case MumpsScalar is in a different precision
597: PetscMUMPSInt *ia_alloc, *ja_alloc; /* work arrays used for the CSR struct for sparse rhs */
598: PetscCount cur_ilen, cur_jlen; /* current len of ia_alloc[], ja_alloc[] */
599: PetscErrorCode (*ConvertToTriples)(Mat, PetscInt, MatReuse, Mat_MUMPS *);
601: /* Support for MATNEST */
602: PetscErrorCode (**nest_convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *);
603: PetscCount *nest_vals_start;
604: PetscScalar *nest_vals;
606: /* stuff used by petsc/mumps OpenMP support*/
607: PetscBool use_petsc_omp_support;
608: PetscOmpCtrl omp_ctrl; /* an OpenMP controller that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */
609: MPI_Comm petsc_comm, omp_comm; /* petsc_comm is PETSc matrix's comm */
610: PetscCount *recvcount; /* a collection of nnz on omp_master */
611: PetscMPIInt tag, omp_comm_size;
612: PetscBool is_omp_master; /* is this rank the master of omp_comm */
613: MPI_Request *reqs;
614: };
616: /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt.
617: Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices.
618: */
619: static PetscErrorCode PetscMUMPSIntCSRCast(PETSC_UNUSED Mat_MUMPS *mumps, PetscInt nrow, PetscInt *ia, PetscInt *ja, PetscMUMPSInt **ia_mumps, PetscMUMPSInt **ja_mumps, PetscMUMPSInt *nnz_mumps)
620: {
621: PetscInt nnz = ia[nrow] - 1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscCount since mumps only uses PetscMUMPSInt for rhs */
623: PetscFunctionBegin;
624: #if defined(PETSC_USE_64BIT_INDICES)
625: {
626: if (nrow + 1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */
627: PetscCall(PetscFree(mumps->ia_alloc));
628: PetscCall(PetscMalloc1(nrow + 1, &mumps->ia_alloc));
629: mumps->cur_ilen = nrow + 1;
630: }
631: if (nnz > mumps->cur_jlen) {
632: PetscCall(PetscFree(mumps->ja_alloc));
633: PetscCall(PetscMalloc1(nnz, &mumps->ja_alloc));
634: mumps->cur_jlen = nnz;
635: }
636: for (PetscInt i = 0; i < nrow + 1; i++) PetscCall(PetscMUMPSIntCast(ia[i], &mumps->ia_alloc[i]));
637: for (PetscInt i = 0; i < nnz; i++) PetscCall(PetscMUMPSIntCast(ja[i], &mumps->ja_alloc[i]));
638: *ia_mumps = mumps->ia_alloc;
639: *ja_mumps = mumps->ja_alloc;
640: }
641: #else
642: *ia_mumps = ia;
643: *ja_mumps = ja;
644: #endif
645: PetscCall(PetscMUMPSIntCast(nnz, nnz_mumps));
646: PetscFunctionReturn(PETSC_SUCCESS);
647: }
649: static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS *mumps)
650: {
651: PetscFunctionBegin;
652: PetscCall(PetscFree(mumps->id.listvar_schur));
653: PetscCall(PetscFree(mumps->redrhs)); // if needed, id.redrhs will be freed in MatMumpsFreeInternalID()
654: PetscCall(PetscFree(mumps->schur_sol));
655: mumps->id.size_schur = 0;
656: mumps->id.schur_lld = 0;
657: if (mumps->id.internal_id) mumps->id.ICNTL(19) = 0; // sometimes, the inner id is yet built
658: PetscFunctionReturn(PETSC_SUCCESS);
659: }
661: /* solve with rhs in mumps->id.redrhs and return in the same location */
662: static PetscErrorCode MatMumpsSolveSchur_Private(Mat F)
663: {
664: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
665: Mat S, B, X; // solve S*X = B; all three matrices are dense
666: MatFactorSchurStatus schurstatus;
667: PetscInt sizesol;
668: const PetscScalar *xarray;
670: PetscFunctionBegin;
671: PetscCall(MatFactorFactorizeSchurComplement(F));
672: PetscCall(MatFactorGetSchurComplement(F, &S, &schurstatus));
673: PetscCall(MatMumpsCastMumpsScalarArray(mumps->sizeredrhs, mumps->id.precision, mumps->id.redrhs, mumps->redrhs));
675: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->redrhs, &B));
676: PetscCall(MatSetType(B, ((PetscObject)S)->type_name));
677: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
678: PetscCall(MatBindToCPU(B, S->boundtocpu));
679: #endif
680: switch (schurstatus) {
681: case MAT_FACTOR_SCHUR_FACTORED:
682: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->redrhs, &X));
683: PetscCall(MatSetType(X, ((PetscObject)S)->type_name));
684: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
685: PetscCall(MatBindToCPU(X, S->boundtocpu));
686: #endif
687: if (!mumps->id.ICNTL(9)) { /* transpose solve */
688: PetscCall(MatMatSolveTranspose(S, B, X));
689: } else {
690: PetscCall(MatMatSolve(S, B, X));
691: }
692: break;
693: case MAT_FACTOR_SCHUR_INVERTED:
694: sizesol = mumps->id.nrhs * mumps->id.size_schur;
695: if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) {
696: PetscCall(PetscFree(mumps->schur_sol));
697: PetscCall(PetscMalloc1(sizesol, &mumps->schur_sol));
698: mumps->schur_sizesol = sizesol;
699: }
700: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->schur_sol, &X));
701: PetscCall(MatSetType(X, ((PetscObject)S)->type_name));
702: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
703: PetscCall(MatBindToCPU(X, S->boundtocpu));
704: #endif
705: PetscCall(MatProductCreateWithMat(S, B, NULL, X));
706: if (!mumps->id.ICNTL(9)) { /* transpose solve */
707: PetscCall(MatProductSetType(X, MATPRODUCT_AtB));
708: } else {
709: PetscCall(MatProductSetType(X, MATPRODUCT_AB));
710: }
711: PetscCall(MatProductSetFromOptions(X));
712: PetscCall(MatProductSymbolic(X));
713: PetscCall(MatProductNumeric(X));
715: PetscCall(MatCopy(X, B, SAME_NONZERO_PATTERN));
716: break;
717: default:
718: SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unhandled MatFactorSchurStatus %d", F->schur_status);
719: }
720: // MUST get the array from X (not B), though they share the same host array. We can only guarantee X has the correct data on device.
721: PetscCall(MatDenseGetArrayRead(X, &xarray)); // xarray should be mumps->redrhs, but using MatDenseGetArrayRead is safer with GPUs.
722: PetscCall(MatMumpsCastPetscScalarArray(mumps->sizeredrhs, xarray, mumps->id.precision, mumps->id.redrhs));
723: PetscCall(MatDenseRestoreArrayRead(X, &xarray));
724: PetscCall(MatFactorRestoreSchurComplement(F, &S, schurstatus));
725: PetscCall(MatDestroy(&B));
726: PetscCall(MatDestroy(&X));
727: PetscFunctionReturn(PETSC_SUCCESS);
728: }
730: static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion)
731: {
732: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
734: PetscFunctionBegin;
735: if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */
736: PetscFunctionReturn(PETSC_SUCCESS);
737: }
738: if (!expansion) { /* prepare for the condensation step */
739: PetscInt sizeredrhs = mumps->id.nrhs * mumps->id.size_schur;
740: /* allocate MUMPS internal array to store reduced right-hand sides */
741: if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) {
742: mumps->id.lredrhs = mumps->id.size_schur;
743: mumps->sizeredrhs = mumps->id.nrhs * mumps->id.lredrhs;
744: if (mumps->id.redrhs_len) PetscCall(PetscFree(mumps->id.redrhs));
745: PetscCall(PetscFree(mumps->redrhs));
746: PetscCall(PetscMalloc1(mumps->sizeredrhs, &mumps->redrhs));
747: PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, mumps->sizeredrhs, mumps->redrhs, mumps->id.precision, &mumps->id.redrhs_len, &mumps->id.redrhs));
748: }
749: } else { /* prepare for the expansion step */
750: PetscCall(MatMumpsSolveSchur_Private(F)); /* solve Schur complement, put solution in id.redrhs (this has to be done by the MUMPS user, so basically us) */
751: mumps->id.ICNTL(26) = 2; /* expansion phase */
752: PetscMUMPS_c(mumps);
753: PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));
754: /* restore defaults */
755: mumps->id.ICNTL(26) = -1;
756: /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */
757: if (mumps->id.nrhs > 1) {
758: if (mumps->id.redrhs_len) PetscCall(PetscFree(mumps->id.redrhs));
759: PetscCall(PetscFree(mumps->redrhs));
760: mumps->id.redrhs_len = 0;
761: mumps->id.lredrhs = 0;
762: mumps->sizeredrhs = 0;
763: }
764: }
765: PetscFunctionReturn(PETSC_SUCCESS);
766: }
768: /*
769: MatConvertToTriples_A_B - convert PETSc matrix to triples: row[nz], col[nz], val[nz]
771: input:
772: A - matrix in aij,baij or sbaij format
773: shift - 0: C style output triple; 1: Fortran style output triple.
774: reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple
775: MAT_REUSE_MATRIX: only the values in v array are updated
776: output:
777: nnz - dim of r, c, and v (number of local nonzero entries of A)
778: r, c, v - row and col index, matrix values (matrix triples)
780: The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is
781: freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means
782: that the PetscMalloc() cannot easily be replaced with a PetscMalloc3().
784: */
786: static PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
787: {
788: const PetscScalar *av;
789: const PetscInt *ai, *aj, *ajj, M = A->rmap->n;
790: PetscCount nz, rnz, k;
791: PetscMUMPSInt *row, *col;
792: Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data;
794: PetscFunctionBegin;
795: PetscCall(MatSeqAIJGetArrayRead(A, &av));
796: if (reuse == MAT_INITIAL_MATRIX) {
797: nz = aa->nz;
798: ai = aa->i;
799: aj = aa->j;
800: PetscCall(PetscMalloc2(nz, &row, nz, &col));
801: for (PetscCount i = k = 0; i < M; i++) {
802: rnz = ai[i + 1] - ai[i];
803: ajj = aj + ai[i];
804: for (PetscCount j = 0; j < rnz; j++) {
805: PetscCall(PetscMUMPSIntCast(i + shift, &row[k]));
806: PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[k]));
807: k++;
808: }
809: }
810: mumps->val = (PetscScalar *)av;
811: mumps->irn = row;
812: mumps->jcn = col;
813: mumps->nnz = nz;
814: } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, av, aa->nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqaij_seqaij(), so one needs to copy the memory */
815: else mumps->val = (PetscScalar *)av; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
816: PetscCall(MatSeqAIJRestoreArrayRead(A, &av));
817: PetscFunctionReturn(PETSC_SUCCESS);
818: }
820: static PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
821: {
822: PetscCount nz, i, j, k, r;
823: Mat_SeqSELL *a = (Mat_SeqSELL *)A->data;
824: PetscMUMPSInt *row, *col;
826: PetscFunctionBegin;
827: nz = a->sliidx[a->totalslices];
828: if (reuse == MAT_INITIAL_MATRIX) {
829: PetscCall(PetscMalloc2(nz, &row, nz, &col));
830: for (i = k = 0; i < a->totalslices; i++) {
831: for (j = a->sliidx[i], r = 0; j < a->sliidx[i + 1]; j++, r = ((r + 1) & 0x07)) PetscCall(PetscMUMPSIntCast(8 * i + r + shift, &row[k++]));
832: }
833: for (i = 0; i < nz; i++) PetscCall(PetscMUMPSIntCast(a->colidx[i] + shift, &col[i]));
834: mumps->irn = row;
835: mumps->jcn = col;
836: mumps->nnz = nz;
837: mumps->val = a->val;
838: } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, a->val, nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqsell_seqaij(), so one needs to copy the memory */
839: else mumps->val = a->val; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
840: PetscFunctionReturn(PETSC_SUCCESS);
841: }
843: static PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
844: {
845: Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)A->data;
846: const PetscInt *ai, *aj, *ajj, bs2 = aa->bs2;
847: PetscCount M, nz = bs2 * aa->nz, idx = 0, rnz, i, j, k, m;
848: PetscInt bs;
849: PetscMUMPSInt *row, *col;
851: PetscFunctionBegin;
852: if (reuse == MAT_INITIAL_MATRIX) {
853: PetscCall(MatGetBlockSize(A, &bs));
854: M = A->rmap->N / bs;
855: ai = aa->i;
856: aj = aa->j;
857: PetscCall(PetscMalloc2(nz, &row, nz, &col));
858: for (i = 0; i < M; i++) {
859: ajj = aj + ai[i];
860: rnz = ai[i + 1] - ai[i];
861: for (k = 0; k < rnz; k++) {
862: for (j = 0; j < bs; j++) {
863: for (m = 0; m < bs; m++) {
864: PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[idx]));
865: PetscCall(PetscMUMPSIntCast(bs * ajj[k] + j + shift, &col[idx]));
866: idx++;
867: }
868: }
869: }
870: }
871: mumps->irn = row;
872: mumps->jcn = col;
873: mumps->nnz = nz;
874: mumps->val = aa->a;
875: } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, aa->a, nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqbaij_seqaij(), so one needs to copy the memory */
876: else mumps->val = aa->a; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
877: PetscFunctionReturn(PETSC_SUCCESS);
878: }
880: static PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
881: {
882: const PetscInt *ai, *aj, *ajj;
883: PetscInt bs;
884: PetscCount nz, rnz, i, j, k, m;
885: PetscMUMPSInt *row, *col;
886: PetscScalar *val;
887: Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)A->data;
888: const PetscInt bs2 = aa->bs2, mbs = aa->mbs;
889: #if defined(PETSC_USE_COMPLEX)
890: PetscBool isset, hermitian;
891: #endif
893: PetscFunctionBegin;
894: #if defined(PETSC_USE_COMPLEX)
895: PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
896: PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
897: #endif
898: ai = aa->i;
899: aj = aa->j;
900: PetscCall(MatGetBlockSize(A, &bs));
901: if (reuse == MAT_INITIAL_MATRIX) {
902: const PetscCount alloc_size = aa->nz * bs2;
904: PetscCall(PetscMalloc2(alloc_size, &row, alloc_size, &col));
905: if (bs > 1) {
906: PetscCall(PetscMalloc1(alloc_size, &mumps->val_alloc));
907: mumps->val = mumps->val_alloc;
908: } else {
909: mumps->val = aa->a;
910: }
911: mumps->irn = row;
912: mumps->jcn = col;
913: } else {
914: row = mumps->irn;
915: col = mumps->jcn;
916: }
917: val = mumps->val;
919: nz = 0;
920: if (bs > 1) {
921: for (i = 0; i < mbs; i++) {
922: rnz = ai[i + 1] - ai[i];
923: ajj = aj + ai[i];
924: for (j = 0; j < rnz; j++) {
925: for (k = 0; k < bs; k++) {
926: for (m = 0; m < bs; m++) {
927: if (ajj[j] > i || k >= m) {
928: if (reuse == MAT_INITIAL_MATRIX) {
929: PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[nz]));
930: PetscCall(PetscMUMPSIntCast(ajj[j] * bs + k + shift, &col[nz]));
931: }
932: val[nz++] = aa->a[(ai[i] + j) * bs2 + m + k * bs];
933: }
934: }
935: }
936: }
937: }
938: } else if (reuse == MAT_INITIAL_MATRIX) {
939: for (i = 0; i < mbs; i++) {
940: rnz = ai[i + 1] - ai[i];
941: ajj = aj + ai[i];
942: for (j = 0; j < rnz; j++) {
943: PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
944: PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
945: nz++;
946: }
947: }
948: PetscCheck(nz == aa->nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different numbers of nonzeros %" PetscCount_FMT " != %" PetscInt_FMT, nz, aa->nz);
949: } else if (mumps->nest_vals)
950: PetscCall(PetscArraycpy(mumps->val, aa->a, aa->nz)); /* bs == 1 and MAT_REUSE_MATRIX, MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqsbaij_seqsbaij(), so one needs to copy the memory */
951: else mumps->val = aa->a; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
952: if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz;
953: PetscFunctionReturn(PETSC_SUCCESS);
954: }
956: static PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
957: {
958: const PetscInt *ai, *aj, *ajj, *adiag, M = A->rmap->n;
959: PetscCount nz, rnz, i, j;
960: const PetscScalar *av, *v1;
961: PetscScalar *val;
962: PetscMUMPSInt *row, *col;
963: Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data;
964: PetscBool diagDense;
965: #if defined(PETSC_USE_COMPLEX)
966: PetscBool hermitian, isset;
967: #endif
969: PetscFunctionBegin;
970: #if defined(PETSC_USE_COMPLEX)
971: PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
972: PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
973: #endif
974: PetscCall(MatSeqAIJGetArrayRead(A, &av));
975: ai = aa->i;
976: aj = aa->j;
977: PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, &diagDense));
978: if (reuse == MAT_INITIAL_MATRIX) {
979: /* count nz in the upper triangular part of A */
980: nz = 0;
981: if (!diagDense) {
982: for (i = 0; i < M; i++) {
983: if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
984: for (j = ai[i]; j < ai[i + 1]; j++) {
985: if (aj[j] < i) continue;
986: nz++;
987: }
988: } else {
989: nz += ai[i + 1] - adiag[i];
990: }
991: }
992: } else {
993: for (i = 0; i < M; i++) nz += ai[i + 1] - adiag[i];
994: }
995: PetscCall(PetscMalloc2(nz, &row, nz, &col));
996: PetscCall(PetscMalloc1(nz, &val));
997: mumps->nnz = nz;
998: mumps->irn = row;
999: mumps->jcn = col;
1000: mumps->val = mumps->val_alloc = val;
1002: nz = 0;
1003: if (!diagDense) {
1004: for (i = 0; i < M; i++) {
1005: if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
1006: for (j = ai[i]; j < ai[i + 1]; j++) {
1007: if (aj[j] < i) continue;
1008: PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
1009: PetscCall(PetscMUMPSIntCast(aj[j] + shift, &col[nz]));
1010: val[nz] = av[j];
1011: nz++;
1012: }
1013: } else {
1014: rnz = ai[i + 1] - adiag[i];
1015: ajj = aj + adiag[i];
1016: v1 = av + adiag[i];
1017: for (j = 0; j < rnz; j++) {
1018: PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
1019: PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
1020: val[nz++] = v1[j];
1021: }
1022: }
1023: }
1024: } else {
1025: for (i = 0; i < M; i++) {
1026: rnz = ai[i + 1] - adiag[i];
1027: ajj = aj + adiag[i];
1028: v1 = av + adiag[i];
1029: for (j = 0; j < rnz; j++) {
1030: PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
1031: PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
1032: val[nz++] = v1[j];
1033: }
1034: }
1035: }
1036: } else {
1037: nz = 0;
1038: val = mumps->val;
1039: if (!diagDense) {
1040: for (i = 0; i < M; i++) {
1041: if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
1042: for (j = ai[i]; j < ai[i + 1]; j++) {
1043: if (aj[j] < i) continue;
1044: val[nz++] = av[j];
1045: }
1046: } else {
1047: rnz = ai[i + 1] - adiag[i];
1048: v1 = av + adiag[i];
1049: for (j = 0; j < rnz; j++) val[nz++] = v1[j];
1050: }
1051: }
1052: } else {
1053: for (i = 0; i < M; i++) {
1054: rnz = ai[i + 1] - adiag[i];
1055: v1 = av + adiag[i];
1056: for (j = 0; j < rnz; j++) val[nz++] = v1[j];
1057: }
1058: }
1059: }
1060: PetscCall(MatSeqAIJRestoreArrayRead(A, &av));
1061: PetscFunctionReturn(PETSC_SUCCESS);
1062: }
1064: static PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1065: {
1066: const PetscInt *ai, *aj, *bi, *bj, *garray, *ajj, *bjj;
1067: PetscInt bs;
1068: PetscCount rstart, nz, i, j, k, m, jj, irow, countA, countB;
1069: PetscMUMPSInt *row, *col;
1070: const PetscScalar *av, *bv, *v1, *v2;
1071: PetscScalar *val;
1072: Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)A->data;
1073: Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)mat->A->data;
1074: Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)mat->B->data;
1075: const PetscInt bs2 = aa->bs2, mbs = aa->mbs;
1076: #if defined(PETSC_USE_COMPLEX)
1077: PetscBool hermitian, isset;
1078: #endif
1080: PetscFunctionBegin;
1081: #if defined(PETSC_USE_COMPLEX)
1082: PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
1083: PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
1084: #endif
1085: PetscCall(MatGetBlockSize(A, &bs));
1086: rstart = A->rmap->rstart;
1087: ai = aa->i;
1088: aj = aa->j;
1089: bi = bb->i;
1090: bj = bb->j;
1091: av = aa->a;
1092: bv = bb->a;
1094: garray = mat->garray;
1096: if (reuse == MAT_INITIAL_MATRIX) {
1097: nz = (aa->nz + bb->nz) * bs2; /* just a conservative estimate */
1098: PetscCall(PetscMalloc2(nz, &row, nz, &col));
1099: PetscCall(PetscMalloc1(nz, &val));
1100: /* can not decide the exact mumps->nnz now because of the SBAIJ */
1101: mumps->irn = row;
1102: mumps->jcn = col;
1103: mumps->val = mumps->val_alloc = val;
1104: } else {
1105: val = mumps->val;
1106: }
1108: jj = 0;
1109: irow = rstart;
1110: for (i = 0; i < mbs; i++) {
1111: ajj = aj + ai[i]; /* ptr to the beginning of this row */
1112: countA = ai[i + 1] - ai[i];
1113: countB = bi[i + 1] - bi[i];
1114: bjj = bj + bi[i];
1115: v1 = av + ai[i] * bs2;
1116: v2 = bv + bi[i] * bs2;
1118: if (bs > 1) {
1119: /* A-part */
1120: for (j = 0; j < countA; j++) {
1121: for (k = 0; k < bs; k++) {
1122: for (m = 0; m < bs; m++) {
1123: if (rstart + ajj[j] * bs > irow || k >= m) {
1124: if (reuse == MAT_INITIAL_MATRIX) {
1125: PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj]));
1126: PetscCall(PetscMUMPSIntCast(rstart + ajj[j] * bs + k + shift, &col[jj]));
1127: }
1128: val[jj++] = v1[j * bs2 + m + k * bs];
1129: }
1130: }
1131: }
1132: }
1134: /* B-part */
1135: for (j = 0; j < countB; j++) {
1136: for (k = 0; k < bs; k++) {
1137: for (m = 0; m < bs; m++) {
1138: if (reuse == MAT_INITIAL_MATRIX) {
1139: PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj]));
1140: PetscCall(PetscMUMPSIntCast(garray[bjj[j]] * bs + k + shift, &col[jj]));
1141: }
1142: val[jj++] = v2[j * bs2 + m + k * bs];
1143: }
1144: }
1145: }
1146: } else {
1147: /* A-part */
1148: for (j = 0; j < countA; j++) {
1149: if (reuse == MAT_INITIAL_MATRIX) {
1150: PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
1151: PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj]));
1152: }
1153: val[jj++] = v1[j];
1154: }
1156: /* B-part */
1157: for (j = 0; j < countB; j++) {
1158: if (reuse == MAT_INITIAL_MATRIX) {
1159: PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
1160: PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
1161: }
1162: val[jj++] = v2[j];
1163: }
1164: }
1165: irow += bs;
1166: }
1167: if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = jj;
1168: PetscFunctionReturn(PETSC_SUCCESS);
1169: }
1171: static PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1172: {
1173: const PetscInt *ai, *aj, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj;
1174: PetscCount rstart, cstart, nz, i, j, jj, irow, countA, countB;
1175: PetscMUMPSInt *row, *col;
1176: const PetscScalar *av, *bv, *v1, *v2;
1177: PetscScalar *val;
1178: Mat Ad, Ao;
1179: Mat_SeqAIJ *aa;
1180: Mat_SeqAIJ *bb;
1182: PetscFunctionBegin;
1183: PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray));
1184: PetscCall(MatSeqAIJGetArrayRead(Ad, &av));
1185: PetscCall(MatSeqAIJGetArrayRead(Ao, &bv));
1187: aa = (Mat_SeqAIJ *)Ad->data;
1188: bb = (Mat_SeqAIJ *)Ao->data;
1189: ai = aa->i;
1190: aj = aa->j;
1191: bi = bb->i;
1192: bj = bb->j;
1194: rstart = A->rmap->rstart;
1195: cstart = A->cmap->rstart;
1197: if (reuse == MAT_INITIAL_MATRIX) {
1198: nz = (PetscCount)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */
1199: PetscCall(PetscMalloc2(nz, &row, nz, &col));
1200: PetscCall(PetscMalloc1(nz, &val));
1201: mumps->nnz = nz;
1202: mumps->irn = row;
1203: mumps->jcn = col;
1204: mumps->val = mumps->val_alloc = val;
1205: } else {
1206: val = mumps->val;
1207: }
1209: jj = 0;
1210: irow = rstart;
1211: for (i = 0; i < m; i++) {
1212: ajj = aj + ai[i]; /* ptr to the beginning of this row */
1213: countA = ai[i + 1] - ai[i];
1214: countB = bi[i + 1] - bi[i];
1215: bjj = bj + bi[i];
1216: v1 = av + ai[i];
1217: v2 = bv + bi[i];
1219: /* A-part */
1220: for (j = 0; j < countA; j++) {
1221: if (reuse == MAT_INITIAL_MATRIX) {
1222: PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
1223: PetscCall(PetscMUMPSIntCast(cstart + ajj[j] + shift, &col[jj]));
1224: }
1225: val[jj++] = v1[j];
1226: }
1228: /* B-part */
1229: for (j = 0; j < countB; j++) {
1230: if (reuse == MAT_INITIAL_MATRIX) {
1231: PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
1232: PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
1233: }
1234: val[jj++] = v2[j];
1235: }
1236: irow++;
1237: }
1238: PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av));
1239: PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv));
1240: PetscFunctionReturn(PETSC_SUCCESS);
1241: }
1243: static PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1244: {
1245: Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)A->data;
1246: Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)mat->A->data;
1247: Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)mat->B->data;
1248: const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j, *ajj, *bjj;
1249: const PetscInt *garray = mat->garray, mbs = mat->mbs, rstart = A->rmap->rstart, cstart = A->cmap->rstart;
1250: const PetscInt bs2 = mat->bs2;
1251: PetscInt bs;
1252: PetscCount nz, i, j, k, n, jj, irow, countA, countB, idx;
1253: PetscMUMPSInt *row, *col;
1254: const PetscScalar *av = aa->a, *bv = bb->a, *v1, *v2;
1255: PetscScalar *val;
1257: PetscFunctionBegin;
1258: PetscCall(MatGetBlockSize(A, &bs));
1259: if (reuse == MAT_INITIAL_MATRIX) {
1260: nz = bs2 * (aa->nz + bb->nz);
1261: PetscCall(PetscMalloc2(nz, &row, nz, &col));
1262: PetscCall(PetscMalloc1(nz, &val));
1263: mumps->nnz = nz;
1264: mumps->irn = row;
1265: mumps->jcn = col;
1266: mumps->val = mumps->val_alloc = val;
1267: } else {
1268: val = mumps->val;
1269: }
1271: jj = 0;
1272: irow = rstart;
1273: for (i = 0; i < mbs; i++) {
1274: countA = ai[i + 1] - ai[i];
1275: countB = bi[i + 1] - bi[i];
1276: ajj = aj + ai[i];
1277: bjj = bj + bi[i];
1278: v1 = av + bs2 * ai[i];
1279: v2 = bv + bs2 * bi[i];
1281: idx = 0;
1282: /* A-part */
1283: for (k = 0; k < countA; k++) {
1284: for (j = 0; j < bs; j++) {
1285: for (n = 0; n < bs; n++) {
1286: if (reuse == MAT_INITIAL_MATRIX) {
1287: PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj]));
1288: PetscCall(PetscMUMPSIntCast(cstart + bs * ajj[k] + j + shift, &col[jj]));
1289: }
1290: val[jj++] = v1[idx++];
1291: }
1292: }
1293: }
1295: idx = 0;
1296: /* B-part */
1297: for (k = 0; k < countB; k++) {
1298: for (j = 0; j < bs; j++) {
1299: for (n = 0; n < bs; n++) {
1300: if (reuse == MAT_INITIAL_MATRIX) {
1301: PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj]));
1302: PetscCall(PetscMUMPSIntCast(bs * garray[bjj[k]] + j + shift, &col[jj]));
1303: }
1304: val[jj++] = v2[idx++];
1305: }
1306: }
1307: }
1308: irow += bs;
1309: }
1310: PetscFunctionReturn(PETSC_SUCCESS);
1311: }
1313: static PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1314: {
1315: const PetscInt *ai, *aj, *adiag, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj;
1316: PetscCount rstart, nz, nza, nzb, i, j, jj, irow, countA, countB;
1317: PetscMUMPSInt *row, *col;
1318: const PetscScalar *av, *bv, *v1, *v2;
1319: PetscScalar *val;
1320: Mat Ad, Ao;
1321: Mat_SeqAIJ *aa;
1322: Mat_SeqAIJ *bb;
1323: #if defined(PETSC_USE_COMPLEX)
1324: PetscBool hermitian, isset;
1325: #endif
1327: PetscFunctionBegin;
1328: #if defined(PETSC_USE_COMPLEX)
1329: PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
1330: PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
1331: #endif
1332: PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray));
1333: PetscCall(MatSeqAIJGetArrayRead(Ad, &av));
1334: PetscCall(MatSeqAIJGetArrayRead(Ao, &bv));
1336: aa = (Mat_SeqAIJ *)Ad->data;
1337: bb = (Mat_SeqAIJ *)Ao->data;
1338: ai = aa->i;
1339: aj = aa->j;
1340: bi = bb->i;
1341: bj = bb->j;
1342: PetscCall(MatGetDiagonalMarkers_SeqAIJ(Ad, &adiag, NULL));
1343: rstart = A->rmap->rstart;
1345: if (reuse == MAT_INITIAL_MATRIX) {
1346: nza = 0; /* num of upper triangular entries in mat->A, including diagonals */
1347: nzb = 0; /* num of upper triangular entries in mat->B */
1348: for (i = 0; i < m; i++) {
1349: nza += (ai[i + 1] - adiag[i]);
1350: countB = bi[i + 1] - bi[i];
1351: bjj = bj + bi[i];
1352: for (j = 0; j < countB; j++) {
1353: if (garray[bjj[j]] > rstart) nzb++;
1354: }
1355: }
1357: nz = nza + nzb; /* total nz of upper triangular part of mat */
1358: PetscCall(PetscMalloc2(nz, &row, nz, &col));
1359: PetscCall(PetscMalloc1(nz, &val));
1360: mumps->nnz = nz;
1361: mumps->irn = row;
1362: mumps->jcn = col;
1363: mumps->val = mumps->val_alloc = val;
1364: } else {
1365: val = mumps->val;
1366: }
1368: jj = 0;
1369: irow = rstart;
1370: for (i = 0; i < m; i++) {
1371: ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */
1372: v1 = av + adiag[i];
1373: countA = ai[i + 1] - adiag[i];
1374: countB = bi[i + 1] - bi[i];
1375: bjj = bj + bi[i];
1376: v2 = bv + bi[i];
1378: /* A-part */
1379: for (j = 0; j < countA; j++) {
1380: if (reuse == MAT_INITIAL_MATRIX) {
1381: PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
1382: PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj]));
1383: }
1384: val[jj++] = v1[j];
1385: }
1387: /* B-part */
1388: for (j = 0; j < countB; j++) {
1389: if (garray[bjj[j]] > rstart) {
1390: if (reuse == MAT_INITIAL_MATRIX) {
1391: PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
1392: PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
1393: }
1394: val[jj++] = v2[j];
1395: }
1396: }
1397: irow++;
1398: }
1399: PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av));
1400: PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv));
1401: PetscFunctionReturn(PETSC_SUCCESS);
1402: }
1404: static PetscErrorCode MatConvertToTriples_diagonal_xaij(Mat A, PETSC_UNUSED PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1405: {
1406: const PetscScalar *av;
1407: const PetscInt M = A->rmap->n;
1408: PetscCount i;
1409: PetscMUMPSInt *row, *col;
1410: Vec v;
1412: PetscFunctionBegin;
1413: PetscCall(MatDiagonalGetDiagonal(A, &v));
1414: PetscCall(VecGetArrayRead(v, &av));
1415: if (reuse == MAT_INITIAL_MATRIX) {
1416: PetscCall(PetscMalloc2(M, &row, M, &col));
1417: for (i = 0; i < M; i++) {
1418: PetscCall(PetscMUMPSIntCast(i + A->rmap->rstart, &row[i]));
1419: col[i] = row[i];
1420: }
1421: mumps->val = (PetscScalar *)av;
1422: mumps->irn = row;
1423: mumps->jcn = col;
1424: mumps->nnz = M;
1425: } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, av, M)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_diagonal_xaij(), so one needs to copy the memory */
1426: else mumps->val = (PetscScalar *)av; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
1427: PetscCall(VecRestoreArrayRead(v, &av));
1428: PetscFunctionReturn(PETSC_SUCCESS);
1429: }
1431: static PetscErrorCode MatConvertToTriples_dense_xaij(Mat A, PETSC_UNUSED PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1432: {
1433: PetscScalar *v;
1434: const PetscInt m = A->rmap->n, N = A->cmap->N;
1435: PetscInt lda;
1436: PetscCount i, j;
1437: PetscMUMPSInt *row, *col;
1439: PetscFunctionBegin;
1440: PetscCall(MatDenseGetArray(A, &v));
1441: PetscCall(MatDenseGetLDA(A, &lda));
1442: if (reuse == MAT_INITIAL_MATRIX) {
1443: PetscCall(PetscMalloc2(m * N, &row, m * N, &col));
1444: for (i = 0; i < m; i++) {
1445: col[i] = 0;
1446: PetscCall(PetscMUMPSIntCast(i + A->rmap->rstart, &row[i]));
1447: }
1448: for (j = 1; j < N; j++) {
1449: for (i = 0; i < m; i++) PetscCall(PetscMUMPSIntCast(j, col + i + m * j));
1450: PetscCall(PetscArraycpy(row + m * j, row + m * (j - 1), m));
1451: }
1452: if (lda == m) mumps->val = v;
1453: else {
1454: PetscCall(PetscMalloc1(m * N, &mumps->val));
1455: mumps->val_alloc = mumps->val;
1456: for (j = 0; j < N; j++) PetscCall(PetscArraycpy(mumps->val + m * j, v + lda * j, m));
1457: }
1458: mumps->irn = row;
1459: mumps->jcn = col;
1460: mumps->nnz = m * N;
1461: } else {
1462: if (lda == m && !mumps->nest_vals) mumps->val = v;
1463: else {
1464: for (j = 0; j < N; j++) PetscCall(PetscArraycpy(mumps->val + m * j, v + lda * j, m));
1465: }
1466: }
1467: PetscCall(MatDenseRestoreArray(A, &v));
1468: PetscFunctionReturn(PETSC_SUCCESS);
1469: }
1471: // If the input Mat (sub) is either MATTRANSPOSEVIRTUAL or MATHERMITIANTRANSPOSEVIRTUAL, this function gets the parent Mat until it is not a
1472: // MATTRANSPOSEVIRTUAL or MATHERMITIANTRANSPOSEVIRTUAL itself and returns the appropriate shift, scaling, and whether the parent Mat should be conjugated
1473: // and its rows and columns permuted
1474: // TODO FIXME: this should not be in this file and should instead be refactored where the same logic applies, e.g., MatAXPY_Dense_Nest()
1475: static PetscErrorCode MatGetTranspose_TransposeVirtual(Mat *sub, PetscBool *conjugate, PetscScalar *vshift, PetscScalar *vscale, PetscBool *swap)
1476: {
1477: Mat A;
1478: PetscScalar s[2];
1479: PetscBool isTrans, isHTrans, compare;
1481: PetscFunctionBegin;
1482: do {
1483: PetscCall(PetscObjectTypeCompare((PetscObject)*sub, MATTRANSPOSEVIRTUAL, &isTrans));
1484: if (isTrans) {
1485: PetscCall(MatTransposeGetMat(*sub, &A));
1486: isHTrans = PETSC_FALSE;
1487: } else {
1488: PetscCall(PetscObjectTypeCompare((PetscObject)*sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans));
1489: if (isHTrans) PetscCall(MatHermitianTransposeGetMat(*sub, &A));
1490: }
1491: compare = (PetscBool)(isTrans || isHTrans);
1492: if (compare) {
1493: if (vshift && vscale) {
1494: PetscCall(MatShellGetScalingShifts(*sub, s, s + 1, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
1495: if (!*conjugate) {
1496: *vshift += s[0] * *vscale;
1497: *vscale *= s[1];
1498: } else {
1499: *vshift += PetscConj(s[0]) * *vscale;
1500: *vscale *= PetscConj(s[1]);
1501: }
1502: }
1503: if (swap) *swap = (PetscBool)!*swap;
1504: if (isHTrans && conjugate) *conjugate = (PetscBool)!*conjugate;
1505: *sub = A;
1506: }
1507: } while (compare);
1508: PetscFunctionReturn(PETSC_SUCCESS);
1509: }
1511: static PetscErrorCode MatConvertToTriples_nest_xaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1512: {
1513: Mat **mats;
1514: PetscInt nr, nc;
1515: PetscBool chol = mumps->sym ? PETSC_TRUE : PETSC_FALSE;
1517: PetscFunctionBegin;
1518: PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats));
1519: if (reuse == MAT_INITIAL_MATRIX) {
1520: PetscMUMPSInt *irns, *jcns;
1521: PetscScalar *vals;
1522: PetscCount totnnz, cumnnz, maxnnz;
1523: PetscInt *pjcns_w, Mbs = 0;
1524: IS *rows, *cols;
1525: PetscInt **rows_idx, **cols_idx;
1527: cumnnz = 0;
1528: maxnnz = 0;
1529: PetscCall(PetscMalloc2(nr * nc + 1, &mumps->nest_vals_start, nr * nc, &mumps->nest_convert_to_triples));
1530: for (PetscInt r = 0; r < nr; r++) {
1531: for (PetscInt c = 0; c < nc; c++) {
1532: Mat sub = mats[r][c];
1534: mumps->nest_convert_to_triples[r * nc + c] = NULL;
1535: if (chol && c < r) continue; /* skip lower-triangular block for Cholesky */
1536: if (sub) {
1537: PetscErrorCode (*convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *) = NULL;
1538: PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isDiag, isDense;
1539: MatInfo info;
1541: PetscCall(MatGetTranspose_TransposeVirtual(&sub, NULL, NULL, NULL, NULL));
1542: PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ));
1543: PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ));
1544: PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ));
1545: PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ));
1546: PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ));
1547: PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ));
1548: PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag));
1549: PetscCall(PetscObjectTypeCompareAny((PetscObject)sub, &isDense, MATSEQDENSE, MATMPIDENSE, NULL));
1551: if (chol) {
1552: if (r == c) {
1553: if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqsbaij;
1554: else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpisbaij;
1555: else if (isSeqSBAIJ) convert_to_triples = MatConvertToTriples_seqsbaij_seqsbaij;
1556: else if (isMPISBAIJ) convert_to_triples = MatConvertToTriples_mpisbaij_mpisbaij;
1557: else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij;
1558: else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij;
1559: } else {
1560: if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij;
1561: else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij;
1562: else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij;
1563: else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij;
1564: else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij;
1565: else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij;
1566: }
1567: } else {
1568: if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij;
1569: else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij;
1570: else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij;
1571: else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij;
1572: else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij;
1573: else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij;
1574: }
1575: PetscCheck(convert_to_triples, PetscObjectComm((PetscObject)sub), PETSC_ERR_SUP, "Not for block of type %s", ((PetscObject)sub)->type_name);
1576: mumps->nest_convert_to_triples[r * nc + c] = convert_to_triples;
1577: PetscCall(MatGetInfo(sub, MAT_LOCAL, &info));
1578: cumnnz += (PetscCount)info.nz_used; /* can be overestimated for Cholesky */
1579: maxnnz = PetscMax(maxnnz, info.nz_used);
1580: }
1581: }
1582: }
1584: /* Allocate total COO */
1585: totnnz = cumnnz;
1586: PetscCall(PetscMalloc2(totnnz, &irns, totnnz, &jcns));
1587: PetscCall(PetscMalloc1(totnnz, &vals));
1589: /* Handle rows and column maps
1590: We directly map rows and use an SF for the columns */
1591: PetscCall(PetscMalloc4(nr, &rows, nc, &cols, nr, &rows_idx, nc, &cols_idx));
1592: PetscCall(MatNestGetISs(A, rows, cols));
1593: for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetIndices(rows[r], (const PetscInt **)&rows_idx[r]));
1594: for (PetscInt c = 0; c < nc; c++) PetscCall(ISGetIndices(cols[c], (const PetscInt **)&cols_idx[c]));
1595: if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscMalloc1(maxnnz, &pjcns_w));
1596: else (void)maxnnz;
1598: cumnnz = 0;
1599: for (PetscInt r = 0; r < nr; r++) {
1600: for (PetscInt c = 0; c < nc; c++) {
1601: Mat sub = mats[r][c];
1602: const PetscInt *ridx = rows_idx[r];
1603: const PetscInt *cidx = cols_idx[c];
1604: PetscScalar vscale = 1.0, vshift = 0.0;
1605: PetscInt rst, size, bs;
1606: PetscSF csf;
1607: PetscBool conjugate = PETSC_FALSE, swap = PETSC_FALSE;
1608: PetscLayout cmap;
1609: PetscInt innz;
1611: mumps->nest_vals_start[r * nc + c] = cumnnz;
1612: if (c == r) {
1613: PetscCall(ISGetSize(rows[r], &size));
1614: if (!mumps->nest_convert_to_triples[r * nc + c]) {
1615: for (PetscInt c = 0; c < nc && !sub; ++c) sub = mats[r][c]; // diagonal Mat is NULL, so start over from the beginning of the current row
1616: }
1617: PetscCall(MatGetBlockSize(sub, &bs));
1618: Mbs += size / bs;
1619: }
1620: if (!mumps->nest_convert_to_triples[r * nc + c]) continue;
1622: /* Extract inner blocks if needed */
1623: PetscCall(MatGetTranspose_TransposeVirtual(&sub, &conjugate, &vshift, &vscale, &swap));
1624: PetscCheck(vshift == 0.0, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Nonzero shift in parent MatShell");
1626: /* Get column layout to map off-process columns */
1627: PetscCall(MatGetLayouts(sub, NULL, &cmap));
1629: /* Get row start to map on-process rows */
1630: PetscCall(MatGetOwnershipRange(sub, &rst, NULL));
1632: /* Directly use the mumps datastructure and use C ordering for now */
1633: PetscCall((*mumps->nest_convert_to_triples[r * nc + c])(sub, 0, MAT_INITIAL_MATRIX, mumps));
1635: /* Swap the role of rows and columns indices for transposed blocks
1636: since we need values with global final ordering */
1637: if (swap) {
1638: cidx = rows_idx[r];
1639: ridx = cols_idx[c];
1640: }
1642: /* Communicate column indices
1643: This could have been done with a single SF but it would have complicated the code a lot.
1644: But since we do it only once, we pay the price of setting up an SF for each block */
1645: if (PetscDefined(USE_64BIT_INDICES)) {
1646: for (PetscInt k = 0; k < mumps->nnz; k++) pjcns_w[k] = mumps->jcn[k];
1647: } else pjcns_w = (PetscInt *)mumps->jcn; /* This cast is needed only to silence warnings for 64bit integers builds */
1648: PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &csf));
1649: PetscCall(PetscIntCast(mumps->nnz, &innz));
1650: PetscCall(PetscSFSetGraphLayout(csf, cmap, innz, NULL, PETSC_OWN_POINTER, pjcns_w));
1651: PetscCall(PetscSFBcastBegin(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE));
1652: PetscCall(PetscSFBcastEnd(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE));
1653: PetscCall(PetscSFDestroy(&csf));
1655: /* Import indices: use direct map for rows and mapped indices for columns */
1656: if (swap) {
1657: for (PetscInt k = 0; k < mumps->nnz; k++) {
1658: PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &jcns[cumnnz + k]));
1659: PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &irns[cumnnz + k]));
1660: }
1661: } else {
1662: for (PetscInt k = 0; k < mumps->nnz; k++) {
1663: PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &irns[cumnnz + k]));
1664: PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &jcns[cumnnz + k]));
1665: }
1666: }
1668: /* Import values to full COO */
1669: if (conjugate) { /* conjugate the entries */
1670: PetscScalar *v = vals + cumnnz;
1671: for (PetscInt k = 0; k < mumps->nnz; k++) v[k] = vscale * PetscConj(mumps->val[k]);
1672: } else if (vscale != 1.0) {
1673: PetscScalar *v = vals + cumnnz;
1674: for (PetscInt k = 0; k < mumps->nnz; k++) v[k] = vscale * mumps->val[k];
1675: } else PetscCall(PetscArraycpy(vals + cumnnz, mumps->val, mumps->nnz));
1677: /* Shift new starting point and sanity check */
1678: cumnnz += mumps->nnz;
1679: PetscCheck(cumnnz <= totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected number of nonzeros %" PetscCount_FMT " != %" PetscCount_FMT, cumnnz, totnnz);
1681: /* Free scratch memory */
1682: PetscCall(PetscFree2(mumps->irn, mumps->jcn));
1683: PetscCall(PetscFree(mumps->val_alloc));
1684: mumps->val = NULL;
1685: mumps->nnz = 0;
1686: }
1687: }
1688: if (mumps->id.ICNTL(15) == 1) {
1689: if (Mbs != A->rmap->N) {
1690: PetscMPIInt rank, size;
1692: PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
1693: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
1694: if (rank == 0) {
1695: PetscInt shift = 0;
1697: PetscCall(PetscMUMPSIntCast(Mbs, &mumps->id.nblk));
1698: PetscCall(PetscFree(mumps->id.blkptr));
1699: PetscCall(PetscMalloc1(Mbs + 1, &mumps->id.blkptr));
1700: mumps->id.blkptr[0] = 1;
1701: for (PetscInt i = 0; i < size; ++i) {
1702: for (PetscInt r = 0; r < nr; r++) {
1703: Mat sub = mats[r][r];
1704: const PetscInt *ranges;
1705: PetscInt bs;
1707: for (PetscInt c = 0; c < nc && !sub; ++c) sub = mats[r][c]; // diagonal Mat is NULL, so start over from the beginning of the current row
1708: PetscCall(MatGetOwnershipRanges(sub, &ranges));
1709: PetscCall(MatGetBlockSize(sub, &bs));
1710: for (PetscInt j = 0, start = mumps->id.blkptr[shift] + bs; j < ranges[i + 1] - ranges[i]; j += bs) PetscCall(PetscMUMPSIntCast(start + j, mumps->id.blkptr + shift + j / bs + 1));
1711: shift += (ranges[i + 1] - ranges[i]) / bs;
1712: }
1713: }
1714: }
1715: } else mumps->id.ICNTL(15) = 0;
1716: }
1717: if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscFree(pjcns_w));
1718: for (PetscInt r = 0; r < nr; r++) PetscCall(ISRestoreIndices(rows[r], (const PetscInt **)&rows_idx[r]));
1719: for (PetscInt c = 0; c < nc; c++) PetscCall(ISRestoreIndices(cols[c], (const PetscInt **)&cols_idx[c]));
1720: PetscCall(PetscFree4(rows, cols, rows_idx, cols_idx));
1721: if (!chol) PetscCheck(cumnnz == totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different number of nonzeros %" PetscCount_FMT " != %" PetscCount_FMT, cumnnz, totnnz);
1722: mumps->nest_vals_start[nr * nc] = cumnnz;
1724: /* Set pointers for final MUMPS data structure */
1725: mumps->nest_vals = vals;
1726: mumps->val_alloc = NULL; /* do not use val_alloc since it may be reallocated with the OMP callpath */
1727: mumps->val = vals;
1728: mumps->irn = irns;
1729: mumps->jcn = jcns;
1730: mumps->nnz = cumnnz;
1731: } else {
1732: PetscScalar *oval = mumps->nest_vals;
1733: for (PetscInt r = 0; r < nr; r++) {
1734: for (PetscInt c = 0; c < nc; c++) {
1735: PetscBool conjugate = PETSC_FALSE;
1736: Mat sub = mats[r][c];
1737: PetscScalar vscale = 1.0, vshift = 0.0;
1738: PetscInt midx = r * nc + c;
1740: if (!mumps->nest_convert_to_triples[midx]) continue;
1741: PetscCall(MatGetTranspose_TransposeVirtual(&sub, &conjugate, &vshift, &vscale, NULL));
1742: PetscCheck(vshift == 0.0, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Nonzero shift in parent MatShell");
1743: mumps->val = oval + mumps->nest_vals_start[midx];
1744: PetscCall((*mumps->nest_convert_to_triples[midx])(sub, shift, MAT_REUSE_MATRIX, mumps));
1745: if (conjugate) {
1746: PetscCount nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx];
1747: for (PetscCount k = 0; k < nnz; k++) mumps->val[k] = vscale * PetscConj(mumps->val[k]);
1748: } else if (vscale != 1.0) {
1749: PetscCount nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx];
1750: for (PetscCount k = 0; k < nnz; k++) mumps->val[k] *= vscale;
1751: }
1752: }
1753: }
1754: mumps->val = oval;
1755: }
1756: PetscFunctionReturn(PETSC_SUCCESS);
1757: }
1759: static PetscErrorCode MatDestroy_MUMPS(Mat A)
1760: {
1761: Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
1763: PetscFunctionBegin;
1764: PetscCall(PetscFree(mumps->id.isol_loc));
1765: PetscCall(VecScatterDestroy(&mumps->scat_rhs));
1766: PetscCall(VecScatterDestroy(&mumps->scat_sol));
1767: PetscCall(VecDestroy(&mumps->b_seq));
1768: PetscCall(VecDestroy(&mumps->x_seq));
1769: PetscCall(PetscFree(mumps->id.perm_in));
1770: PetscCall(PetscFree(mumps->id.blkvar));
1771: PetscCall(PetscFree(mumps->id.blkptr));
1772: PetscCall(PetscFree2(mumps->irn, mumps->jcn));
1773: PetscCall(PetscFree(mumps->val_alloc));
1774: PetscCall(PetscFree(mumps->info));
1775: PetscCall(PetscFree(mumps->ICNTL_pre));
1776: PetscCall(PetscFree(mumps->CNTL_pre));
1777: PetscCall(MatMumpsResetSchur_Private(mumps));
1778: if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */
1779: mumps->id.job = JOB_END;
1780: PetscMUMPS_c(mumps);
1781: PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in termination: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
1782: if (mumps->mumps_comm != MPI_COMM_NULL) {
1783: if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) PetscCallMPI(MPI_Comm_free(&mumps->mumps_comm));
1784: else PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &mumps->mumps_comm));
1785: }
1786: }
1787: PetscCall(MatMumpsFreeInternalID(&mumps->id));
1788: #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1789: if (mumps->use_petsc_omp_support) {
1790: PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl));
1791: PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf));
1792: PetscCall(PetscFree3(mumps->rhs_nrow, mumps->rhs_recvcounts, mumps->rhs_disps));
1793: }
1794: #endif
1795: PetscCall(PetscFree(mumps->ia_alloc));
1796: PetscCall(PetscFree(mumps->ja_alloc));
1797: PetscCall(PetscFree(mumps->recvcount));
1798: PetscCall(PetscFree(mumps->reqs));
1799: PetscCall(PetscFree(mumps->irhs_loc));
1800: PetscCall(PetscFree2(mumps->nest_vals_start, mumps->nest_convert_to_triples));
1801: PetscCall(PetscFree(mumps->nest_vals));
1802: PetscCall(PetscFree(A->data));
1804: /* clear composed functions */
1805: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
1806: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorSetSchurIS_C", NULL));
1807: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorCreateSchurComplement_C", NULL));
1808: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetIcntl_C", NULL));
1809: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetIcntl_C", NULL));
1810: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetCntl_C", NULL));
1811: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetCntl_C", NULL));
1812: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfo_C", NULL));
1813: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfog_C", NULL));
1814: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfo_C", NULL));
1815: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfog_C", NULL));
1816: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetNullPivots_C", NULL));
1817: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverse_C", NULL));
1818: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverseTranspose_C", NULL));
1819: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetBlk_C", NULL));
1820: PetscFunctionReturn(PETSC_SUCCESS);
1821: }
1823: /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */
1824: static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A, PetscInt nrhs, const PetscScalar *array)
1825: {
1826: Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
1827: const PetscMPIInt ompsize = mumps->omp_comm_size;
1828: PetscInt i, m, M, rstart;
1830: PetscFunctionBegin;
1831: PetscCall(MatGetSize(A, &M, NULL));
1832: PetscCall(MatGetLocalSize(A, &m, NULL));
1833: PetscCheck(M <= PETSC_MUMPS_INT_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt");
1834: if (ompsize == 1) {
1835: if (!mumps->irhs_loc) {
1836: mumps->nloc_rhs = (PetscMUMPSInt)m;
1837: PetscCall(PetscMalloc1(m, &mumps->irhs_loc));
1838: PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
1839: for (i = 0; i < m; i++) PetscCall(PetscMUMPSIntCast(rstart + i + 1, &mumps->irhs_loc[i])); /* use 1-based indices */
1840: }
1841: PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, m * nrhs, array, mumps->id.precision, &mumps->id.rhs_loc_len, &mumps->id.rhs_loc));
1842: } else {
1843: #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1844: const PetscInt *ranges;
1845: PetscMPIInt j, k, sendcount, *petsc_ranks, *omp_ranks;
1846: MPI_Group petsc_group, omp_group;
1847: PetscScalar *recvbuf = NULL;
1849: if (mumps->is_omp_master) {
1850: /* Lazily initialize the omp stuff for distributed rhs */
1851: if (!mumps->irhs_loc) {
1852: PetscCall(PetscMalloc2(ompsize, &omp_ranks, ompsize, &petsc_ranks));
1853: PetscCall(PetscMalloc3(ompsize, &mumps->rhs_nrow, ompsize, &mumps->rhs_recvcounts, ompsize, &mumps->rhs_disps));
1854: PetscCallMPI(MPI_Comm_group(mumps->petsc_comm, &petsc_group));
1855: PetscCallMPI(MPI_Comm_group(mumps->omp_comm, &omp_group));
1856: for (j = 0; j < ompsize; j++) omp_ranks[j] = j;
1857: PetscCallMPI(MPI_Group_translate_ranks(omp_group, ompsize, omp_ranks, petsc_group, petsc_ranks));
1859: /* Populate mumps->irhs_loc[], rhs_nrow[] */
1860: mumps->nloc_rhs = 0;
1861: PetscCall(MatGetOwnershipRanges(A, &ranges));
1862: for (j = 0; j < ompsize; j++) {
1863: mumps->rhs_nrow[j] = ranges[petsc_ranks[j] + 1] - ranges[petsc_ranks[j]];
1864: mumps->nloc_rhs += mumps->rhs_nrow[j];
1865: }
1866: PetscCall(PetscMalloc1(mumps->nloc_rhs, &mumps->irhs_loc));
1867: for (j = k = 0; j < ompsize; j++) {
1868: for (i = ranges[petsc_ranks[j]]; i < ranges[petsc_ranks[j] + 1]; i++, k++) PetscCall(PetscMUMPSIntCast(i + 1, &mumps->irhs_loc[k])); /* uses 1-based indices */
1869: }
1871: PetscCall(PetscFree2(omp_ranks, petsc_ranks));
1872: PetscCallMPI(MPI_Group_free(&petsc_group));
1873: PetscCallMPI(MPI_Group_free(&omp_group));
1874: }
1876: /* Realloc buffers when current nrhs is bigger than what we have met */
1877: if (nrhs > mumps->max_nrhs) {
1878: PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf));
1879: PetscCall(PetscMalloc2(mumps->nloc_rhs * nrhs, &mumps->rhs_loc, mumps->nloc_rhs * nrhs, &mumps->rhs_recvbuf));
1880: mumps->max_nrhs = nrhs;
1881: }
1883: /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */
1884: for (j = 0; j < ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j] * nrhs, &mumps->rhs_recvcounts[j]));
1885: mumps->rhs_disps[0] = 0;
1886: for (j = 1; j < ompsize; j++) {
1887: mumps->rhs_disps[j] = mumps->rhs_disps[j - 1] + mumps->rhs_recvcounts[j - 1];
1888: PetscCheck(mumps->rhs_disps[j] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscMPIInt overflow!");
1889: }
1890: recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */
1891: }
1893: PetscCall(PetscMPIIntCast(m * nrhs, &sendcount));
1894: PetscCallMPI(MPI_Gatherv(array, sendcount, MPIU_SCALAR, recvbuf, mumps->rhs_recvcounts, mumps->rhs_disps, MPIU_SCALAR, 0, mumps->omp_comm));
1896: if (mumps->is_omp_master) {
1897: if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */
1898: PetscScalar *dst, *dstbase = mumps->rhs_loc;
1899: for (j = 0; j < ompsize; j++) {
1900: const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j];
1901: dst = dstbase;
1902: for (i = 0; i < nrhs; i++) {
1903: PetscCall(PetscArraycpy(dst, src, mumps->rhs_nrow[j]));
1904: src += mumps->rhs_nrow[j];
1905: dst += mumps->nloc_rhs;
1906: }
1907: dstbase += mumps->rhs_nrow[j];
1908: }
1909: }
1910: PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nloc_rhs * nrhs, mumps->rhs_loc, mumps->id.precision, &mumps->id.rhs_loc_len, &mumps->id.rhs_loc));
1911: }
1912: #endif /* PETSC_HAVE_OPENMP_SUPPORT */
1913: }
1914: mumps->id.nrhs = (PetscMUMPSInt)nrhs;
1915: mumps->id.nloc_rhs = (PetscMUMPSInt)mumps->nloc_rhs;
1916: mumps->id.lrhs_loc = mumps->nloc_rhs;
1917: mumps->id.irhs_loc = mumps->irhs_loc;
1918: PetscFunctionReturn(PETSC_SUCCESS);
1919: }
1921: static PetscErrorCode MatSolve_MUMPS(Mat A, Vec b, Vec x)
1922: {
1923: Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
1924: const PetscScalar *barray = NULL;
1925: PetscScalar *array;
1926: IS is_iden, is_petsc;
1927: PetscBool second_solve = PETSC_FALSE;
1928: static PetscBool cite1 = PETSC_FALSE, cite2 = PETSC_FALSE;
1930: PetscFunctionBegin;
1931: PetscCall(PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM "
1932: "Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n",
1933: &cite1));
1934: PetscCall(PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel "
1935: "Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n",
1936: &cite2));
1938: PetscCall(VecFlag(x, A->factorerrortype));
1939: if (A->factorerrortype) {
1940: PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1941: PetscFunctionReturn(PETSC_SUCCESS);
1942: }
1944: mumps->id.nrhs = 1;
1945: if (mumps->petsc_size > 1) {
1946: if (mumps->ICNTL20 == 10) {
1947: mumps->id.ICNTL(20) = 10; /* dense distributed RHS, need to set rhs_loc[], irhs_loc[] */
1948: PetscCall(VecGetArrayRead(b, &barray));
1949: PetscCall(MatMumpsSetUpDistRHSInfo(A, 1, barray));
1950: } else {
1951: mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential b_seq vector*/
1952: PetscCall(VecScatterBegin(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD));
1953: PetscCall(VecScatterEnd(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD));
1954: if (!mumps->myid) {
1955: PetscCall(VecGetArray(mumps->b_seq, &array));
1956: PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->b_seq->map->n, array, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs));
1957: }
1958: }
1959: } else { /* petsc_size == 1, use MUMPS's dense centralized RHS feature, so that we don't need to bother with isol_loc[] to get the solution */
1960: mumps->id.ICNTL(20) = 0;
1961: PetscCall(VecCopy(b, x));
1962: PetscCall(VecGetArray(x, &array));
1963: PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, x->map->n, array, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs));
1964: }
1966: /*
1967: handle condensation step of Schur complement (if any)
1968: We set by default ICNTL(26) == -1 when Schur indices have been provided by the user.
1969: According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase
1970: Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system.
1971: This requires an extra call to PetscMUMPS_c and the computation of the factors for S
1972: */
1973: if (mumps->id.size_schur > 0) {
1974: PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc");
1975: if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) {
1976: second_solve = PETSC_TRUE;
1977: PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); // allocate id.redrhs
1978: mumps->id.ICNTL(26) = 1; /* condensation phase */
1979: } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE));
1980: }
1982: mumps->id.job = JOB_SOLVE;
1983: PetscMUMPS_c(mumps); // reduced solve, put solution in id.redrhs
1984: PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));
1986: /* handle expansion step of Schur complement (if any) */
1987: if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE));
1988: else if (mumps->id.ICNTL(26) == 1) { // condense the right hand side
1989: PetscCall(MatMumpsSolveSchur_Private(A));
1990: for (PetscInt i = 0; i < mumps->id.size_schur; ++i) array[mumps->id.listvar_schur[i] - 1] = ID_FIELD_GET(mumps->id, redrhs, i);
1991: }
1993: if (mumps->petsc_size > 1) { /* convert mumps distributed solution to PETSc mpi x */
1994: if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) {
1995: /* when id.ICNTL(9) changes, the contents of ilsol_loc may change (not its size, lsol_loc), recreates scat_sol */
1996: PetscCall(VecScatterDestroy(&mumps->scat_sol));
1997: }
1998: if (!mumps->scat_sol) { /* create scatter scat_sol */
1999: PetscInt *isol2_loc = NULL;
2000: PetscCall(ISCreateStride(PETSC_COMM_SELF, mumps->id.lsol_loc, 0, 1, &is_iden)); /* from */
2001: PetscCall(PetscMalloc1(mumps->id.lsol_loc, &isol2_loc));
2002: for (PetscInt i = 0; i < mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i] - 1; /* change Fortran style to C style */
2003: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, mumps->id.lsol_loc, isol2_loc, PETSC_OWN_POINTER, &is_petsc)); /* to */
2004: PetscCall(VecScatterCreate(mumps->x_seq, is_iden, x, is_petsc, &mumps->scat_sol));
2005: PetscCall(ISDestroy(&is_iden));
2006: PetscCall(ISDestroy(&is_petsc));
2007: mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */
2008: }
2010: PetscScalar *xarray;
2011: PetscCall(VecGetArray(mumps->x_seq, &xarray));
2012: PetscCall(MatMumpsCastMumpsScalarArray(mumps->id.lsol_loc, mumps->id.precision, mumps->id.sol_loc, xarray));
2013: PetscCall(VecRestoreArray(mumps->x_seq, &xarray));
2014: PetscCall(VecScatterBegin(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD));
2015: PetscCall(VecScatterEnd(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD));
2017: if (mumps->ICNTL20 == 10) { // distributed RHS
2018: PetscCall(VecRestoreArrayRead(b, &barray));
2019: } else if (!mumps->myid) { // centralized RHS
2020: PetscCall(VecRestoreArray(mumps->b_seq, &array));
2021: }
2022: } else {
2023: // id.rhs has the solution in mumps precision
2024: PetscCall(MatMumpsCastMumpsScalarArray(x->map->n, mumps->id.precision, mumps->id.rhs, array));
2025: PetscCall(VecRestoreArray(x, &array));
2026: }
2028: PetscCall(PetscLogFlops(2.0 * PetscMax(0, (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n)));
2029: PetscFunctionReturn(PETSC_SUCCESS);
2030: }
2032: static PetscErrorCode MatSolveTranspose_MUMPS(Mat A, Vec b, Vec x)
2033: {
2034: Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
2035: const PetscMUMPSInt value = mumps->id.ICNTL(9);
2037: PetscFunctionBegin;
2038: mumps->id.ICNTL(9) = 0;
2039: PetscCall(MatSolve_MUMPS(A, b, x));
2040: mumps->id.ICNTL(9) = value;
2041: PetscFunctionReturn(PETSC_SUCCESS);
2042: }
2044: static PetscErrorCode MatMatSolve_MUMPS(Mat A, Mat B, Mat X)
2045: {
2046: Mat Bt = NULL;
2047: PetscBool denseX, denseB, flg, flgT;
2048: Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
2049: PetscInt i, nrhs, M, nrhsM;
2050: PetscScalar *array;
2051: const PetscScalar *barray;
2052: PetscInt lsol_loc, nlsol_loc, *idxx, iidx = 0;
2053: PetscMUMPSInt *isol_loc, *isol_loc_save;
2054: PetscScalar *sol_loc;
2055: void *sol_loc_save;
2056: PetscCount sol_loc_len_save;
2057: IS is_to, is_from;
2058: PetscInt k, proc, j, m, myrstart;
2059: const PetscInt *rstart;
2060: Vec v_mpi, msol_loc;
2061: VecScatter scat_sol;
2062: Vec b_seq;
2063: VecScatter scat_rhs;
2064: PetscScalar *aa;
2065: PetscInt spnr, *ia, *ja;
2066: Mat_MPIAIJ *b = NULL;
2068: PetscFunctionBegin;
2069: PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &denseX, MATSEQDENSE, MATMPIDENSE, NULL));
2070: PetscCheck(denseX, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix");
2072: PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &denseB, MATSEQDENSE, MATMPIDENSE, NULL));
2074: if (denseB) {
2075: PetscCheck(B->rmap->n == X->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix B and X must have same row distribution");
2076: mumps->id.ICNTL(20) = 0; /* dense RHS */
2077: } else { /* sparse B */
2078: PetscCheck(X != B, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_IDN, "X and B must be different matrices");
2079: PetscCall(PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSEVIRTUAL, &flgT));
2080: PetscCheck(flgT, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONG, "Matrix B must be MATTRANSPOSEVIRTUAL matrix");
2081: PetscCall(MatShellGetScalingShifts(B, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
2082: /* input B is transpose of actual RHS matrix,
2083: because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */
2084: PetscCall(MatTransposeGetMat(B, &Bt));
2085: mumps->id.ICNTL(20) = 1; /* sparse RHS */
2086: }
2088: PetscCall(MatGetSize(B, &M, &nrhs));
2089: PetscCall(PetscIntMultError(nrhs, M, &nrhsM));
2090: mumps->id.nrhs = (PetscMUMPSInt)nrhs;
2091: mumps->id.lrhs = (PetscMUMPSInt)M;
2093: if (mumps->petsc_size == 1) { // handle this easy case specially and return early
2094: PetscScalar *aa;
2095: PetscInt spnr, *ia, *ja;
2096: PetscBool second_solve = PETSC_FALSE;
2098: PetscCall(MatDenseGetArray(X, &array));
2099: if (denseB) {
2100: /* copy B to X */
2101: PetscCall(MatDenseGetArrayRead(B, &barray));
2102: PetscCall(PetscArraycpy(array, barray, nrhsM));
2103: PetscCall(MatDenseRestoreArrayRead(B, &barray));
2104: } else { /* sparse B */
2105: PetscCall(MatSeqAIJGetArray(Bt, &aa));
2106: PetscCall(MatGetRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
2107: PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
2108: PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
2109: PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->id.nz_rhs, aa, mumps->id.precision, &mumps->id.rhs_sparse_len, &mumps->id.rhs_sparse));
2110: }
2111: PetscCall(MatMumpsMakeMumpsScalarArray(denseB, nrhsM, array, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs));
2113: /* handle condensation step of Schur complement (if any) */
2114: if (mumps->id.size_schur > 0) {
2115: if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) {
2116: second_solve = PETSC_TRUE;
2117: PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); // allocate id.redrhs
2118: mumps->id.ICNTL(26) = 1; /* condensation phase, i.e, to solve id.redrhs */
2119: } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE));
2120: }
2122: mumps->id.job = JOB_SOLVE;
2123: PetscMUMPS_c(mumps);
2124: PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));
2126: /* handle expansion step of Schur complement (if any) */
2127: if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE));
2128: else if (mumps->id.ICNTL(26) == 1) { // condense the right hand side
2129: PetscCall(MatMumpsSolveSchur_Private(A));
2130: for (j = 0; j < nrhs; ++j)
2131: for (i = 0; i < mumps->id.size_schur; ++i) array[mumps->id.listvar_schur[i] - 1 + j * M] = ID_FIELD_GET(mumps->id, redrhs, i + j * mumps->id.lredrhs);
2132: }
2134: if (!denseB) { /* sparse B, restore ia, ja */
2135: PetscCall(MatSeqAIJRestoreArray(Bt, &aa));
2136: PetscCall(MatRestoreRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
2137: PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure");
2138: }
2140: // no matter dense B or sparse B, solution is in id.rhs; convert it to array of X.
2141: PetscCall(MatMumpsCastMumpsScalarArray(nrhsM, mumps->id.precision, mumps->id.rhs, array));
2142: PetscCall(MatDenseRestoreArray(X, &array));
2143: PetscFunctionReturn(PETSC_SUCCESS);
2144: }
2146: /* parallel case: MUMPS requires rhs B to be centralized on the host! */
2147: PetscCheck(!mumps->id.ICNTL(19), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc");
2149: /* create msol_loc to hold mumps local solution */
2150: isol_loc_save = mumps->id.isol_loc; /* save these, as we want to reuse them in MatSolve() */
2151: sol_loc_save = mumps->id.sol_loc;
2152: sol_loc_len_save = mumps->id.sol_loc_len;
2153: mumps->id.isol_loc = NULL; // an init state
2154: mumps->id.sol_loc = NULL;
2155: mumps->id.sol_loc_len = 0;
2157: lsol_loc = mumps->id.lsol_loc;
2158: PetscCall(PetscIntMultError(nrhs, lsol_loc, &nlsol_loc)); /* length of sol_loc */
2159: PetscCall(PetscMalloc2(nlsol_loc, &sol_loc, lsol_loc, &isol_loc));
2160: PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, nlsol_loc, sol_loc, mumps->id.precision, &mumps->id.sol_loc_len, &mumps->id.sol_loc));
2161: mumps->id.isol_loc = isol_loc;
2163: PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nlsol_loc, (PetscScalar *)sol_loc, &msol_loc));
2165: if (denseB) {
2166: if (mumps->ICNTL20 == 10) {
2167: mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
2168: PetscCall(MatDenseGetArrayRead(B, &barray));
2169: PetscCall(MatMumpsSetUpDistRHSInfo(A, nrhs, barray)); // put barray to rhs_loc
2170: PetscCall(MatDenseRestoreArrayRead(B, &barray));
2171: PetscCall(MatGetLocalSize(B, &m, NULL));
2172: PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhsM, NULL, &v_mpi)); // will scatter the solution to v_mpi, which wraps X
2173: } else {
2174: mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
2175: /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in
2176: very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank
2177: 0, re-arrange B into desired order, which is a local operation.
2178: */
2180: /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */
2181: /* wrap dense rhs matrix B into a vector v_mpi */
2182: PetscCall(MatGetLocalSize(B, &m, NULL));
2183: PetscCall(MatDenseGetArrayRead(B, &barray));
2184: PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhsM, barray, &v_mpi));
2185: PetscCall(MatDenseRestoreArrayRead(B, &barray));
2187: /* scatter v_mpi to b_seq in proc[0]. With ICNTL(20) = 0, MUMPS requires rhs to be centralized on the host! */
2188: if (!mumps->myid) {
2189: PetscInt *idx;
2190: /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */
2191: PetscCall(PetscMalloc1(nrhsM, &idx));
2192: PetscCall(MatGetOwnershipRanges(B, &rstart));
2193: for (proc = 0, k = 0; proc < mumps->petsc_size; proc++) {
2194: for (j = 0; j < nrhs; j++) {
2195: for (i = rstart[proc]; i < rstart[proc + 1]; i++) idx[k++] = j * M + i;
2196: }
2197: }
2199: PetscCall(VecCreateSeq(PETSC_COMM_SELF, nrhsM, &b_seq));
2200: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nrhsM, idx, PETSC_OWN_POINTER, &is_to));
2201: PetscCall(ISCreateStride(PETSC_COMM_SELF, nrhsM, 0, 1, &is_from));
2202: } else {
2203: PetscCall(VecCreateSeq(PETSC_COMM_SELF, 0, &b_seq));
2204: PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_to));
2205: PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_from));
2206: }
2208: PetscCall(VecScatterCreate(v_mpi, is_from, b_seq, is_to, &scat_rhs));
2209: PetscCall(VecScatterBegin(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD));
2210: PetscCall(ISDestroy(&is_to));
2211: PetscCall(ISDestroy(&is_from));
2212: PetscCall(VecScatterEnd(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD));
2214: if (!mumps->myid) { /* define rhs on the host */
2215: PetscCall(VecGetArrayRead(b_seq, &barray));
2216: PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, nrhsM, barray, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs));
2217: PetscCall(VecRestoreArrayRead(b_seq, &barray));
2218: }
2219: }
2220: } else { /* sparse B */
2221: b = (Mat_MPIAIJ *)Bt->data;
2223: /* wrap dense X into a vector v_mpi */
2224: PetscCall(MatGetLocalSize(X, &m, NULL));
2225: PetscCall(MatDenseGetArrayRead(X, &barray));
2226: PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X), 1, nrhs * m, nrhsM, barray, &v_mpi));
2227: PetscCall(MatDenseRestoreArrayRead(X, &barray));
2229: if (!mumps->myid) {
2230: PetscCall(MatSeqAIJGetArray(b->A, &aa));
2231: PetscCall(MatGetRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
2232: PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
2233: PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
2234: PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, ((Mat_SeqAIJ *)b->A->data)->nz, aa, mumps->id.precision, &mumps->id.rhs_sparse_len, &mumps->id.rhs_sparse));
2235: } else {
2236: mumps->id.irhs_ptr = NULL;
2237: mumps->id.irhs_sparse = NULL;
2238: mumps->id.nz_rhs = 0;
2239: if (mumps->id.rhs_sparse_len) {
2240: PetscCall(PetscFree(mumps->id.rhs_sparse));
2241: mumps->id.rhs_sparse_len = 0;
2242: }
2243: }
2244: }
2246: /* solve phase */
2247: mumps->id.job = JOB_SOLVE;
2248: PetscMUMPS_c(mumps);
2249: PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
2251: /* scatter mumps distributed solution to PETSc vector v_mpi, which shares local arrays with solution matrix X */
2252: PetscCall(MatDenseGetArray(X, &array));
2253: PetscCall(VecPlaceArray(v_mpi, array));
2255: /* create scatter scat_sol */
2256: PetscCall(MatGetOwnershipRanges(X, &rstart));
2257: /* iidx: index for scatter mumps solution to PETSc X */
2259: PetscCall(ISCreateStride(PETSC_COMM_SELF, nlsol_loc, 0, 1, &is_from));
2260: PetscCall(PetscMalloc1(nlsol_loc, &idxx));
2261: for (i = 0; i < lsol_loc; i++) {
2262: isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */
2264: for (proc = 0; proc < mumps->petsc_size; proc++) {
2265: if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc + 1]) {
2266: myrstart = rstart[proc];
2267: k = isol_loc[i] - myrstart; /* local index on 1st column of PETSc vector X */
2268: iidx = k + myrstart * nrhs; /* maps mumps isol_loc[i] to PETSc index in X */
2269: m = rstart[proc + 1] - rstart[proc]; /* rows of X for this proc */
2270: break;
2271: }
2272: }
2274: for (j = 0; j < nrhs; j++) idxx[i + j * lsol_loc] = iidx + j * m;
2275: }
2276: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nlsol_loc, idxx, PETSC_COPY_VALUES, &is_to));
2277: PetscCall(MatMumpsCastMumpsScalarArray(nlsol_loc, mumps->id.precision, mumps->id.sol_loc, sol_loc)); // Vec msol_loc is created with sol_loc[]
2278: PetscCall(VecScatterCreate(msol_loc, is_from, v_mpi, is_to, &scat_sol));
2279: PetscCall(VecScatterBegin(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD));
2280: PetscCall(ISDestroy(&is_from));
2281: PetscCall(ISDestroy(&is_to));
2282: PetscCall(VecScatterEnd(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD));
2283: PetscCall(MatDenseRestoreArray(X, &array));
2285: if (mumps->id.sol_loc_len) { // in case we allocated intermediate buffers
2286: mumps->id.sol_loc_len = 0;
2287: PetscCall(PetscFree(mumps->id.sol_loc));
2288: }
2290: // restore old values
2291: mumps->id.sol_loc = sol_loc_save;
2292: mumps->id.sol_loc_len = sol_loc_len_save;
2293: mumps->id.isol_loc = isol_loc_save;
2295: PetscCall(PetscFree2(sol_loc, isol_loc));
2296: PetscCall(PetscFree(idxx));
2297: PetscCall(VecDestroy(&msol_loc));
2298: PetscCall(VecDestroy(&v_mpi));
2299: if (!denseB) {
2300: if (!mumps->myid) {
2301: b = (Mat_MPIAIJ *)Bt->data;
2302: PetscCall(MatSeqAIJRestoreArray(b->A, &aa));
2303: PetscCall(MatRestoreRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
2304: PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure");
2305: }
2306: } else {
2307: if (mumps->ICNTL20 == 0) {
2308: PetscCall(VecDestroy(&b_seq));
2309: PetscCall(VecScatterDestroy(&scat_rhs));
2310: }
2311: }
2312: PetscCall(VecScatterDestroy(&scat_sol));
2313: PetscCall(PetscLogFlops(nrhs * PetscMax(0, 2.0 * (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n)));
2314: PetscFunctionReturn(PETSC_SUCCESS);
2315: }
2317: static PetscErrorCode MatMatSolveTranspose_MUMPS(Mat A, Mat B, Mat X)
2318: {
2319: Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
2320: const PetscMUMPSInt value = mumps->id.ICNTL(9);
2322: PetscFunctionBegin;
2323: mumps->id.ICNTL(9) = 0;
2324: PetscCall(MatMatSolve_MUMPS(A, B, X));
2325: mumps->id.ICNTL(9) = value;
2326: PetscFunctionReturn(PETSC_SUCCESS);
2327: }
2329: static PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A, Mat Bt, Mat X)
2330: {
2331: PetscBool flg;
2332: Mat B;
2334: PetscFunctionBegin;
2335: PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt, &flg, MATSEQAIJ, MATMPIAIJ, NULL));
2336: PetscCheck(flg, PetscObjectComm((PetscObject)Bt), PETSC_ERR_ARG_WRONG, "Matrix Bt must be MATAIJ matrix");
2338: /* Create B=Bt^T that uses Bt's data structure */
2339: PetscCall(MatCreateTranspose(Bt, &B));
2341: PetscCall(MatMatSolve_MUMPS(A, B, X));
2342: PetscCall(MatDestroy(&B));
2343: PetscFunctionReturn(PETSC_SUCCESS);
2344: }
2346: #if !defined(PETSC_USE_COMPLEX)
2347: /*
2348: input:
2349: F: numeric factor
2350: output:
2351: nneg: total number of negative pivots
2352: nzero: total number of zero pivots
2353: npos: (global dimension of F) - nneg - nzero
2354: */
2355: static PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos)
2356: {
2357: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2358: PetscMPIInt size;
2360: PetscFunctionBegin;
2361: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F), &size));
2362: /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */
2363: PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia", mumps->id.INFOG(13));
2365: if (nneg) *nneg = mumps->id.INFOG(12);
2366: if (nzero || npos) {
2367: PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection");
2368: if (nzero) *nzero = mumps->id.INFOG(28);
2369: if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28));
2370: }
2371: PetscFunctionReturn(PETSC_SUCCESS);
2372: }
2373: #endif
2375: static PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse, Mat_MUMPS *mumps)
2376: {
2377: PetscMPIInt nreqs;
2378: PetscMUMPSInt *irn, *jcn;
2379: PetscMPIInt count;
2380: PetscCount totnnz, remain;
2381: const PetscInt osize = mumps->omp_comm_size;
2382: PetscScalar *val;
2384: PetscFunctionBegin;
2385: if (osize > 1) {
2386: if (reuse == MAT_INITIAL_MATRIX) {
2387: /* master first gathers counts of nonzeros to receive */
2388: if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize, &mumps->recvcount));
2389: PetscCallMPI(MPI_Gather(&mumps->nnz, 1, MPIU_INT64, mumps->recvcount, 1, MPIU_INT64, 0 /*master*/, mumps->omp_comm));
2391: /* Then each computes number of send/recvs */
2392: if (mumps->is_omp_master) {
2393: /* Start from 1 since self communication is not done in MPI */
2394: nreqs = 0;
2395: for (PetscMPIInt i = 1; i < osize; i++) nreqs += (mumps->recvcount[i] + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX;
2396: } else {
2397: nreqs = (PetscMPIInt)(((mumps->nnz + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX));
2398: }
2399: PetscCall(PetscMalloc1(nreqs * 3, &mumps->reqs)); /* Triple the requests since we send irn, jcn and val separately */
2401: /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others.
2402: MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz
2403: might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size
2404: is very small, the current approach should have no extra overhead compared to MPI_Gatherv.
2405: */
2406: nreqs = 0; /* counter for actual send/recvs */
2407: if (mumps->is_omp_master) {
2408: totnnz = 0;
2410: for (PetscMPIInt i = 0; i < osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */
2411: PetscCall(PetscMalloc2(totnnz, &irn, totnnz, &jcn));
2412: PetscCall(PetscMalloc1(totnnz, &val));
2414: /* Self communication */
2415: PetscCall(PetscArraycpy(irn, mumps->irn, mumps->nnz));
2416: PetscCall(PetscArraycpy(jcn, mumps->jcn, mumps->nnz));
2417: PetscCall(PetscArraycpy(val, mumps->val, mumps->nnz));
2419: /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */
2420: PetscCall(PetscFree2(mumps->irn, mumps->jcn));
2421: PetscCall(PetscFree(mumps->val_alloc));
2422: mumps->nnz = totnnz;
2423: mumps->irn = irn;
2424: mumps->jcn = jcn;
2425: mumps->val = mumps->val_alloc = val;
2427: irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */
2428: jcn += mumps->recvcount[0];
2429: val += mumps->recvcount[0];
2431: /* Remote communication */
2432: for (PetscMPIInt i = 1; i < osize; i++) {
2433: count = (PetscMPIInt)PetscMin(mumps->recvcount[i], (PetscMPIInt)PETSC_MPI_INT_MAX);
2434: remain = mumps->recvcount[i] - count;
2435: while (count > 0) {
2436: PetscCallMPI(MPIU_Irecv(irn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2437: PetscCallMPI(MPIU_Irecv(jcn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2438: PetscCallMPI(MPIU_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2439: irn += count;
2440: jcn += count;
2441: val += count;
2442: count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX);
2443: remain -= count;
2444: }
2445: }
2446: } else {
2447: irn = mumps->irn;
2448: jcn = mumps->jcn;
2449: val = mumps->val;
2450: count = (PetscMPIInt)PetscMin(mumps->nnz, (PetscMPIInt)PETSC_MPI_INT_MAX);
2451: remain = mumps->nnz - count;
2452: while (count > 0) {
2453: PetscCallMPI(MPIU_Isend(irn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2454: PetscCallMPI(MPIU_Isend(jcn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2455: PetscCallMPI(MPIU_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2456: irn += count;
2457: jcn += count;
2458: val += count;
2459: count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX);
2460: remain -= count;
2461: }
2462: }
2463: } else {
2464: nreqs = 0;
2465: if (mumps->is_omp_master) {
2466: val = mumps->val + mumps->recvcount[0];
2467: for (PetscMPIInt i = 1; i < osize; i++) { /* Remote communication only since self data is already in place */
2468: count = (PetscMPIInt)PetscMin(mumps->recvcount[i], (PetscMPIInt)PETSC_MPI_INT_MAX);
2469: remain = mumps->recvcount[i] - count;
2470: while (count > 0) {
2471: PetscCallMPI(MPIU_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2472: val += count;
2473: count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX);
2474: remain -= count;
2475: }
2476: }
2477: } else {
2478: val = mumps->val;
2479: count = (PetscMPIInt)PetscMin(mumps->nnz, (PetscMPIInt)PETSC_MPI_INT_MAX);
2480: remain = mumps->nnz - count;
2481: while (count > 0) {
2482: PetscCallMPI(MPIU_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2483: val += count;
2484: count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX);
2485: remain -= count;
2486: }
2487: }
2488: }
2489: PetscCallMPI(MPI_Waitall(nreqs, mumps->reqs, MPI_STATUSES_IGNORE));
2490: mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */
2491: }
2492: PetscFunctionReturn(PETSC_SUCCESS);
2493: }
2495: static PetscErrorCode MatFactorNumeric_MUMPS(Mat F, Mat A, PETSC_UNUSED const MatFactorInfo *info)
2496: {
2497: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2499: PetscFunctionBegin;
2500: if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) {
2501: if (mumps->id.INFOG(1) == -6) PetscCall(PetscInfo(A, "MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2502: PetscCall(PetscInfo(A, "MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2503: PetscFunctionReturn(PETSC_SUCCESS);
2504: }
2506: PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps));
2507: PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX, mumps));
2509: /* numerical factorization phase */
2510: mumps->id.job = JOB_FACTNUMERIC;
2511: if (!mumps->id.ICNTL(18)) { /* A is centralized */
2512: if (!mumps->myid) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a));
2513: } else {
2514: PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc));
2515: }
2517: if (F->schur) {
2518: const PetscScalar *array;
2519: MUMPS_INT size = mumps->id.size_schur;
2520: PetscCall(MatDenseGetArrayRead(F->schur, &array));
2521: PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, size * size, array, mumps->id.precision, &mumps->id.schur_len, &mumps->id.schur));
2522: PetscCall(MatDenseRestoreArrayRead(F->schur, &array));
2523: }
2525: if (mumps->id.ICNTL(22)) PetscCall(PetscStrncpy(mumps->id.ooc_prefix, ((PetscObject)F)->prefix, sizeof(((MUMPS_STRUC_C *)NULL)->ooc_prefix)));
2526: if (A->rmap->N && A->cmap->N) PetscMUMPS_c(mumps);
2527: if (mumps->id.INFOG(1) < 0) {
2528: PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));
2529: if (mumps->id.INFOG(1) == -10) {
2530: PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2531: F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
2532: } else if (mumps->id.INFOG(1) == -13) {
2533: PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, cannot allocate required memory %d megabytes\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2534: F->factorerrortype = MAT_FACTOR_OUTMEMORY;
2535: } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) {
2536: PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d, problem with work array\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2537: F->factorerrortype = MAT_FACTOR_OUTMEMORY;
2538: } else {
2539: PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2540: F->factorerrortype = MAT_FACTOR_OTHER;
2541: }
2542: }
2543: PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: ICNTL(16)=%d " MUMPS_MANUALS, mumps->id.INFOG(16));
2545: F->assembled = PETSC_TRUE;
2547: if (F->schur) { /* reset Schur status to unfactored */
2548: #if defined(PETSC_HAVE_CUDA)
2549: F->schur->offloadmask = PETSC_OFFLOAD_CPU;
2550: #endif
2551: PetscScalar *array;
2552: PetscCall(MatDenseGetArray(F->schur, &array));
2553: PetscCall(MatMumpsCastMumpsScalarArray(mumps->id.size_schur * mumps->id.size_schur, mumps->id.precision, mumps->id.schur, array));
2554: PetscCall(MatDenseRestoreArray(F->schur, &array));
2555: if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
2556: mumps->id.ICNTL(19) = 2;
2557: PetscCall(MatTranspose(F->schur, MAT_INPLACE_MATRIX, &F->schur));
2558: }
2559: PetscCall(MatFactorRestoreSchurComplement(F, NULL, MAT_FACTOR_SCHUR_UNFACTORED));
2560: }
2562: /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */
2563: if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3;
2565: if (!mumps->is_omp_master) mumps->id.INFO(23) = 0;
2566: // MUMPS userguide: ISOL_loc should be allocated by the user between the factorization and the
2567: // solve phases. On exit from the solve phase, ISOL_loc(i) contains the index of the variables for
2568: // which the solution (in SOL_loc) is available on the local processor.
2569: // If successive calls to the solve phase (JOB= 3) are performed for a given matrix, ISOL_loc will
2570: // normally have the same contents for each of these calls. The only exception is the case of
2571: // unsymmetric matrices (SYM=1) when the transpose option is changed (see ICNTL(9)) and non
2572: // symmetric row/column exchanges (see ICNTL(6)) have occurred before the solve phase.
2573: if (mumps->petsc_size > 1) {
2574: PetscInt lsol_loc;
2575: PetscScalar *array;
2577: /* distributed solution; Create x_seq=sol_loc for repeated use */
2578: if (mumps->x_seq) {
2579: PetscCall(VecScatterDestroy(&mumps->scat_sol));
2580: PetscCall(PetscFree(mumps->id.isol_loc));
2581: PetscCall(VecDestroy(&mumps->x_seq));
2582: }
2583: lsol_loc = mumps->id.INFO(23); /* length of sol_loc */
2584: PetscCall(PetscMalloc1(lsol_loc, &mumps->id.isol_loc));
2585: PetscCall(VecCreateSeq(PETSC_COMM_SELF, lsol_loc, &mumps->x_seq));
2586: PetscCall(VecGetArray(mumps->x_seq, &array));
2587: PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, lsol_loc, array, mumps->id.precision, &mumps->id.sol_loc_len, &mumps->id.sol_loc));
2588: PetscCall(VecRestoreArray(mumps->x_seq, &array));
2589: mumps->id.lsol_loc = (PetscMUMPSInt)lsol_loc;
2590: }
2591: PetscCall(PetscLogFlops((double)ID_RINFO_GET(mumps->id, 2)));
2592: PetscFunctionReturn(PETSC_SUCCESS);
2593: }
2595: /* Sets MUMPS options from the options database */
2596: static PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A)
2597: {
2598: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2599: PetscReal cntl;
2600: PetscMUMPSInt icntl = 0, size, *listvar_schur;
2601: PetscInt info[80], i, ninfo = 80, rbs, cbs;
2602: PetscBool flg = PETSC_FALSE;
2603: PetscBool schur = mumps->id.icntl ? (PetscBool)(mumps->id.ICNTL(26) == -1) : (PetscBool)(mumps->ICNTL26 == -1);
2604: void *arr;
2606: PetscFunctionBegin;
2607: PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MUMPS Options", "Mat");
2608: if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */
2609: PetscPrecision precision = PetscDefined(USE_REAL_SINGLE) ? PETSC_PRECISION_SINGLE : PETSC_PRECISION_DOUBLE;
2610: PetscInt nthreads = 0;
2611: PetscInt nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
2612: PetscInt nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0;
2613: PetscMUMPSInt nblk, *blkvar, *blkptr;
2615: mumps->petsc_comm = PetscObjectComm((PetscObject)A);
2616: PetscCallMPI(MPI_Comm_size(mumps->petsc_comm, &mumps->petsc_size));
2617: PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm, &mumps->myid)); /* "if (!myid)" still works even if mumps_comm is different */
2619: PetscCall(PetscOptionsName("-mat_mumps_use_omp_threads", "Convert MPI processes into OpenMP threads", "None", &mumps->use_petsc_omp_support));
2620: if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */
2621: /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */
2622: PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)F)->prefix, "-mat_mumps_use_omp_threads", &nthreads, NULL));
2623: if (mumps->use_petsc_omp_support) {
2624: PetscCheck(!schur, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature", ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : "");
2625: #if defined(PETSC_HAVE_OPENMP_SUPPORT)
2626: PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm, nthreads, &mumps->omp_ctrl));
2627: PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl, &mumps->omp_comm, &mumps->mumps_comm, &mumps->is_omp_master));
2628: #else
2629: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual",
2630: ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : "");
2631: #endif
2632: } else {
2633: mumps->omp_comm = PETSC_COMM_SELF;
2634: mumps->mumps_comm = mumps->petsc_comm;
2635: mumps->is_omp_master = PETSC_TRUE;
2636: }
2637: PetscCallMPI(MPI_Comm_size(mumps->omp_comm, &mumps->omp_comm_size));
2638: mumps->reqs = NULL;
2639: mumps->tag = 0;
2641: if (mumps->mumps_comm != MPI_COMM_NULL) {
2642: if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) {
2643: /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */
2644: MPI_Comm comm;
2645: PetscCallMPI(MPI_Comm_dup(mumps->mumps_comm, &comm));
2646: mumps->mumps_comm = comm;
2647: } else PetscCall(PetscCommGetComm(mumps->petsc_comm, &mumps->mumps_comm));
2648: }
2650: mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm);
2651: mumps->id.job = JOB_INIT;
2652: mumps->id.par = 1; /* host participates factorizaton and solve */
2653: mumps->id.sym = mumps->sym;
2655: size = mumps->id.size_schur;
2656: arr = mumps->id.schur;
2657: listvar_schur = mumps->id.listvar_schur;
2658: nblk = mumps->id.nblk;
2659: blkvar = mumps->id.blkvar;
2660: blkptr = mumps->id.blkptr;
2661: if (PetscDefined(USE_DEBUG)) {
2662: for (PetscInt i = 0; i < size; i++)
2663: PetscCheck(listvar_schur[i] - 1 >= 0 && listvar_schur[i] - 1 < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid Schur index at position %" PetscInt_FMT "! %" PetscInt_FMT " must be in [0, %" PetscInt_FMT ")", i, (PetscInt)listvar_schur[i] - 1,
2664: A->rmap->N);
2665: }
2667: PetscCall(PetscOptionsEnum("-pc_precision", "Precision used by MUMPS", "MATSOLVERMUMPS", PetscPrecisionTypes, (PetscEnum)precision, (PetscEnum *)&precision, NULL));
2668: PetscCheck(precision == PETSC_PRECISION_SINGLE || precision == PETSC_PRECISION_DOUBLE, PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "MUMPS does not support %s precision", PetscPrecisionTypes[precision]);
2669: PetscCheck(precision == PETSC_SCALAR_PRECISION || PetscDefined(HAVE_MUMPS_MIXED_PRECISION), PetscObjectComm((PetscObject)F), PETSC_ERR_USER, "Your MUMPS library does not support mixed precision, but which is needed with your specified PetscScalar");
2670: PetscCall(MatMumpsAllocateInternalID(&mumps->id, precision));
2672: PetscMUMPS_c(mumps);
2673: PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
2675: /* set PETSc-MUMPS default options - override MUMPS default */
2676: mumps->id.ICNTL(3) = 0;
2677: mumps->id.ICNTL(4) = 0;
2678: if (mumps->petsc_size == 1) {
2679: mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */
2680: mumps->id.ICNTL(7) = 7; /* automatic choice of ordering done by the package */
2681: } else {
2682: mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */
2683: mumps->id.ICNTL(21) = 1; /* distributed solution */
2684: }
2685: if (nblk && blkptr) {
2686: mumps->id.ICNTL(15) = 1;
2687: mumps->id.nblk = nblk;
2688: mumps->id.blkvar = blkvar;
2689: mumps->id.blkptr = blkptr;
2690: } else mumps->id.ICNTL(15) = 0;
2692: /* restore cached ICNTL and CNTL values */
2693: for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1 + 2 * icntl]) = mumps->ICNTL_pre[2 + 2 * icntl];
2694: for (icntl = 0; icntl < nCNTL_pre; ++icntl) ID_CNTL_SET(mumps->id, (PetscInt)mumps->CNTL_pre[1 + 2 * icntl], mumps->CNTL_pre[2 + 2 * icntl]);
2696: PetscCall(PetscFree(mumps->ICNTL_pre));
2697: PetscCall(PetscFree(mumps->CNTL_pre));
2699: if (schur) {
2700: mumps->id.size_schur = size;
2701: mumps->id.schur_lld = size;
2702: mumps->id.schur = arr;
2703: mumps->id.listvar_schur = listvar_schur;
2704: if (mumps->petsc_size > 1) {
2705: PetscBool gs; /* gs is false if any rank other than root has non-empty IS */
2707: mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */
2708: gs = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */
2709: PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &gs, 1, MPI_C_BOOL, MPI_LAND, mumps->petsc_comm));
2710: PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_SUP, "MUMPS distributed parallel Schur complements not yet supported from PETSc");
2711: } else {
2712: if (F->factortype == MAT_FACTOR_LU) {
2713: mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */
2714: } else {
2715: mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */
2716: }
2717: }
2718: mumps->id.ICNTL(26) = -1;
2719: }
2721: /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code.
2722: For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS.
2723: */
2724: PetscCallMPI(MPI_Bcast(mumps->id.icntl, 40, MPI_INT, 0, mumps->omp_comm));
2725: PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15, MPIU_MUMPSREAL(&mumps->id), 0, mumps->omp_comm));
2727: mumps->scat_rhs = NULL;
2728: mumps->scat_sol = NULL;
2729: }
2730: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1", "ICNTL(1): output stream for error messages", "None", mumps->id.ICNTL(1), &icntl, &flg));
2731: if (flg) mumps->id.ICNTL(1) = icntl;
2732: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2", "ICNTL(2): output stream for diagnostic printing, statistics, and warning", "None", mumps->id.ICNTL(2), &icntl, &flg));
2733: if (flg) mumps->id.ICNTL(2) = icntl;
2734: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3", "ICNTL(3): output stream for global information, collected on the host", "None", mumps->id.ICNTL(3), &icntl, &flg));
2735: if (flg) mumps->id.ICNTL(3) = icntl;
2737: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4", "ICNTL(4): level of printing (0 to 4)", "None", mumps->id.ICNTL(4), &icntl, &flg));
2738: if (flg) mumps->id.ICNTL(4) = icntl;
2739: if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */
2741: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6", "ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)", "None", mumps->id.ICNTL(6), &icntl, &flg));
2742: if (flg) mumps->id.ICNTL(6) = icntl;
2744: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7", "ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)", "None", mumps->id.ICNTL(7), &icntl, &flg));
2745: if (flg) {
2746: PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto");
2747: mumps->id.ICNTL(7) = icntl;
2748: }
2750: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8", "ICNTL(8): scaling strategy (-2 to 8 or 77)", "None", mumps->id.ICNTL(8), &mumps->id.ICNTL(8), NULL));
2751: /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */
2752: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10", "ICNTL(10): max num of refinements", "None", mumps->id.ICNTL(10), &mumps->id.ICNTL(10), NULL));
2753: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11", "ICNTL(11): statistics related to an error analysis (via -ksp_view)", "None", mumps->id.ICNTL(11), &mumps->id.ICNTL(11), NULL));
2754: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12", "ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)", "None", mumps->id.ICNTL(12), &mumps->id.ICNTL(12), NULL));
2755: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13", "ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting", "None", mumps->id.ICNTL(13), &mumps->id.ICNTL(13), NULL));
2756: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14", "ICNTL(14): percentage increase in the estimated working space", "None", mumps->id.ICNTL(14), &mumps->id.ICNTL(14), NULL));
2757: PetscCall(MatGetBlockSizes(A, &rbs, &cbs));
2758: if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = (PetscMUMPSInt)-rbs;
2759: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15", "ICNTL(15): compression of the input matrix resulting from a block format", "None", mumps->id.ICNTL(15), &mumps->id.ICNTL(15), &flg));
2760: if (flg) {
2761: if (mumps->id.ICNTL(15) < 0) PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes");
2762: else if (mumps->id.ICNTL(15) > 0) {
2763: const PetscInt *bsizes;
2764: PetscInt nblocks, p, *blkptr = NULL;
2765: PetscMPIInt *recvcounts, *displs, n;
2766: PetscMPIInt rank, size = 0;
2768: PetscCall(MatGetVariableBlockSizes(A, &nblocks, &bsizes));
2769: flg = PETSC_TRUE;
2770: for (p = 0; p < nblocks; ++p) {
2771: if (bsizes[p] > 1) break;
2772: }
2773: if (p == nblocks) flg = PETSC_FALSE;
2774: PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &flg, 1, MPI_C_BOOL, MPI_LOR, PetscObjectComm((PetscObject)A)));
2775: if (flg) { // if at least one process supplies variable block sizes and they are not all set to 1
2776: PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
2777: if (rank == 0) PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
2778: PetscCall(PetscCalloc2(size, &recvcounts, size + 1, &displs));
2779: PetscCall(PetscMPIIntCast(nblocks, &n));
2780: PetscCallMPI(MPI_Gather(&n, 1, MPI_INT, recvcounts, 1, MPI_INT, 0, PetscObjectComm((PetscObject)A)));
2781: for (PetscInt p = 0; p < size; ++p) displs[p + 1] = displs[p] + recvcounts[p];
2782: PetscCall(PetscMalloc1(displs[size] + 1, &blkptr));
2783: PetscCallMPI(MPI_Bcast(displs + size, 1, MPIU_INT, 0, PetscObjectComm((PetscObject)A)));
2784: PetscCallMPI(MPI_Gatherv(bsizes, n, MPIU_INT, blkptr + 1, recvcounts, displs, MPIU_INT, 0, PetscObjectComm((PetscObject)A)));
2785: if (rank == 0) {
2786: blkptr[0] = 1;
2787: for (PetscInt p = 0; p < n; ++p) blkptr[p + 1] += blkptr[p];
2788: PetscCall(MatMumpsSetBlk(F, displs[size], NULL, blkptr));
2789: }
2790: PetscCall(PetscFree2(recvcounts, displs));
2791: PetscCall(PetscFree(blkptr));
2792: }
2793: }
2794: }
2795: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19", "ICNTL(19): computes the Schur complement", "None", mumps->id.ICNTL(19), &mumps->id.ICNTL(19), NULL));
2796: if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any Schur data (if any) */
2797: PetscCall(MatDestroy(&F->schur));
2798: PetscCall(MatMumpsResetSchur_Private(mumps));
2799: }
2801: /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps",
2802: and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran
2803: and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...'
2804: This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug,
2805: see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590.
2806: In short, we could not use distributed RHS until with MPICH v4.0b1 or we enabled a workaround in mumps-5.6.2+
2807: */
2808: mumps->ICNTL20 = 10; /* Distributed dense RHS, by default */
2809: #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) || (PetscDefined(HAVE_MPICH) && MPICH_NUMVERSION < 40000101) || PetscDefined(HAVE_MSMPI)
2810: mumps->ICNTL20 = 0; /* Centralized dense RHS, if need be */
2811: #endif
2812: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20", "ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides", "None", mumps->ICNTL20, &mumps->ICNTL20, &flg));
2813: PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10", (int)mumps->ICNTL20);
2814: #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0)
2815: PetscCheck(!flg || mumps->ICNTL20 != 10, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=10 is not supported before MUMPS-5.3.0");
2816: #endif
2817: /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */
2819: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22", "ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)", "None", mumps->id.ICNTL(22), &mumps->id.ICNTL(22), &flg));
2820: if (flg && mumps->id.ICNTL(22) != 1) mumps->id.ICNTL(22) = 0; // MUMPS treats values other than 1 as 0. Normalize it so we can safely use 'if (mumps->id.ICNTL(22))'
2821: if (mumps->id.ICNTL(22)) {
2822: // MUMPS will use the /tmp directory if -mat_mumps_ooc_tmpdir is not set by user.
2823: // We don't provide option -mat_mumps_ooc_prefix, as we use F's prefix as OOC_PREFIX, which is set later during MatFactorNumeric_MUMPS() to also handle cases where users enable OOC via MatMumpsSetICNTL().
2824: PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "Out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(((MUMPS_STRUC_C *)NULL)->ooc_tmpdir), NULL));
2825: }
2826: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23", "ICNTL(23): max size of the working memory (MB) that can allocate per processor", "None", mumps->id.ICNTL(23), &mumps->id.ICNTL(23), NULL));
2827: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24", "ICNTL(24): detection of null pivot rows (0 or 1)", "None", mumps->id.ICNTL(24), &mumps->id.ICNTL(24), NULL));
2828: if (mumps->id.ICNTL(24)) mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */
2830: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25", "ICNTL(25): computes a solution of a deficient matrix and a null space basis", "None", mumps->id.ICNTL(25), &mumps->id.ICNTL(25), NULL));
2831: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26", "ICNTL(26): drives the solution phase if a Schur complement matrix", "None", mumps->id.ICNTL(26), &mumps->id.ICNTL(26), NULL));
2832: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27", "ICNTL(27): controls the blocking size for multiple right-hand sides", "None", mumps->id.ICNTL(27), &mumps->id.ICNTL(27), NULL));
2833: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28", "ICNTL(28): use 1 for sequential analysis and ICNTL(7) ordering, or 2 for parallel analysis and ICNTL(29) ordering", "None", mumps->id.ICNTL(28), &mumps->id.ICNTL(28), NULL));
2834: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29", "ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis", "None", mumps->id.ICNTL(29), &mumps->id.ICNTL(29), NULL));
2835: /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */
2836: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31", "ICNTL(31): indicates which factors may be discarded during factorization", "None", mumps->id.ICNTL(31), &mumps->id.ICNTL(31), NULL));
2837: /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elimination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL)); -- not supported by PETSc API */
2838: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33", "ICNTL(33): compute determinant", "None", mumps->id.ICNTL(33), &mumps->id.ICNTL(33), NULL));
2839: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35", "ICNTL(35): activates Block Low Rank (BLR) based factorization", "None", mumps->id.ICNTL(35), &mumps->id.ICNTL(35), NULL));
2840: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36", "ICNTL(36): choice of BLR factorization variant", "None", mumps->id.ICNTL(36), &mumps->id.ICNTL(36), NULL));
2841: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_37", "ICNTL(37): compression of the contribution blocks (CB)", "None", mumps->id.ICNTL(37), &mumps->id.ICNTL(37), NULL));
2842: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38", "ICNTL(38): estimated compression rate of LU factors with BLR", "None", mumps->id.ICNTL(38), &mumps->id.ICNTL(38), NULL));
2843: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_40", "ICNTL(40): adaptive BLR precision feature", "None", mumps->id.ICNTL(40), &mumps->id.ICNTL(40), NULL));
2844: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_47", "ICNTL(47): single precision factorization in a double precision instance", "None", mumps->id.ICNTL(47), &mumps->id.ICNTL(47), NULL));
2845: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_48", "ICNTL(48): multithreading with tree parallelism", "None", mumps->id.ICNTL(48), &mumps->id.ICNTL(48), NULL));
2846: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_49", "ICNTL(49): compact workarray at the end of factorization phase", "None", mumps->id.ICNTL(49), &mumps->id.ICNTL(49), NULL));
2847: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_56", "ICNTL(56): postponing and rank-revealing factorization", "None", mumps->id.ICNTL(56), &mumps->id.ICNTL(56), NULL));
2848: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_58", "ICNTL(58): defines options for symbolic factorization", "None", mumps->id.ICNTL(58), &mumps->id.ICNTL(58), NULL));
2850: PetscCall(PetscOptionsReal("-mat_mumps_cntl_1", "CNTL(1): relative pivoting threshold", "None", (PetscReal)ID_CNTL_GET(mumps->id, 1), &cntl, &flg));
2851: if (flg) ID_CNTL_SET(mumps->id, 1, cntl);
2852: PetscCall(PetscOptionsReal("-mat_mumps_cntl_2", "CNTL(2): stopping criterion of refinement", "None", (PetscReal)ID_CNTL_GET(mumps->id, 2), &cntl, &flg));
2853: if (flg) ID_CNTL_SET(mumps->id, 2, cntl);
2854: PetscCall(PetscOptionsReal("-mat_mumps_cntl_3", "CNTL(3): absolute pivoting threshold", "None", (PetscReal)ID_CNTL_GET(mumps->id, 3), &cntl, &flg));
2855: if (flg) ID_CNTL_SET(mumps->id, 3, cntl);
2856: PetscCall(PetscOptionsReal("-mat_mumps_cntl_4", "CNTL(4): value for static pivoting", "None", (PetscReal)ID_CNTL_GET(mumps->id, 4), &cntl, &flg));
2857: if (flg) ID_CNTL_SET(mumps->id, 4, cntl);
2858: PetscCall(PetscOptionsReal("-mat_mumps_cntl_5", "CNTL(5): fixation for null pivots", "None", (PetscReal)ID_CNTL_GET(mumps->id, 5), &cntl, &flg));
2859: if (flg) ID_CNTL_SET(mumps->id, 5, cntl);
2860: PetscCall(PetscOptionsReal("-mat_mumps_cntl_7", "CNTL(7): dropping parameter used during BLR", "None", (PetscReal)ID_CNTL_GET(mumps->id, 7), &cntl, &flg));
2861: if (flg) ID_CNTL_SET(mumps->id, 7, cntl);
2863: PetscCall(PetscOptionsIntArray("-mat_mumps_view_info", "request INFO local to each processor", "", info, &ninfo, NULL));
2864: if (ninfo) {
2865: PetscCheck(ninfo <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "number of INFO %" PetscInt_FMT " must <= 80", ninfo);
2866: PetscCall(PetscMalloc1(ninfo, &mumps->info));
2867: mumps->ninfo = ninfo;
2868: for (i = 0; i < ninfo; i++) {
2869: PetscCheck(info[i] >= 0 && info[i] <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "index of INFO %" PetscInt_FMT " must between 1 and 80", ninfo);
2870: mumps->info[i] = info[i];
2871: }
2872: }
2873: PetscOptionsEnd();
2874: PetscFunctionReturn(PETSC_SUCCESS);
2875: }
2877: static PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F, Mat A, PETSC_UNUSED const MatFactorInfo *info, Mat_MUMPS *mumps)
2878: {
2879: PetscFunctionBegin;
2880: if (mumps->id.INFOG(1) < 0) {
2881: PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in analysis: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
2882: if (mumps->id.INFOG(1) == -6) {
2883: PetscCall(PetscInfo(F, "MUMPS error in analysis: matrix is singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2884: F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT;
2885: } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) {
2886: PetscCall(PetscInfo(F, "MUMPS error in analysis: problem with work array, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2887: F->factorerrortype = MAT_FACTOR_OUTMEMORY;
2888: } else {
2889: PetscCall(PetscInfo(F, "MUMPS error in analysis: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS "\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2890: F->factorerrortype = MAT_FACTOR_OTHER;
2891: }
2892: }
2893: if (!mumps->id.n) F->factorerrortype = MAT_FACTOR_NOERROR;
2894: PetscFunctionReturn(PETSC_SUCCESS);
2895: }
2897: static PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F, Mat A, IS r, PETSC_UNUSED IS c, const MatFactorInfo *info)
2898: {
2899: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2900: Vec b;
2901: const PetscInt M = A->rmap->N;
2903: PetscFunctionBegin;
2904: if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2905: /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */
2906: PetscFunctionReturn(PETSC_SUCCESS);
2907: }
2909: /* Set MUMPS options from the options database */
2910: PetscCall(MatSetFromOptions_MUMPS(F, A));
2912: PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2913: PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));
2915: /* analysis phase */
2916: mumps->id.job = JOB_FACTSYMBOLIC;
2917: PetscCall(PetscMUMPSIntCast(M, &mumps->id.n));
2918: switch (mumps->id.ICNTL(18)) {
2919: case 0: /* centralized assembled matrix input */
2920: if (!mumps->myid) {
2921: mumps->id.nnz = mumps->nnz;
2922: mumps->id.irn = mumps->irn;
2923: mumps->id.jcn = mumps->jcn;
2924: if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a));
2925: if (r && mumps->id.ICNTL(7) == 7 && !F->schur) {
2926: mumps->id.ICNTL(7) = 1;
2927: if (!mumps->myid) {
2928: const PetscInt *idx;
2930: PetscCall(PetscMalloc1(M, &mumps->id.perm_in));
2931: PetscCall(ISGetIndices(r, &idx));
2932: for (PetscInt i = 0; i < M; i++) PetscCall(PetscMUMPSIntCast(idx[i] + 1, &mumps->id.perm_in[i])); /* perm_in[]: start from 1, not 0! */
2933: PetscCall(ISRestoreIndices(r, &idx));
2934: }
2935: }
2936: }
2937: break;
2938: case 3: /* distributed assembled matrix input (size>1) */
2939: mumps->id.nnz_loc = mumps->nnz;
2940: mumps->id.irn_loc = mumps->irn;
2941: mumps->id.jcn_loc = mumps->jcn;
2942: if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc));
2943: if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2944: PetscCall(MatCreateVecs(A, NULL, &b));
2945: PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
2946: PetscCall(VecDestroy(&b));
2947: }
2948: break;
2949: }
2950: if (A->rmap->N && A->cmap->N) {
2951: PetscMUMPS_c(mumps);
2952: PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
2953: }
2954: F->ops->lufactornumeric = MatFactorNumeric_MUMPS;
2955: F->ops->solve = MatSolve_MUMPS;
2956: F->ops->solvetranspose = MatSolveTranspose_MUMPS;
2957: F->ops->matsolve = MatMatSolve_MUMPS;
2958: F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS;
2959: F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS;
2961: mumps->matstruc = SAME_NONZERO_PATTERN;
2962: PetscFunctionReturn(PETSC_SUCCESS);
2963: }
2965: /* Note the PETSc r and c permutations are ignored */
2966: static PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F, Mat A, PETSC_UNUSED IS r, PETSC_UNUSED IS c, const MatFactorInfo *info)
2967: {
2968: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2969: Vec b;
2970: const PetscInt M = A->rmap->N;
2972: PetscFunctionBegin;
2973: if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2974: /* F is assembled by a previous call of MatLUFactorSymbolic_BAIJMUMPS() */
2975: PetscFunctionReturn(PETSC_SUCCESS);
2976: }
2978: /* Set MUMPS options from the options database */
2979: PetscCall(MatSetFromOptions_MUMPS(F, A));
2981: PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2982: PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));
2984: /* analysis phase */
2985: mumps->id.job = JOB_FACTSYMBOLIC;
2986: PetscCall(PetscMUMPSIntCast(M, &mumps->id.n));
2987: switch (mumps->id.ICNTL(18)) {
2988: case 0: /* centralized assembled matrix input */
2989: if (!mumps->myid) {
2990: mumps->id.nnz = mumps->nnz;
2991: mumps->id.irn = mumps->irn;
2992: mumps->id.jcn = mumps->jcn;
2993: if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a));
2994: }
2995: break;
2996: case 3: /* distributed assembled matrix input (size>1) */
2997: mumps->id.nnz_loc = mumps->nnz;
2998: mumps->id.irn_loc = mumps->irn;
2999: mumps->id.jcn_loc = mumps->jcn;
3000: if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc));
3001: if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
3002: PetscCall(MatCreateVecs(A, NULL, &b));
3003: PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
3004: PetscCall(VecDestroy(&b));
3005: }
3006: break;
3007: }
3008: if (A->rmap->N && A->cmap->N) {
3009: PetscMUMPS_c(mumps);
3010: PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
3011: }
3012: F->ops->lufactornumeric = MatFactorNumeric_MUMPS;
3013: F->ops->solve = MatSolve_MUMPS;
3014: F->ops->solvetranspose = MatSolveTranspose_MUMPS;
3015: F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS;
3017: mumps->matstruc = SAME_NONZERO_PATTERN;
3018: PetscFunctionReturn(PETSC_SUCCESS);
3019: }
3021: /* Note the PETSc r permutation and factor info are ignored */
3022: static PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F, Mat A, PETSC_UNUSED IS r, const MatFactorInfo *info)
3023: {
3024: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3025: Vec b;
3026: const PetscInt M = A->rmap->N;
3028: PetscFunctionBegin;
3029: if (mumps->matstruc == SAME_NONZERO_PATTERN) {
3030: /* F is assembled by a previous call of MatCholeskyFactorSymbolic_MUMPS() */
3031: PetscFunctionReturn(PETSC_SUCCESS);
3032: }
3034: /* Set MUMPS options from the options database */
3035: PetscCall(MatSetFromOptions_MUMPS(F, A));
3037: PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
3038: PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));
3040: /* analysis phase */
3041: mumps->id.job = JOB_FACTSYMBOLIC;
3042: PetscCall(PetscMUMPSIntCast(M, &mumps->id.n));
3043: switch (mumps->id.ICNTL(18)) {
3044: case 0: /* centralized assembled matrix input */
3045: if (!mumps->myid) {
3046: mumps->id.nnz = mumps->nnz;
3047: mumps->id.irn = mumps->irn;
3048: mumps->id.jcn = mumps->jcn;
3049: if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a));
3050: }
3051: break;
3052: case 3: /* distributed assembled matrix input (size>1) */
3053: mumps->id.nnz_loc = mumps->nnz;
3054: mumps->id.irn_loc = mumps->irn;
3055: mumps->id.jcn_loc = mumps->jcn;
3056: if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc));
3057: if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
3058: PetscCall(MatCreateVecs(A, NULL, &b));
3059: PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
3060: PetscCall(VecDestroy(&b));
3061: }
3062: break;
3063: }
3064: if (A->rmap->N && A->cmap->N) {
3065: PetscMUMPS_c(mumps);
3066: PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
3067: }
3068: F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS;
3069: F->ops->solve = MatSolve_MUMPS;
3070: F->ops->solvetranspose = MatSolve_MUMPS;
3071: F->ops->matsolve = MatMatSolve_MUMPS;
3072: F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS;
3073: F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS;
3074: #if defined(PETSC_USE_COMPLEX)
3075: F->ops->getinertia = NULL;
3076: #else
3077: F->ops->getinertia = MatGetInertia_SBAIJMUMPS;
3078: #endif
3080: mumps->matstruc = SAME_NONZERO_PATTERN;
3081: PetscFunctionReturn(PETSC_SUCCESS);
3082: }
3084: static PetscErrorCode MatView_MUMPS(Mat A, PetscViewer viewer)
3085: {
3086: PetscBool isascii;
3087: PetscViewerFormat format;
3088: Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
3090: PetscFunctionBegin;
3091: /* check if matrix is mumps type */
3092: if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(PETSC_SUCCESS);
3094: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
3095: if (isascii) {
3096: PetscCall(PetscViewerGetFormat(viewer, &format));
3097: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
3098: PetscCall(PetscViewerASCIIPrintf(viewer, "MUMPS run parameters:\n"));
3099: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
3100: PetscCall(PetscViewerASCIIPrintf(viewer, " SYM (matrix type): %d\n", mumps->id.sym));
3101: PetscCall(PetscViewerASCIIPrintf(viewer, " PAR (host participation): %d\n", mumps->id.par));
3102: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(1) (output for error): %d\n", mumps->id.ICNTL(1)));
3103: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(2) (output of diagnostic msg): %d\n", mumps->id.ICNTL(2)));
3104: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(3) (output for global info): %d\n", mumps->id.ICNTL(3)));
3105: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(4) (level of printing): %d\n", mumps->id.ICNTL(4)));
3106: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(5) (input mat struct): %d\n", mumps->id.ICNTL(5)));
3107: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(6) (matrix prescaling): %d\n", mumps->id.ICNTL(6)));
3108: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(7) (sequential matrix ordering):%d\n", mumps->id.ICNTL(7)));
3109: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(8) (scaling strategy): %d\n", mumps->id.ICNTL(8)));
3110: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(10) (max num of refinements): %d\n", mumps->id.ICNTL(10)));
3111: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(11) (error analysis): %d\n", mumps->id.ICNTL(11)));
3112: if (mumps->id.ICNTL(11) > 0) {
3113: PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(4) (inf norm of input mat): %g\n", (double)ID_RINFOG_GET(mumps->id, 4)));
3114: PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(5) (inf norm of solution): %g\n", (double)ID_RINFOG_GET(mumps->id, 5)));
3115: PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(6) (inf norm of residual): %g\n", (double)ID_RINFOG_GET(mumps->id, 6)));
3116: PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(7),RINFOG(8) (backward error est): %g, %g\n", (double)ID_RINFOG_GET(mumps->id, 7), (double)ID_RINFOG_GET(mumps->id, 8)));
3117: PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(9) (error estimate): %g\n", (double)ID_RINFOG_GET(mumps->id, 9)));
3118: PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n", (double)ID_RINFOG_GET(mumps->id, 10), (double)ID_RINFOG_GET(mumps->id, 11)));
3119: }
3120: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(12) (efficiency control): %d\n", mumps->id.ICNTL(12)));
3121: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(13) (sequential factorization of the root node): %d\n", mumps->id.ICNTL(13)));
3122: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(14) (percentage of estimated workspace increase): %d\n", mumps->id.ICNTL(14)));
3123: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(15) (compression of the input matrix): %d\n", mumps->id.ICNTL(15)));
3124: /* ICNTL(15-17) not used */
3125: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(18) (input mat struct): %d\n", mumps->id.ICNTL(18)));
3126: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(19) (Schur complement info): %d\n", mumps->id.ICNTL(19)));
3127: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(20) (RHS sparse pattern): %d\n", mumps->id.ICNTL(20)));
3128: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(21) (solution struct): %d\n", mumps->id.ICNTL(21)));
3129: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(22) (in-core/out-of-core facility): %d\n", mumps->id.ICNTL(22)));
3130: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(23) (max size of memory allocated locally): %d\n", mumps->id.ICNTL(23)));
3132: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(24) (detection of null pivot rows): %d\n", mumps->id.ICNTL(24)));
3133: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(25) (computation of a null space basis): %d\n", mumps->id.ICNTL(25)));
3134: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(26) (Schur options for RHS or solution): %d\n", mumps->id.ICNTL(26)));
3135: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(27) (blocking size for multiple RHS): %d\n", mumps->id.ICNTL(27)));
3136: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(28) (use parallel or sequential ordering): %d\n", mumps->id.ICNTL(28)));
3137: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(29) (parallel ordering): %d\n", mumps->id.ICNTL(29)));
3139: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(30) (user-specified set of entries in inv(A)): %d\n", mumps->id.ICNTL(30)));
3140: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(31) (factors is discarded in the solve phase): %d\n", mumps->id.ICNTL(31)));
3141: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(33) (compute determinant): %d\n", mumps->id.ICNTL(33)));
3142: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(35) (activate BLR based factorization): %d\n", mumps->id.ICNTL(35)));
3143: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(36) (choice of BLR factorization variant): %d\n", mumps->id.ICNTL(36)));
3144: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(37) (compression of the contribution blocks): %d\n", mumps->id.ICNTL(37)));
3145: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(38) (estimated compression rate of LU factors): %d\n", mumps->id.ICNTL(38)));
3146: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(40) (adaptive BLR precision feature): %d\n", mumps->id.ICNTL(40)));
3147: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(47) (single precision factorization in a double precision instance): %d\n", mumps->id.ICNTL(47)));
3148: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(48) (multithreading with tree parallelism): %d\n", mumps->id.ICNTL(48)));
3149: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(49) (compact workarray at the end of factorization phase):%d\n", mumps->id.ICNTL(49)));
3150: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(56) (postponing and rank-revealing factorization):%d\n", mumps->id.ICNTL(56)));
3151: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(58) (options for symbolic factorization): %d\n", mumps->id.ICNTL(58)));
3153: PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(1) (relative pivoting threshold): %g\n", (double)ID_CNTL_GET(mumps->id, 1)));
3154: PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(2) (stopping criterion of refinement): %g\n", (double)ID_CNTL_GET(mumps->id, 2)));
3155: PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(3) (absolute pivoting threshold): %g\n", (double)ID_CNTL_GET(mumps->id, 3)));
3156: PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(4) (value of static pivoting): %g\n", (double)ID_CNTL_GET(mumps->id, 4)));
3157: PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(5) (fixation for null pivots): %g\n", (double)ID_CNTL_GET(mumps->id, 5)));
3158: PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(7) (dropping parameter for BLR): %g\n", (double)ID_CNTL_GET(mumps->id, 7)));
3160: /* information local to each processor */
3161: PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis):\n"));
3162: PetscCall(PetscViewerASCIIPushSynchronized(viewer));
3163: PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, (double)ID_RINFO_GET(mumps->id, 1)));
3164: PetscCall(PetscViewerFlush(viewer));
3165: PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization):\n"));
3166: PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, (double)ID_RINFO_GET(mumps->id, 2)));
3167: PetscCall(PetscViewerFlush(viewer));
3168: PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization):\n"));
3169: PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, (double)ID_RINFO_GET(mumps->id, 3)));
3170: PetscCall(PetscViewerFlush(viewer));
3172: PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization):\n"));
3173: PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(15)));
3174: PetscCall(PetscViewerFlush(viewer));
3176: PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization):\n"));
3177: PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(16)));
3178: PetscCall(PetscViewerFlush(viewer));
3180: PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization):\n"));
3181: PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(23)));
3182: PetscCall(PetscViewerFlush(viewer));
3184: if (mumps->ninfo && mumps->ninfo <= 80) {
3185: for (PetscInt i = 0; i < mumps->ninfo; i++) {
3186: PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(%" PetscInt_FMT "):\n", mumps->info[i]));
3187: PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(mumps->info[i])));
3188: PetscCall(PetscViewerFlush(viewer));
3189: }
3190: }
3191: PetscCall(PetscViewerASCIIPopSynchronized(viewer));
3192: } else PetscCall(PetscViewerASCIIPrintf(viewer, " Use -%sksp_view ::ascii_info_detail to display information for all processes\n", ((PetscObject)A)->prefix ? ((PetscObject)A)->prefix : ""));
3194: if (mumps->myid == 0) { /* information from the host */
3195: PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(1) (global estimated flops for the elimination after analysis): %g\n", (double)ID_RINFOG_GET(mumps->id, 1)));
3196: PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(2) (global estimated flops for the assembly after factorization): %g\n", (double)ID_RINFOG_GET(mumps->id, 2)));
3197: PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(3) (global estimated flops for the elimination after factorization): %g\n", (double)ID_RINFOG_GET(mumps->id, 3)));
3198: PetscCall(PetscViewerASCIIPrintf(viewer, " (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n", (double)ID_RINFOG_GET(mumps->id, 12), (double)ID_RINFOG_GET(mumps->id, 13), mumps->id.INFOG(34)));
3200: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(3)));
3201: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(4)));
3202: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(5) (estimated maximum front size in the complete tree): %d\n", mumps->id.INFOG(5)));
3203: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(6) (number of nodes in the complete tree): %d\n", mumps->id.INFOG(6)));
3204: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(7) (ordering option effectively used after analysis): %d\n", mumps->id.INFOG(7)));
3205: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n", mumps->id.INFOG(8)));
3206: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n", mumps->id.INFOG(9)));
3207: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(10) (total integer space store the matrix factors after factorization): %d\n", mumps->id.INFOG(10)));
3208: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(11) (order of largest frontal matrix after factorization): %d\n", mumps->id.INFOG(11)));
3209: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(12) (number of off-diagonal pivots): %d\n", mumps->id.INFOG(12)));
3210: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(13) (number of delayed pivots after factorization): %d\n", mumps->id.INFOG(13)));
3211: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(14) (number of memory compress after factorization): %d\n", mumps->id.INFOG(14)));
3212: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(15) (number of steps of iterative refinement after solution): %d\n", mumps->id.INFOG(15)));
3213: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n", mumps->id.INFOG(16)));
3214: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n", mumps->id.INFOG(17)));
3215: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n", mumps->id.INFOG(18)));
3216: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n", mumps->id.INFOG(19)));
3217: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(20) (estimated number of entries in the factors): %d\n", mumps->id.INFOG(20)));
3218: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n", mumps->id.INFOG(21)));
3219: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n", mumps->id.INFOG(22)));
3220: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n", mumps->id.INFOG(23)));
3221: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n", mumps->id.INFOG(24)));
3222: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n", mumps->id.INFOG(25)));
3223: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(28) (after factorization: number of null pivots encountered): %d\n", mumps->id.INFOG(28)));
3224: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n", mumps->id.INFOG(29)));
3225: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n", mumps->id.INFOG(30), mumps->id.INFOG(31)));
3226: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(32) (after analysis: type of analysis done): %d\n", mumps->id.INFOG(32)));
3227: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(33) (value used for ICNTL(8)): %d\n", mumps->id.INFOG(33)));
3228: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(34) (exponent of the determinant if determinant is requested): %d\n", mumps->id.INFOG(34)));
3229: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n", mumps->id.INFOG(35)));
3230: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(36)));
3231: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n", mumps->id.INFOG(37)));
3232: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(38)));
3233: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n", mumps->id.INFOG(39)));
3234: }
3235: }
3236: }
3237: PetscFunctionReturn(PETSC_SUCCESS);
3238: }
3240: static PetscErrorCode MatGetInfo_MUMPS(Mat A, PETSC_UNUSED MatInfoType flag, MatInfo *info)
3241: {
3242: Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
3244: PetscFunctionBegin;
3245: info->block_size = 1.0;
3246: info->nz_allocated = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20);
3247: info->nz_used = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20);
3248: info->nz_unneeded = 0.0;
3249: info->assemblies = 0.0;
3250: info->mallocs = 0.0;
3251: info->memory = 0.0;
3252: info->fill_ratio_given = 0;
3253: info->fill_ratio_needed = 0;
3254: info->factor_mallocs = 0;
3255: PetscFunctionReturn(PETSC_SUCCESS);
3256: }
3258: static PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is)
3259: {
3260: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3261: const PetscInt *idxs;
3262: PetscInt size, i;
3264: PetscFunctionBegin;
3265: PetscCall(ISGetLocalSize(is, &size));
3266: /* Schur complement matrix */
3267: PetscCall(MatDestroy(&F->schur));
3268: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size, size, NULL, &F->schur));
3269: // don't allocate mumps->id.schur[] now as its precision is yet to know
3270: PetscCall(PetscMUMPSIntCast(size, &mumps->id.size_schur));
3271: PetscCall(PetscMUMPSIntCast(size, &mumps->id.schur_lld));
3272: if (mumps->sym == 1) PetscCall(MatSetOption(F->schur, MAT_SPD, PETSC_TRUE));
3274: /* MUMPS expects Fortran style indices */
3275: PetscCall(PetscFree(mumps->id.listvar_schur));
3276: PetscCall(PetscMalloc1(size, &mumps->id.listvar_schur));
3277: PetscCall(ISGetIndices(is, &idxs));
3278: for (i = 0; i < size; i++) PetscCall(PetscMUMPSIntCast(idxs[i] + 1, &mumps->id.listvar_schur[i]));
3279: PetscCall(ISRestoreIndices(is, &idxs));
3280: /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */
3281: if (mumps->id.icntl) mumps->id.ICNTL(26) = -1;
3282: else mumps->ICNTL26 = -1;
3283: PetscFunctionReturn(PETSC_SUCCESS);
3284: }
3286: static PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F, Mat *S)
3287: {
3288: Mat St;
3289: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3290: PetscScalar *array;
3291: PetscInt i, j, N = mumps->id.size_schur;
3293: PetscFunctionBegin;
3294: PetscCheck(mumps->id.ICNTL(19), PetscObjectComm((PetscObject)F), PETSC_ERR_ORDER, "Schur complement mode not selected! Call MatFactorSetSchurIS() to enable it");
3295: PetscCall(MatCreate(PETSC_COMM_SELF, &St));
3296: PetscCall(MatSetSizes(St, PETSC_DECIDE, PETSC_DECIDE, mumps->id.size_schur, mumps->id.size_schur));
3297: PetscCall(MatSetType(St, MATDENSE));
3298: PetscCall(MatSetUp(St));
3299: PetscCall(MatDenseGetArray(St, &array));
3300: if (!mumps->sym) { /* MUMPS always return a full matrix */
3301: if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
3302: for (i = 0; i < N; i++) {
3303: for (j = 0; j < N; j++) array[j * N + i] = ID_FIELD_GET(mumps->id, schur, i * N + j);
3304: }
3305: } else { /* stored by columns */
3306: PetscCall(MatMumpsCastMumpsScalarArray(N * N, mumps->id.precision, mumps->id.schur, array));
3307: }
3308: } else { /* either full or lower-triangular (not packed) */
3309: if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */
3310: for (i = 0; i < N; i++) {
3311: for (j = i; j < N; j++) array[i * N + j] = array[j * N + i] = ID_FIELD_GET(mumps->id, schur, i * N + j);
3312: }
3313: } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */
3314: PetscCall(MatMumpsCastMumpsScalarArray(N * N, mumps->id.precision, mumps->id.schur, array));
3315: } else { /* ICNTL(19) == 1 lower triangular stored by rows */
3316: for (i = 0; i < N; i++) {
3317: for (j = 0; j < i + 1; j++) array[i * N + j] = array[j * N + i] = ID_FIELD_GET(mumps->id, schur, i * N + j);
3318: }
3319: }
3320: }
3321: PetscCall(MatDenseRestoreArray(St, &array));
3322: *S = St;
3323: PetscFunctionReturn(PETSC_SUCCESS);
3324: }
3326: static PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt ival)
3327: {
3328: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3330: PetscFunctionBegin;
3331: PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 40 || icntl == 47 || icntl == 48 || icntl == 49 || icntl == 56 || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl);
3332: if (mumps->id.job == JOB_NULL) { /* need to cache icntl and ival since PetscMUMPS_c() has never been called */
3333: PetscMUMPSInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */
3334: for (i = 0; i < nICNTL_pre; ++i)
3335: if (mumps->ICNTL_pre[1 + 2 * i] == icntl) break; /* is this ICNTL already cached? */
3336: if (i == nICNTL_pre) { /* not already cached */
3337: if (i > 0) PetscCall(PetscRealloc(sizeof(PetscMUMPSInt) * (2 * nICNTL_pre + 3), &mumps->ICNTL_pre));
3338: else PetscCall(PetscCalloc(sizeof(PetscMUMPSInt) * 3, &mumps->ICNTL_pre));
3339: mumps->ICNTL_pre[0]++;
3340: }
3341: mumps->ICNTL_pre[1 + 2 * i] = (PetscMUMPSInt)icntl;
3342: PetscCall(PetscMUMPSIntCast(ival, mumps->ICNTL_pre + 2 + 2 * i));
3343: } else PetscCall(PetscMUMPSIntCast(ival, &mumps->id.ICNTL(icntl)));
3344: PetscFunctionReturn(PETSC_SUCCESS);
3345: }
3347: static PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt *ival)
3348: {
3349: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3351: PetscFunctionBegin;
3352: PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 40 || icntl == 47 || icntl == 48 || icntl == 49 || icntl == 56 || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl);
3353: if (mumps->id.job == JOB_NULL) {
3354: PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0;
3355: *ival = 0;
3356: for (i = 0; i < nICNTL_pre; ++i) {
3357: if (mumps->ICNTL_pre[1 + 2 * i] == icntl) *ival = mumps->ICNTL_pre[2 + 2 * i];
3358: }
3359: } else *ival = mumps->id.ICNTL(icntl);
3360: PetscFunctionReturn(PETSC_SUCCESS);
3361: }
3363: static PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal val)
3364: {
3365: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3367: PetscFunctionBegin;
3368: PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl);
3369: if (mumps->id.job == JOB_NULL) {
3370: PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
3371: for (i = 0; i < nCNTL_pre; ++i)
3372: if (mumps->CNTL_pre[1 + 2 * i] == icntl) break;
3373: if (i == nCNTL_pre) {
3374: if (i > 0) PetscCall(PetscRealloc(sizeof(PetscReal) * (2 * nCNTL_pre + 3), &mumps->CNTL_pre));
3375: else PetscCall(PetscCalloc(sizeof(PetscReal) * 3, &mumps->CNTL_pre));
3376: mumps->CNTL_pre[0]++;
3377: }
3378: mumps->CNTL_pre[1 + 2 * i] = icntl;
3379: mumps->CNTL_pre[2 + 2 * i] = val;
3380: } else ID_CNTL_SET(mumps->id, icntl, val);
3381: PetscFunctionReturn(PETSC_SUCCESS);
3382: }
3384: static PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal *val)
3385: {
3386: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3388: PetscFunctionBegin;
3389: PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl);
3390: if (mumps->id.job == JOB_NULL) {
3391: PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
3392: *val = 0.0;
3393: for (i = 0; i < nCNTL_pre; ++i) {
3394: if (mumps->CNTL_pre[1 + 2 * i] == icntl) *val = mumps->CNTL_pre[2 + 2 * i];
3395: }
3396: } else *val = ID_CNTL_GET(mumps->id, icntl);
3397: PetscFunctionReturn(PETSC_SUCCESS);
3398: }
3400: /*@C
3401: MatMumpsSetOocTmpDir - Set MUMPS out-of-core `OOC_TMPDIR` <https://mumps-solver.org/index.php?page=doc>
3403: Logically Collective
3405: Input Parameters:
3406: + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`.
3407: - tmpdir - temporary directory for out-of-core facility.
3409: Level: beginner
3411: Note:
3412: To make it effective, this routine must be called before the numeric factorization, i.e., `PCSetUp()`.
3413: If `ooc_tmpdir` is not set, MUMPS will also check the environment variable `MUMPS_OOC_TMPDIR`. But if neither was defined, it will use /tmp by default.
3415: .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsGetOocTmpDir`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3416: @*/
3417: PetscErrorCode MatMumpsSetOocTmpDir(Mat F, const char *tmpdir)
3418: {
3419: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3421: PetscFunctionBegin;
3424: PetscCall(PetscStrncpy(mumps->id.ooc_tmpdir, tmpdir, sizeof(((MUMPS_STRUC_C *)NULL)->ooc_tmpdir)));
3425: PetscFunctionReturn(PETSC_SUCCESS);
3426: }
3428: /*@C
3429: MatMumpsGetOocTmpDir - Get MUMPS out-of-core `OOC_TMPDIR` <https://mumps-solver.org/index.php?page=doc>
3431: Logically Collective
3433: Input Parameter:
3434: . F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`.
3436: Output Parameter:
3437: . tmpdir - temporary directory for out-of-core facility.
3439: Level: beginner
3441: Note:
3442: The returned string is read-only and user should not try to change it.
3444: .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetOocTmpDir`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3445: @*/
3446: PetscErrorCode MatMumpsGetOocTmpDir(Mat F, const char **tmpdir)
3447: {
3448: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3450: PetscFunctionBegin;
3453: if (tmpdir) *tmpdir = mumps->id.ooc_tmpdir;
3454: PetscFunctionReturn(PETSC_SUCCESS);
3455: }
3457: static PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F, PetscInt icntl, PetscInt *info)
3458: {
3459: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3461: PetscFunctionBegin;
3462: *info = mumps->id.INFO(icntl);
3463: PetscFunctionReturn(PETSC_SUCCESS);
3464: }
3466: static PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F, PetscInt icntl, PetscInt *infog)
3467: {
3468: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3470: PetscFunctionBegin;
3471: *infog = mumps->id.INFOG(icntl);
3472: PetscFunctionReturn(PETSC_SUCCESS);
3473: }
3475: static PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfo)
3476: {
3477: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3479: PetscFunctionBegin;
3480: *rinfo = ID_RINFO_GET(mumps->id, icntl);
3481: PetscFunctionReturn(PETSC_SUCCESS);
3482: }
3484: static PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfog)
3485: {
3486: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3488: PetscFunctionBegin;
3489: *rinfog = ID_RINFOG_GET(mumps->id, icntl);
3490: PetscFunctionReturn(PETSC_SUCCESS);
3491: }
3493: static PetscErrorCode MatMumpsGetNullPivots_MUMPS(Mat F, PetscInt *size, PetscInt **array)
3494: {
3495: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3497: PetscFunctionBegin;
3498: PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection");
3499: *size = 0;
3500: *array = NULL;
3501: if (!mumps->myid) {
3502: *size = mumps->id.INFOG(28);
3503: PetscCall(PetscMalloc1(*size, array));
3504: for (int i = 0; i < *size; i++) (*array)[i] = mumps->id.pivnul_list[i] - 1;
3505: }
3506: PetscFunctionReturn(PETSC_SUCCESS);
3507: }
3509: static PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F, Mat spRHS)
3510: {
3511: Mat Bt = NULL, Btseq = NULL;
3512: PetscBool flg;
3513: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3514: PetscScalar *aa;
3515: PetscInt spnr, *ia, *ja, M, nrhs;
3517: PetscFunctionBegin;
3518: PetscAssertPointer(spRHS, 2);
3519: PetscCall(PetscObjectTypeCompare((PetscObject)spRHS, MATTRANSPOSEVIRTUAL, &flg));
3520: PetscCheck(flg, PetscObjectComm((PetscObject)spRHS), PETSC_ERR_ARG_WRONG, "Matrix spRHS must be type MATTRANSPOSEVIRTUAL matrix");
3521: PetscCall(MatShellGetScalingShifts(spRHS, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
3522: PetscCall(MatTransposeGetMat(spRHS, &Bt));
3524: PetscCall(MatMumpsSetIcntl(F, 30, 1));
3526: if (mumps->petsc_size > 1) {
3527: Mat_MPIAIJ *b = (Mat_MPIAIJ *)Bt->data;
3528: Btseq = b->A;
3529: } else {
3530: Btseq = Bt;
3531: }
3533: PetscCall(MatGetSize(spRHS, &M, &nrhs));
3534: mumps->id.nrhs = (PetscMUMPSInt)nrhs;
3535: PetscCall(PetscMUMPSIntCast(M, &mumps->id.lrhs));
3536: mumps->id.rhs = NULL;
3538: if (!mumps->myid) {
3539: PetscCall(MatSeqAIJGetArray(Btseq, &aa));
3540: PetscCall(MatGetRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
3541: PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
3542: PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
3543: PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, ((Mat_SeqAIJ *)Btseq->data)->nz, aa, mumps->id.precision, &mumps->id.rhs_sparse_len, &mumps->id.rhs_sparse));
3544: } else {
3545: mumps->id.irhs_ptr = NULL;
3546: mumps->id.irhs_sparse = NULL;
3547: mumps->id.nz_rhs = 0;
3548: if (mumps->id.rhs_sparse_len) {
3549: PetscCall(PetscFree(mumps->id.rhs_sparse));
3550: mumps->id.rhs_sparse_len = 0;
3551: }
3552: }
3553: mumps->id.ICNTL(20) = 1; /* rhs is sparse */
3554: mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */
3556: /* solve phase */
3557: mumps->id.job = JOB_SOLVE;
3558: PetscMUMPS_c(mumps);
3559: PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));
3561: if (!mumps->myid) {
3562: PetscCall(MatSeqAIJRestoreArray(Btseq, &aa));
3563: PetscCall(MatRestoreRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
3564: PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
3565: }
3566: PetscFunctionReturn(PETSC_SUCCESS);
3567: }
3569: static PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F, Mat spRHST)
3570: {
3571: Mat spRHS;
3573: PetscFunctionBegin;
3574: PetscCall(MatCreateTranspose(spRHST, &spRHS));
3575: PetscCall(MatMumpsGetInverse_MUMPS(F, spRHS));
3576: PetscCall(MatDestroy(&spRHS));
3577: PetscFunctionReturn(PETSC_SUCCESS);
3578: }
3580: static PetscErrorCode MatMumpsSetBlk_MUMPS(Mat F, PetscInt nblk, const PetscInt blkvar[], const PetscInt blkptr[])
3581: {
3582: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3584: PetscFunctionBegin;
3585: if (nblk) {
3586: PetscAssertPointer(blkptr, 4);
3587: PetscCall(PetscMUMPSIntCast(nblk, &mumps->id.nblk));
3588: PetscCall(PetscFree(mumps->id.blkptr));
3589: PetscCall(PetscMalloc1(nblk + 1, &mumps->id.blkptr));
3590: for (PetscInt i = 0; i < nblk + 1; ++i) PetscCall(PetscMUMPSIntCast(blkptr[i], mumps->id.blkptr + i));
3591: // mumps->id.icntl[] might have not been allocated, which is done in MatSetFromOptions_MUMPS(). So we don't assign ICNTL(15).
3592: // We use id.nblk and id.blkptr to know what values to set to ICNTL(15) in MatSetFromOptions_MUMPS().
3593: // mumps->id.ICNTL(15) = 1;
3594: if (blkvar) {
3595: PetscCall(PetscFree(mumps->id.blkvar));
3596: PetscCall(PetscMalloc1(F->rmap->N, &mumps->id.blkvar));
3597: for (PetscInt i = 0; i < F->rmap->N; ++i) PetscCall(PetscMUMPSIntCast(blkvar[i], mumps->id.blkvar + i));
3598: }
3599: } else {
3600: PetscCall(PetscFree(mumps->id.blkptr));
3601: PetscCall(PetscFree(mumps->id.blkvar));
3602: // mumps->id.ICNTL(15) = 0;
3603: mumps->id.nblk = 0;
3604: }
3605: PetscFunctionReturn(PETSC_SUCCESS);
3606: }
3608: /*MC
3609: MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for
3610: MPI distributed and sequential matrices via the external package MUMPS <https://mumps-solver.org/index.php?page=doc>
3612: Works with `MATAIJ` and `MATSBAIJ` matrices
3614: Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS
3616: Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode.
3617: See details below.
3619: Use `-pc_type cholesky` or `lu` `-pc_factor_mat_solver_type mumps` to use this direct solver
3621: Options Database Keys:
3622: + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages
3623: . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning
3624: . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host
3625: . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4)
3626: . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)
3627: . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto
3628: Use -pc_factor_mat_ordering_type type to have PETSc perform the ordering (sequential only)
3629: . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77)
3630: . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements
3631: . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view)
3632: . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)
3633: . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting
3634: . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space
3635: . -mat_mumps_icntl_15 - ICNTL(15): compression of the input matrix resulting from a block format
3636: . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement
3637: . -mat_mumps_icntl_20 - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS
3638: . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)
3639: . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor
3640: . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1)
3641: . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis
3642: . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix
3643: . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ICNTL(7) ordering, or 2 for parallel analysis and ICNTL(29) ordering
3644: . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis
3645: . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A)
3646: . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization
3647: . -mat_mumps_icntl_33 - ICNTL(33): compute determinant
3648: . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature
3649: . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant
3650: . -mat_mumps_icntl_37 - ICNTL(37): compression of the contribution blocks (CB)
3651: . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR
3652: . -mat_mumps_icntl_40 - ICNTL(40): adaptive BLR precision feature
3653: . -mat_mumps_icntl_47 - ICNTL(47): single precision factorization in a double precision instance
3654: . -mat_mumps_icntl_48 - ICNTL(48): multithreading with tree parallelism
3655: . -mat_mumps_icntl_49 - ICNTL(49): compact workarray at the end of factorization phase
3656: . -mat_mumps_icntl_58 - ICNTL(58): options for symbolic factorization
3657: . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold
3658: . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement
3659: . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold
3660: . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting
3661: . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots
3662: . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization
3663: - -mat_mumps_use_omp_threads m - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS.
3664: Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual.
3666: Level: beginner
3668: Notes:
3669: MUMPS Cholesky does not handle (complex) Hermitian matrices (see User's Guide at <https://mumps-solver.org/index.php?page=doc>) so using it will
3670: error if the matrix is Hermitian.
3672: When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling
3673: `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix.
3675: When a MUMPS factorization fails inside a KSP solve, for example with a `KSP_DIVERGED_PC_FAILED`, one can find the MUMPS information about
3676: the failure with
3677: .vb
3678: KSPGetPC(ksp,&pc);
3679: PCFactorGetMatrix(pc,&mat);
3680: MatMumpsGetInfo(mat,....);
3681: MatMumpsGetInfog(mat,....); etc.
3682: .ve
3683: Or run with `-ksp_error_if_not_converged` and the program will be stopped and the information printed in the error message.
3685: MUMPS provides 64-bit integer support in two build modes:
3686: full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and
3687: requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI).
3689: selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices,
3690: MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and
3691: columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit
3692: integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS.
3694: With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc.
3696: Two modes to run MUMPS/PETSc with OpenMP
3697: .vb
3698: Set `OMP_NUM_THREADS` and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP
3699: threads per rank, then you may use "export `OMP_NUM_THREADS` = 16 && mpiexec -n 4 ./test".
3700: .ve
3702: .vb
3703: `-mat_mumps_use_omp_threads` [m] and run your code with as many MPI ranks as the number of cores. For example,
3704: if a compute node has 32 cores and you run on two nodes, you may use "mpiexec -n 64 ./test -mat_mumps_use_omp_threads 16"
3705: .ve
3707: To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part
3708: (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with `--with-openmp` `--download-hwloc`
3709: (or `--with-hwloc`), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS
3710: libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS
3711: (PETSc will automatically try to utilized a threaded BLAS if `--with-openmp` is provided).
3713: If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI
3714: processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of
3715: size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm
3716: are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set
3717: by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs.
3718: In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets,
3719: if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind
3720: MPI ranks to cores, then with `-mat_mumps_use_omp_threads` 16, a master rank (and threads it spawns) will use half cores in socket 0, and half
3721: cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the
3722: problem will not happen. Therefore, when you use `-mat_mumps_use_omp_threads`, you need to keep an eye on your MPI rank mapping and CPU binding.
3723: For example, with the Slurm job scheduler, one can use srun `--cpu-bind`=verbose -m block:block to map consecutive MPI ranks to sockets and
3724: examine the mapping result.
3726: PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set `OMP_PROC_BIND` and `OMP_PLACES` in job scripts,
3727: for example, export `OMP_PLACES`=threads and export `OMP_PROC_BIND`=spread. One does not need to export `OMP_NUM_THREADS`=m in job scripts as PETSc
3728: calls `omp_set_num_threads`(m) internally before calling MUMPS.
3730: See {cite}`heroux2011bi` and {cite}`gutierrez2017accommodating`
3732: .seealso: [](ch_matrices), `Mat`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `MatMumpsSetBlk()`, `KSPGetPC()`, `PCFactorGetMatrix()`
3733: M*/
3735: static PetscErrorCode MatFactorGetSolverType_mumps(PETSC_UNUSED Mat A, MatSolverType *type)
3736: {
3737: PetscFunctionBegin;
3738: *type = MATSOLVERMUMPS;
3739: PetscFunctionReturn(PETSC_SUCCESS);
3740: }
3742: /* MatGetFactor for Seq and MPI AIJ matrices */
3743: static PetscErrorCode MatGetFactor_aij_mumps(Mat A, MatFactorType ftype, Mat *F)
3744: {
3745: Mat B;
3746: Mat_MUMPS *mumps;
3747: PetscBool isSeqAIJ, isDiag, isDense;
3748: PetscMPIInt size;
3750: PetscFunctionBegin;
3751: if (PetscDefined(USE_COMPLEX) && ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
3752: PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n"));
3753: *F = NULL;
3754: PetscFunctionReturn(PETSC_SUCCESS);
3755: }
3756: /* Create the factorization matrix */
3757: PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ));
3758: PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATDIAGONAL, &isDiag));
3759: PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &isDense, MATSEQDENSE, MATMPIDENSE, NULL));
3760: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3761: PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3762: PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
3763: PetscCall(MatSetUp(B));
3765: PetscCall(PetscNew(&mumps));
3767: B->ops->view = MatView_MUMPS;
3768: B->ops->getinfo = MatGetInfo_MUMPS;
3770: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
3771: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
3772: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
3773: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
3774: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
3775: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
3776: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
3777: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
3778: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
3779: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
3780: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
3781: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
3782: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
3783: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
3784: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS));
3786: if (ftype == MAT_FACTOR_LU) {
3787: B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3788: B->factortype = MAT_FACTOR_LU;
3789: if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij;
3790: else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij;
3791: else if (isDense) mumps->ConvertToTriples = MatConvertToTriples_dense_xaij;
3792: else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij;
3793: PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
3794: mumps->sym = 0;
3795: } else {
3796: B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3797: B->factortype = MAT_FACTOR_CHOLESKY;
3798: if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij;
3799: else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij;
3800: else if (isDense) mumps->ConvertToTriples = MatConvertToTriples_dense_xaij;
3801: else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij;
3802: PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY]));
3803: if (PetscDefined(USE_COMPLEX)) mumps->sym = 2;
3804: else if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
3805: else mumps->sym = 2;
3806: }
3808: /* set solvertype */
3809: PetscCall(PetscFree(B->solvertype));
3810: PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
3811: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
3812: if (size == 1) {
3813: /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3814: B->canuseordering = PETSC_TRUE;
3815: }
3816: B->ops->destroy = MatDestroy_MUMPS;
3817: B->data = (void *)mumps;
3819: *F = B;
3820: mumps->id.job = JOB_NULL;
3821: mumps->ICNTL_pre = NULL;
3822: mumps->CNTL_pre = NULL;
3823: mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
3824: PetscFunctionReturn(PETSC_SUCCESS);
3825: }
3827: /* MatGetFactor for Seq and MPI SBAIJ matrices */
3828: static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A, PETSC_UNUSED MatFactorType ftype, Mat *F)
3829: {
3830: Mat B;
3831: Mat_MUMPS *mumps;
3832: PetscBool isSeqSBAIJ;
3833: PetscMPIInt size;
3835: PetscFunctionBegin;
3836: if (PetscDefined(USE_COMPLEX) && ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
3837: PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n"));
3838: *F = NULL;
3839: PetscFunctionReturn(PETSC_SUCCESS);
3840: }
3841: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3842: PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3843: PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
3844: PetscCall(MatSetUp(B));
3846: PetscCall(PetscNew(&mumps));
3847: PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &isSeqSBAIJ));
3848: if (isSeqSBAIJ) {
3849: mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij;
3850: } else {
3851: mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij;
3852: }
3854: B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3855: B->ops->view = MatView_MUMPS;
3856: B->ops->getinfo = MatGetInfo_MUMPS;
3858: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
3859: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
3860: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
3861: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
3862: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
3863: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
3864: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
3865: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
3866: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
3867: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
3868: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
3869: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
3870: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
3871: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
3872: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS));
3874: B->factortype = MAT_FACTOR_CHOLESKY;
3875: if (PetscDefined(USE_COMPLEX)) mumps->sym = 2;
3876: else if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
3877: else mumps->sym = 2;
3879: /* set solvertype */
3880: PetscCall(PetscFree(B->solvertype));
3881: PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
3882: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
3883: if (size == 1) {
3884: /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3885: B->canuseordering = PETSC_TRUE;
3886: }
3887: PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY]));
3888: B->ops->destroy = MatDestroy_MUMPS;
3889: B->data = (void *)mumps;
3891: *F = B;
3892: mumps->id.job = JOB_NULL;
3893: mumps->ICNTL_pre = NULL;
3894: mumps->CNTL_pre = NULL;
3895: mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
3896: PetscFunctionReturn(PETSC_SUCCESS);
3897: }
3899: static PetscErrorCode MatGetFactor_baij_mumps(Mat A, MatFactorType ftype, Mat *F)
3900: {
3901: Mat B;
3902: Mat_MUMPS *mumps;
3903: PetscBool isSeqBAIJ;
3904: PetscMPIInt size;
3906: PetscFunctionBegin;
3907: /* Create the factorization matrix */
3908: PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &isSeqBAIJ));
3909: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3910: PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3911: PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
3912: PetscCall(MatSetUp(B));
3914: PetscCall(PetscNew(&mumps));
3915: PetscCheck(ftype == MAT_FACTOR_LU, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead");
3916: B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS;
3917: B->factortype = MAT_FACTOR_LU;
3918: if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij;
3919: else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij;
3920: mumps->sym = 0;
3921: PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
3923: B->ops->view = MatView_MUMPS;
3924: B->ops->getinfo = MatGetInfo_MUMPS;
3926: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
3927: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
3928: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
3929: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
3930: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
3931: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
3932: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
3933: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
3934: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
3935: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
3936: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
3937: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
3938: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
3939: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
3940: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS));
3942: /* set solvertype */
3943: PetscCall(PetscFree(B->solvertype));
3944: PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
3945: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
3946: if (size == 1) {
3947: /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3948: B->canuseordering = PETSC_TRUE;
3949: }
3950: B->ops->destroy = MatDestroy_MUMPS;
3951: B->data = (void *)mumps;
3953: *F = B;
3954: mumps->id.job = JOB_NULL;
3955: mumps->ICNTL_pre = NULL;
3956: mumps->CNTL_pre = NULL;
3957: mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
3958: PetscFunctionReturn(PETSC_SUCCESS);
3959: }
3961: /* MatGetFactor for Seq and MPI SELL matrices */
3962: static PetscErrorCode MatGetFactor_sell_mumps(Mat A, MatFactorType ftype, Mat *F)
3963: {
3964: Mat B;
3965: Mat_MUMPS *mumps;
3966: PetscBool isSeqSELL;
3967: PetscMPIInt size;
3969: PetscFunctionBegin;
3970: /* Create the factorization matrix */
3971: PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSELL, &isSeqSELL));
3972: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3973: PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3974: PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
3975: PetscCall(MatSetUp(B));
3977: PetscCall(PetscNew(&mumps));
3979: B->ops->view = MatView_MUMPS;
3980: B->ops->getinfo = MatGetInfo_MUMPS;
3982: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
3983: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
3984: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
3985: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
3986: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
3987: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
3988: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
3989: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
3990: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
3991: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
3992: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
3993: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
3995: PetscCheck(ftype == MAT_FACTOR_LU, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented");
3996: B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3997: B->factortype = MAT_FACTOR_LU;
3998: PetscCheck(isSeqSELL, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented");
3999: mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij;
4000: mumps->sym = 0;
4001: PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
4003: /* set solvertype */
4004: PetscCall(PetscFree(B->solvertype));
4005: PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
4006: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
4007: if (size == 1) {
4008: /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
4009: B->canuseordering = PETSC_TRUE;
4010: }
4011: B->ops->destroy = MatDestroy_MUMPS;
4012: B->data = (void *)mumps;
4014: *F = B;
4015: mumps->id.job = JOB_NULL;
4016: mumps->ICNTL_pre = NULL;
4017: mumps->CNTL_pre = NULL;
4018: mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
4019: PetscFunctionReturn(PETSC_SUCCESS);
4020: }
4022: /* MatGetFactor for MATNEST matrices */
4023: static PetscErrorCode MatGetFactor_nest_mumps(Mat A, MatFactorType ftype, Mat *F)
4024: {
4025: Mat B, **mats;
4026: Mat_MUMPS *mumps;
4027: PetscInt nr, nc;
4028: PetscMPIInt size;
4029: PetscBool flg = PETSC_TRUE;
4031: PetscFunctionBegin;
4032: if (PetscDefined(USE_COMPLEX) && ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
4033: PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n"));
4034: *F = NULL;
4035: PetscFunctionReturn(PETSC_SUCCESS);
4036: }
4038: /* Return if some condition is not satisfied */
4039: *F = NULL;
4040: PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats));
4041: if (ftype == MAT_FACTOR_CHOLESKY) {
4042: IS *rows, *cols;
4043: PetscInt *m, *M;
4045: PetscCheck(nr == nc, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_FACTOR_CHOLESKY not supported for nest sizes %" PetscInt_FMT " != %" PetscInt_FMT ". Use MAT_FACTOR_LU.", nr, nc);
4046: PetscCall(PetscMalloc2(nr, &rows, nc, &cols));
4047: PetscCall(MatNestGetISs(A, rows, cols));
4048: for (PetscInt r = 0; flg && r < nr; r++) PetscCall(ISEqualUnsorted(rows[r], cols[r], &flg));
4049: if (!flg) {
4050: PetscCall(PetscFree2(rows, cols));
4051: PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for unequal row and column maps. Use MAT_FACTOR_LU.\n"));
4052: PetscFunctionReturn(PETSC_SUCCESS);
4053: }
4054: PetscCall(PetscMalloc2(nr, &m, nr, &M));
4055: for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetMinMax(rows[r], &m[r], &M[r]));
4056: for (PetscInt r = 0; flg && r < nr; r++)
4057: for (PetscInt k = r + 1; flg && k < nr; k++)
4058: if ((m[k] <= m[r] && m[r] <= M[k]) || (m[k] <= M[r] && M[r] <= M[k])) flg = PETSC_FALSE;
4059: PetscCall(PetscFree2(m, M));
4060: PetscCall(PetscFree2(rows, cols));
4061: if (!flg) {
4062: PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for intersecting row maps. Use MAT_FACTOR_LU.\n"));
4063: PetscFunctionReturn(PETSC_SUCCESS);
4064: }
4065: }
4067: for (PetscInt r = 0; r < nr; r++) {
4068: for (PetscInt c = 0; c < nc; c++) {
4069: Mat sub = mats[r][c];
4070: PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isDiag, isDense;
4072: if (!sub || (ftype == MAT_FACTOR_CHOLESKY && c < r)) continue;
4073: PetscCall(MatGetTranspose_TransposeVirtual(&sub, NULL, NULL, NULL, NULL));
4074: PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ));
4075: PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ));
4076: PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ));
4077: PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ));
4078: PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ));
4079: PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ));
4080: PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag));
4081: PetscCall(PetscObjectTypeCompareAny((PetscObject)sub, &isDense, MATSEQDENSE, MATMPIDENSE, NULL));
4082: if (ftype == MAT_FACTOR_CHOLESKY) {
4083: if (r == c) {
4084: if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isSeqSBAIJ && !isMPISBAIJ && !isDiag && !isDense) {
4085: PetscCall(PetscInfo(sub, "MAT_FACTOR_CHOLESKY not supported for diagonal block of type %s.\n", ((PetscObject)sub)->type_name));
4086: flg = PETSC_FALSE;
4087: }
4088: } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag && !isDense) {
4089: PetscCall(PetscInfo(sub, "MAT_FACTOR_CHOLESKY not supported for off-diagonal block of type %s.\n", ((PetscObject)sub)->type_name));
4090: flg = PETSC_FALSE;
4091: }
4092: } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag && !isDense) {
4093: PetscCall(PetscInfo(sub, "MAT_FACTOR_LU not supported for block of type %s.\n", ((PetscObject)sub)->type_name));
4094: flg = PETSC_FALSE;
4095: }
4096: }
4097: }
4098: if (!flg) PetscFunctionReturn(PETSC_SUCCESS);
4100: /* Create the factorization matrix */
4101: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
4102: PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
4103: PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
4104: PetscCall(MatSetUp(B));
4106: PetscCall(PetscNew(&mumps));
4108: B->ops->view = MatView_MUMPS;
4109: B->ops->getinfo = MatGetInfo_MUMPS;
4111: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
4112: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
4113: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
4114: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
4115: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
4116: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
4117: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
4118: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
4119: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
4120: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
4121: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
4122: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
4123: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
4124: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
4125: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS));
4127: if (ftype == MAT_FACTOR_LU) {
4128: B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
4129: B->factortype = MAT_FACTOR_LU;
4130: mumps->sym = 0;
4131: } else {
4132: B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
4133: B->factortype = MAT_FACTOR_CHOLESKY;
4134: if (PetscDefined(USE_COMPLEX)) mumps->sym = 2;
4135: else if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
4136: else mumps->sym = 2;
4137: }
4138: mumps->ConvertToTriples = MatConvertToTriples_nest_xaij;
4139: PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[ftype]));
4141: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
4142: if (size == 1) {
4143: /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
4144: B->canuseordering = PETSC_TRUE;
4145: }
4147: /* set solvertype */
4148: PetscCall(PetscFree(B->solvertype));
4149: PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
4150: B->ops->destroy = MatDestroy_MUMPS;
4151: B->data = (void *)mumps;
4153: *F = B;
4154: mumps->id.job = JOB_NULL;
4155: mumps->ICNTL_pre = NULL;
4156: mumps->CNTL_pre = NULL;
4157: mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
4158: PetscFunctionReturn(PETSC_SUCCESS);
4159: }
4161: PETSC_INTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void)
4162: {
4163: PetscFunctionBegin;
4164: PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
4165: PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
4166: PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps));
4167: PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps));
4168: PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPISBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps));
4169: PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
4170: PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
4171: PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps));
4172: PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps));
4173: PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps));
4174: PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSELL, MAT_FACTOR_LU, MatGetFactor_sell_mumps));
4175: PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
4176: PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
4177: PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQDENSE, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
4178: PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQDENSE, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
4179: PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIDENSE, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
4180: PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIDENSE, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
4181: PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_LU, MatGetFactor_nest_mumps));
4182: PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_CHOLESKY, MatGetFactor_nest_mumps));
4183: PetscFunctionReturn(PETSC_SUCCESS);
4184: }