Actual source code: mumps.c
1: /*
2: Provides an interface to the MUMPS sparse solver
3: */
4: #include <petscpkg_version.h>
5: #include <petscsf.h>
6: #include <../src/mat/impls/aij/mpi/mpiaij.h>
7: #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
8: #include <../src/mat/impls/sell/mpi/mpisell.h>
9: #include <petsc/private/vecimpl.h>
11: #define MUMPS_MANUALS "(see users manual https://mumps-solver.org/index.php?page=doc \"Error and warning diagnostics\")"
13: EXTERN_C_BEGIN
14: #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
15: #include <cmumps_c.h>
16: #include <zmumps_c.h>
17: #include <smumps_c.h>
18: #include <dmumps_c.h>
19: #else
20: #if defined(PETSC_USE_COMPLEX)
21: #if defined(PETSC_USE_REAL_SINGLE)
22: #include <cmumps_c.h>
23: #define MUMPS_c cmumps_c
24: #define MUMPS_STRUC_C CMUMPS_STRUC_C
25: #define MumpsScalar CMUMPS_COMPLEX
26: #else
27: #include <zmumps_c.h>
28: #define MUMPS_c zmumps_c
29: #define MUMPS_STRUC_C ZMUMPS_STRUC_C
30: #define MumpsScalar ZMUMPS_COMPLEX
31: #endif
32: #else
33: #if defined(PETSC_USE_REAL_SINGLE)
34: #include <smumps_c.h>
35: #define MUMPS_c smumps_c
36: #define MUMPS_STRUC_C SMUMPS_STRUC_C
37: #define MumpsScalar SMUMPS_REAL
38: #else
39: #include <dmumps_c.h>
40: #define MUMPS_c dmumps_c
41: #define MUMPS_STRUC_C DMUMPS_STRUC_C
42: #define MumpsScalar DMUMPS_REAL
43: #endif
44: #endif
45: #endif
46: EXTERN_C_END
48: #define JOB_INIT -1
49: #define JOB_NULL 0
50: #define JOB_FACTSYMBOLIC 1
51: #define JOB_FACTNUMERIC 2
52: #define JOB_SOLVE 3
53: #define JOB_END -2
55: /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for
56: number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the
57: naming convention in PetscMPIInt, PetscBLASInt etc.
58: */
59: typedef MUMPS_INT PetscMUMPSInt;
61: #if PETSC_PKG_MUMPS_VERSION_GE(5, 3, 0)
62: #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */
63: #error "PETSc has not been tested with full 64-bit MUMPS and we choose to error out"
64: #endif
65: #else
66: #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */
67: #error "PETSc has not been tested with full 64-bit MUMPS and we choose to error out"
68: #endif
69: #endif
71: #define MPIU_MUMPSINT MPI_INT
72: #define PETSC_MUMPS_INT_MAX 2147483647
73: #define PETSC_MUMPS_INT_MIN -2147483648
75: /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/
76: static inline PetscErrorCode PetscMUMPSIntCast(PetscCount a, PetscMUMPSInt *b)
77: {
78: PetscFunctionBegin;
79: #if PetscDefined(USE_64BIT_INDICES)
80: PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt");
81: #endif
82: *b = (PetscMUMPSInt)a;
83: PetscFunctionReturn(PETSC_SUCCESS);
84: }
86: /* Put these utility routines here since they are only used in this file */
87: static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems PetscOptionsObject, const char opt[], const char text[], const char man[], PetscMUMPSInt currentvalue, PetscMUMPSInt *value, PetscBool *set, PetscMUMPSInt lb, PetscMUMPSInt ub)
88: {
89: PetscInt myval;
90: PetscBool myset;
92: PetscFunctionBegin;
93: /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */
94: PetscCall(PetscOptionsInt_Private(PetscOptionsObject, opt, text, man, (PetscInt)currentvalue, &myval, &myset, lb, ub));
95: if (myset) PetscCall(PetscMUMPSIntCast(myval, value));
96: if (set) *set = myset;
97: PetscFunctionReturn(PETSC_SUCCESS);
98: }
99: #define PetscOptionsMUMPSInt(a, b, c, d, e, f) PetscOptionsMUMPSInt_Private(PetscOptionsObject, a, b, c, d, e, f, PETSC_MUMPS_INT_MIN, PETSC_MUMPS_INT_MAX)
101: // An abstract type for specific MUMPS types {S,D,C,Z}MUMPS_STRUC_C.
102: //
103: // With the abstract (outer) type, we can write shared code. We call MUMPS through a type-to-be-determined inner field within the abstract type.
104: // Before/after calling MUMPS, we need to copy in/out fields between the outer and the inner, which seems expensive. But note that the large fixed size
105: // arrays within the types are directly linked. At the end, we only need to copy ~20 intergers/pointers, which is doable. See PreMumpsCall()/PostMumpsCall().
106: //
107: // Not all fields in the specific types are exposed in the abstract type. We only need those used by the PETSc/MUMPS interface.
108: // Notably, DMUMPS_COMPLEX* and DMUMPS_REAL* fields are now declared as void *. Their type will be determined by the the actual precision to be used.
109: // Also note that we added some *_len fields not in specific types to track sizes of those MumpsScalar buffers.
110: typedef struct {
111: PetscPrecision precision; // precision used by MUMPS
112: void *internal_id; // the data structure passed to MUMPS, whose actual type {S,D,C,Z}MUMPS_STRUC_C is to be decided by precision and PETSc's use of complex
114: // aliased fields from internal_id, so that we can use XMUMPS_STRUC_C to write shared code across different precisions.
115: MUMPS_INT sym, par, job;
116: MUMPS_INT comm_fortran; /* Fortran communicator */
117: MUMPS_INT *icntl;
118: void *cntl; // MumpsReal, fixed size array
119: MUMPS_INT n;
120: MUMPS_INT nblk;
122: /* Assembled entry */
123: MUMPS_INT8 nnz;
124: MUMPS_INT *irn;
125: MUMPS_INT *jcn;
126: void *a; // MumpsScalar, centralized input
127: PetscCount a_len;
129: /* Distributed entry */
130: MUMPS_INT8 nnz_loc;
131: MUMPS_INT *irn_loc;
132: MUMPS_INT *jcn_loc;
133: void *a_loc; // MumpsScalar, distributed input
134: PetscCount a_loc_len;
136: /* Matrix by blocks */
137: MUMPS_INT *blkptr;
138: MUMPS_INT *blkvar;
140: /* Ordering, if given by user */
141: MUMPS_INT *perm_in;
143: /* RHS, solution, ouptput data and statistics */
144: void *rhs, *redrhs, *rhs_sparse, *sol_loc, *rhs_loc; // MumpsScalar buffers
145: PetscCount rhs_len, redrhs_len, rhs_sparse_len, sol_loc_len, rhs_loc_len; // length of buffers (in MumpsScalar) IF allocated in a different precision than PetscScalar
147: MUMPS_INT *irhs_sparse, *irhs_ptr, *isol_loc, *irhs_loc;
148: MUMPS_INT nrhs, lrhs, lredrhs, nz_rhs, lsol_loc, nloc_rhs, lrhs_loc;
149: // MUMPS_INT nsol_loc; // introduced in MUMPS-5.7, but PETSc doesn't use it; would cause compile errors with the widely used 5.6. If you add it, must also update PreMumpsCall() and guard this with #if PETSC_PKG_MUMPS_VERSION_GE(5, 7, 0)
150: MUMPS_INT schur_lld;
151: MUMPS_INT *info, *infog; // fixed size array
152: void *rinfo, *rinfog; // MumpsReal, fixed size array
154: /* Null space */
155: MUMPS_INT *pivnul_list; // allocated by MUMPS!
156: MUMPS_INT *mapping; // allocated by MUMPS!
158: /* Schur */
159: MUMPS_INT size_schur;
160: MUMPS_INT *listvar_schur;
161: void *schur; // MumpsScalar
162: PetscCount schur_len;
164: /* For out-of-core */
165: char *ooc_tmpdir; // fixed size array
166: char *ooc_prefix; // fixed size array
167: } XMUMPS_STRUC_C;
169: // Note: fixed-size arrays are allocated by MUMPS; redirect them to the outer struct
170: #define AllocatInternalID(MUMPS_STRUC_T, outer) \
171: do { \
172: MUMPS_STRUC_T *inner; \
173: PetscCall(PetscNew(&inner)); \
174: outer->icntl = inner->icntl; \
175: outer->cntl = inner->cntl; \
176: outer->info = inner->info; \
177: outer->infog = inner->infog; \
178: outer->rinfo = inner->rinfo; \
179: outer->rinfog = inner->rinfog; \
180: outer->ooc_tmpdir = inner->ooc_tmpdir; \
181: outer->ooc_prefix = inner->ooc_prefix; \
182: /* the three field should never change after init */ \
183: inner->comm_fortran = outer->comm_fortran; \
184: inner->par = outer->par; \
185: inner->sym = outer->sym; \
186: outer->internal_id = inner; \
187: } while (0)
189: // Allocate the internal [SDCZ]MUMPS_STRUC_C ID data structure in the given , and link fields of the outer and the inner
190: static inline PetscErrorCode MatMumpsAllocateInternalID(XMUMPS_STRUC_C *outer, PetscPrecision precision)
191: {
192: PetscFunctionBegin;
193: outer->precision = precision;
194: #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
195: #if defined(PETSC_USE_COMPLEX)
196: if (precision == PETSC_PRECISION_SINGLE) AllocatInternalID(CMUMPS_STRUC_C, outer);
197: else AllocatInternalID(ZMUMPS_STRUC_C, outer);
198: #else
199: if (precision == PETSC_PRECISION_SINGLE) AllocatInternalID(SMUMPS_STRUC_C, outer);
200: else AllocatInternalID(DMUMPS_STRUC_C, outer);
201: #endif
202: #else
203: AllocatInternalID(MUMPS_STRUC_C, outer);
204: #endif
205: PetscFunctionReturn(PETSC_SUCCESS);
206: }
208: #define FreeInternalIDFields(MUMPS_STRUC_T, outer) \
209: do { \
210: MUMPS_STRUC_T *inner = (MUMPS_STRUC_T *)(outer)->internal_id; \
211: PetscCall(PetscFree(inner->a)); \
212: PetscCall(PetscFree(inner->a_loc)); \
213: PetscCall(PetscFree(inner->redrhs)); \
214: PetscCall(PetscFree(inner->rhs)); \
215: PetscCall(PetscFree(inner->rhs_sparse)); \
216: PetscCall(PetscFree(inner->rhs_loc)); \
217: PetscCall(PetscFree(inner->sol_loc)); \
218: PetscCall(PetscFree(inner->schur)); \
219: } while (0)
221: static inline PetscErrorCode MatMumpsFreeInternalID(XMUMPS_STRUC_C *outer)
222: {
223: PetscFunctionBegin;
224: if (outer->internal_id) { // sometimes, the inner is never created before we destroy the outer
225: #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
226: const PetscPrecision mumps_precision = outer->precision;
227: if (mumps_precision != PETSC_SCALAR_PRECISION) { // Free internal buffers if we used mixed precision
228: #if defined(PETSC_USE_COMPLEX)
229: if (mumps_precision == PETSC_PRECISION_SINGLE) FreeInternalIDFields(CMUMPS_STRUC_C, outer);
230: else FreeInternalIDFields(ZMUMPS_STRUC_C, outer);
231: #else
232: if (mumps_precision == PETSC_PRECISION_SINGLE) FreeInternalIDFields(SMUMPS_STRUC_C, outer);
233: else FreeInternalIDFields(DMUMPS_STRUC_C, outer);
234: #endif
235: }
236: #endif
237: PetscCall(PetscFree(outer->internal_id));
238: }
239: PetscFunctionReturn(PETSC_SUCCESS);
240: }
242: // Make a companion MumpsScalar array (with a given PetscScalar array), to hold at least MumpsScalars in the given and return the address at .
243: // indicates if we need to convert PetscScalars to MumpsScalars after allocating the MumpsScalar array.
244: // (For bravity, we use for array address and for its length in MumpsScalar, though in code they should be <*ma> and <*m>)
245: // If already points to a buffer/array, on input should be its length. Note the buffer might be freed if it is not big enough for this request.
246: //
247: // The returned array is a companion, so how it is created depends on if PetscScalar and MumpsScalar are the same.
248: // 1) If they are different, a separate array will be made and its length and address will be provided at and on output.
249: // 2) Otherwise, will be returned in , and will be zero on output.
250: //
251: //
252: // Input parameters:
253: // + convert - whether to do PetscScalar to MumpsScalar conversion
254: // . n - length of the PetscScalar array
255: // . pa - [n]], points to the PetscScalar array
256: // . precision - precision of MumpsScalar
257: // . m - on input, length of an existing MumpsScalar array if any, otherwise *m is just zero.
258: // - ma - on input, an existing MumpsScalar array if any.
259: //
260: // Output parameters:
261: // + m - length of the MumpsScalar buffer at if MumpsScalar is different from PetscScalar, otherwise 0
262: // . ma - the MumpsScalar array, which could be an alias of when the two types are the same.
263: //
264: // Note:
265: // New memory, if allocated, is done via PetscMalloc1(), and is owned by caller.
266: static PetscErrorCode MatMumpsMakeMumpsScalarArray(PetscBool convert, PetscCount n, const PetscScalar *pa, PetscPrecision precision, PetscCount *m, void **ma)
267: {
268: PetscFunctionBegin;
269: #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
270: const PetscPrecision mumps_precision = precision;
271: PetscCheck(precision == PETSC_PRECISION_SINGLE || precision == PETSC_PRECISION_DOUBLE, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unsupported precicison (%d). Must be single or double", (int)precision);
272: #if defined(PETSC_USE_COMPLEX)
273: if (mumps_precision != PETSC_SCALAR_PRECISION) {
274: if (mumps_precision == PETSC_PRECISION_SINGLE) {
275: if (*m < n) {
276: PetscCall(PetscFree(*ma));
277: PetscCall(PetscMalloc1(n, (CMUMPS_COMPLEX **)ma));
278: *m = n;
279: }
280: if (convert) {
281: CMUMPS_COMPLEX *b = *(CMUMPS_COMPLEX **)ma;
282: for (PetscCount i = 0; i < n; i++) {
283: b[i].r = PetscRealPart(pa[i]);
284: b[i].i = PetscImaginaryPart(pa[i]);
285: }
286: }
287: } else {
288: if (*m < n) {
289: PetscCall(PetscFree(*ma));
290: PetscCall(PetscMalloc1(n, (ZMUMPS_COMPLEX **)ma));
291: *m = n;
292: }
293: if (convert) {
294: ZMUMPS_COMPLEX *b = *(ZMUMPS_COMPLEX **)ma;
295: for (PetscCount i = 0; i < n; i++) {
296: b[i].r = PetscRealPart(pa[i]);
297: b[i].i = PetscImaginaryPart(pa[i]);
298: }
299: }
300: }
301: }
302: #else
303: if (mumps_precision != PETSC_SCALAR_PRECISION) {
304: if (mumps_precision == PETSC_PRECISION_SINGLE) {
305: if (*m < n) {
306: PetscCall(PetscFree(*ma));
307: PetscCall(PetscMalloc1(n, (SMUMPS_REAL **)ma));
308: *m = n;
309: }
310: if (convert) {
311: SMUMPS_REAL *b = *(SMUMPS_REAL **)ma;
312: for (PetscCount i = 0; i < n; i++) b[i] = pa[i];
313: }
314: } else {
315: if (*m < n) {
316: PetscCall(PetscFree(*ma));
317: PetscCall(PetscMalloc1(n, (DMUMPS_REAL **)ma));
318: *m = n;
319: }
320: if (convert) {
321: DMUMPS_REAL *b = *(DMUMPS_REAL **)ma;
322: for (PetscCount i = 0; i < n; i++) b[i] = pa[i];
323: }
324: }
325: }
326: #endif
327: else
328: #endif
329: {
330: if (*m != 0) PetscCall(PetscFree(*ma)); // free existing buffer if any
331: *ma = (void *)pa; // same precision, make them alias
332: *m = 0;
333: }
334: PetscFunctionReturn(PETSC_SUCCESS);
335: }
337: // Cast a MumpsScalar array in to a PetscScalar array at address .
338: //
339: // 1) If the two types are different, cast array elements.
340: // 2) Otherwise, this works as a memcpy; of course, if the two addresses are equal, it is a no-op.
341: static PetscErrorCode MatMumpsCastMumpsScalarArray(PetscCount n, PetscPrecision mumps_precision, const void *ma, PetscScalar *pa)
342: {
343: PetscFunctionBegin;
344: #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
345: if (mumps_precision != PETSC_SCALAR_PRECISION) {
346: #if defined(PETSC_USE_COMPLEX)
347: if (mumps_precision == PETSC_PRECISION_SINGLE) {
348: PetscReal *a = (PetscReal *)pa;
349: const SMUMPS_REAL *b = (const SMUMPS_REAL *)ma;
350: for (PetscCount i = 0; i < 2 * n; i++) a[i] = b[i];
351: } else {
352: PetscReal *a = (PetscReal *)pa;
353: const DMUMPS_REAL *b = (const DMUMPS_REAL *)ma;
354: for (PetscCount i = 0; i < 2 * n; i++) a[i] = b[i];
355: }
356: #else
357: if (mumps_precision == PETSC_PRECISION_SINGLE) {
358: const SMUMPS_REAL *b = (const SMUMPS_REAL *)ma;
359: for (PetscCount i = 0; i < n; i++) pa[i] = b[i];
360: } else {
361: const DMUMPS_REAL *b = (const DMUMPS_REAL *)ma;
362: for (PetscCount i = 0; i < n; i++) pa[i] = b[i];
363: }
364: #endif
365: } else
366: #endif
367: PetscCall(PetscArraycpy((PetscScalar *)pa, (PetscScalar *)ma, n));
368: PetscFunctionReturn(PETSC_SUCCESS);
369: }
371: // Cast a PetscScalar array to a MumpsScalar array in the given at address .
372: //
373: // 1) If the two types are different, cast array elements.
374: // 2) Otherwise, this works as a memcpy; of course, if the two addresses are equal, it is a no-op.
375: static PetscErrorCode MatMumpsCastPetscScalarArray(PetscCount n, const PetscScalar *pa, PetscPrecision mumps_precision, const void *ma)
376: {
377: PetscFunctionBegin;
378: #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
379: if (mumps_precision != PETSC_SCALAR_PRECISION) {
380: #if defined(PETSC_USE_COMPLEX)
381: if (mumps_precision == PETSC_PRECISION_SINGLE) {
382: CMUMPS_COMPLEX *b = (CMUMPS_COMPLEX *)ma;
383: for (PetscCount i = 0; i < n; i++) {
384: b[i].r = PetscRealPart(pa[i]);
385: b[i].i = PetscImaginaryPart(pa[i]);
386: }
387: } else {
388: ZMUMPS_COMPLEX *b = (ZMUMPS_COMPLEX *)ma;
389: for (PetscCount i = 0; i < n; i++) {
390: b[i].r = PetscRealPart(pa[i]);
391: b[i].i = PetscImaginaryPart(pa[i]);
392: }
393: }
394: #else
395: if (mumps_precision == PETSC_PRECISION_SINGLE) {
396: SMUMPS_REAL *b = (SMUMPS_REAL *)ma;
397: for (PetscCount i = 0; i < n; i++) b[i] = pa[i];
398: } else {
399: DMUMPS_REAL *b = (DMUMPS_REAL *)ma;
400: for (PetscCount i = 0; i < n; i++) b[i] = pa[i];
401: }
402: #endif
403: } else
404: #endif
405: PetscCall(PetscArraycpy((PetscScalar *)ma, (PetscScalar *)pa, n));
406: PetscFunctionReturn(PETSC_SUCCESS);
407: }
409: static inline MPI_Datatype MPIU_MUMPSREAL(const XMUMPS_STRUC_C *id)
410: {
411: return id->precision == PETSC_PRECISION_DOUBLE ? MPI_DOUBLE : MPI_FLOAT;
412: }
414: #define PreMumpsCall(inner, outer, mumpsscalar) \
415: do { \
416: inner->job = outer->job; \
417: inner->n = outer->n; \
418: inner->nblk = outer->nblk; \
419: inner->nnz = outer->nnz; \
420: inner->irn = outer->irn; \
421: inner->jcn = outer->jcn; \
422: inner->a = (mumpsscalar *)outer->a; \
423: inner->nnz_loc = outer->nnz_loc; \
424: inner->irn_loc = outer->irn_loc; \
425: inner->jcn_loc = outer->jcn_loc; \
426: inner->a_loc = (mumpsscalar *)outer->a_loc; \
427: inner->blkptr = outer->blkptr; \
428: inner->blkvar = outer->blkvar; \
429: inner->perm_in = outer->perm_in; \
430: inner->rhs = (mumpsscalar *)outer->rhs; \
431: inner->redrhs = (mumpsscalar *)outer->redrhs; \
432: inner->rhs_sparse = (mumpsscalar *)outer->rhs_sparse; \
433: inner->sol_loc = (mumpsscalar *)outer->sol_loc; \
434: inner->rhs_loc = (mumpsscalar *)outer->rhs_loc; \
435: inner->irhs_sparse = outer->irhs_sparse; \
436: inner->irhs_ptr = outer->irhs_ptr; \
437: inner->isol_loc = outer->isol_loc; \
438: inner->irhs_loc = outer->irhs_loc; \
439: inner->nrhs = outer->nrhs; \
440: inner->lrhs = outer->lrhs; \
441: inner->lredrhs = outer->lredrhs; \
442: inner->nz_rhs = outer->nz_rhs; \
443: inner->lsol_loc = outer->lsol_loc; \
444: inner->nloc_rhs = outer->nloc_rhs; \
445: inner->lrhs_loc = outer->lrhs_loc; \
446: inner->schur_lld = outer->schur_lld; \
447: inner->size_schur = outer->size_schur; \
448: inner->listvar_schur = outer->listvar_schur; \
449: inner->schur = (mumpsscalar *)outer->schur; \
450: } while (0)
452: #define PostMumpsCall(inner, outer) \
453: do { \
454: outer->pivnul_list = inner->pivnul_list; \
455: outer->mapping = inner->mapping; \
456: } while (0)
458: // Entry for PETSc to call mumps
459: static inline PetscErrorCode PetscCallMumps_Private(XMUMPS_STRUC_C *outer)
460: {
461: PetscFunctionBegin;
462: #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
463: #if defined(PETSC_USE_COMPLEX)
464: if (outer->precision == PETSC_PRECISION_SINGLE) {
465: CMUMPS_STRUC_C *inner = (CMUMPS_STRUC_C *)outer->internal_id;
466: PreMumpsCall(inner, outer, CMUMPS_COMPLEX);
467: PetscStackCallExternalVoid("cmumps_c", cmumps_c(inner));
468: PostMumpsCall(inner, outer);
469: } else {
470: ZMUMPS_STRUC_C *inner = (ZMUMPS_STRUC_C *)outer->internal_id;
471: PreMumpsCall(inner, outer, ZMUMPS_COMPLEX);
472: PetscStackCallExternalVoid("zmumps_c", zmumps_c(inner));
473: PostMumpsCall(inner, outer);
474: }
475: #else
476: if (outer->precision == PETSC_PRECISION_SINGLE) {
477: SMUMPS_STRUC_C *inner = (SMUMPS_STRUC_C *)outer->internal_id;
478: PreMumpsCall(inner, outer, SMUMPS_REAL);
479: PetscStackCallExternalVoid("smumps_c", smumps_c(inner));
480: PostMumpsCall(inner, outer);
481: } else {
482: DMUMPS_STRUC_C *inner = (DMUMPS_STRUC_C *)outer->internal_id;
483: PreMumpsCall(inner, outer, DMUMPS_REAL);
484: PetscStackCallExternalVoid("dmumps_c", dmumps_c(inner));
485: PostMumpsCall(inner, outer);
486: }
487: #endif
488: #else
489: MUMPS_STRUC_C *inner = (MUMPS_STRUC_C *)outer->internal_id;
490: PreMumpsCall(inner, outer, MumpsScalar);
491: PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(inner));
492: PostMumpsCall(inner, outer);
493: #endif
494: PetscFunctionReturn(PETSC_SUCCESS);
495: }
497: /* macros s.t. indices match MUMPS documentation */
498: #define ICNTL(I) icntl[(I) - 1]
499: #define INFOG(I) infog[(I) - 1]
500: #define INFO(I) info[(I) - 1]
502: // Get a value from a MumpsScalar array, which is the field in the struct of MUMPS_STRUC_C. The value is convertible to PetscScalar. Note no minus 1 on I!
503: #if defined(PETSC_USE_COMPLEX)
504: #define ID_FIELD_GET(ID, F, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((CMUMPS_COMPLEX *)(ID).F)[I].r + PETSC_i * ((CMUMPS_COMPLEX *)(ID).F)[I].i : ((ZMUMPS_COMPLEX *)(ID).F)[I].r + PETSC_i * ((ZMUMPS_COMPLEX *)(ID).F)[I].i)
505: #else
506: #define ID_FIELD_GET(ID, F, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).F)[I] : ((double *)(ID).F)[I])
507: #endif
509: // Get a value from MumpsReal arrays. The value is convertible to PetscReal.
510: #define ID_CNTL_GET(ID, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).cntl)[(I) - 1] : ((double *)(ID).cntl)[(I) - 1])
511: #define ID_RINFOG_GET(ID, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).rinfog)[(I) - 1] : ((double *)(ID).rinfog)[(I) - 1])
512: #define ID_RINFO_GET(ID, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).rinfo)[(I) - 1] : ((double *)(ID).rinfo)[(I) - 1])
514: // Set the I-th entry of the MumpsReal array id.cntl[] with a PetscReal
515: #define ID_CNTL_SET(ID, I, VAL) \
516: do { \
517: if ((ID).precision == PETSC_PRECISION_SINGLE) ((float *)(ID).cntl)[(I) - 1] = (VAL); \
518: else ((double *)(ID).cntl)[(I) - 1] = (VAL); \
519: } while (0)
521: /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */
522: #if defined(PETSC_HAVE_OPENMP_SUPPORT)
523: #define PetscMUMPS_c(mumps) \
524: do { \
525: if (mumps->use_petsc_omp_support) { \
526: if (mumps->is_omp_master) { \
527: PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \
528: PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \
529: PetscCall(PetscCallMumps_Private(&mumps->id)); \
530: PetscCall(PetscFPTrapPop()); \
531: PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \
532: } \
533: PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \
534: /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific \
535: to processes, so we only Bcast info[1], an error code and leave others (since they do not have \
536: an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82. \
537: omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \
538: */ \
539: SMUMPS_STRUC_C tmp; /* All MUMPS_STRUC_C types have same lengths on these info arrays */ \
540: PetscCallMPI(MPI_Bcast(mumps->id.infog, PETSC_STATIC_ARRAY_LENGTH(tmp.infog), MPIU_MUMPSINT, 0, mumps->omp_comm)); \
541: PetscCallMPI(MPI_Bcast(mumps->id.info, PETSC_STATIC_ARRAY_LENGTH(tmp.info), MPIU_MUMPSINT, 0, mumps->omp_comm)); \
542: PetscCallMPI(MPI_Bcast(mumps->id.rinfog, PETSC_STATIC_ARRAY_LENGTH(tmp.rinfog), MPIU_MUMPSREAL(&mumps->id), 0, mumps->omp_comm)); \
543: PetscCallMPI(MPI_Bcast(mumps->id.rinfo, PETSC_STATIC_ARRAY_LENGTH(tmp.rinfo), MPIU_MUMPSREAL(&mumps->id), 0, mumps->omp_comm)); \
544: } else { \
545: PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \
546: PetscCall(PetscCallMumps_Private(&mumps->id)); \
547: PetscCall(PetscFPTrapPop()); \
548: } \
549: } while (0)
550: #else
551: #define PetscMUMPS_c(mumps) \
552: do { \
553: PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \
554: PetscCall(PetscCallMumps_Private(&mumps->id)); \
555: PetscCall(PetscFPTrapPop()); \
556: } while (0)
557: #endif
559: typedef struct Mat_MUMPS Mat_MUMPS;
560: struct Mat_MUMPS {
561: XMUMPS_STRUC_C id;
563: MatStructure matstruc;
564: PetscMPIInt myid, petsc_size;
565: PetscMUMPSInt *irn, *jcn; /* the (i,j,v) triplets passed to mumps. */
566: PetscScalar *val, *val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */
567: PetscCount nnz; /* number of nonzeros. The type is called selective 64-bit in mumps */
568: PetscMUMPSInt sym;
569: MPI_Comm mumps_comm;
570: PetscMUMPSInt *ICNTL_pre;
571: PetscReal *CNTL_pre;
572: PetscMUMPSInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */
573: VecScatter scat_rhs, scat_sol; /* used by MatSolve() */
574: PetscMUMPSInt ICNTL20; /* use centralized (0) or distributed (10) dense RHS */
575: PetscMUMPSInt ICNTL26;
576: PetscMUMPSInt lrhs_loc, nloc_rhs, *irhs_loc;
577: #if defined(PETSC_HAVE_OPENMP_SUPPORT)
578: PetscInt *rhs_nrow, max_nrhs;
579: PetscMPIInt *rhs_recvcounts, *rhs_disps;
580: PetscScalar *rhs_loc, *rhs_recvbuf;
581: #endif
582: Vec b_seq, x_seq;
583: PetscInt ninfo, *info; /* which INFO to display */
584: PetscInt sizeredrhs;
585: PetscScalar *schur_sol;
586: PetscInt schur_sizesol;
587: PetscScalar *redrhs; // buffer in PetscScalar in case MumpsScalar is in a different precision
588: PetscMUMPSInt *ia_alloc, *ja_alloc; /* work arrays used for the CSR struct for sparse rhs */
589: PetscCount cur_ilen, cur_jlen; /* current len of ia_alloc[], ja_alloc[] */
590: PetscErrorCode (*ConvertToTriples)(Mat, PetscInt, MatReuse, Mat_MUMPS *);
592: /* Support for MATNEST */
593: PetscErrorCode (**nest_convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *);
594: PetscCount *nest_vals_start;
595: PetscScalar *nest_vals;
597: /* stuff used by petsc/mumps OpenMP support*/
598: PetscBool use_petsc_omp_support;
599: PetscOmpCtrl omp_ctrl; /* an OpenMP controller that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */
600: MPI_Comm petsc_comm, omp_comm; /* petsc_comm is PETSc matrix's comm */
601: PetscCount *recvcount; /* a collection of nnz on omp_master */
602: PetscMPIInt tag, omp_comm_size;
603: PetscBool is_omp_master; /* is this rank the master of omp_comm */
604: MPI_Request *reqs;
605: };
607: /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt.
608: Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices.
609: */
610: static PetscErrorCode PetscMUMPSIntCSRCast(PETSC_UNUSED Mat_MUMPS *mumps, PetscInt nrow, PetscInt *ia, PetscInt *ja, PetscMUMPSInt **ia_mumps, PetscMUMPSInt **ja_mumps, PetscMUMPSInt *nnz_mumps)
611: {
612: PetscInt nnz = ia[nrow] - 1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscCount since mumps only uses PetscMUMPSInt for rhs */
614: PetscFunctionBegin;
615: #if defined(PETSC_USE_64BIT_INDICES)
616: {
617: PetscInt i;
618: if (nrow + 1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */
619: PetscCall(PetscFree(mumps->ia_alloc));
620: PetscCall(PetscMalloc1(nrow + 1, &mumps->ia_alloc));
621: mumps->cur_ilen = nrow + 1;
622: }
623: if (nnz > mumps->cur_jlen) {
624: PetscCall(PetscFree(mumps->ja_alloc));
625: PetscCall(PetscMalloc1(nnz, &mumps->ja_alloc));
626: mumps->cur_jlen = nnz;
627: }
628: for (i = 0; i < nrow + 1; i++) PetscCall(PetscMUMPSIntCast(ia[i], &mumps->ia_alloc[i]));
629: for (i = 0; i < nnz; i++) PetscCall(PetscMUMPSIntCast(ja[i], &mumps->ja_alloc[i]));
630: *ia_mumps = mumps->ia_alloc;
631: *ja_mumps = mumps->ja_alloc;
632: }
633: #else
634: *ia_mumps = ia;
635: *ja_mumps = ja;
636: #endif
637: PetscCall(PetscMUMPSIntCast(nnz, nnz_mumps));
638: PetscFunctionReturn(PETSC_SUCCESS);
639: }
641: static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS *mumps)
642: {
643: PetscFunctionBegin;
644: PetscCall(PetscFree(mumps->id.listvar_schur));
645: PetscCall(PetscFree(mumps->redrhs)); // if needed, id.redrhs will be freed in MatMumpsFreeInternalID()
646: PetscCall(PetscFree(mumps->schur_sol));
647: mumps->id.size_schur = 0;
648: mumps->id.schur_lld = 0;
649: if (mumps->id.internal_id) mumps->id.ICNTL(19) = 0; // sometimes, the inner id is yet built
650: PetscFunctionReturn(PETSC_SUCCESS);
651: }
653: /* solve with rhs in mumps->id.redrhs and return in the same location */
654: static PetscErrorCode MatMumpsSolveSchur_Private(Mat F)
655: {
656: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
657: Mat S, B, X; // solve S*X = B; all three matrices are dense
658: MatFactorSchurStatus schurstatus;
659: PetscInt sizesol;
660: const PetscScalar *xarray;
662: PetscFunctionBegin;
663: PetscCall(MatFactorFactorizeSchurComplement(F));
664: PetscCall(MatFactorGetSchurComplement(F, &S, &schurstatus));
665: PetscCall(MatMumpsCastMumpsScalarArray(mumps->sizeredrhs, mumps->id.precision, mumps->id.redrhs, mumps->redrhs));
667: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->redrhs, &B));
668: PetscCall(MatSetType(B, ((PetscObject)S)->type_name));
669: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
670: PetscCall(MatBindToCPU(B, S->boundtocpu));
671: #endif
672: switch (schurstatus) {
673: case MAT_FACTOR_SCHUR_FACTORED:
674: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->redrhs, &X));
675: PetscCall(MatSetType(X, ((PetscObject)S)->type_name));
676: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
677: PetscCall(MatBindToCPU(X, S->boundtocpu));
678: #endif
679: if (!mumps->id.ICNTL(9)) { /* transpose solve */
680: PetscCall(MatMatSolveTranspose(S, B, X));
681: } else {
682: PetscCall(MatMatSolve(S, B, X));
683: }
684: break;
685: case MAT_FACTOR_SCHUR_INVERTED:
686: sizesol = mumps->id.nrhs * mumps->id.size_schur;
687: if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) {
688: PetscCall(PetscFree(mumps->schur_sol));
689: PetscCall(PetscMalloc1(sizesol, &mumps->schur_sol));
690: mumps->schur_sizesol = sizesol;
691: }
692: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->schur_sol, &X));
693: PetscCall(MatSetType(X, ((PetscObject)S)->type_name));
694: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
695: PetscCall(MatBindToCPU(X, S->boundtocpu));
696: #endif
697: PetscCall(MatProductCreateWithMat(S, B, NULL, X));
698: if (!mumps->id.ICNTL(9)) { /* transpose solve */
699: PetscCall(MatProductSetType(X, MATPRODUCT_AtB));
700: } else {
701: PetscCall(MatProductSetType(X, MATPRODUCT_AB));
702: }
703: PetscCall(MatProductSetFromOptions(X));
704: PetscCall(MatProductSymbolic(X));
705: PetscCall(MatProductNumeric(X));
707: PetscCall(MatCopy(X, B, SAME_NONZERO_PATTERN));
708: break;
709: default:
710: SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unhandled MatFactorSchurStatus %d", F->schur_status);
711: }
712: // MUST get the array from X (not B), though they share the same host array. We can only guarantee X has the correct data on device.
713: PetscCall(MatDenseGetArrayRead(X, &xarray)); // xarray should be mumps->redrhs, but using MatDenseGetArrayRead is safer with GPUs.
714: PetscCall(MatMumpsCastPetscScalarArray(mumps->sizeredrhs, xarray, mumps->id.precision, mumps->id.redrhs));
715: PetscCall(MatDenseRestoreArrayRead(X, &xarray));
716: PetscCall(MatFactorRestoreSchurComplement(F, &S, schurstatus));
717: PetscCall(MatDestroy(&B));
718: PetscCall(MatDestroy(&X));
719: PetscFunctionReturn(PETSC_SUCCESS);
720: }
722: static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion)
723: {
724: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
726: PetscFunctionBegin;
727: if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */
728: PetscFunctionReturn(PETSC_SUCCESS);
729: }
730: if (!expansion) { /* prepare for the condensation step */
731: PetscInt sizeredrhs = mumps->id.nrhs * mumps->id.size_schur;
732: /* allocate MUMPS internal array to store reduced right-hand sides */
733: if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) {
734: mumps->id.lredrhs = mumps->id.size_schur;
735: mumps->sizeredrhs = mumps->id.nrhs * mumps->id.lredrhs;
736: if (mumps->id.redrhs_len) PetscCall(PetscFree(mumps->id.redrhs));
737: PetscCall(PetscFree(mumps->redrhs));
738: PetscCall(PetscMalloc1(mumps->sizeredrhs, &mumps->redrhs));
739: PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, mumps->sizeredrhs, mumps->redrhs, mumps->id.precision, &mumps->id.redrhs_len, &mumps->id.redrhs));
740: }
741: } else { /* prepare for the expansion step */
742: PetscCall(MatMumpsSolveSchur_Private(F)); /* solve Schur complement, put solution in id.redrhs (this has to be done by the MUMPS user, so basically us) */
743: mumps->id.ICNTL(26) = 2; /* expansion phase */
744: PetscMUMPS_c(mumps);
745: PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));
746: /* restore defaults */
747: mumps->id.ICNTL(26) = -1;
748: /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */
749: if (mumps->id.nrhs > 1) {
750: if (mumps->id.redrhs_len) PetscCall(PetscFree(mumps->id.redrhs));
751: PetscCall(PetscFree(mumps->redrhs));
752: mumps->id.redrhs_len = 0;
753: mumps->id.lredrhs = 0;
754: mumps->sizeredrhs = 0;
755: }
756: }
757: PetscFunctionReturn(PETSC_SUCCESS);
758: }
760: /*
761: MatConvertToTriples_A_B - convert PETSc matrix to triples: row[nz], col[nz], val[nz]
763: input:
764: A - matrix in aij,baij or sbaij format
765: shift - 0: C style output triple; 1: Fortran style output triple.
766: reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple
767: MAT_REUSE_MATRIX: only the values in v array are updated
768: output:
769: nnz - dim of r, c, and v (number of local nonzero entries of A)
770: r, c, v - row and col index, matrix values (matrix triples)
772: The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is
773: freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means
774: that the PetscMalloc() cannot easily be replaced with a PetscMalloc3().
776: */
778: static PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
779: {
780: const PetscScalar *av;
781: const PetscInt *ai, *aj, *ajj, M = A->rmap->n;
782: PetscCount nz, rnz, k;
783: PetscMUMPSInt *row, *col;
784: Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data;
786: PetscFunctionBegin;
787: PetscCall(MatSeqAIJGetArrayRead(A, &av));
788: if (reuse == MAT_INITIAL_MATRIX) {
789: nz = aa->nz;
790: ai = aa->i;
791: aj = aa->j;
792: PetscCall(PetscMalloc2(nz, &row, nz, &col));
793: for (PetscCount i = k = 0; i < M; i++) {
794: rnz = ai[i + 1] - ai[i];
795: ajj = aj + ai[i];
796: for (PetscCount j = 0; j < rnz; j++) {
797: PetscCall(PetscMUMPSIntCast(i + shift, &row[k]));
798: PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[k]));
799: k++;
800: }
801: }
802: mumps->val = (PetscScalar *)av;
803: mumps->irn = row;
804: mumps->jcn = col;
805: mumps->nnz = nz;
806: } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, av, aa->nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqaij_seqaij(), so one needs to copy the memory */
807: else mumps->val = (PetscScalar *)av; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
808: PetscCall(MatSeqAIJRestoreArrayRead(A, &av));
809: PetscFunctionReturn(PETSC_SUCCESS);
810: }
812: static PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
813: {
814: PetscCount nz, i, j, k, r;
815: Mat_SeqSELL *a = (Mat_SeqSELL *)A->data;
816: PetscMUMPSInt *row, *col;
818: PetscFunctionBegin;
819: nz = a->sliidx[a->totalslices];
820: if (reuse == MAT_INITIAL_MATRIX) {
821: PetscCall(PetscMalloc2(nz, &row, nz, &col));
822: for (i = k = 0; i < a->totalslices; i++) {
823: for (j = a->sliidx[i], r = 0; j < a->sliidx[i + 1]; j++, r = ((r + 1) & 0x07)) PetscCall(PetscMUMPSIntCast(8 * i + r + shift, &row[k++]));
824: }
825: for (i = 0; i < nz; i++) PetscCall(PetscMUMPSIntCast(a->colidx[i] + shift, &col[i]));
826: mumps->irn = row;
827: mumps->jcn = col;
828: mumps->nnz = nz;
829: mumps->val = a->val;
830: } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, a->val, nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqsell_seqaij(), so one needs to copy the memory */
831: else mumps->val = a->val; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
832: PetscFunctionReturn(PETSC_SUCCESS);
833: }
835: static PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
836: {
837: Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)A->data;
838: const PetscInt *ai, *aj, *ajj, bs2 = aa->bs2;
839: PetscCount M, nz = bs2 * aa->nz, idx = 0, rnz, i, j, k, m;
840: PetscInt bs;
841: PetscMUMPSInt *row, *col;
843: PetscFunctionBegin;
844: if (reuse == MAT_INITIAL_MATRIX) {
845: PetscCall(MatGetBlockSize(A, &bs));
846: M = A->rmap->N / bs;
847: ai = aa->i;
848: aj = aa->j;
849: PetscCall(PetscMalloc2(nz, &row, nz, &col));
850: for (i = 0; i < M; i++) {
851: ajj = aj + ai[i];
852: rnz = ai[i + 1] - ai[i];
853: for (k = 0; k < rnz; k++) {
854: for (j = 0; j < bs; j++) {
855: for (m = 0; m < bs; m++) {
856: PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[idx]));
857: PetscCall(PetscMUMPSIntCast(bs * ajj[k] + j + shift, &col[idx]));
858: idx++;
859: }
860: }
861: }
862: }
863: mumps->irn = row;
864: mumps->jcn = col;
865: mumps->nnz = nz;
866: mumps->val = aa->a;
867: } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, aa->a, nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqbaij_seqaij(), so one needs to copy the memory */
868: else mumps->val = aa->a; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
869: PetscFunctionReturn(PETSC_SUCCESS);
870: }
872: static PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
873: {
874: const PetscInt *ai, *aj, *ajj;
875: PetscInt bs;
876: PetscCount nz, rnz, i, j, k, m;
877: PetscMUMPSInt *row, *col;
878: PetscScalar *val;
879: Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)A->data;
880: const PetscInt bs2 = aa->bs2, mbs = aa->mbs;
881: #if defined(PETSC_USE_COMPLEX)
882: PetscBool isset, hermitian;
883: #endif
885: PetscFunctionBegin;
886: #if defined(PETSC_USE_COMPLEX)
887: PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
888: PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
889: #endif
890: ai = aa->i;
891: aj = aa->j;
892: PetscCall(MatGetBlockSize(A, &bs));
893: if (reuse == MAT_INITIAL_MATRIX) {
894: const PetscCount alloc_size = aa->nz * bs2;
896: PetscCall(PetscMalloc2(alloc_size, &row, alloc_size, &col));
897: if (bs > 1) {
898: PetscCall(PetscMalloc1(alloc_size, &mumps->val_alloc));
899: mumps->val = mumps->val_alloc;
900: } else {
901: mumps->val = aa->a;
902: }
903: mumps->irn = row;
904: mumps->jcn = col;
905: } else {
906: row = mumps->irn;
907: col = mumps->jcn;
908: }
909: val = mumps->val;
911: nz = 0;
912: if (bs > 1) {
913: for (i = 0; i < mbs; i++) {
914: rnz = ai[i + 1] - ai[i];
915: ajj = aj + ai[i];
916: for (j = 0; j < rnz; j++) {
917: for (k = 0; k < bs; k++) {
918: for (m = 0; m < bs; m++) {
919: if (ajj[j] > i || k >= m) {
920: if (reuse == MAT_INITIAL_MATRIX) {
921: PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[nz]));
922: PetscCall(PetscMUMPSIntCast(ajj[j] * bs + k + shift, &col[nz]));
923: }
924: val[nz++] = aa->a[(ai[i] + j) * bs2 + m + k * bs];
925: }
926: }
927: }
928: }
929: }
930: } else if (reuse == MAT_INITIAL_MATRIX) {
931: for (i = 0; i < mbs; i++) {
932: rnz = ai[i + 1] - ai[i];
933: ajj = aj + ai[i];
934: for (j = 0; j < rnz; j++) {
935: PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
936: PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
937: nz++;
938: }
939: }
940: PetscCheck(nz == aa->nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different numbers of nonzeros %" PetscCount_FMT " != %" PetscInt_FMT, nz, aa->nz);
941: } else if (mumps->nest_vals)
942: PetscCall(PetscArraycpy(mumps->val, aa->a, aa->nz)); /* bs == 1 and MAT_REUSE_MATRIX, MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqsbaij_seqsbaij(), so one needs to copy the memory */
943: else mumps->val = aa->a; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
944: if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz;
945: PetscFunctionReturn(PETSC_SUCCESS);
946: }
948: static PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
949: {
950: const PetscInt *ai, *aj, *ajj, *adiag, M = A->rmap->n;
951: PetscCount nz, rnz, i, j;
952: const PetscScalar *av, *v1;
953: PetscScalar *val;
954: PetscMUMPSInt *row, *col;
955: Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data;
956: PetscBool missing;
957: #if defined(PETSC_USE_COMPLEX)
958: PetscBool hermitian, isset;
959: #endif
961: PetscFunctionBegin;
962: #if defined(PETSC_USE_COMPLEX)
963: PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
964: PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
965: #endif
966: PetscCall(MatSeqAIJGetArrayRead(A, &av));
967: ai = aa->i;
968: aj = aa->j;
969: adiag = aa->diag;
970: PetscCall(MatMissingDiagonal_SeqAIJ(A, &missing, NULL));
971: if (reuse == MAT_INITIAL_MATRIX) {
972: /* count nz in the upper triangular part of A */
973: nz = 0;
974: if (missing) {
975: for (i = 0; i < M; i++) {
976: if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
977: for (j = ai[i]; j < ai[i + 1]; j++) {
978: if (aj[j] < i) continue;
979: nz++;
980: }
981: } else {
982: nz += ai[i + 1] - adiag[i];
983: }
984: }
985: } else {
986: for (i = 0; i < M; i++) nz += ai[i + 1] - adiag[i];
987: }
988: PetscCall(PetscMalloc2(nz, &row, nz, &col));
989: PetscCall(PetscMalloc1(nz, &val));
990: mumps->nnz = nz;
991: mumps->irn = row;
992: mumps->jcn = col;
993: mumps->val = mumps->val_alloc = val;
995: nz = 0;
996: if (missing) {
997: for (i = 0; i < M; i++) {
998: if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
999: for (j = ai[i]; j < ai[i + 1]; j++) {
1000: if (aj[j] < i) continue;
1001: PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
1002: PetscCall(PetscMUMPSIntCast(aj[j] + shift, &col[nz]));
1003: val[nz] = av[j];
1004: nz++;
1005: }
1006: } else {
1007: rnz = ai[i + 1] - adiag[i];
1008: ajj = aj + adiag[i];
1009: v1 = av + adiag[i];
1010: for (j = 0; j < rnz; j++) {
1011: PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
1012: PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
1013: val[nz++] = v1[j];
1014: }
1015: }
1016: }
1017: } else {
1018: for (i = 0; i < M; i++) {
1019: rnz = ai[i + 1] - adiag[i];
1020: ajj = aj + adiag[i];
1021: v1 = av + adiag[i];
1022: for (j = 0; j < rnz; j++) {
1023: PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
1024: PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
1025: val[nz++] = v1[j];
1026: }
1027: }
1028: }
1029: } else {
1030: nz = 0;
1031: val = mumps->val;
1032: if (missing) {
1033: for (i = 0; i < M; i++) {
1034: if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
1035: for (j = ai[i]; j < ai[i + 1]; j++) {
1036: if (aj[j] < i) continue;
1037: val[nz++] = av[j];
1038: }
1039: } else {
1040: rnz = ai[i + 1] - adiag[i];
1041: v1 = av + adiag[i];
1042: for (j = 0; j < rnz; j++) val[nz++] = v1[j];
1043: }
1044: }
1045: } else {
1046: for (i = 0; i < M; i++) {
1047: rnz = ai[i + 1] - adiag[i];
1048: v1 = av + adiag[i];
1049: for (j = 0; j < rnz; j++) val[nz++] = v1[j];
1050: }
1051: }
1052: }
1053: PetscCall(MatSeqAIJRestoreArrayRead(A, &av));
1054: PetscFunctionReturn(PETSC_SUCCESS);
1055: }
1057: static PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1058: {
1059: const PetscInt *ai, *aj, *bi, *bj, *garray, *ajj, *bjj;
1060: PetscInt bs;
1061: PetscCount rstart, nz, i, j, k, m, jj, irow, countA, countB;
1062: PetscMUMPSInt *row, *col;
1063: const PetscScalar *av, *bv, *v1, *v2;
1064: PetscScalar *val;
1065: Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)A->data;
1066: Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)mat->A->data;
1067: Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)mat->B->data;
1068: const PetscInt bs2 = aa->bs2, mbs = aa->mbs;
1069: #if defined(PETSC_USE_COMPLEX)
1070: PetscBool hermitian, isset;
1071: #endif
1073: PetscFunctionBegin;
1074: #if defined(PETSC_USE_COMPLEX)
1075: PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
1076: PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
1077: #endif
1078: PetscCall(MatGetBlockSize(A, &bs));
1079: rstart = A->rmap->rstart;
1080: ai = aa->i;
1081: aj = aa->j;
1082: bi = bb->i;
1083: bj = bb->j;
1084: av = aa->a;
1085: bv = bb->a;
1087: garray = mat->garray;
1089: if (reuse == MAT_INITIAL_MATRIX) {
1090: nz = (aa->nz + bb->nz) * bs2; /* just a conservative estimate */
1091: PetscCall(PetscMalloc2(nz, &row, nz, &col));
1092: PetscCall(PetscMalloc1(nz, &val));
1093: /* can not decide the exact mumps->nnz now because of the SBAIJ */
1094: mumps->irn = row;
1095: mumps->jcn = col;
1096: mumps->val = mumps->val_alloc = val;
1097: } else {
1098: val = mumps->val;
1099: }
1101: jj = 0;
1102: irow = rstart;
1103: for (i = 0; i < mbs; i++) {
1104: ajj = aj + ai[i]; /* ptr to the beginning of this row */
1105: countA = ai[i + 1] - ai[i];
1106: countB = bi[i + 1] - bi[i];
1107: bjj = bj + bi[i];
1108: v1 = av + ai[i] * bs2;
1109: v2 = bv + bi[i] * bs2;
1111: if (bs > 1) {
1112: /* A-part */
1113: for (j = 0; j < countA; j++) {
1114: for (k = 0; k < bs; k++) {
1115: for (m = 0; m < bs; m++) {
1116: if (rstart + ajj[j] * bs > irow || k >= m) {
1117: if (reuse == MAT_INITIAL_MATRIX) {
1118: PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj]));
1119: PetscCall(PetscMUMPSIntCast(rstart + ajj[j] * bs + k + shift, &col[jj]));
1120: }
1121: val[jj++] = v1[j * bs2 + m + k * bs];
1122: }
1123: }
1124: }
1125: }
1127: /* B-part */
1128: for (j = 0; j < countB; j++) {
1129: for (k = 0; k < bs; k++) {
1130: for (m = 0; m < bs; m++) {
1131: if (reuse == MAT_INITIAL_MATRIX) {
1132: PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj]));
1133: PetscCall(PetscMUMPSIntCast(garray[bjj[j]] * bs + k + shift, &col[jj]));
1134: }
1135: val[jj++] = v2[j * bs2 + m + k * bs];
1136: }
1137: }
1138: }
1139: } else {
1140: /* A-part */
1141: for (j = 0; j < countA; j++) {
1142: if (reuse == MAT_INITIAL_MATRIX) {
1143: PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
1144: PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj]));
1145: }
1146: val[jj++] = v1[j];
1147: }
1149: /* B-part */
1150: for (j = 0; j < countB; j++) {
1151: if (reuse == MAT_INITIAL_MATRIX) {
1152: PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
1153: PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
1154: }
1155: val[jj++] = v2[j];
1156: }
1157: }
1158: irow += bs;
1159: }
1160: if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = jj;
1161: PetscFunctionReturn(PETSC_SUCCESS);
1162: }
1164: static PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1165: {
1166: const PetscInt *ai, *aj, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj;
1167: PetscCount rstart, cstart, nz, i, j, jj, irow, countA, countB;
1168: PetscMUMPSInt *row, *col;
1169: const PetscScalar *av, *bv, *v1, *v2;
1170: PetscScalar *val;
1171: Mat Ad, Ao;
1172: Mat_SeqAIJ *aa;
1173: Mat_SeqAIJ *bb;
1175: PetscFunctionBegin;
1176: PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray));
1177: PetscCall(MatSeqAIJGetArrayRead(Ad, &av));
1178: PetscCall(MatSeqAIJGetArrayRead(Ao, &bv));
1180: aa = (Mat_SeqAIJ *)Ad->data;
1181: bb = (Mat_SeqAIJ *)Ao->data;
1182: ai = aa->i;
1183: aj = aa->j;
1184: bi = bb->i;
1185: bj = bb->j;
1187: rstart = A->rmap->rstart;
1188: cstart = A->cmap->rstart;
1190: if (reuse == MAT_INITIAL_MATRIX) {
1191: nz = (PetscCount)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */
1192: PetscCall(PetscMalloc2(nz, &row, nz, &col));
1193: PetscCall(PetscMalloc1(nz, &val));
1194: mumps->nnz = nz;
1195: mumps->irn = row;
1196: mumps->jcn = col;
1197: mumps->val = mumps->val_alloc = val;
1198: } else {
1199: val = mumps->val;
1200: }
1202: jj = 0;
1203: irow = rstart;
1204: for (i = 0; i < m; i++) {
1205: ajj = aj + ai[i]; /* ptr to the beginning of this row */
1206: countA = ai[i + 1] - ai[i];
1207: countB = bi[i + 1] - bi[i];
1208: bjj = bj + bi[i];
1209: v1 = av + ai[i];
1210: v2 = bv + bi[i];
1212: /* A-part */
1213: for (j = 0; j < countA; j++) {
1214: if (reuse == MAT_INITIAL_MATRIX) {
1215: PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
1216: PetscCall(PetscMUMPSIntCast(cstart + ajj[j] + shift, &col[jj]));
1217: }
1218: val[jj++] = v1[j];
1219: }
1221: /* B-part */
1222: for (j = 0; j < countB; j++) {
1223: if (reuse == MAT_INITIAL_MATRIX) {
1224: PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
1225: PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
1226: }
1227: val[jj++] = v2[j];
1228: }
1229: irow++;
1230: }
1231: PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av));
1232: PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv));
1233: PetscFunctionReturn(PETSC_SUCCESS);
1234: }
1236: static PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1237: {
1238: Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)A->data;
1239: Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)mat->A->data;
1240: Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)mat->B->data;
1241: const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j, *ajj, *bjj;
1242: const PetscInt *garray = mat->garray, mbs = mat->mbs, rstart = A->rmap->rstart, cstart = A->cmap->rstart;
1243: const PetscInt bs2 = mat->bs2;
1244: PetscInt bs;
1245: PetscCount nz, i, j, k, n, jj, irow, countA, countB, idx;
1246: PetscMUMPSInt *row, *col;
1247: const PetscScalar *av = aa->a, *bv = bb->a, *v1, *v2;
1248: PetscScalar *val;
1250: PetscFunctionBegin;
1251: PetscCall(MatGetBlockSize(A, &bs));
1252: if (reuse == MAT_INITIAL_MATRIX) {
1253: nz = bs2 * (aa->nz + bb->nz);
1254: PetscCall(PetscMalloc2(nz, &row, nz, &col));
1255: PetscCall(PetscMalloc1(nz, &val));
1256: mumps->nnz = nz;
1257: mumps->irn = row;
1258: mumps->jcn = col;
1259: mumps->val = mumps->val_alloc = val;
1260: } else {
1261: val = mumps->val;
1262: }
1264: jj = 0;
1265: irow = rstart;
1266: for (i = 0; i < mbs; i++) {
1267: countA = ai[i + 1] - ai[i];
1268: countB = bi[i + 1] - bi[i];
1269: ajj = aj + ai[i];
1270: bjj = bj + bi[i];
1271: v1 = av + bs2 * ai[i];
1272: v2 = bv + bs2 * bi[i];
1274: idx = 0;
1275: /* A-part */
1276: for (k = 0; k < countA; k++) {
1277: for (j = 0; j < bs; j++) {
1278: for (n = 0; n < bs; n++) {
1279: if (reuse == MAT_INITIAL_MATRIX) {
1280: PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj]));
1281: PetscCall(PetscMUMPSIntCast(cstart + bs * ajj[k] + j + shift, &col[jj]));
1282: }
1283: val[jj++] = v1[idx++];
1284: }
1285: }
1286: }
1288: idx = 0;
1289: /* B-part */
1290: for (k = 0; k < countB; k++) {
1291: for (j = 0; j < bs; j++) {
1292: for (n = 0; n < bs; n++) {
1293: if (reuse == MAT_INITIAL_MATRIX) {
1294: PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj]));
1295: PetscCall(PetscMUMPSIntCast(bs * garray[bjj[k]] + j + shift, &col[jj]));
1296: }
1297: val[jj++] = v2[idx++];
1298: }
1299: }
1300: }
1301: irow += bs;
1302: }
1303: PetscFunctionReturn(PETSC_SUCCESS);
1304: }
1306: static PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1307: {
1308: const PetscInt *ai, *aj, *adiag, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj;
1309: PetscCount rstart, nz, nza, nzb, i, j, jj, irow, countA, countB;
1310: PetscMUMPSInt *row, *col;
1311: const PetscScalar *av, *bv, *v1, *v2;
1312: PetscScalar *val;
1313: Mat Ad, Ao;
1314: Mat_SeqAIJ *aa;
1315: Mat_SeqAIJ *bb;
1316: #if defined(PETSC_USE_COMPLEX)
1317: PetscBool hermitian, isset;
1318: #endif
1320: PetscFunctionBegin;
1321: #if defined(PETSC_USE_COMPLEX)
1322: PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
1323: PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
1324: #endif
1325: PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray));
1326: PetscCall(MatSeqAIJGetArrayRead(Ad, &av));
1327: PetscCall(MatSeqAIJGetArrayRead(Ao, &bv));
1329: aa = (Mat_SeqAIJ *)Ad->data;
1330: bb = (Mat_SeqAIJ *)Ao->data;
1331: ai = aa->i;
1332: aj = aa->j;
1333: adiag = aa->diag;
1334: bi = bb->i;
1335: bj = bb->j;
1337: rstart = A->rmap->rstart;
1339: if (reuse == MAT_INITIAL_MATRIX) {
1340: nza = 0; /* num of upper triangular entries in mat->A, including diagonals */
1341: nzb = 0; /* num of upper triangular entries in mat->B */
1342: for (i = 0; i < m; i++) {
1343: nza += (ai[i + 1] - adiag[i]);
1344: countB = bi[i + 1] - bi[i];
1345: bjj = bj + bi[i];
1346: for (j = 0; j < countB; j++) {
1347: if (garray[bjj[j]] > rstart) nzb++;
1348: }
1349: }
1351: nz = nza + nzb; /* total nz of upper triangular part of mat */
1352: PetscCall(PetscMalloc2(nz, &row, nz, &col));
1353: PetscCall(PetscMalloc1(nz, &val));
1354: mumps->nnz = nz;
1355: mumps->irn = row;
1356: mumps->jcn = col;
1357: mumps->val = mumps->val_alloc = val;
1358: } else {
1359: val = mumps->val;
1360: }
1362: jj = 0;
1363: irow = rstart;
1364: for (i = 0; i < m; i++) {
1365: ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */
1366: v1 = av + adiag[i];
1367: countA = ai[i + 1] - adiag[i];
1368: countB = bi[i + 1] - bi[i];
1369: bjj = bj + bi[i];
1370: v2 = bv + bi[i];
1372: /* A-part */
1373: for (j = 0; j < countA; j++) {
1374: if (reuse == MAT_INITIAL_MATRIX) {
1375: PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
1376: PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj]));
1377: }
1378: val[jj++] = v1[j];
1379: }
1381: /* B-part */
1382: for (j = 0; j < countB; j++) {
1383: if (garray[bjj[j]] > rstart) {
1384: if (reuse == MAT_INITIAL_MATRIX) {
1385: PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
1386: PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
1387: }
1388: val[jj++] = v2[j];
1389: }
1390: }
1391: irow++;
1392: }
1393: PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av));
1394: PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv));
1395: PetscFunctionReturn(PETSC_SUCCESS);
1396: }
1398: static PetscErrorCode MatConvertToTriples_diagonal_xaij(Mat A, PETSC_UNUSED PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1399: {
1400: const PetscScalar *av;
1401: const PetscInt M = A->rmap->n;
1402: PetscCount i;
1403: PetscMUMPSInt *row, *col;
1404: Vec v;
1406: PetscFunctionBegin;
1407: PetscCall(MatDiagonalGetDiagonal(A, &v));
1408: PetscCall(VecGetArrayRead(v, &av));
1409: if (reuse == MAT_INITIAL_MATRIX) {
1410: PetscCall(PetscMalloc2(M, &row, M, &col));
1411: for (i = 0; i < M; i++) {
1412: PetscCall(PetscMUMPSIntCast(i + A->rmap->rstart, &row[i]));
1413: col[i] = row[i];
1414: }
1415: mumps->val = (PetscScalar *)av;
1416: mumps->irn = row;
1417: mumps->jcn = col;
1418: mumps->nnz = M;
1419: } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, av, M)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_diagonal_xaij(), so one needs to copy the memory */
1420: else mumps->val = (PetscScalar *)av; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
1421: PetscCall(VecRestoreArrayRead(v, &av));
1422: PetscFunctionReturn(PETSC_SUCCESS);
1423: }
1425: static PetscErrorCode MatConvertToTriples_dense_xaij(Mat A, PETSC_UNUSED PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1426: {
1427: PetscScalar *v;
1428: const PetscInt m = A->rmap->n, N = A->cmap->N;
1429: PetscInt lda;
1430: PetscCount i, j;
1431: PetscMUMPSInt *row, *col;
1433: PetscFunctionBegin;
1434: PetscCall(MatDenseGetArray(A, &v));
1435: PetscCall(MatDenseGetLDA(A, &lda));
1436: if (reuse == MAT_INITIAL_MATRIX) {
1437: PetscCall(PetscMalloc2(m * N, &row, m * N, &col));
1438: for (i = 0; i < m; i++) {
1439: col[i] = 0;
1440: PetscCall(PetscMUMPSIntCast(i + A->rmap->rstart, &row[i]));
1441: }
1442: for (j = 1; j < N; j++) {
1443: for (i = 0; i < m; i++) PetscCall(PetscMUMPSIntCast(j, col + i + m * j));
1444: PetscCall(PetscArraycpy(row + m * j, row + m * (j - 1), m));
1445: }
1446: if (lda == m) mumps->val = v;
1447: else {
1448: PetscCall(PetscMalloc1(m * N, &mumps->val));
1449: mumps->val_alloc = mumps->val;
1450: for (j = 0; j < N; j++) PetscCall(PetscArraycpy(mumps->val + m * j, v + lda * j, m));
1451: }
1452: mumps->irn = row;
1453: mumps->jcn = col;
1454: mumps->nnz = m * N;
1455: } else {
1456: if (lda == m && !mumps->nest_vals) mumps->val = v;
1457: else {
1458: for (j = 0; j < N; j++) PetscCall(PetscArraycpy(mumps->val + m * j, v + lda * j, m));
1459: }
1460: }
1461: PetscCall(MatDenseRestoreArray(A, &v));
1462: PetscFunctionReturn(PETSC_SUCCESS);
1463: }
1465: // If the input Mat (sub) is either MATTRANSPOSEVIRTUAL or MATHERMITIANTRANSPOSEVIRTUAL, this function gets the parent Mat until it is not a
1466: // MATTRANSPOSEVIRTUAL or MATHERMITIANTRANSPOSEVIRTUAL itself and returns the appropriate shift, scaling, and whether the parent Mat should be conjugated
1467: // and its rows and columns permuted
1468: // TODO FIXME: this should not be in this file and should instead be refactored where the same logic applies, e.g., MatAXPY_Dense_Nest()
1469: static PetscErrorCode MatGetTranspose_TransposeVirtual(Mat *sub, PetscBool *conjugate, PetscScalar *vshift, PetscScalar *vscale, PetscBool *swap)
1470: {
1471: Mat A;
1472: PetscScalar s[2];
1473: PetscBool isTrans, isHTrans, compare;
1475: PetscFunctionBegin;
1476: do {
1477: PetscCall(PetscObjectTypeCompare((PetscObject)*sub, MATTRANSPOSEVIRTUAL, &isTrans));
1478: if (isTrans) {
1479: PetscCall(MatTransposeGetMat(*sub, &A));
1480: isHTrans = PETSC_FALSE;
1481: } else {
1482: PetscCall(PetscObjectTypeCompare((PetscObject)*sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans));
1483: if (isHTrans) PetscCall(MatHermitianTransposeGetMat(*sub, &A));
1484: }
1485: compare = (PetscBool)(isTrans || isHTrans);
1486: if (compare) {
1487: if (vshift && vscale) {
1488: PetscCall(MatShellGetScalingShifts(*sub, s, s + 1, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
1489: if (!*conjugate) {
1490: *vshift += s[0] * *vscale;
1491: *vscale *= s[1];
1492: } else {
1493: *vshift += PetscConj(s[0]) * *vscale;
1494: *vscale *= PetscConj(s[1]);
1495: }
1496: }
1497: if (swap) *swap = (PetscBool)!*swap;
1498: if (isHTrans && conjugate) *conjugate = (PetscBool)!*conjugate;
1499: *sub = A;
1500: }
1501: } while (compare);
1502: PetscFunctionReturn(PETSC_SUCCESS);
1503: }
1505: static PetscErrorCode MatConvertToTriples_nest_xaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1506: {
1507: Mat **mats;
1508: PetscInt nr, nc;
1509: PetscBool chol = mumps->sym ? PETSC_TRUE : PETSC_FALSE;
1511: PetscFunctionBegin;
1512: PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats));
1513: if (reuse == MAT_INITIAL_MATRIX) {
1514: PetscMUMPSInt *irns, *jcns;
1515: PetscScalar *vals;
1516: PetscCount totnnz, cumnnz, maxnnz;
1517: PetscInt *pjcns_w, Mbs = 0;
1518: IS *rows, *cols;
1519: PetscInt **rows_idx, **cols_idx;
1521: cumnnz = 0;
1522: maxnnz = 0;
1523: PetscCall(PetscMalloc2(nr * nc + 1, &mumps->nest_vals_start, nr * nc, &mumps->nest_convert_to_triples));
1524: for (PetscInt r = 0; r < nr; r++) {
1525: for (PetscInt c = 0; c < nc; c++) {
1526: Mat sub = mats[r][c];
1528: mumps->nest_convert_to_triples[r * nc + c] = NULL;
1529: if (chol && c < r) continue; /* skip lower-triangular block for Cholesky */
1530: if (sub) {
1531: PetscErrorCode (*convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *) = NULL;
1532: PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isDiag, isDense;
1533: MatInfo info;
1535: PetscCall(MatGetTranspose_TransposeVirtual(&sub, NULL, NULL, NULL, NULL));
1536: PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ));
1537: PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ));
1538: PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ));
1539: PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ));
1540: PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ));
1541: PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ));
1542: PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag));
1543: PetscCall(PetscObjectTypeCompareAny((PetscObject)sub, &isDense, MATSEQDENSE, MATMPIDENSE, NULL));
1545: if (chol) {
1546: if (r == c) {
1547: if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqsbaij;
1548: else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpisbaij;
1549: else if (isSeqSBAIJ) convert_to_triples = MatConvertToTriples_seqsbaij_seqsbaij;
1550: else if (isMPISBAIJ) convert_to_triples = MatConvertToTriples_mpisbaij_mpisbaij;
1551: else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij;
1552: else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij;
1553: } else {
1554: if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij;
1555: else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij;
1556: else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij;
1557: else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij;
1558: else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij;
1559: else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij;
1560: }
1561: } else {
1562: if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij;
1563: else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij;
1564: else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij;
1565: else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij;
1566: else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij;
1567: else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij;
1568: }
1569: PetscCheck(convert_to_triples, PetscObjectComm((PetscObject)sub), PETSC_ERR_SUP, "Not for block of type %s", ((PetscObject)sub)->type_name);
1570: mumps->nest_convert_to_triples[r * nc + c] = convert_to_triples;
1571: PetscCall(MatGetInfo(sub, MAT_LOCAL, &info));
1572: cumnnz += (PetscCount)info.nz_used; /* can be overestimated for Cholesky */
1573: maxnnz = PetscMax(maxnnz, info.nz_used);
1574: }
1575: }
1576: }
1578: /* Allocate total COO */
1579: totnnz = cumnnz;
1580: PetscCall(PetscMalloc2(totnnz, &irns, totnnz, &jcns));
1581: PetscCall(PetscMalloc1(totnnz, &vals));
1583: /* Handle rows and column maps
1584: We directly map rows and use an SF for the columns */
1585: PetscCall(PetscMalloc4(nr, &rows, nc, &cols, nr, &rows_idx, nc, &cols_idx));
1586: PetscCall(MatNestGetISs(A, rows, cols));
1587: for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetIndices(rows[r], (const PetscInt **)&rows_idx[r]));
1588: for (PetscInt c = 0; c < nc; c++) PetscCall(ISGetIndices(cols[c], (const PetscInt **)&cols_idx[c]));
1589: if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscMalloc1(maxnnz, &pjcns_w));
1590: else (void)maxnnz;
1592: cumnnz = 0;
1593: for (PetscInt r = 0; r < nr; r++) {
1594: for (PetscInt c = 0; c < nc; c++) {
1595: Mat sub = mats[r][c];
1596: const PetscInt *ridx = rows_idx[r];
1597: const PetscInt *cidx = cols_idx[c];
1598: PetscScalar vscale = 1.0, vshift = 0.0;
1599: PetscInt rst, size, bs;
1600: PetscSF csf;
1601: PetscBool conjugate = PETSC_FALSE, swap = PETSC_FALSE;
1602: PetscLayout cmap;
1603: PetscInt innz;
1605: mumps->nest_vals_start[r * nc + c] = cumnnz;
1606: if (c == r) {
1607: PetscCall(ISGetSize(rows[r], &size));
1608: if (!mumps->nest_convert_to_triples[r * nc + c]) {
1609: for (PetscInt c = 0; c < nc && !sub; ++c) sub = mats[r][c]; // diagonal Mat is NULL, so start over from the beginning of the current row
1610: }
1611: PetscCall(MatGetBlockSize(sub, &bs));
1612: Mbs += size / bs;
1613: }
1614: if (!mumps->nest_convert_to_triples[r * nc + c]) continue;
1616: /* Extract inner blocks if needed */
1617: PetscCall(MatGetTranspose_TransposeVirtual(&sub, &conjugate, &vshift, &vscale, &swap));
1618: PetscCheck(vshift == 0.0, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Nonzero shift in parent MatShell");
1620: /* Get column layout to map off-process columns */
1621: PetscCall(MatGetLayouts(sub, NULL, &cmap));
1623: /* Get row start to map on-process rows */
1624: PetscCall(MatGetOwnershipRange(sub, &rst, NULL));
1626: /* Directly use the mumps datastructure and use C ordering for now */
1627: PetscCall((*mumps->nest_convert_to_triples[r * nc + c])(sub, 0, MAT_INITIAL_MATRIX, mumps));
1629: /* Swap the role of rows and columns indices for transposed blocks
1630: since we need values with global final ordering */
1631: if (swap) {
1632: cidx = rows_idx[r];
1633: ridx = cols_idx[c];
1634: }
1636: /* Communicate column indices
1637: This could have been done with a single SF but it would have complicated the code a lot.
1638: But since we do it only once, we pay the price of setting up an SF for each block */
1639: if (PetscDefined(USE_64BIT_INDICES)) {
1640: for (PetscInt k = 0; k < mumps->nnz; k++) pjcns_w[k] = mumps->jcn[k];
1641: } else pjcns_w = (PetscInt *)mumps->jcn; /* This cast is needed only to silence warnings for 64bit integers builds */
1642: PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &csf));
1643: PetscCall(PetscIntCast(mumps->nnz, &innz));
1644: PetscCall(PetscSFSetGraphLayout(csf, cmap, innz, NULL, PETSC_OWN_POINTER, pjcns_w));
1645: PetscCall(PetscSFBcastBegin(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE));
1646: PetscCall(PetscSFBcastEnd(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE));
1647: PetscCall(PetscSFDestroy(&csf));
1649: /* Import indices: use direct map for rows and mapped indices for columns */
1650: if (swap) {
1651: for (PetscInt k = 0; k < mumps->nnz; k++) {
1652: PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &jcns[cumnnz + k]));
1653: PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &irns[cumnnz + k]));
1654: }
1655: } else {
1656: for (PetscInt k = 0; k < mumps->nnz; k++) {
1657: PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &irns[cumnnz + k]));
1658: PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &jcns[cumnnz + k]));
1659: }
1660: }
1662: /* Import values to full COO */
1663: if (conjugate) { /* conjugate the entries */
1664: PetscScalar *v = vals + cumnnz;
1665: for (PetscInt k = 0; k < mumps->nnz; k++) v[k] = vscale * PetscConj(mumps->val[k]);
1666: } else if (vscale != 1.0) {
1667: PetscScalar *v = vals + cumnnz;
1668: for (PetscInt k = 0; k < mumps->nnz; k++) v[k] = vscale * mumps->val[k];
1669: } else PetscCall(PetscArraycpy(vals + cumnnz, mumps->val, mumps->nnz));
1671: /* Shift new starting point and sanity check */
1672: cumnnz += mumps->nnz;
1673: PetscCheck(cumnnz <= totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected number of nonzeros %" PetscCount_FMT " != %" PetscCount_FMT, cumnnz, totnnz);
1675: /* Free scratch memory */
1676: PetscCall(PetscFree2(mumps->irn, mumps->jcn));
1677: PetscCall(PetscFree(mumps->val_alloc));
1678: mumps->val = NULL;
1679: mumps->nnz = 0;
1680: }
1681: }
1682: if (mumps->id.ICNTL(15) == 1) {
1683: if (Mbs != A->rmap->N) {
1684: PetscMPIInt rank, size;
1686: PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
1687: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
1688: if (rank == 0) {
1689: PetscInt shift = 0;
1691: PetscCall(PetscMUMPSIntCast(Mbs, &mumps->id.nblk));
1692: PetscCall(PetscFree(mumps->id.blkptr));
1693: PetscCall(PetscMalloc1(Mbs + 1, &mumps->id.blkptr));
1694: mumps->id.blkptr[0] = 1;
1695: for (PetscInt i = 0; i < size; ++i) {
1696: for (PetscInt r = 0; r < nr; r++) {
1697: Mat sub = mats[r][r];
1698: const PetscInt *ranges;
1699: PetscInt bs;
1701: for (PetscInt c = 0; c < nc && !sub; ++c) sub = mats[r][c]; // diagonal Mat is NULL, so start over from the beginning of the current row
1702: PetscCall(MatGetOwnershipRanges(sub, &ranges));
1703: PetscCall(MatGetBlockSize(sub, &bs));
1704: for (PetscInt j = 0, start = mumps->id.blkptr[shift] + bs; j < ranges[i + 1] - ranges[i]; j += bs) PetscCall(PetscMUMPSIntCast(start + j, mumps->id.blkptr + shift + j / bs + 1));
1705: shift += (ranges[i + 1] - ranges[i]) / bs;
1706: }
1707: }
1708: }
1709: } else mumps->id.ICNTL(15) = 0;
1710: }
1711: if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscFree(pjcns_w));
1712: for (PetscInt r = 0; r < nr; r++) PetscCall(ISRestoreIndices(rows[r], (const PetscInt **)&rows_idx[r]));
1713: for (PetscInt c = 0; c < nc; c++) PetscCall(ISRestoreIndices(cols[c], (const PetscInt **)&cols_idx[c]));
1714: PetscCall(PetscFree4(rows, cols, rows_idx, cols_idx));
1715: if (!chol) PetscCheck(cumnnz == totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different number of nonzeros %" PetscCount_FMT " != %" PetscCount_FMT, cumnnz, totnnz);
1716: mumps->nest_vals_start[nr * nc] = cumnnz;
1718: /* Set pointers for final MUMPS data structure */
1719: mumps->nest_vals = vals;
1720: mumps->val_alloc = NULL; /* do not use val_alloc since it may be reallocated with the OMP callpath */
1721: mumps->val = vals;
1722: mumps->irn = irns;
1723: mumps->jcn = jcns;
1724: mumps->nnz = cumnnz;
1725: } else {
1726: PetscScalar *oval = mumps->nest_vals;
1727: for (PetscInt r = 0; r < nr; r++) {
1728: for (PetscInt c = 0; c < nc; c++) {
1729: PetscBool conjugate = PETSC_FALSE;
1730: Mat sub = mats[r][c];
1731: PetscScalar vscale = 1.0, vshift = 0.0;
1732: PetscInt midx = r * nc + c;
1734: if (!mumps->nest_convert_to_triples[midx]) continue;
1735: PetscCall(MatGetTranspose_TransposeVirtual(&sub, &conjugate, &vshift, &vscale, NULL));
1736: PetscCheck(vshift == 0.0, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Nonzero shift in parent MatShell");
1737: mumps->val = oval + mumps->nest_vals_start[midx];
1738: PetscCall((*mumps->nest_convert_to_triples[midx])(sub, shift, MAT_REUSE_MATRIX, mumps));
1739: if (conjugate) {
1740: PetscCount nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx];
1741: for (PetscCount k = 0; k < nnz; k++) mumps->val[k] = vscale * PetscConj(mumps->val[k]);
1742: } else if (vscale != 1.0) {
1743: PetscCount nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx];
1744: for (PetscCount k = 0; k < nnz; k++) mumps->val[k] *= vscale;
1745: }
1746: }
1747: }
1748: mumps->val = oval;
1749: }
1750: PetscFunctionReturn(PETSC_SUCCESS);
1751: }
1753: static PetscErrorCode MatDestroy_MUMPS(Mat A)
1754: {
1755: Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
1757: PetscFunctionBegin;
1758: PetscCall(PetscFree(mumps->id.isol_loc));
1759: PetscCall(VecScatterDestroy(&mumps->scat_rhs));
1760: PetscCall(VecScatterDestroy(&mumps->scat_sol));
1761: PetscCall(VecDestroy(&mumps->b_seq));
1762: PetscCall(VecDestroy(&mumps->x_seq));
1763: PetscCall(PetscFree(mumps->id.perm_in));
1764: PetscCall(PetscFree(mumps->id.blkvar));
1765: PetscCall(PetscFree(mumps->id.blkptr));
1766: PetscCall(PetscFree2(mumps->irn, mumps->jcn));
1767: PetscCall(PetscFree(mumps->val_alloc));
1768: PetscCall(PetscFree(mumps->info));
1769: PetscCall(PetscFree(mumps->ICNTL_pre));
1770: PetscCall(PetscFree(mumps->CNTL_pre));
1771: PetscCall(MatMumpsResetSchur_Private(mumps));
1772: if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */
1773: mumps->id.job = JOB_END;
1774: PetscMUMPS_c(mumps);
1775: PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in termination: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
1776: if (mumps->mumps_comm != MPI_COMM_NULL) {
1777: if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) PetscCallMPI(MPI_Comm_free(&mumps->mumps_comm));
1778: else PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &mumps->mumps_comm));
1779: }
1780: }
1781: PetscCall(MatMumpsFreeInternalID(&mumps->id));
1782: #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1783: if (mumps->use_petsc_omp_support) {
1784: PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl));
1785: PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf));
1786: PetscCall(PetscFree3(mumps->rhs_nrow, mumps->rhs_recvcounts, mumps->rhs_disps));
1787: }
1788: #endif
1789: PetscCall(PetscFree(mumps->ia_alloc));
1790: PetscCall(PetscFree(mumps->ja_alloc));
1791: PetscCall(PetscFree(mumps->recvcount));
1792: PetscCall(PetscFree(mumps->reqs));
1793: PetscCall(PetscFree(mumps->irhs_loc));
1794: PetscCall(PetscFree2(mumps->nest_vals_start, mumps->nest_convert_to_triples));
1795: PetscCall(PetscFree(mumps->nest_vals));
1796: PetscCall(PetscFree(A->data));
1798: /* clear composed functions */
1799: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
1800: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorSetSchurIS_C", NULL));
1801: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorCreateSchurComplement_C", NULL));
1802: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetIcntl_C", NULL));
1803: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetIcntl_C", NULL));
1804: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetCntl_C", NULL));
1805: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetCntl_C", NULL));
1806: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfo_C", NULL));
1807: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfog_C", NULL));
1808: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfo_C", NULL));
1809: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfog_C", NULL));
1810: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetNullPivots_C", NULL));
1811: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverse_C", NULL));
1812: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverseTranspose_C", NULL));
1813: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetBlk_C", NULL));
1814: PetscFunctionReturn(PETSC_SUCCESS);
1815: }
1817: /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */
1818: static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A, PetscInt nrhs, const PetscScalar *array)
1819: {
1820: Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
1821: const PetscMPIInt ompsize = mumps->omp_comm_size;
1822: PetscInt i, m, M, rstart;
1824: PetscFunctionBegin;
1825: PetscCall(MatGetSize(A, &M, NULL));
1826: PetscCall(MatGetLocalSize(A, &m, NULL));
1827: PetscCheck(M <= PETSC_MUMPS_INT_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt");
1828: if (ompsize == 1) {
1829: if (!mumps->irhs_loc) {
1830: mumps->nloc_rhs = (PetscMUMPSInt)m;
1831: PetscCall(PetscMalloc1(m, &mumps->irhs_loc));
1832: PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
1833: for (i = 0; i < m; i++) PetscCall(PetscMUMPSIntCast(rstart + i + 1, &mumps->irhs_loc[i])); /* use 1-based indices */
1834: }
1835: PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, m * nrhs, array, mumps->id.precision, &mumps->id.rhs_loc_len, &mumps->id.rhs_loc));
1836: } else {
1837: #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1838: const PetscInt *ranges;
1839: PetscMPIInt j, k, sendcount, *petsc_ranks, *omp_ranks;
1840: MPI_Group petsc_group, omp_group;
1841: PetscScalar *recvbuf = NULL;
1843: if (mumps->is_omp_master) {
1844: /* Lazily initialize the omp stuff for distributed rhs */
1845: if (!mumps->irhs_loc) {
1846: PetscCall(PetscMalloc2(ompsize, &omp_ranks, ompsize, &petsc_ranks));
1847: PetscCall(PetscMalloc3(ompsize, &mumps->rhs_nrow, ompsize, &mumps->rhs_recvcounts, ompsize, &mumps->rhs_disps));
1848: PetscCallMPI(MPI_Comm_group(mumps->petsc_comm, &petsc_group));
1849: PetscCallMPI(MPI_Comm_group(mumps->omp_comm, &omp_group));
1850: for (j = 0; j < ompsize; j++) omp_ranks[j] = j;
1851: PetscCallMPI(MPI_Group_translate_ranks(omp_group, ompsize, omp_ranks, petsc_group, petsc_ranks));
1853: /* Populate mumps->irhs_loc[], rhs_nrow[] */
1854: mumps->nloc_rhs = 0;
1855: PetscCall(MatGetOwnershipRanges(A, &ranges));
1856: for (j = 0; j < ompsize; j++) {
1857: mumps->rhs_nrow[j] = ranges[petsc_ranks[j] + 1] - ranges[petsc_ranks[j]];
1858: mumps->nloc_rhs += mumps->rhs_nrow[j];
1859: }
1860: PetscCall(PetscMalloc1(mumps->nloc_rhs, &mumps->irhs_loc));
1861: for (j = k = 0; j < ompsize; j++) {
1862: for (i = ranges[petsc_ranks[j]]; i < ranges[petsc_ranks[j] + 1]; i++, k++) PetscCall(PetscMUMPSIntCast(i + 1, &mumps->irhs_loc[k])); /* uses 1-based indices */
1863: }
1865: PetscCall(PetscFree2(omp_ranks, petsc_ranks));
1866: PetscCallMPI(MPI_Group_free(&petsc_group));
1867: PetscCallMPI(MPI_Group_free(&omp_group));
1868: }
1870: /* Realloc buffers when current nrhs is bigger than what we have met */
1871: if (nrhs > mumps->max_nrhs) {
1872: PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf));
1873: PetscCall(PetscMalloc2(mumps->nloc_rhs * nrhs, &mumps->rhs_loc, mumps->nloc_rhs * nrhs, &mumps->rhs_recvbuf));
1874: mumps->max_nrhs = nrhs;
1875: }
1877: /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */
1878: for (j = 0; j < ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j] * nrhs, &mumps->rhs_recvcounts[j]));
1879: mumps->rhs_disps[0] = 0;
1880: for (j = 1; j < ompsize; j++) {
1881: mumps->rhs_disps[j] = mumps->rhs_disps[j - 1] + mumps->rhs_recvcounts[j - 1];
1882: PetscCheck(mumps->rhs_disps[j] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscMPIInt overflow!");
1883: }
1884: recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */
1885: }
1887: PetscCall(PetscMPIIntCast(m * nrhs, &sendcount));
1888: PetscCallMPI(MPI_Gatherv(array, sendcount, MPIU_SCALAR, recvbuf, mumps->rhs_recvcounts, mumps->rhs_disps, MPIU_SCALAR, 0, mumps->omp_comm));
1890: if (mumps->is_omp_master) {
1891: if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */
1892: PetscScalar *dst, *dstbase = mumps->rhs_loc;
1893: for (j = 0; j < ompsize; j++) {
1894: const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j];
1895: dst = dstbase;
1896: for (i = 0; i < nrhs; i++) {
1897: PetscCall(PetscArraycpy(dst, src, mumps->rhs_nrow[j]));
1898: src += mumps->rhs_nrow[j];
1899: dst += mumps->nloc_rhs;
1900: }
1901: dstbase += mumps->rhs_nrow[j];
1902: }
1903: }
1904: PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nloc_rhs * nrhs, mumps->rhs_loc, mumps->id.precision, &mumps->id.rhs_loc_len, &mumps->id.rhs_loc));
1905: }
1906: #endif /* PETSC_HAVE_OPENMP_SUPPORT */
1907: }
1908: mumps->id.nrhs = (PetscMUMPSInt)nrhs;
1909: mumps->id.nloc_rhs = (PetscMUMPSInt)mumps->nloc_rhs;
1910: mumps->id.lrhs_loc = mumps->nloc_rhs;
1911: mumps->id.irhs_loc = mumps->irhs_loc;
1912: PetscFunctionReturn(PETSC_SUCCESS);
1913: }
1915: static PetscErrorCode MatSolve_MUMPS(Mat A, Vec b, Vec x)
1916: {
1917: Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
1918: const PetscScalar *barray = NULL;
1919: PetscScalar *array;
1920: IS is_iden, is_petsc;
1921: PetscInt i;
1922: PetscBool second_solve = PETSC_FALSE;
1923: static PetscBool cite1 = PETSC_FALSE, cite2 = PETSC_FALSE;
1925: PetscFunctionBegin;
1926: PetscCall(PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM "
1927: "Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n",
1928: &cite1));
1929: PetscCall(PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel "
1930: "Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n",
1931: &cite2));
1933: PetscCall(VecFlag(x, A->factorerrortype));
1934: if (A->factorerrortype) {
1935: PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1936: PetscFunctionReturn(PETSC_SUCCESS);
1937: }
1939: mumps->id.nrhs = 1;
1940: if (mumps->petsc_size > 1) {
1941: if (mumps->ICNTL20 == 10) {
1942: mumps->id.ICNTL(20) = 10; /* dense distributed RHS, need to set rhs_loc[], irhs_loc[] */
1943: PetscCall(VecGetArrayRead(b, &barray));
1944: PetscCall(MatMumpsSetUpDistRHSInfo(A, 1, barray));
1945: } else {
1946: mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential b_seq vector*/
1947: PetscCall(VecScatterBegin(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD));
1948: PetscCall(VecScatterEnd(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD));
1949: if (!mumps->myid) {
1950: PetscCall(VecGetArray(mumps->b_seq, &array));
1951: PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->b_seq->map->n, array, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs));
1952: }
1953: }
1954: } else { /* petsc_size == 1, use MUMPS's dense centralized RHS feature, so that we don't need to bother with isol_loc[] to get the solution */
1955: mumps->id.ICNTL(20) = 0;
1956: PetscCall(VecCopy(b, x));
1957: PetscCall(VecGetArray(x, &array));
1958: PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, x->map->n, array, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs));
1959: }
1961: /*
1962: handle condensation step of Schur complement (if any)
1963: We set by default ICNTL(26) == -1 when Schur indices have been provided by the user.
1964: According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase
1965: Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system.
1966: This requires an extra call to PetscMUMPS_c and the computation of the factors for S
1967: */
1968: if (mumps->id.size_schur > 0) {
1969: PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc");
1970: if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) {
1971: second_solve = PETSC_TRUE;
1972: PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); // allocate id.redrhs
1973: mumps->id.ICNTL(26) = 1; /* condensation phase */
1974: } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE));
1975: }
1977: mumps->id.job = JOB_SOLVE;
1978: PetscMUMPS_c(mumps); // reduced solve, put solution in id.redrhs
1979: PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));
1981: /* handle expansion step of Schur complement (if any) */
1982: if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE));
1983: else if (mumps->id.ICNTL(26) == 1) { // condense the right hand side
1984: PetscCall(MatMumpsSolveSchur_Private(A));
1985: for (i = 0; i < mumps->id.size_schur; ++i) array[mumps->id.listvar_schur[i] - 1] = ID_FIELD_GET(mumps->id, redrhs, i);
1986: }
1988: if (mumps->petsc_size > 1) { /* convert mumps distributed solution to PETSc mpi x */
1989: if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) {
1990: /* when id.ICNTL(9) changes, the contents of ilsol_loc may change (not its size, lsol_loc), recreates scat_sol */
1991: PetscCall(VecScatterDestroy(&mumps->scat_sol));
1992: }
1993: if (!mumps->scat_sol) { /* create scatter scat_sol */
1994: PetscInt *isol2_loc = NULL;
1995: PetscCall(ISCreateStride(PETSC_COMM_SELF, mumps->id.lsol_loc, 0, 1, &is_iden)); /* from */
1996: PetscCall(PetscMalloc1(mumps->id.lsol_loc, &isol2_loc));
1997: for (i = 0; i < mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i] - 1; /* change Fortran style to C style */
1998: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, mumps->id.lsol_loc, isol2_loc, PETSC_OWN_POINTER, &is_petsc)); /* to */
1999: PetscCall(VecScatterCreate(mumps->x_seq, is_iden, x, is_petsc, &mumps->scat_sol));
2000: PetscCall(ISDestroy(&is_iden));
2001: PetscCall(ISDestroy(&is_petsc));
2002: mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */
2003: }
2005: PetscScalar *xarray;
2006: PetscCall(VecGetArray(mumps->x_seq, &xarray));
2007: PetscCall(MatMumpsCastMumpsScalarArray(mumps->id.lsol_loc, mumps->id.precision, mumps->id.sol_loc, xarray));
2008: PetscCall(VecRestoreArray(mumps->x_seq, &xarray));
2009: PetscCall(VecScatterBegin(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD));
2010: PetscCall(VecScatterEnd(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD));
2012: if (mumps->ICNTL20 == 10) { // distributed RHS
2013: PetscCall(VecRestoreArrayRead(b, &barray));
2014: } else if (!mumps->myid) { // centralized RHS
2015: PetscCall(VecRestoreArray(mumps->b_seq, &array));
2016: }
2017: } else {
2018: // id.rhs has the solution in mumps precision
2019: PetscCall(MatMumpsCastMumpsScalarArray(x->map->n, mumps->id.precision, mumps->id.rhs, array));
2020: PetscCall(VecRestoreArray(x, &array));
2021: }
2023: PetscCall(PetscLogFlops(2.0 * PetscMax(0, (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n)));
2024: PetscFunctionReturn(PETSC_SUCCESS);
2025: }
2027: static PetscErrorCode MatSolveTranspose_MUMPS(Mat A, Vec b, Vec x)
2028: {
2029: Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
2030: const PetscMUMPSInt value = mumps->id.ICNTL(9);
2032: PetscFunctionBegin;
2033: mumps->id.ICNTL(9) = 0;
2034: PetscCall(MatSolve_MUMPS(A, b, x));
2035: mumps->id.ICNTL(9) = value;
2036: PetscFunctionReturn(PETSC_SUCCESS);
2037: }
2039: static PetscErrorCode MatMatSolve_MUMPS(Mat A, Mat B, Mat X)
2040: {
2041: Mat Bt = NULL;
2042: PetscBool denseX, denseB, flg, flgT;
2043: Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
2044: PetscInt i, nrhs, M, nrhsM;
2045: PetscScalar *array;
2046: const PetscScalar *barray;
2047: PetscInt lsol_loc, nlsol_loc, *idxx, iidx = 0;
2048: PetscMUMPSInt *isol_loc, *isol_loc_save;
2049: PetscScalar *sol_loc;
2050: void *sol_loc_save;
2051: PetscCount sol_loc_len_save;
2052: IS is_to, is_from;
2053: PetscInt k, proc, j, m, myrstart;
2054: const PetscInt *rstart;
2055: Vec v_mpi, msol_loc;
2056: VecScatter scat_sol;
2057: Vec b_seq;
2058: VecScatter scat_rhs;
2059: PetscScalar *aa;
2060: PetscInt spnr, *ia, *ja;
2061: Mat_MPIAIJ *b = NULL;
2063: PetscFunctionBegin;
2064: PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &denseX, MATSEQDENSE, MATMPIDENSE, NULL));
2065: PetscCheck(denseX, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix");
2067: PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &denseB, MATSEQDENSE, MATMPIDENSE, NULL));
2069: if (denseB) {
2070: PetscCheck(B->rmap->n == X->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix B and X must have same row distribution");
2071: mumps->id.ICNTL(20) = 0; /* dense RHS */
2072: } else { /* sparse B */
2073: PetscCheck(X != B, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_IDN, "X and B must be different matrices");
2074: PetscCall(PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSEVIRTUAL, &flgT));
2075: PetscCheck(flgT, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONG, "Matrix B must be MATTRANSPOSEVIRTUAL matrix");
2076: PetscCall(MatShellGetScalingShifts(B, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
2077: /* input B is transpose of actual RHS matrix,
2078: because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */
2079: PetscCall(MatTransposeGetMat(B, &Bt));
2080: mumps->id.ICNTL(20) = 1; /* sparse RHS */
2081: }
2083: PetscCall(MatGetSize(B, &M, &nrhs));
2084: PetscCall(PetscIntMultError(nrhs, M, &nrhsM));
2085: mumps->id.nrhs = (PetscMUMPSInt)nrhs;
2086: mumps->id.lrhs = (PetscMUMPSInt)M;
2088: if (mumps->petsc_size == 1) { // handle this easy case specially and return early
2089: PetscScalar *aa;
2090: PetscInt spnr, *ia, *ja;
2091: PetscBool second_solve = PETSC_FALSE;
2093: PetscCall(MatDenseGetArray(X, &array));
2094: if (denseB) {
2095: /* copy B to X */
2096: PetscCall(MatDenseGetArrayRead(B, &barray));
2097: PetscCall(PetscArraycpy(array, barray, nrhsM));
2098: PetscCall(MatDenseRestoreArrayRead(B, &barray));
2099: } else { /* sparse B */
2100: PetscCall(MatSeqAIJGetArray(Bt, &aa));
2101: PetscCall(MatGetRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
2102: PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
2103: PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
2104: PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->id.nz_rhs, aa, mumps->id.precision, &mumps->id.rhs_sparse_len, &mumps->id.rhs_sparse));
2105: }
2106: PetscCall(MatMumpsMakeMumpsScalarArray(denseB, nrhsM, array, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs));
2108: /* handle condensation step of Schur complement (if any) */
2109: if (mumps->id.size_schur > 0) {
2110: if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) {
2111: second_solve = PETSC_TRUE;
2112: PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); // allocate id.redrhs
2113: mumps->id.ICNTL(26) = 1; /* condensation phase, i.e, to solve id.redrhs */
2114: } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE));
2115: }
2117: mumps->id.job = JOB_SOLVE;
2118: PetscMUMPS_c(mumps);
2119: PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));
2121: /* handle expansion step of Schur complement (if any) */
2122: if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE));
2123: else if (mumps->id.ICNTL(26) == 1) { // condense the right hand side
2124: PetscCall(MatMumpsSolveSchur_Private(A));
2125: for (j = 0; j < nrhs; ++j)
2126: for (i = 0; i < mumps->id.size_schur; ++i) array[mumps->id.listvar_schur[i] - 1 + j * M] = ID_FIELD_GET(mumps->id, redrhs, i + j * mumps->id.lredrhs);
2127: }
2129: if (!denseB) { /* sparse B, restore ia, ja */
2130: PetscCall(MatSeqAIJRestoreArray(Bt, &aa));
2131: PetscCall(MatRestoreRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
2132: PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure");
2133: }
2135: // no matter dense B or sparse B, solution is in id.rhs; convert it to array of X.
2136: PetscCall(MatMumpsCastMumpsScalarArray(nrhsM, mumps->id.precision, mumps->id.rhs, array));
2137: PetscCall(MatDenseRestoreArray(X, &array));
2138: PetscFunctionReturn(PETSC_SUCCESS);
2139: }
2141: /* parallel case: MUMPS requires rhs B to be centralized on the host! */
2142: PetscCheck(!mumps->id.ICNTL(19), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc");
2144: /* create msol_loc to hold mumps local solution */
2145: isol_loc_save = mumps->id.isol_loc; /* save these, as we want to reuse them in MatSolve() */
2146: sol_loc_save = mumps->id.sol_loc;
2147: sol_loc_len_save = mumps->id.sol_loc_len;
2148: mumps->id.isol_loc = NULL; // an init state
2149: mumps->id.sol_loc = NULL;
2150: mumps->id.sol_loc_len = 0;
2152: lsol_loc = mumps->id.lsol_loc;
2153: PetscCall(PetscIntMultError(nrhs, lsol_loc, &nlsol_loc)); /* length of sol_loc */
2154: PetscCall(PetscMalloc2(nlsol_loc, &sol_loc, lsol_loc, &isol_loc));
2155: PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, nlsol_loc, sol_loc, mumps->id.precision, &mumps->id.sol_loc_len, &mumps->id.sol_loc));
2156: mumps->id.isol_loc = isol_loc;
2158: PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nlsol_loc, (PetscScalar *)sol_loc, &msol_loc));
2160: if (denseB) {
2161: if (mumps->ICNTL20 == 10) {
2162: mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
2163: PetscCall(MatDenseGetArrayRead(B, &barray));
2164: PetscCall(MatMumpsSetUpDistRHSInfo(A, nrhs, barray)); // put barray to rhs_loc
2165: PetscCall(MatDenseRestoreArrayRead(B, &barray));
2166: PetscCall(MatGetLocalSize(B, &m, NULL));
2167: PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhsM, NULL, &v_mpi)); // will scatter the solution to v_mpi, which wraps X
2168: } else {
2169: mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
2170: /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in
2171: very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank
2172: 0, re-arrange B into desired order, which is a local operation.
2173: */
2175: /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */
2176: /* wrap dense rhs matrix B into a vector v_mpi */
2177: PetscCall(MatGetLocalSize(B, &m, NULL));
2178: PetscCall(MatDenseGetArrayRead(B, &barray));
2179: PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhsM, barray, &v_mpi));
2180: PetscCall(MatDenseRestoreArrayRead(B, &barray));
2182: /* scatter v_mpi to b_seq in proc[0]. With ICNTL(20) = 0, MUMPS requires rhs to be centralized on the host! */
2183: if (!mumps->myid) {
2184: PetscInt *idx;
2185: /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */
2186: PetscCall(PetscMalloc1(nrhsM, &idx));
2187: PetscCall(MatGetOwnershipRanges(B, &rstart));
2188: for (proc = 0, k = 0; proc < mumps->petsc_size; proc++) {
2189: for (j = 0; j < nrhs; j++) {
2190: for (i = rstart[proc]; i < rstart[proc + 1]; i++) idx[k++] = j * M + i;
2191: }
2192: }
2194: PetscCall(VecCreateSeq(PETSC_COMM_SELF, nrhsM, &b_seq));
2195: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nrhsM, idx, PETSC_OWN_POINTER, &is_to));
2196: PetscCall(ISCreateStride(PETSC_COMM_SELF, nrhsM, 0, 1, &is_from));
2197: } else {
2198: PetscCall(VecCreateSeq(PETSC_COMM_SELF, 0, &b_seq));
2199: PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_to));
2200: PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_from));
2201: }
2203: PetscCall(VecScatterCreate(v_mpi, is_from, b_seq, is_to, &scat_rhs));
2204: PetscCall(VecScatterBegin(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD));
2205: PetscCall(ISDestroy(&is_to));
2206: PetscCall(ISDestroy(&is_from));
2207: PetscCall(VecScatterEnd(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD));
2209: if (!mumps->myid) { /* define rhs on the host */
2210: PetscCall(VecGetArrayRead(b_seq, &barray));
2211: PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, nrhsM, barray, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs));
2212: PetscCall(VecRestoreArrayRead(b_seq, &barray));
2213: }
2214: }
2215: } else { /* sparse B */
2216: b = (Mat_MPIAIJ *)Bt->data;
2218: /* wrap dense X into a vector v_mpi */
2219: PetscCall(MatGetLocalSize(X, &m, NULL));
2220: PetscCall(MatDenseGetArrayRead(X, &barray));
2221: PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X), 1, nrhs * m, nrhsM, barray, &v_mpi));
2222: PetscCall(MatDenseRestoreArrayRead(X, &barray));
2224: if (!mumps->myid) {
2225: PetscCall(MatSeqAIJGetArray(b->A, &aa));
2226: PetscCall(MatGetRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
2227: PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
2228: PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
2229: PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, ((Mat_SeqAIJ *)b->A->data)->nz, aa, mumps->id.precision, &mumps->id.rhs_sparse_len, &mumps->id.rhs_sparse));
2230: } else {
2231: mumps->id.irhs_ptr = NULL;
2232: mumps->id.irhs_sparse = NULL;
2233: mumps->id.nz_rhs = 0;
2234: if (mumps->id.rhs_sparse_len) {
2235: PetscCall(PetscFree(mumps->id.rhs_sparse));
2236: mumps->id.rhs_sparse_len = 0;
2237: }
2238: }
2239: }
2241: /* solve phase */
2242: mumps->id.job = JOB_SOLVE;
2243: PetscMUMPS_c(mumps);
2244: PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
2246: /* scatter mumps distributed solution to PETSc vector v_mpi, which shares local arrays with solution matrix X */
2247: PetscCall(MatDenseGetArray(X, &array));
2248: PetscCall(VecPlaceArray(v_mpi, array));
2250: /* create scatter scat_sol */
2251: PetscCall(MatGetOwnershipRanges(X, &rstart));
2252: /* iidx: index for scatter mumps solution to PETSc X */
2254: PetscCall(ISCreateStride(PETSC_COMM_SELF, nlsol_loc, 0, 1, &is_from));
2255: PetscCall(PetscMalloc1(nlsol_loc, &idxx));
2256: for (i = 0; i < lsol_loc; i++) {
2257: isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */
2259: for (proc = 0; proc < mumps->petsc_size; proc++) {
2260: if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc + 1]) {
2261: myrstart = rstart[proc];
2262: k = isol_loc[i] - myrstart; /* local index on 1st column of PETSc vector X */
2263: iidx = k + myrstart * nrhs; /* maps mumps isol_loc[i] to PETSc index in X */
2264: m = rstart[proc + 1] - rstart[proc]; /* rows of X for this proc */
2265: break;
2266: }
2267: }
2269: for (j = 0; j < nrhs; j++) idxx[i + j * lsol_loc] = iidx + j * m;
2270: }
2271: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nlsol_loc, idxx, PETSC_COPY_VALUES, &is_to));
2272: PetscCall(MatMumpsCastMumpsScalarArray(nlsol_loc, mumps->id.precision, mumps->id.sol_loc, sol_loc)); // Vec msol_loc is created with sol_loc[]
2273: PetscCall(VecScatterCreate(msol_loc, is_from, v_mpi, is_to, &scat_sol));
2274: PetscCall(VecScatterBegin(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD));
2275: PetscCall(ISDestroy(&is_from));
2276: PetscCall(ISDestroy(&is_to));
2277: PetscCall(VecScatterEnd(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD));
2278: PetscCall(MatDenseRestoreArray(X, &array));
2280: if (mumps->id.sol_loc_len) { // in case we allocated intermediate buffers
2281: mumps->id.sol_loc_len = 0;
2282: PetscCall(PetscFree(mumps->id.sol_loc));
2283: }
2285: // restore old values
2286: mumps->id.sol_loc = sol_loc_save;
2287: mumps->id.sol_loc_len = sol_loc_len_save;
2288: mumps->id.isol_loc = isol_loc_save;
2290: PetscCall(PetscFree2(sol_loc, isol_loc));
2291: PetscCall(PetscFree(idxx));
2292: PetscCall(VecDestroy(&msol_loc));
2293: PetscCall(VecDestroy(&v_mpi));
2294: if (!denseB) {
2295: if (!mumps->myid) {
2296: b = (Mat_MPIAIJ *)Bt->data;
2297: PetscCall(MatSeqAIJRestoreArray(b->A, &aa));
2298: PetscCall(MatRestoreRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
2299: PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure");
2300: }
2301: } else {
2302: if (mumps->ICNTL20 == 0) {
2303: PetscCall(VecDestroy(&b_seq));
2304: PetscCall(VecScatterDestroy(&scat_rhs));
2305: }
2306: }
2307: PetscCall(VecScatterDestroy(&scat_sol));
2308: PetscCall(PetscLogFlops(nrhs * PetscMax(0, 2.0 * (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n)));
2309: PetscFunctionReturn(PETSC_SUCCESS);
2310: }
2312: static PetscErrorCode MatMatSolveTranspose_MUMPS(Mat A, Mat B, Mat X)
2313: {
2314: Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
2315: const PetscMUMPSInt value = mumps->id.ICNTL(9);
2317: PetscFunctionBegin;
2318: mumps->id.ICNTL(9) = 0;
2319: PetscCall(MatMatSolve_MUMPS(A, B, X));
2320: mumps->id.ICNTL(9) = value;
2321: PetscFunctionReturn(PETSC_SUCCESS);
2322: }
2324: static PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A, Mat Bt, Mat X)
2325: {
2326: PetscBool flg;
2327: Mat B;
2329: PetscFunctionBegin;
2330: PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt, &flg, MATSEQAIJ, MATMPIAIJ, NULL));
2331: PetscCheck(flg, PetscObjectComm((PetscObject)Bt), PETSC_ERR_ARG_WRONG, "Matrix Bt must be MATAIJ matrix");
2333: /* Create B=Bt^T that uses Bt's data structure */
2334: PetscCall(MatCreateTranspose(Bt, &B));
2336: PetscCall(MatMatSolve_MUMPS(A, B, X));
2337: PetscCall(MatDestroy(&B));
2338: PetscFunctionReturn(PETSC_SUCCESS);
2339: }
2341: #if !defined(PETSC_USE_COMPLEX)
2342: /*
2343: input:
2344: F: numeric factor
2345: output:
2346: nneg: total number of negative pivots
2347: nzero: total number of zero pivots
2348: npos: (global dimension of F) - nneg - nzero
2349: */
2350: static PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos)
2351: {
2352: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2353: PetscMPIInt size;
2355: PetscFunctionBegin;
2356: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F), &size));
2357: /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */
2358: PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia", mumps->id.INFOG(13));
2360: if (nneg) *nneg = mumps->id.INFOG(12);
2361: if (nzero || npos) {
2362: PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection");
2363: if (nzero) *nzero = mumps->id.INFOG(28);
2364: if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28));
2365: }
2366: PetscFunctionReturn(PETSC_SUCCESS);
2367: }
2368: #endif
2370: static PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse, Mat_MUMPS *mumps)
2371: {
2372: PetscMPIInt nreqs;
2373: PetscMUMPSInt *irn, *jcn;
2374: PetscMPIInt count;
2375: PetscCount totnnz, remain;
2376: const PetscInt osize = mumps->omp_comm_size;
2377: PetscScalar *val;
2379: PetscFunctionBegin;
2380: if (osize > 1) {
2381: if (reuse == MAT_INITIAL_MATRIX) {
2382: /* master first gathers counts of nonzeros to receive */
2383: if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize, &mumps->recvcount));
2384: PetscCallMPI(MPI_Gather(&mumps->nnz, 1, MPIU_INT64, mumps->recvcount, 1, MPIU_INT64, 0 /*master*/, mumps->omp_comm));
2386: /* Then each computes number of send/recvs */
2387: if (mumps->is_omp_master) {
2388: /* Start from 1 since self communication is not done in MPI */
2389: nreqs = 0;
2390: for (PetscMPIInt i = 1; i < osize; i++) nreqs += (mumps->recvcount[i] + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX;
2391: } else {
2392: nreqs = (PetscMPIInt)(((mumps->nnz + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX));
2393: }
2394: PetscCall(PetscMalloc1(nreqs * 3, &mumps->reqs)); /* Triple the requests since we send irn, jcn and val separately */
2396: /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others.
2397: MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz
2398: might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size
2399: is very small, the current approach should have no extra overhead compared to MPI_Gatherv.
2400: */
2401: nreqs = 0; /* counter for actual send/recvs */
2402: if (mumps->is_omp_master) {
2403: totnnz = 0;
2405: for (PetscMPIInt i = 0; i < osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */
2406: PetscCall(PetscMalloc2(totnnz, &irn, totnnz, &jcn));
2407: PetscCall(PetscMalloc1(totnnz, &val));
2409: /* Self communication */
2410: PetscCall(PetscArraycpy(irn, mumps->irn, mumps->nnz));
2411: PetscCall(PetscArraycpy(jcn, mumps->jcn, mumps->nnz));
2412: PetscCall(PetscArraycpy(val, mumps->val, mumps->nnz));
2414: /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */
2415: PetscCall(PetscFree2(mumps->irn, mumps->jcn));
2416: PetscCall(PetscFree(mumps->val_alloc));
2417: mumps->nnz = totnnz;
2418: mumps->irn = irn;
2419: mumps->jcn = jcn;
2420: mumps->val = mumps->val_alloc = val;
2422: irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */
2423: jcn += mumps->recvcount[0];
2424: val += mumps->recvcount[0];
2426: /* Remote communication */
2427: for (PetscMPIInt i = 1; i < osize; i++) {
2428: count = (PetscMPIInt)PetscMin(mumps->recvcount[i], (PetscMPIInt)PETSC_MPI_INT_MAX);
2429: remain = mumps->recvcount[i] - count;
2430: while (count > 0) {
2431: PetscCallMPI(MPIU_Irecv(irn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2432: PetscCallMPI(MPIU_Irecv(jcn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2433: PetscCallMPI(MPIU_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2434: irn += count;
2435: jcn += count;
2436: val += count;
2437: count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX);
2438: remain -= count;
2439: }
2440: }
2441: } else {
2442: irn = mumps->irn;
2443: jcn = mumps->jcn;
2444: val = mumps->val;
2445: count = (PetscMPIInt)PetscMin(mumps->nnz, (PetscMPIInt)PETSC_MPI_INT_MAX);
2446: remain = mumps->nnz - count;
2447: while (count > 0) {
2448: PetscCallMPI(MPIU_Isend(irn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2449: PetscCallMPI(MPIU_Isend(jcn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2450: PetscCallMPI(MPIU_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2451: irn += count;
2452: jcn += count;
2453: val += count;
2454: count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX);
2455: remain -= count;
2456: }
2457: }
2458: } else {
2459: nreqs = 0;
2460: if (mumps->is_omp_master) {
2461: val = mumps->val + mumps->recvcount[0];
2462: for (PetscMPIInt i = 1; i < osize; i++) { /* Remote communication only since self data is already in place */
2463: count = (PetscMPIInt)PetscMin(mumps->recvcount[i], (PetscMPIInt)PETSC_MPI_INT_MAX);
2464: remain = mumps->recvcount[i] - count;
2465: while (count > 0) {
2466: PetscCallMPI(MPIU_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2467: val += count;
2468: count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX);
2469: remain -= count;
2470: }
2471: }
2472: } else {
2473: val = mumps->val;
2474: count = (PetscMPIInt)PetscMin(mumps->nnz, (PetscMPIInt)PETSC_MPI_INT_MAX);
2475: remain = mumps->nnz - count;
2476: while (count > 0) {
2477: PetscCallMPI(MPIU_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2478: val += count;
2479: count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX);
2480: remain -= count;
2481: }
2482: }
2483: }
2484: PetscCallMPI(MPI_Waitall(nreqs, mumps->reqs, MPI_STATUSES_IGNORE));
2485: mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */
2486: }
2487: PetscFunctionReturn(PETSC_SUCCESS);
2488: }
2490: static PetscErrorCode MatFactorNumeric_MUMPS(Mat F, Mat A, PETSC_UNUSED const MatFactorInfo *info)
2491: {
2492: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2494: PetscFunctionBegin;
2495: if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) {
2496: if (mumps->id.INFOG(1) == -6) PetscCall(PetscInfo(A, "MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2497: PetscCall(PetscInfo(A, "MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2498: PetscFunctionReturn(PETSC_SUCCESS);
2499: }
2501: PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps));
2502: PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX, mumps));
2504: /* numerical factorization phase */
2505: mumps->id.job = JOB_FACTNUMERIC;
2506: if (!mumps->id.ICNTL(18)) { /* A is centralized */
2507: if (!mumps->myid) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a));
2508: } else {
2509: PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc));
2510: }
2512: if (F->schur) {
2513: const PetscScalar *array;
2514: MUMPS_INT size = mumps->id.size_schur;
2515: PetscCall(MatDenseGetArrayRead(F->schur, &array));
2516: PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, size * size, array, mumps->id.precision, &mumps->id.schur_len, &mumps->id.schur));
2517: PetscCall(MatDenseRestoreArrayRead(F->schur, &array));
2518: }
2520: PetscMUMPS_c(mumps);
2521: if (mumps->id.INFOG(1) < 0) {
2522: PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));
2523: if (mumps->id.INFOG(1) == -10) {
2524: PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2525: F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
2526: } else if (mumps->id.INFOG(1) == -13) {
2527: PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, cannot allocate required memory %d megabytes\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2528: F->factorerrortype = MAT_FACTOR_OUTMEMORY;
2529: } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) {
2530: PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d, problem with work array\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2531: F->factorerrortype = MAT_FACTOR_OUTMEMORY;
2532: } else {
2533: PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2534: F->factorerrortype = MAT_FACTOR_OTHER;
2535: }
2536: }
2537: PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: ICNTL(16)=%d " MUMPS_MANUALS, mumps->id.INFOG(16));
2539: F->assembled = PETSC_TRUE;
2541: if (F->schur) { /* reset Schur status to unfactored */
2542: #if defined(PETSC_HAVE_CUDA)
2543: F->schur->offloadmask = PETSC_OFFLOAD_CPU;
2544: #endif
2545: PetscScalar *array;
2546: PetscCall(MatDenseGetArray(F->schur, &array));
2547: PetscCall(MatMumpsCastMumpsScalarArray(mumps->id.size_schur * mumps->id.size_schur, mumps->id.precision, mumps->id.schur, array));
2548: PetscCall(MatDenseRestoreArray(F->schur, &array));
2549: if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
2550: mumps->id.ICNTL(19) = 2;
2551: PetscCall(MatTranspose(F->schur, MAT_INPLACE_MATRIX, &F->schur));
2552: }
2553: PetscCall(MatFactorRestoreSchurComplement(F, NULL, MAT_FACTOR_SCHUR_UNFACTORED));
2554: }
2556: /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */
2557: if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3;
2559: if (!mumps->is_omp_master) mumps->id.INFO(23) = 0;
2560: // MUMPS userguide: ISOL_loc should be allocated by the user between the factorization and the
2561: // solve phases. On exit from the solve phase, ISOL_loc(i) contains the index of the variables for
2562: // which the solution (in SOL_loc) is available on the local processor.
2563: // If successive calls to the solve phase (JOB= 3) are performed for a given matrix, ISOL_loc will
2564: // normally have the same contents for each of these calls. The only exception is the case of
2565: // unsymmetric matrices (SYM=1) when the transpose option is changed (see ICNTL(9)) and non
2566: // symmetric row/column exchanges (see ICNTL(6)) have occurred before the solve phase.
2567: if (mumps->petsc_size > 1) {
2568: PetscInt lsol_loc;
2569: PetscScalar *array;
2571: /* distributed solution; Create x_seq=sol_loc for repeated use */
2572: if (mumps->x_seq) {
2573: PetscCall(VecScatterDestroy(&mumps->scat_sol));
2574: PetscCall(PetscFree(mumps->id.isol_loc));
2575: PetscCall(VecDestroy(&mumps->x_seq));
2576: }
2577: lsol_loc = mumps->id.INFO(23); /* length of sol_loc */
2578: PetscCall(PetscMalloc1(lsol_loc, &mumps->id.isol_loc));
2579: PetscCall(VecCreateSeq(PETSC_COMM_SELF, lsol_loc, &mumps->x_seq));
2580: PetscCall(VecGetArray(mumps->x_seq, &array));
2581: PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, lsol_loc, array, mumps->id.precision, &mumps->id.sol_loc_len, &mumps->id.sol_loc));
2582: PetscCall(VecRestoreArray(mumps->x_seq, &array));
2583: mumps->id.lsol_loc = (PetscMUMPSInt)lsol_loc;
2584: }
2585: PetscCall(PetscLogFlops((double)ID_RINFO_GET(mumps->id, 2)));
2586: PetscFunctionReturn(PETSC_SUCCESS);
2587: }
2589: /* Sets MUMPS options from the options database */
2590: static PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A)
2591: {
2592: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2593: PetscReal cntl;
2594: PetscMUMPSInt icntl = 0, size, *listvar_schur;
2595: PetscInt info[80], i, ninfo = 80, rbs, cbs;
2596: PetscBool flg = PETSC_FALSE;
2597: PetscBool schur = mumps->id.icntl ? (PetscBool)(mumps->id.ICNTL(26) == -1) : (PetscBool)(mumps->ICNTL26 == -1);
2598: void *arr;
2600: PetscFunctionBegin;
2601: PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MUMPS Options", "Mat");
2602: if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */
2603: PetscPrecision precision = PetscDefined(USE_REAL_SINGLE) ? PETSC_PRECISION_SINGLE : PETSC_PRECISION_DOUBLE;
2604: PetscInt nthreads = 0;
2605: PetscInt nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
2606: PetscInt nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0;
2607: PetscMUMPSInt nblk, *blkvar, *blkptr;
2609: mumps->petsc_comm = PetscObjectComm((PetscObject)A);
2610: PetscCallMPI(MPI_Comm_size(mumps->petsc_comm, &mumps->petsc_size));
2611: PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm, &mumps->myid)); /* "if (!myid)" still works even if mumps_comm is different */
2613: PetscCall(PetscOptionsName("-mat_mumps_use_omp_threads", "Convert MPI processes into OpenMP threads", "None", &mumps->use_petsc_omp_support));
2614: if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */
2615: /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */
2616: PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)F)->prefix, "-mat_mumps_use_omp_threads", &nthreads, NULL));
2617: if (mumps->use_petsc_omp_support) {
2618: PetscCheck(!schur, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature", ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : "");
2619: #if defined(PETSC_HAVE_OPENMP_SUPPORT)
2620: PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm, nthreads, &mumps->omp_ctrl));
2621: PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl, &mumps->omp_comm, &mumps->mumps_comm, &mumps->is_omp_master));
2622: #else
2623: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual",
2624: ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : "");
2625: #endif
2626: } else {
2627: mumps->omp_comm = PETSC_COMM_SELF;
2628: mumps->mumps_comm = mumps->petsc_comm;
2629: mumps->is_omp_master = PETSC_TRUE;
2630: }
2631: PetscCallMPI(MPI_Comm_size(mumps->omp_comm, &mumps->omp_comm_size));
2632: mumps->reqs = NULL;
2633: mumps->tag = 0;
2635: if (mumps->mumps_comm != MPI_COMM_NULL) {
2636: if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) {
2637: /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */
2638: MPI_Comm comm;
2639: PetscCallMPI(MPI_Comm_dup(mumps->mumps_comm, &comm));
2640: mumps->mumps_comm = comm;
2641: } else PetscCall(PetscCommGetComm(mumps->petsc_comm, &mumps->mumps_comm));
2642: }
2644: mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm);
2645: mumps->id.job = JOB_INIT;
2646: mumps->id.par = 1; /* host participates factorizaton and solve */
2647: mumps->id.sym = mumps->sym;
2649: size = mumps->id.size_schur;
2650: arr = mumps->id.schur;
2651: listvar_schur = mumps->id.listvar_schur;
2652: nblk = mumps->id.nblk;
2653: blkvar = mumps->id.blkvar;
2654: blkptr = mumps->id.blkptr;
2655: if (PetscDefined(USE_DEBUG)) {
2656: for (PetscInt i = 0; i < size; i++)
2657: PetscCheck(listvar_schur[i] - 1 >= 0 && listvar_schur[i] - 1 < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid Schur index at position %" PetscInt_FMT "! %" PetscInt_FMT " must be in [0, %" PetscInt_FMT ")", i, (PetscInt)listvar_schur[i] - 1,
2658: A->rmap->N);
2659: }
2661: PetscCall(PetscOptionsEnum("-pc_precision", "Precision used by MUMPS", "MATSOLVERMUMPS", PetscPrecisionTypes, (PetscEnum)precision, (PetscEnum *)&precision, NULL));
2662: PetscCheck(precision == PETSC_PRECISION_SINGLE || precision == PETSC_PRECISION_DOUBLE, PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "MUMPS does not support %s precision", PetscPrecisionTypes[precision]);
2663: PetscCheck(precision == PETSC_SCALAR_PRECISION || PetscDefined(HAVE_MUMPS_MIXED_PRECISION), PetscObjectComm((PetscObject)F), PETSC_ERR_USER, "Your MUMPS library does not support mixed precision, but which is needed with your specified PetscScalar");
2664: PetscCall(MatMumpsAllocateInternalID(&mumps->id, precision));
2666: PetscMUMPS_c(mumps);
2667: PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
2669: /* set PETSc-MUMPS default options - override MUMPS default */
2670: mumps->id.ICNTL(3) = 0;
2671: mumps->id.ICNTL(4) = 0;
2672: if (mumps->petsc_size == 1) {
2673: mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */
2674: mumps->id.ICNTL(7) = 7; /* automatic choice of ordering done by the package */
2675: } else {
2676: mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */
2677: mumps->id.ICNTL(21) = 1; /* distributed solution */
2678: }
2679: if (nblk && blkptr) {
2680: mumps->id.ICNTL(15) = 1;
2681: mumps->id.nblk = nblk;
2682: mumps->id.blkvar = blkvar;
2683: mumps->id.blkptr = blkptr;
2684: } else mumps->id.ICNTL(15) = 0;
2686: /* restore cached ICNTL and CNTL values */
2687: for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1 + 2 * icntl]) = mumps->ICNTL_pre[2 + 2 * icntl];
2688: for (icntl = 0; icntl < nCNTL_pre; ++icntl) ID_CNTL_SET(mumps->id, (PetscInt)mumps->CNTL_pre[1 + 2 * icntl], mumps->CNTL_pre[2 + 2 * icntl]);
2690: PetscCall(PetscFree(mumps->ICNTL_pre));
2691: PetscCall(PetscFree(mumps->CNTL_pre));
2693: if (schur) {
2694: mumps->id.size_schur = size;
2695: mumps->id.schur_lld = size;
2696: mumps->id.schur = arr;
2697: mumps->id.listvar_schur = listvar_schur;
2698: if (mumps->petsc_size > 1) {
2699: PetscBool gs; /* gs is false if any rank other than root has non-empty IS */
2701: mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */
2702: gs = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */
2703: PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &gs, 1, MPI_C_BOOL, MPI_LAND, mumps->petsc_comm));
2704: PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_SUP, "MUMPS distributed parallel Schur complements not yet supported from PETSc");
2705: } else {
2706: if (F->factortype == MAT_FACTOR_LU) {
2707: mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */
2708: } else {
2709: mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */
2710: }
2711: }
2712: mumps->id.ICNTL(26) = -1;
2713: }
2715: /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code.
2716: For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS.
2717: */
2718: PetscCallMPI(MPI_Bcast(mumps->id.icntl, 40, MPI_INT, 0, mumps->omp_comm));
2719: PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15, MPIU_MUMPSREAL(&mumps->id), 0, mumps->omp_comm));
2721: mumps->scat_rhs = NULL;
2722: mumps->scat_sol = NULL;
2723: }
2724: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1", "ICNTL(1): output stream for error messages", "None", mumps->id.ICNTL(1), &icntl, &flg));
2725: if (flg) mumps->id.ICNTL(1) = icntl;
2726: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2", "ICNTL(2): output stream for diagnostic printing, statistics, and warning", "None", mumps->id.ICNTL(2), &icntl, &flg));
2727: if (flg) mumps->id.ICNTL(2) = icntl;
2728: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3", "ICNTL(3): output stream for global information, collected on the host", "None", mumps->id.ICNTL(3), &icntl, &flg));
2729: if (flg) mumps->id.ICNTL(3) = icntl;
2731: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4", "ICNTL(4): level of printing (0 to 4)", "None", mumps->id.ICNTL(4), &icntl, &flg));
2732: if (flg) mumps->id.ICNTL(4) = icntl;
2733: if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */
2735: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6", "ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)", "None", mumps->id.ICNTL(6), &icntl, &flg));
2736: if (flg) mumps->id.ICNTL(6) = icntl;
2738: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7", "ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)", "None", mumps->id.ICNTL(7), &icntl, &flg));
2739: if (flg) {
2740: PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto");
2741: mumps->id.ICNTL(7) = icntl;
2742: }
2744: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8", "ICNTL(8): scaling strategy (-2 to 8 or 77)", "None", mumps->id.ICNTL(8), &mumps->id.ICNTL(8), NULL));
2745: /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */
2746: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10", "ICNTL(10): max num of refinements", "None", mumps->id.ICNTL(10), &mumps->id.ICNTL(10), NULL));
2747: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11", "ICNTL(11): statistics related to an error analysis (via -ksp_view)", "None", mumps->id.ICNTL(11), &mumps->id.ICNTL(11), NULL));
2748: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12", "ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)", "None", mumps->id.ICNTL(12), &mumps->id.ICNTL(12), NULL));
2749: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13", "ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting", "None", mumps->id.ICNTL(13), &mumps->id.ICNTL(13), NULL));
2750: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14", "ICNTL(14): percentage increase in the estimated working space", "None", mumps->id.ICNTL(14), &mumps->id.ICNTL(14), NULL));
2751: PetscCall(MatGetBlockSizes(A, &rbs, &cbs));
2752: if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = (PetscMUMPSInt)-rbs;
2753: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15", "ICNTL(15): compression of the input matrix resulting from a block format", "None", mumps->id.ICNTL(15), &mumps->id.ICNTL(15), &flg));
2754: if (flg) {
2755: if (mumps->id.ICNTL(15) < 0) PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes");
2756: else if (mumps->id.ICNTL(15) > 0) {
2757: const PetscInt *bsizes;
2758: PetscInt nblocks, p, *blkptr = NULL;
2759: PetscMPIInt *recvcounts, *displs, n;
2760: PetscMPIInt rank, size = 0;
2762: PetscCall(MatGetVariableBlockSizes(A, &nblocks, &bsizes));
2763: flg = PETSC_TRUE;
2764: for (p = 0; p < nblocks; ++p) {
2765: if (bsizes[p] > 1) break;
2766: }
2767: if (p == nblocks) flg = PETSC_FALSE;
2768: PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &flg, 1, MPI_C_BOOL, MPI_LOR, PetscObjectComm((PetscObject)A)));
2769: if (flg) { // if at least one process supplies variable block sizes and they are not all set to 1
2770: PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
2771: if (rank == 0) PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
2772: PetscCall(PetscCalloc2(size, &recvcounts, size + 1, &displs));
2773: PetscCall(PetscMPIIntCast(nblocks, &n));
2774: PetscCallMPI(MPI_Gather(&n, 1, MPI_INT, recvcounts, 1, MPI_INT, 0, PetscObjectComm((PetscObject)A)));
2775: for (PetscInt p = 0; p < size; ++p) displs[p + 1] = displs[p] + recvcounts[p];
2776: PetscCall(PetscMalloc1(displs[size] + 1, &blkptr));
2777: PetscCallMPI(MPI_Bcast(displs + size, 1, MPIU_INT, 0, PetscObjectComm((PetscObject)A)));
2778: PetscCallMPI(MPI_Gatherv(bsizes, n, MPIU_INT, blkptr + 1, recvcounts, displs, MPIU_INT, 0, PetscObjectComm((PetscObject)A)));
2779: if (rank == 0) {
2780: blkptr[0] = 1;
2781: for (PetscInt p = 0; p < n; ++p) blkptr[p + 1] += blkptr[p];
2782: PetscCall(MatMumpsSetBlk(F, displs[size], NULL, blkptr));
2783: }
2784: PetscCall(PetscFree2(recvcounts, displs));
2785: PetscCall(PetscFree(blkptr));
2786: }
2787: }
2788: }
2789: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19", "ICNTL(19): computes the Schur complement", "None", mumps->id.ICNTL(19), &mumps->id.ICNTL(19), NULL));
2790: if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */
2791: PetscCall(MatDestroy(&F->schur));
2792: PetscCall(MatMumpsResetSchur_Private(mumps));
2793: }
2795: /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps",
2796: and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran
2797: and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...'
2798: This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug,
2799: see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590.
2800: In short, we could not use distributed RHS until with MPICH v4.0b1 or we enabled a workaround in mumps-5.6.2+
2801: */
2802: mumps->ICNTL20 = 10; /* Distributed dense RHS, by default */
2803: #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) || (PetscDefined(HAVE_MPICH) && MPICH_NUMVERSION < 40000101) || PetscDefined(HAVE_MSMPI)
2804: mumps->ICNTL20 = 0; /* Centralized dense RHS, if need be */
2805: #endif
2806: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20", "ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides", "None", mumps->ICNTL20, &mumps->ICNTL20, &flg));
2807: PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10", (int)mumps->ICNTL20);
2808: #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0)
2809: PetscCheck(!flg || mumps->ICNTL20 != 10, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=10 is not supported before MUMPS-5.3.0");
2810: #endif
2811: /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */
2813: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22", "ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)", "None", mumps->id.ICNTL(22), &mumps->id.ICNTL(22), NULL));
2814: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23", "ICNTL(23): max size of the working memory (MB) that can allocate per processor", "None", mumps->id.ICNTL(23), &mumps->id.ICNTL(23), NULL));
2815: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24", "ICNTL(24): detection of null pivot rows (0 or 1)", "None", mumps->id.ICNTL(24), &mumps->id.ICNTL(24), NULL));
2816: if (mumps->id.ICNTL(24)) mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */
2818: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25", "ICNTL(25): computes a solution of a deficient matrix and a null space basis", "None", mumps->id.ICNTL(25), &mumps->id.ICNTL(25), NULL));
2819: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26", "ICNTL(26): drives the solution phase if a Schur complement matrix", "None", mumps->id.ICNTL(26), &mumps->id.ICNTL(26), NULL));
2820: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27", "ICNTL(27): controls the blocking size for multiple right-hand sides", "None", mumps->id.ICNTL(27), &mumps->id.ICNTL(27), NULL));
2821: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28", "ICNTL(28): use 1 for sequential analysis and ICNTL(7) ordering, or 2 for parallel analysis and ICNTL(29) ordering", "None", mumps->id.ICNTL(28), &mumps->id.ICNTL(28), NULL));
2822: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29", "ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis", "None", mumps->id.ICNTL(29), &mumps->id.ICNTL(29), NULL));
2823: /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */
2824: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31", "ICNTL(31): indicates which factors may be discarded during factorization", "None", mumps->id.ICNTL(31), &mumps->id.ICNTL(31), NULL));
2825: /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elimination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL)); -- not supported by PETSc API */
2826: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33", "ICNTL(33): compute determinant", "None", mumps->id.ICNTL(33), &mumps->id.ICNTL(33), NULL));
2827: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35", "ICNTL(35): activates Block Low Rank (BLR) based factorization", "None", mumps->id.ICNTL(35), &mumps->id.ICNTL(35), NULL));
2828: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36", "ICNTL(36): choice of BLR factorization variant", "None", mumps->id.ICNTL(36), &mumps->id.ICNTL(36), NULL));
2829: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_37", "ICNTL(37): compression of the contribution blocks (CB)", "None", mumps->id.ICNTL(37), &mumps->id.ICNTL(37), NULL));
2830: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38", "ICNTL(38): estimated compression rate of LU factors with BLR", "None", mumps->id.ICNTL(38), &mumps->id.ICNTL(38), NULL));
2831: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_48", "ICNTL(48): multithreading with tree parallelism", "None", mumps->id.ICNTL(48), &mumps->id.ICNTL(48), NULL));
2832: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_56", "ICNTL(56): postponing and rank-revealing factorization", "None", mumps->id.ICNTL(56), &mumps->id.ICNTL(56), NULL));
2833: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_58", "ICNTL(58): defines options for symbolic factorization", "None", mumps->id.ICNTL(58), &mumps->id.ICNTL(58), NULL));
2835: PetscCall(PetscOptionsReal("-mat_mumps_cntl_1", "CNTL(1): relative pivoting threshold", "None", (PetscReal)ID_CNTL_GET(mumps->id, 1), &cntl, &flg));
2836: if (flg) ID_CNTL_SET(mumps->id, 1, cntl);
2837: PetscCall(PetscOptionsReal("-mat_mumps_cntl_2", "CNTL(2): stopping criterion of refinement", "None", (PetscReal)ID_CNTL_GET(mumps->id, 2), &cntl, &flg));
2838: if (flg) ID_CNTL_SET(mumps->id, 2, cntl);
2839: PetscCall(PetscOptionsReal("-mat_mumps_cntl_3", "CNTL(3): absolute pivoting threshold", "None", (PetscReal)ID_CNTL_GET(mumps->id, 3), &cntl, &flg));
2840: if (flg) ID_CNTL_SET(mumps->id, 3, cntl);
2841: PetscCall(PetscOptionsReal("-mat_mumps_cntl_4", "CNTL(4): value for static pivoting", "None", (PetscReal)ID_CNTL_GET(mumps->id, 4), &cntl, &flg));
2842: if (flg) ID_CNTL_SET(mumps->id, 4, cntl);
2843: PetscCall(PetscOptionsReal("-mat_mumps_cntl_5", "CNTL(5): fixation for null pivots", "None", (PetscReal)ID_CNTL_GET(mumps->id, 5), &cntl, &flg));
2844: if (flg) ID_CNTL_SET(mumps->id, 5, cntl);
2845: PetscCall(PetscOptionsReal("-mat_mumps_cntl_7", "CNTL(7): dropping parameter used during BLR", "None", (PetscReal)ID_CNTL_GET(mumps->id, 7), &cntl, &flg));
2846: if (flg) ID_CNTL_SET(mumps->id, 7, cntl);
2848: PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL));
2850: PetscCall(PetscOptionsIntArray("-mat_mumps_view_info", "request INFO local to each processor", "", info, &ninfo, NULL));
2851: if (ninfo) {
2852: PetscCheck(ninfo <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "number of INFO %" PetscInt_FMT " must <= 80", ninfo);
2853: PetscCall(PetscMalloc1(ninfo, &mumps->info));
2854: mumps->ninfo = ninfo;
2855: for (i = 0; i < ninfo; i++) {
2856: PetscCheck(info[i] >= 0 && info[i] <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "index of INFO %" PetscInt_FMT " must between 1 and 80", ninfo);
2857: mumps->info[i] = info[i];
2858: }
2859: }
2860: PetscOptionsEnd();
2861: PetscFunctionReturn(PETSC_SUCCESS);
2862: }
2864: static PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F, Mat A, PETSC_UNUSED const MatFactorInfo *info, Mat_MUMPS *mumps)
2865: {
2866: PetscFunctionBegin;
2867: if (mumps->id.INFOG(1) < 0) {
2868: PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in analysis: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
2869: if (mumps->id.INFOG(1) == -6) {
2870: PetscCall(PetscInfo(F, "MUMPS error in analysis: matrix is singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2871: F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT;
2872: } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) {
2873: PetscCall(PetscInfo(F, "MUMPS error in analysis: problem with work array, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2874: F->factorerrortype = MAT_FACTOR_OUTMEMORY;
2875: } else {
2876: PetscCall(PetscInfo(F, "MUMPS error in analysis: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS "\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2877: F->factorerrortype = MAT_FACTOR_OTHER;
2878: }
2879: }
2880: if (!mumps->id.n) F->factorerrortype = MAT_FACTOR_NOERROR;
2881: PetscFunctionReturn(PETSC_SUCCESS);
2882: }
2884: static PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F, Mat A, IS r, PETSC_UNUSED IS c, const MatFactorInfo *info)
2885: {
2886: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2887: Vec b;
2888: const PetscInt M = A->rmap->N;
2890: PetscFunctionBegin;
2891: if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2892: /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */
2893: PetscFunctionReturn(PETSC_SUCCESS);
2894: }
2896: /* Set MUMPS options from the options database */
2897: PetscCall(MatSetFromOptions_MUMPS(F, A));
2899: PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2900: PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));
2902: /* analysis phase */
2903: mumps->id.job = JOB_FACTSYMBOLIC;
2904: PetscCall(PetscMUMPSIntCast(M, &mumps->id.n));
2905: switch (mumps->id.ICNTL(18)) {
2906: case 0: /* centralized assembled matrix input */
2907: if (!mumps->myid) {
2908: mumps->id.nnz = mumps->nnz;
2909: mumps->id.irn = mumps->irn;
2910: mumps->id.jcn = mumps->jcn;
2911: if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a));
2912: if (r && mumps->id.ICNTL(7) == 7) {
2913: mumps->id.ICNTL(7) = 1;
2914: if (!mumps->myid) {
2915: const PetscInt *idx;
2916: PetscInt i;
2918: PetscCall(PetscMalloc1(M, &mumps->id.perm_in));
2919: PetscCall(ISGetIndices(r, &idx));
2920: for (i = 0; i < M; i++) PetscCall(PetscMUMPSIntCast(idx[i] + 1, &mumps->id.perm_in[i])); /* perm_in[]: start from 1, not 0! */
2921: PetscCall(ISRestoreIndices(r, &idx));
2922: }
2923: }
2924: }
2925: break;
2926: case 3: /* distributed assembled matrix input (size>1) */
2927: mumps->id.nnz_loc = mumps->nnz;
2928: mumps->id.irn_loc = mumps->irn;
2929: mumps->id.jcn_loc = mumps->jcn;
2930: if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc));
2931: if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2932: PetscCall(MatCreateVecs(A, NULL, &b));
2933: PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
2934: PetscCall(VecDestroy(&b));
2935: }
2936: break;
2937: }
2938: PetscMUMPS_c(mumps);
2939: PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
2941: F->ops->lufactornumeric = MatFactorNumeric_MUMPS;
2942: F->ops->solve = MatSolve_MUMPS;
2943: F->ops->solvetranspose = MatSolveTranspose_MUMPS;
2944: F->ops->matsolve = MatMatSolve_MUMPS;
2945: F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS;
2946: F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS;
2948: mumps->matstruc = SAME_NONZERO_PATTERN;
2949: PetscFunctionReturn(PETSC_SUCCESS);
2950: }
2952: /* Note the PETSc r and c permutations are ignored */
2953: static PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F, Mat A, PETSC_UNUSED IS r, PETSC_UNUSED IS c, const MatFactorInfo *info)
2954: {
2955: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2956: Vec b;
2957: const PetscInt M = A->rmap->N;
2959: PetscFunctionBegin;
2960: if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2961: /* F is assembled by a previous call of MatLUFactorSymbolic_BAIJMUMPS() */
2962: PetscFunctionReturn(PETSC_SUCCESS);
2963: }
2965: /* Set MUMPS options from the options database */
2966: PetscCall(MatSetFromOptions_MUMPS(F, A));
2968: PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2969: PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));
2971: /* analysis phase */
2972: mumps->id.job = JOB_FACTSYMBOLIC;
2973: PetscCall(PetscMUMPSIntCast(M, &mumps->id.n));
2974: switch (mumps->id.ICNTL(18)) {
2975: case 0: /* centralized assembled matrix input */
2976: if (!mumps->myid) {
2977: mumps->id.nnz = mumps->nnz;
2978: mumps->id.irn = mumps->irn;
2979: mumps->id.jcn = mumps->jcn;
2980: if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a));
2981: }
2982: break;
2983: case 3: /* distributed assembled matrix input (size>1) */
2984: mumps->id.nnz_loc = mumps->nnz;
2985: mumps->id.irn_loc = mumps->irn;
2986: mumps->id.jcn_loc = mumps->jcn;
2987: if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc));
2988: if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2989: PetscCall(MatCreateVecs(A, NULL, &b));
2990: PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
2991: PetscCall(VecDestroy(&b));
2992: }
2993: break;
2994: }
2995: PetscMUMPS_c(mumps);
2996: PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
2998: F->ops->lufactornumeric = MatFactorNumeric_MUMPS;
2999: F->ops->solve = MatSolve_MUMPS;
3000: F->ops->solvetranspose = MatSolveTranspose_MUMPS;
3001: F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS;
3003: mumps->matstruc = SAME_NONZERO_PATTERN;
3004: PetscFunctionReturn(PETSC_SUCCESS);
3005: }
3007: /* Note the PETSc r permutation and factor info are ignored */
3008: static PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F, Mat A, PETSC_UNUSED IS r, const MatFactorInfo *info)
3009: {
3010: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3011: Vec b;
3012: const PetscInt M = A->rmap->N;
3014: PetscFunctionBegin;
3015: if (mumps->matstruc == SAME_NONZERO_PATTERN) {
3016: /* F is assembled by a previous call of MatCholeskyFactorSymbolic_MUMPS() */
3017: PetscFunctionReturn(PETSC_SUCCESS);
3018: }
3020: /* Set MUMPS options from the options database */
3021: PetscCall(MatSetFromOptions_MUMPS(F, A));
3023: PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
3024: PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));
3026: /* analysis phase */
3027: mumps->id.job = JOB_FACTSYMBOLIC;
3028: PetscCall(PetscMUMPSIntCast(M, &mumps->id.n));
3029: switch (mumps->id.ICNTL(18)) {
3030: case 0: /* centralized assembled matrix input */
3031: if (!mumps->myid) {
3032: mumps->id.nnz = mumps->nnz;
3033: mumps->id.irn = mumps->irn;
3034: mumps->id.jcn = mumps->jcn;
3035: if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a));
3036: }
3037: break;
3038: case 3: /* distributed assembled matrix input (size>1) */
3039: mumps->id.nnz_loc = mumps->nnz;
3040: mumps->id.irn_loc = mumps->irn;
3041: mumps->id.jcn_loc = mumps->jcn;
3042: if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc));
3043: if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
3044: PetscCall(MatCreateVecs(A, NULL, &b));
3045: PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
3046: PetscCall(VecDestroy(&b));
3047: }
3048: break;
3049: }
3050: PetscMUMPS_c(mumps);
3051: PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
3053: F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS;
3054: F->ops->solve = MatSolve_MUMPS;
3055: F->ops->solvetranspose = MatSolve_MUMPS;
3056: F->ops->matsolve = MatMatSolve_MUMPS;
3057: F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS;
3058: F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS;
3059: #if defined(PETSC_USE_COMPLEX)
3060: F->ops->getinertia = NULL;
3061: #else
3062: F->ops->getinertia = MatGetInertia_SBAIJMUMPS;
3063: #endif
3065: mumps->matstruc = SAME_NONZERO_PATTERN;
3066: PetscFunctionReturn(PETSC_SUCCESS);
3067: }
3069: static PetscErrorCode MatView_MUMPS(Mat A, PetscViewer viewer)
3070: {
3071: PetscBool isascii;
3072: PetscViewerFormat format;
3073: Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
3075: PetscFunctionBegin;
3076: /* check if matrix is mumps type */
3077: if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(PETSC_SUCCESS);
3079: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
3080: if (isascii) {
3081: PetscCall(PetscViewerGetFormat(viewer, &format));
3082: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
3083: PetscCall(PetscViewerASCIIPrintf(viewer, "MUMPS run parameters:\n"));
3084: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
3085: PetscCall(PetscViewerASCIIPrintf(viewer, " SYM (matrix type): %d\n", mumps->id.sym));
3086: PetscCall(PetscViewerASCIIPrintf(viewer, " PAR (host participation): %d\n", mumps->id.par));
3087: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(1) (output for error): %d\n", mumps->id.ICNTL(1)));
3088: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(2) (output of diagnostic msg): %d\n", mumps->id.ICNTL(2)));
3089: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(3) (output for global info): %d\n", mumps->id.ICNTL(3)));
3090: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(4) (level of printing): %d\n", mumps->id.ICNTL(4)));
3091: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(5) (input mat struct): %d\n", mumps->id.ICNTL(5)));
3092: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(6) (matrix prescaling): %d\n", mumps->id.ICNTL(6)));
3093: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(7) (sequential matrix ordering):%d\n", mumps->id.ICNTL(7)));
3094: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(8) (scaling strategy): %d\n", mumps->id.ICNTL(8)));
3095: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(10) (max num of refinements): %d\n", mumps->id.ICNTL(10)));
3096: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(11) (error analysis): %d\n", mumps->id.ICNTL(11)));
3097: if (mumps->id.ICNTL(11) > 0) {
3098: PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(4) (inf norm of input mat): %g\n", (double)ID_RINFOG_GET(mumps->id, 4)));
3099: PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(5) (inf norm of solution): %g\n", (double)ID_RINFOG_GET(mumps->id, 5)));
3100: PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(6) (inf norm of residual): %g\n", (double)ID_RINFOG_GET(mumps->id, 6)));
3101: PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(7),RINFOG(8) (backward error est): %g, %g\n", (double)ID_RINFOG_GET(mumps->id, 7), (double)ID_RINFOG_GET(mumps->id, 8)));
3102: PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(9) (error estimate): %g\n", (double)ID_RINFOG_GET(mumps->id, 9)));
3103: PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n", (double)ID_RINFOG_GET(mumps->id, 10), (double)ID_RINFOG_GET(mumps->id, 11)));
3104: }
3105: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(12) (efficiency control): %d\n", mumps->id.ICNTL(12)));
3106: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(13) (sequential factorization of the root node): %d\n", mumps->id.ICNTL(13)));
3107: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(14) (percentage of estimated workspace increase): %d\n", mumps->id.ICNTL(14)));
3108: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(15) (compression of the input matrix): %d\n", mumps->id.ICNTL(15)));
3109: /* ICNTL(15-17) not used */
3110: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(18) (input mat struct): %d\n", mumps->id.ICNTL(18)));
3111: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(19) (Schur complement info): %d\n", mumps->id.ICNTL(19)));
3112: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(20) (RHS sparse pattern): %d\n", mumps->id.ICNTL(20)));
3113: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(21) (solution struct): %d\n", mumps->id.ICNTL(21)));
3114: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(22) (in-core/out-of-core facility): %d\n", mumps->id.ICNTL(22)));
3115: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(23) (max size of memory can be allocated locally):%d\n", mumps->id.ICNTL(23)));
3117: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(24) (detection of null pivot rows): %d\n", mumps->id.ICNTL(24)));
3118: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(25) (computation of a null space basis): %d\n", mumps->id.ICNTL(25)));
3119: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(26) (Schur options for RHS or solution): %d\n", mumps->id.ICNTL(26)));
3120: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(27) (blocking size for multiple RHS): %d\n", mumps->id.ICNTL(27)));
3121: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(28) (use parallel or sequential ordering): %d\n", mumps->id.ICNTL(28)));
3122: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(29) (parallel ordering): %d\n", mumps->id.ICNTL(29)));
3124: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(30) (user-specified set of entries in inv(A)): %d\n", mumps->id.ICNTL(30)));
3125: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(31) (factors is discarded in the solve phase): %d\n", mumps->id.ICNTL(31)));
3126: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(33) (compute determinant): %d\n", mumps->id.ICNTL(33)));
3127: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(35) (activate BLR based factorization): %d\n", mumps->id.ICNTL(35)));
3128: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(36) (choice of BLR factorization variant): %d\n", mumps->id.ICNTL(36)));
3129: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(37) (compression of the contribution blocks): %d\n", mumps->id.ICNTL(37)));
3130: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(38) (estimated compression rate of LU factors): %d\n", mumps->id.ICNTL(38)));
3131: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(48) (multithreading with tree parallelism): %d\n", mumps->id.ICNTL(48)));
3132: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(56) (postponing and rank-revealing factorization):%d\n", mumps->id.ICNTL(56)));
3133: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(58) (options for symbolic factorization): %d\n", mumps->id.ICNTL(58)));
3135: PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(1) (relative pivoting threshold): %g\n", (double)ID_CNTL_GET(mumps->id, 1)));
3136: PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(2) (stopping criterion of refinement): %g\n", (double)ID_CNTL_GET(mumps->id, 2)));
3137: PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(3) (absolute pivoting threshold): %g\n", (double)ID_CNTL_GET(mumps->id, 3)));
3138: PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(4) (value of static pivoting): %g\n", (double)ID_CNTL_GET(mumps->id, 4)));
3139: PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(5) (fixation for null pivots): %g\n", (double)ID_CNTL_GET(mumps->id, 5)));
3140: PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(7) (dropping parameter for BLR): %g\n", (double)ID_CNTL_GET(mumps->id, 7)));
3142: /* information local to each processor */
3143: PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis):\n"));
3144: PetscCall(PetscViewerASCIIPushSynchronized(viewer));
3145: PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, (double)ID_RINFO_GET(mumps->id, 1)));
3146: PetscCall(PetscViewerFlush(viewer));
3147: PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization):\n"));
3148: PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, (double)ID_RINFO_GET(mumps->id, 2)));
3149: PetscCall(PetscViewerFlush(viewer));
3150: PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization):\n"));
3151: PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, (double)ID_RINFO_GET(mumps->id, 3)));
3152: PetscCall(PetscViewerFlush(viewer));
3154: PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization):\n"));
3155: PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(15)));
3156: PetscCall(PetscViewerFlush(viewer));
3158: PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization):\n"));
3159: PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(16)));
3160: PetscCall(PetscViewerFlush(viewer));
3162: PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization):\n"));
3163: PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(23)));
3164: PetscCall(PetscViewerFlush(viewer));
3166: if (mumps->ninfo && mumps->ninfo <= 80) {
3167: PetscInt i;
3168: for (i = 0; i < mumps->ninfo; i++) {
3169: PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(%" PetscInt_FMT "):\n", mumps->info[i]));
3170: PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(mumps->info[i])));
3171: PetscCall(PetscViewerFlush(viewer));
3172: }
3173: }
3174: PetscCall(PetscViewerASCIIPopSynchronized(viewer));
3175: } else PetscCall(PetscViewerASCIIPrintf(viewer, " Use -%sksp_view ::ascii_info_detail to display information for all processes\n", ((PetscObject)A)->prefix ? ((PetscObject)A)->prefix : ""));
3177: if (mumps->myid == 0) { /* information from the host */
3178: PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(1) (global estimated flops for the elimination after analysis): %g\n", (double)ID_RINFOG_GET(mumps->id, 1)));
3179: PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(2) (global estimated flops for the assembly after factorization): %g\n", (double)ID_RINFOG_GET(mumps->id, 2)));
3180: PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(3) (global estimated flops for the elimination after factorization): %g\n", (double)ID_RINFOG_GET(mumps->id, 3)));
3181: PetscCall(PetscViewerASCIIPrintf(viewer, " (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n", (double)ID_RINFOG_GET(mumps->id, 12), (double)ID_RINFOG_GET(mumps->id, 13), mumps->id.INFOG(34)));
3183: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(3)));
3184: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(4)));
3185: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(5) (estimated maximum front size in the complete tree): %d\n", mumps->id.INFOG(5)));
3186: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(6) (number of nodes in the complete tree): %d\n", mumps->id.INFOG(6)));
3187: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(7) (ordering option effectively used after analysis): %d\n", mumps->id.INFOG(7)));
3188: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n", mumps->id.INFOG(8)));
3189: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n", mumps->id.INFOG(9)));
3190: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(10) (total integer space store the matrix factors after factorization): %d\n", mumps->id.INFOG(10)));
3191: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(11) (order of largest frontal matrix after factorization): %d\n", mumps->id.INFOG(11)));
3192: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(12) (number of off-diagonal pivots): %d\n", mumps->id.INFOG(12)));
3193: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(13) (number of delayed pivots after factorization): %d\n", mumps->id.INFOG(13)));
3194: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(14) (number of memory compress after factorization): %d\n", mumps->id.INFOG(14)));
3195: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(15) (number of steps of iterative refinement after solution): %d\n", mumps->id.INFOG(15)));
3196: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n", mumps->id.INFOG(16)));
3197: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n", mumps->id.INFOG(17)));
3198: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n", mumps->id.INFOG(18)));
3199: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n", mumps->id.INFOG(19)));
3200: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(20) (estimated number of entries in the factors): %d\n", mumps->id.INFOG(20)));
3201: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n", mumps->id.INFOG(21)));
3202: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n", mumps->id.INFOG(22)));
3203: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n", mumps->id.INFOG(23)));
3204: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n", mumps->id.INFOG(24)));
3205: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n", mumps->id.INFOG(25)));
3206: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(28) (after factorization: number of null pivots encountered): %d\n", mumps->id.INFOG(28)));
3207: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n", mumps->id.INFOG(29)));
3208: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n", mumps->id.INFOG(30), mumps->id.INFOG(31)));
3209: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(32) (after analysis: type of analysis done): %d\n", mumps->id.INFOG(32)));
3210: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(33) (value used for ICNTL(8)): %d\n", mumps->id.INFOG(33)));
3211: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(34) (exponent of the determinant if determinant is requested): %d\n", mumps->id.INFOG(34)));
3212: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n", mumps->id.INFOG(35)));
3213: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(36)));
3214: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n", mumps->id.INFOG(37)));
3215: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(38)));
3216: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n", mumps->id.INFOG(39)));
3217: }
3218: }
3219: }
3220: PetscFunctionReturn(PETSC_SUCCESS);
3221: }
3223: static PetscErrorCode MatGetInfo_MUMPS(Mat A, PETSC_UNUSED MatInfoType flag, MatInfo *info)
3224: {
3225: Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
3227: PetscFunctionBegin;
3228: info->block_size = 1.0;
3229: info->nz_allocated = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20);
3230: info->nz_used = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20);
3231: info->nz_unneeded = 0.0;
3232: info->assemblies = 0.0;
3233: info->mallocs = 0.0;
3234: info->memory = 0.0;
3235: info->fill_ratio_given = 0;
3236: info->fill_ratio_needed = 0;
3237: info->factor_mallocs = 0;
3238: PetscFunctionReturn(PETSC_SUCCESS);
3239: }
3241: static PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is)
3242: {
3243: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3244: const PetscScalar *arr;
3245: const PetscInt *idxs;
3246: PetscInt size, i;
3248: PetscFunctionBegin;
3249: PetscCall(ISGetLocalSize(is, &size));
3250: /* Schur complement matrix */
3251: PetscCall(MatDestroy(&F->schur));
3252: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size, size, NULL, &F->schur));
3253: PetscCall(MatDenseGetArrayRead(F->schur, &arr));
3254: // don't allocate mumps->id.schur[] now as its precision is yet to know
3255: PetscCall(PetscMUMPSIntCast(size, &mumps->id.size_schur));
3256: PetscCall(PetscMUMPSIntCast(size, &mumps->id.schur_lld));
3257: PetscCall(MatDenseRestoreArrayRead(F->schur, &arr));
3258: if (mumps->sym == 1) PetscCall(MatSetOption(F->schur, MAT_SPD, PETSC_TRUE));
3260: /* MUMPS expects Fortran style indices */
3261: PetscCall(PetscFree(mumps->id.listvar_schur));
3262: PetscCall(PetscMalloc1(size, &mumps->id.listvar_schur));
3263: PetscCall(ISGetIndices(is, &idxs));
3264: for (i = 0; i < size; i++) PetscCall(PetscMUMPSIntCast(idxs[i] + 1, &mumps->id.listvar_schur[i]));
3265: PetscCall(ISRestoreIndices(is, &idxs));
3266: /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */
3267: if (mumps->id.icntl) mumps->id.ICNTL(26) = -1;
3268: else mumps->ICNTL26 = -1;
3269: PetscFunctionReturn(PETSC_SUCCESS);
3270: }
3272: static PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F, Mat *S)
3273: {
3274: Mat St;
3275: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3276: PetscScalar *array;
3277: PetscInt i, j, N = mumps->id.size_schur;
3279: PetscFunctionBegin;
3280: PetscCheck(mumps->id.ICNTL(19), PetscObjectComm((PetscObject)F), PETSC_ERR_ORDER, "Schur complement mode not selected! Call MatFactorSetSchurIS() to enable it");
3281: PetscCall(MatCreate(PETSC_COMM_SELF, &St));
3282: PetscCall(MatSetSizes(St, PETSC_DECIDE, PETSC_DECIDE, mumps->id.size_schur, mumps->id.size_schur));
3283: PetscCall(MatSetType(St, MATDENSE));
3284: PetscCall(MatSetUp(St));
3285: PetscCall(MatDenseGetArray(St, &array));
3286: if (!mumps->sym) { /* MUMPS always return a full matrix */
3287: if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
3288: for (i = 0; i < N; i++) {
3289: for (j = 0; j < N; j++) array[j * N + i] = ID_FIELD_GET(mumps->id, schur, i * N + j);
3290: }
3291: } else { /* stored by columns */
3292: PetscCall(MatMumpsCastMumpsScalarArray(N * N, mumps->id.precision, mumps->id.schur, array));
3293: }
3294: } else { /* either full or lower-triangular (not packed) */
3295: if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */
3296: for (i = 0; i < N; i++) {
3297: for (j = i; j < N; j++) array[i * N + j] = array[j * N + i] = ID_FIELD_GET(mumps->id, schur, i * N + j);
3298: }
3299: } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */
3300: PetscCall(MatMumpsCastMumpsScalarArray(N * N, mumps->id.precision, mumps->id.schur, array));
3301: } else { /* ICNTL(19) == 1 lower triangular stored by rows */
3302: for (i = 0; i < N; i++) {
3303: for (j = 0; j < i + 1; j++) array[i * N + j] = array[j * N + i] = ID_FIELD_GET(mumps->id, schur, i * N + j);
3304: }
3305: }
3306: }
3307: PetscCall(MatDenseRestoreArray(St, &array));
3308: *S = St;
3309: PetscFunctionReturn(PETSC_SUCCESS);
3310: }
3312: static PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt ival)
3313: {
3314: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3316: PetscFunctionBegin;
3317: if (mumps->id.job == JOB_NULL) { /* need to cache icntl and ival since PetscMUMPS_c() has never been called */
3318: PetscMUMPSInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */
3319: for (i = 0; i < nICNTL_pre; ++i)
3320: if (mumps->ICNTL_pre[1 + 2 * i] == icntl) break; /* is this ICNTL already cached? */
3321: if (i == nICNTL_pre) { /* not already cached */
3322: if (i > 0) PetscCall(PetscRealloc(sizeof(PetscMUMPSInt) * (2 * nICNTL_pre + 3), &mumps->ICNTL_pre));
3323: else PetscCall(PetscCalloc(sizeof(PetscMUMPSInt) * 3, &mumps->ICNTL_pre));
3324: mumps->ICNTL_pre[0]++;
3325: }
3326: mumps->ICNTL_pre[1 + 2 * i] = (PetscMUMPSInt)icntl;
3327: PetscCall(PetscMUMPSIntCast(ival, mumps->ICNTL_pre + 2 + 2 * i));
3328: } else PetscCall(PetscMUMPSIntCast(ival, &mumps->id.ICNTL(icntl)));
3329: PetscFunctionReturn(PETSC_SUCCESS);
3330: }
3332: static PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt *ival)
3333: {
3334: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3336: PetscFunctionBegin;
3337: if (mumps->id.job == JOB_NULL) {
3338: PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0;
3339: *ival = 0;
3340: for (i = 0; i < nICNTL_pre; ++i) {
3341: if (mumps->ICNTL_pre[1 + 2 * i] == icntl) *ival = mumps->ICNTL_pre[2 + 2 * i];
3342: }
3343: } else *ival = mumps->id.ICNTL(icntl);
3344: PetscFunctionReturn(PETSC_SUCCESS);
3345: }
3347: /*@
3348: MatMumpsSetIcntl - Set MUMPS parameter ICNTL() <https://mumps-solver.org/index.php?page=doc>
3350: Logically Collective
3352: Input Parameters:
3353: + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3354: . icntl - index of MUMPS parameter array `ICNTL()`
3355: - ival - value of MUMPS `ICNTL(icntl)`
3357: Options Database Key:
3358: . -mat_mumps_icntl_<icntl> <ival> - change the option numbered `icntl` to `ival`
3360: Level: beginner
3362: Note:
3363: Ignored if MUMPS is not installed or `F` is not a MUMPS matrix
3365: .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3366: @*/
3367: PetscErrorCode MatMumpsSetIcntl(Mat F, PetscInt icntl, PetscInt ival)
3368: {
3369: PetscFunctionBegin;
3371: PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3374: PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 48 || icntl == 56 || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl);
3375: PetscTryMethod(F, "MatMumpsSetIcntl_C", (Mat, PetscInt, PetscInt), (F, icntl, ival));
3376: PetscFunctionReturn(PETSC_SUCCESS);
3377: }
3379: /*@
3380: MatMumpsGetIcntl - Get MUMPS parameter ICNTL() <https://mumps-solver.org/index.php?page=doc>
3382: Logically Collective
3384: Input Parameters:
3385: + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3386: - icntl - index of MUMPS parameter array ICNTL()
3388: Output Parameter:
3389: . ival - value of MUMPS ICNTL(icntl)
3391: Level: beginner
3393: .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3394: @*/
3395: PetscErrorCode MatMumpsGetIcntl(Mat F, PetscInt icntl, PetscInt *ival)
3396: {
3397: PetscFunctionBegin;
3399: PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3401: PetscAssertPointer(ival, 3);
3402: PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 48 || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl);
3403: PetscUseMethod(F, "MatMumpsGetIcntl_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
3404: PetscFunctionReturn(PETSC_SUCCESS);
3405: }
3407: static PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal val)
3408: {
3409: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3411: PetscFunctionBegin;
3412: if (mumps->id.job == JOB_NULL) {
3413: PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
3414: for (i = 0; i < nCNTL_pre; ++i)
3415: if (mumps->CNTL_pre[1 + 2 * i] == icntl) break;
3416: if (i == nCNTL_pre) {
3417: if (i > 0) PetscCall(PetscRealloc(sizeof(PetscReal) * (2 * nCNTL_pre + 3), &mumps->CNTL_pre));
3418: else PetscCall(PetscCalloc(sizeof(PetscReal) * 3, &mumps->CNTL_pre));
3419: mumps->CNTL_pre[0]++;
3420: }
3421: mumps->CNTL_pre[1 + 2 * i] = icntl;
3422: mumps->CNTL_pre[2 + 2 * i] = val;
3423: } else ID_CNTL_SET(mumps->id, icntl, val);
3424: PetscFunctionReturn(PETSC_SUCCESS);
3425: }
3427: static PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal *val)
3428: {
3429: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3431: PetscFunctionBegin;
3432: if (mumps->id.job == JOB_NULL) {
3433: PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
3434: *val = 0.0;
3435: for (i = 0; i < nCNTL_pre; ++i) {
3436: if (mumps->CNTL_pre[1 + 2 * i] == icntl) *val = mumps->CNTL_pre[2 + 2 * i];
3437: }
3438: } else *val = ID_CNTL_GET(mumps->id, icntl);
3439: PetscFunctionReturn(PETSC_SUCCESS);
3440: }
3442: /*@
3443: MatMumpsSetCntl - Set MUMPS parameter CNTL() <https://mumps-solver.org/index.php?page=doc>
3445: Logically Collective
3447: Input Parameters:
3448: + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3449: . icntl - index of MUMPS parameter array `CNTL()`
3450: - val - value of MUMPS `CNTL(icntl)`
3452: Options Database Key:
3453: . -mat_mumps_cntl_<icntl> <val> - change the option numbered icntl to ival
3455: Level: beginner
3457: Note:
3458: Ignored if MUMPS is not installed or `F` is not a MUMPS matrix
3460: .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3461: @*/
3462: PetscErrorCode MatMumpsSetCntl(Mat F, PetscInt icntl, PetscReal val)
3463: {
3464: PetscFunctionBegin;
3466: PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3469: PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl);
3470: PetscTryMethod(F, "MatMumpsSetCntl_C", (Mat, PetscInt, PetscReal), (F, icntl, val));
3471: PetscFunctionReturn(PETSC_SUCCESS);
3472: }
3474: /*@
3475: MatMumpsGetCntl - Get MUMPS parameter CNTL() <https://mumps-solver.org/index.php?page=doc>
3477: Logically Collective
3479: Input Parameters:
3480: + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3481: - icntl - index of MUMPS parameter array CNTL()
3483: Output Parameter:
3484: . val - value of MUMPS CNTL(icntl)
3486: Level: beginner
3488: .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3489: @*/
3490: PetscErrorCode MatMumpsGetCntl(Mat F, PetscInt icntl, PetscReal *val)
3491: {
3492: PetscFunctionBegin;
3494: PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3496: PetscAssertPointer(val, 3);
3497: PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl);
3498: PetscUseMethod(F, "MatMumpsGetCntl_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
3499: PetscFunctionReturn(PETSC_SUCCESS);
3500: }
3502: static PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F, PetscInt icntl, PetscInt *info)
3503: {
3504: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3506: PetscFunctionBegin;
3507: *info = mumps->id.INFO(icntl);
3508: PetscFunctionReturn(PETSC_SUCCESS);
3509: }
3511: static PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F, PetscInt icntl, PetscInt *infog)
3512: {
3513: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3515: PetscFunctionBegin;
3516: *infog = mumps->id.INFOG(icntl);
3517: PetscFunctionReturn(PETSC_SUCCESS);
3518: }
3520: static PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfo)
3521: {
3522: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3524: PetscFunctionBegin;
3525: *rinfo = ID_RINFO_GET(mumps->id, icntl);
3526: PetscFunctionReturn(PETSC_SUCCESS);
3527: }
3529: static PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfog)
3530: {
3531: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3533: PetscFunctionBegin;
3534: *rinfog = ID_RINFOG_GET(mumps->id, icntl);
3535: PetscFunctionReturn(PETSC_SUCCESS);
3536: }
3538: static PetscErrorCode MatMumpsGetNullPivots_MUMPS(Mat F, PetscInt *size, PetscInt **array)
3539: {
3540: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3542: PetscFunctionBegin;
3543: PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection");
3544: *size = 0;
3545: *array = NULL;
3546: if (!mumps->myid) {
3547: *size = mumps->id.INFOG(28);
3548: PetscCall(PetscMalloc1(*size, array));
3549: for (int i = 0; i < *size; i++) (*array)[i] = mumps->id.pivnul_list[i] - 1;
3550: }
3551: PetscFunctionReturn(PETSC_SUCCESS);
3552: }
3554: static PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F, Mat spRHS)
3555: {
3556: Mat Bt = NULL, Btseq = NULL;
3557: PetscBool flg;
3558: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3559: PetscScalar *aa;
3560: PetscInt spnr, *ia, *ja, M, nrhs;
3562: PetscFunctionBegin;
3563: PetscAssertPointer(spRHS, 2);
3564: PetscCall(PetscObjectTypeCompare((PetscObject)spRHS, MATTRANSPOSEVIRTUAL, &flg));
3565: PetscCheck(flg, PetscObjectComm((PetscObject)spRHS), PETSC_ERR_ARG_WRONG, "Matrix spRHS must be type MATTRANSPOSEVIRTUAL matrix");
3566: PetscCall(MatShellGetScalingShifts(spRHS, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
3567: PetscCall(MatTransposeGetMat(spRHS, &Bt));
3569: PetscCall(MatMumpsSetIcntl(F, 30, 1));
3571: if (mumps->petsc_size > 1) {
3572: Mat_MPIAIJ *b = (Mat_MPIAIJ *)Bt->data;
3573: Btseq = b->A;
3574: } else {
3575: Btseq = Bt;
3576: }
3578: PetscCall(MatGetSize(spRHS, &M, &nrhs));
3579: mumps->id.nrhs = (PetscMUMPSInt)nrhs;
3580: PetscCall(PetscMUMPSIntCast(M, &mumps->id.lrhs));
3581: mumps->id.rhs = NULL;
3583: if (!mumps->myid) {
3584: PetscCall(MatSeqAIJGetArray(Btseq, &aa));
3585: PetscCall(MatGetRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
3586: PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
3587: PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
3588: PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, ((Mat_SeqAIJ *)Btseq->data)->nz, aa, mumps->id.precision, &mumps->id.rhs_sparse_len, &mumps->id.rhs_sparse));
3589: } else {
3590: mumps->id.irhs_ptr = NULL;
3591: mumps->id.irhs_sparse = NULL;
3592: mumps->id.nz_rhs = 0;
3593: if (mumps->id.rhs_sparse_len) {
3594: PetscCall(PetscFree(mumps->id.rhs_sparse));
3595: mumps->id.rhs_sparse_len = 0;
3596: }
3597: }
3598: mumps->id.ICNTL(20) = 1; /* rhs is sparse */
3599: mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */
3601: /* solve phase */
3602: mumps->id.job = JOB_SOLVE;
3603: PetscMUMPS_c(mumps);
3604: PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));
3606: if (!mumps->myid) {
3607: PetscCall(MatSeqAIJRestoreArray(Btseq, &aa));
3608: PetscCall(MatRestoreRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
3609: PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
3610: }
3611: PetscFunctionReturn(PETSC_SUCCESS);
3612: }
3614: /*@
3615: MatMumpsGetInverse - Get user-specified set of entries in inverse of `A` <https://mumps-solver.org/index.php?page=doc>
3617: Logically Collective
3619: Input Parameter:
3620: . F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3622: Output Parameter:
3623: . spRHS - sequential sparse matrix in `MATTRANSPOSEVIRTUAL` format with requested entries of inverse of `A`
3625: Level: beginner
3627: .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`
3628: @*/
3629: PetscErrorCode MatMumpsGetInverse(Mat F, Mat spRHS)
3630: {
3631: PetscFunctionBegin;
3633: PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3634: PetscUseMethod(F, "MatMumpsGetInverse_C", (Mat, Mat), (F, spRHS));
3635: PetscFunctionReturn(PETSC_SUCCESS);
3636: }
3638: static PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F, Mat spRHST)
3639: {
3640: Mat spRHS;
3642: PetscFunctionBegin;
3643: PetscCall(MatCreateTranspose(spRHST, &spRHS));
3644: PetscCall(MatMumpsGetInverse_MUMPS(F, spRHS));
3645: PetscCall(MatDestroy(&spRHS));
3646: PetscFunctionReturn(PETSC_SUCCESS);
3647: }
3649: /*@
3650: MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix $A^T $ <https://mumps-solver.org/index.php?page=doc>
3652: Logically Collective
3654: Input Parameter:
3655: . F - the factored matrix of A obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3657: Output Parameter:
3658: . spRHST - sequential sparse matrix in `MATAIJ` format containing the requested entries of inverse of `A`^T
3660: Level: beginner
3662: .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()`
3663: @*/
3664: PetscErrorCode MatMumpsGetInverseTranspose(Mat F, Mat spRHST)
3665: {
3666: PetscBool flg;
3668: PetscFunctionBegin;
3670: PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3671: PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST, &flg, MATSEQAIJ, MATMPIAIJ, NULL));
3672: PetscCheck(flg, PetscObjectComm((PetscObject)spRHST), PETSC_ERR_ARG_WRONG, "Matrix spRHST must be MATAIJ matrix");
3673: PetscUseMethod(F, "MatMumpsGetInverseTranspose_C", (Mat, Mat), (F, spRHST));
3674: PetscFunctionReturn(PETSC_SUCCESS);
3675: }
3677: static PetscErrorCode MatMumpsSetBlk_MUMPS(Mat F, PetscInt nblk, const PetscInt blkvar[], const PetscInt blkptr[])
3678: {
3679: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3681: PetscFunctionBegin;
3682: if (nblk) {
3683: PetscAssertPointer(blkptr, 4);
3684: PetscCall(PetscMUMPSIntCast(nblk, &mumps->id.nblk));
3685: PetscCall(PetscFree(mumps->id.blkptr));
3686: PetscCall(PetscMalloc1(nblk + 1, &mumps->id.blkptr));
3687: for (PetscInt i = 0; i < nblk + 1; ++i) PetscCall(PetscMUMPSIntCast(blkptr[i], mumps->id.blkptr + i));
3688: // mumps->id.icntl[] might have not been allocated, which is done in MatSetFromOptions_MUMPS(). So we don't assign ICNTL(15).
3689: // We use id.nblk and id.blkptr to know what values to set to ICNTL(15) in MatSetFromOptions_MUMPS().
3690: // mumps->id.ICNTL(15) = 1;
3691: if (blkvar) {
3692: PetscCall(PetscFree(mumps->id.blkvar));
3693: PetscCall(PetscMalloc1(F->rmap->N, &mumps->id.blkvar));
3694: for (PetscInt i = 0; i < F->rmap->N; ++i) PetscCall(PetscMUMPSIntCast(blkvar[i], mumps->id.blkvar + i));
3695: }
3696: } else {
3697: PetscCall(PetscFree(mumps->id.blkptr));
3698: PetscCall(PetscFree(mumps->id.blkvar));
3699: // mumps->id.ICNTL(15) = 0;
3700: mumps->id.nblk = 0;
3701: }
3702: PetscFunctionReturn(PETSC_SUCCESS);
3703: }
3705: /*@
3706: MatMumpsSetBlk - Set user-specified variable block sizes to be used with `-mat_mumps_icntl_15 1`
3708: Not collective, only relevant on the first process of the MPI communicator
3710: Input Parameters:
3711: + F - the factored matrix of A obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3712: . nblk - the number of blocks
3713: . blkvar - see MUMPS documentation, `blkvar(blkptr(iblk):blkptr(iblk+1)-1)`, (`iblk=1, nblk`) holds the variables associated to block `iblk`
3714: - blkptr - array starting at 1 and of size `nblk + 1` storing the prefix sum of all blocks
3716: Level: advanced
3718: .seealso: [](ch_matrices), `MATSOLVERMUMPS`, `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatSetVariableBlockSizes()`
3719: @*/
3720: PetscErrorCode MatMumpsSetBlk(Mat F, PetscInt nblk, const PetscInt blkvar[], const PetscInt blkptr[])
3721: {
3722: PetscFunctionBegin;
3724: PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3725: PetscUseMethod(F, "MatMumpsSetBlk_C", (Mat, PetscInt, const PetscInt[], const PetscInt[]), (F, nblk, blkvar, blkptr));
3726: PetscFunctionReturn(PETSC_SUCCESS);
3727: }
3729: /*@
3730: MatMumpsGetInfo - Get MUMPS parameter INFO() <https://mumps-solver.org/index.php?page=doc>
3732: Logically Collective
3734: Input Parameters:
3735: + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3736: - icntl - index of MUMPS parameter array INFO()
3738: Output Parameter:
3739: . ival - value of MUMPS INFO(icntl)
3741: Level: beginner
3743: .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3744: @*/
3745: PetscErrorCode MatMumpsGetInfo(Mat F, PetscInt icntl, PetscInt *ival)
3746: {
3747: PetscFunctionBegin;
3749: PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3750: PetscAssertPointer(ival, 3);
3751: PetscUseMethod(F, "MatMumpsGetInfo_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
3752: PetscFunctionReturn(PETSC_SUCCESS);
3753: }
3755: /*@
3756: MatMumpsGetInfog - Get MUMPS parameter INFOG() <https://mumps-solver.org/index.php?page=doc>
3758: Logically Collective
3760: Input Parameters:
3761: + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3762: - icntl - index of MUMPS parameter array INFOG()
3764: Output Parameter:
3765: . ival - value of MUMPS INFOG(icntl)
3767: Level: beginner
3769: .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3770: @*/
3771: PetscErrorCode MatMumpsGetInfog(Mat F, PetscInt icntl, PetscInt *ival)
3772: {
3773: PetscFunctionBegin;
3775: PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3776: PetscAssertPointer(ival, 3);
3777: PetscUseMethod(F, "MatMumpsGetInfog_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
3778: PetscFunctionReturn(PETSC_SUCCESS);
3779: }
3781: /*@
3782: MatMumpsGetRinfo - Get MUMPS parameter RINFO() <https://mumps-solver.org/index.php?page=doc>
3784: Logically Collective
3786: Input Parameters:
3787: + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3788: - icntl - index of MUMPS parameter array RINFO()
3790: Output Parameter:
3791: . val - value of MUMPS RINFO(icntl)
3793: Level: beginner
3795: .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()`
3796: @*/
3797: PetscErrorCode MatMumpsGetRinfo(Mat F, PetscInt icntl, PetscReal *val)
3798: {
3799: PetscFunctionBegin;
3801: PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3802: PetscAssertPointer(val, 3);
3803: PetscUseMethod(F, "MatMumpsGetRinfo_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
3804: PetscFunctionReturn(PETSC_SUCCESS);
3805: }
3807: /*@
3808: MatMumpsGetRinfog - Get MUMPS parameter RINFOG() <https://mumps-solver.org/index.php?page=doc>
3810: Logically Collective
3812: Input Parameters:
3813: + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3814: - icntl - index of MUMPS parameter array RINFOG()
3816: Output Parameter:
3817: . val - value of MUMPS RINFOG(icntl)
3819: Level: beginner
3821: .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`
3822: @*/
3823: PetscErrorCode MatMumpsGetRinfog(Mat F, PetscInt icntl, PetscReal *val)
3824: {
3825: PetscFunctionBegin;
3827: PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3828: PetscAssertPointer(val, 3);
3829: PetscUseMethod(F, "MatMumpsGetRinfog_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
3830: PetscFunctionReturn(PETSC_SUCCESS);
3831: }
3833: /*@
3834: MatMumpsGetNullPivots - Get MUMPS parameter PIVNUL_LIST() <https://mumps-solver.org/index.php?page=doc>
3836: Logically Collective
3838: Input Parameter:
3839: . F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3841: Output Parameters:
3842: + size - local size of the array. The size of the array is non-zero only on MPI rank 0
3843: - array - array of rows with null pivot, these rows follow 0-based indexing. The array gets allocated within the function and the user is responsible
3844: for freeing this array.
3846: Level: beginner
3848: .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`
3849: @*/
3850: PetscErrorCode MatMumpsGetNullPivots(Mat F, PetscInt *size, PetscInt **array)
3851: {
3852: PetscFunctionBegin;
3854: PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3855: PetscAssertPointer(size, 2);
3856: PetscAssertPointer(array, 3);
3857: PetscUseMethod(F, "MatMumpsGetNullPivots_C", (Mat, PetscInt *, PetscInt **), (F, size, array));
3858: PetscFunctionReturn(PETSC_SUCCESS);
3859: }
3861: /*MC
3862: MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for
3863: MPI distributed and sequential matrices via the external package MUMPS <https://mumps-solver.org/index.php?page=doc>
3865: Works with `MATAIJ` and `MATSBAIJ` matrices
3867: Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS
3869: Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode.
3870: See details below.
3872: Use `-pc_type cholesky` or `lu` `-pc_factor_mat_solver_type mumps` to use this direct solver
3874: Options Database Keys:
3875: + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages
3876: . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning
3877: . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host
3878: . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4)
3879: . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)
3880: . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto
3881: Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only)
3882: . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77)
3883: . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements
3884: . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view)
3885: . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)
3886: . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting
3887: . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space
3888: . -mat_mumps_icntl_15 - ICNTL(15): compression of the input matrix resulting from a block format
3889: . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement
3890: . -mat_mumps_icntl_20 - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS
3891: . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)
3892: . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor
3893: . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1)
3894: . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis
3895: . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix
3896: . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ICNTL(7) ordering, or 2 for parallel analysis and ICNTL(29) ordering
3897: . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis
3898: . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A)
3899: . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization
3900: . -mat_mumps_icntl_33 - ICNTL(33): compute determinant
3901: . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature
3902: . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant
3903: . -mat_mumps_icntl_37 - ICNTL(37): compression of the contribution blocks (CB)
3904: . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR
3905: . -mat_mumps_icntl_48 - ICNTL(48): multithreading with tree parallelism
3906: . -mat_mumps_icntl_58 - ICNTL(58): options for symbolic factorization
3907: . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold
3908: . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement
3909: . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold
3910: . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting
3911: . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots
3912: . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization
3913: - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS.
3914: Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual.
3916: Level: beginner
3918: Notes:
3919: MUMPS Cholesky does not handle (complex) Hermitian matrices (see User's Guide at <https://mumps-solver.org/index.php?page=doc>) so using it will
3920: error if the matrix is Hermitian.
3922: When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling
3923: `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix.
3925: When a MUMPS factorization fails inside a KSP solve, for example with a `KSP_DIVERGED_PC_FAILED`, one can find the MUMPS information about
3926: the failure with
3927: .vb
3928: KSPGetPC(ksp,&pc);
3929: PCFactorGetMatrix(pc,&mat);
3930: MatMumpsGetInfo(mat,....);
3931: MatMumpsGetInfog(mat,....); etc.
3932: .ve
3933: Or run with `-ksp_error_if_not_converged` and the program will be stopped and the information printed in the error message.
3935: MUMPS provides 64-bit integer support in two build modes:
3936: full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and
3937: requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI).
3939: selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices,
3940: MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and
3941: columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit
3942: integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS.
3944: With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc.
3946: Two modes to run MUMPS/PETSc with OpenMP
3947: .vb
3948: Set `OMP_NUM_THREADS` and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP
3949: threads per rank, then you may use "export `OMP_NUM_THREADS` = 16 && mpirun -n 4 ./test".
3950: .ve
3952: .vb
3953: `-mat_mumps_use_omp_threads` [m] and run your code with as many MPI ranks as the number of cores. For example,
3954: if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16"
3955: .ve
3957: To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part
3958: (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with `--with-openmp` `--download-hwloc`
3959: (or `--with-hwloc`), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS
3960: libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS
3961: (PETSc will automatically try to utilized a threaded BLAS if `--with-openmp` is provided).
3963: If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI
3964: processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of
3965: size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm
3966: are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set
3967: by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs.
3968: In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets,
3969: if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind
3970: MPI ranks to cores, then with `-mat_mumps_use_omp_threads` 16, a master rank (and threads it spawns) will use half cores in socket 0, and half
3971: cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the
3972: problem will not happen. Therefore, when you use `-mat_mumps_use_omp_threads`, you need to keep an eye on your MPI rank mapping and CPU binding.
3973: For example, with the Slurm job scheduler, one can use srun `--cpu-bind`=verbose -m block:block to map consecutive MPI ranks to sockets and
3974: examine the mapping result.
3976: PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set `OMP_PROC_BIND` and `OMP_PLACES` in job scripts,
3977: for example, export `OMP_PLACES`=threads and export `OMP_PROC_BIND`=spread. One does not need to export `OMP_NUM_THREADS`=m in job scripts as PETSc
3978: calls `omp_set_num_threads`(m) internally before calling MUMPS.
3980: See {cite}`heroux2011bi` and {cite}`gutierrez2017accommodating`
3982: .seealso: [](ch_matrices), `Mat`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `MatMumpsSetBlk()`, `KSPGetPC()`, `PCFactorGetMatrix()`
3983: M*/
3985: static PetscErrorCode MatFactorGetSolverType_mumps(PETSC_UNUSED Mat A, MatSolverType *type)
3986: {
3987: PetscFunctionBegin;
3988: *type = MATSOLVERMUMPS;
3989: PetscFunctionReturn(PETSC_SUCCESS);
3990: }
3992: /* MatGetFactor for Seq and MPI AIJ matrices */
3993: static PetscErrorCode MatGetFactor_aij_mumps(Mat A, MatFactorType ftype, Mat *F)
3994: {
3995: Mat B;
3996: Mat_MUMPS *mumps;
3997: PetscBool isSeqAIJ, isDiag, isDense;
3998: PetscMPIInt size;
4000: PetscFunctionBegin;
4001: #if defined(PETSC_USE_COMPLEX)
4002: if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
4003: PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n"));
4004: *F = NULL;
4005: PetscFunctionReturn(PETSC_SUCCESS);
4006: }
4007: #endif
4008: /* Create the factorization matrix */
4009: PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ));
4010: PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATDIAGONAL, &isDiag));
4011: PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &isDense, MATSEQDENSE, MATMPIDENSE, NULL));
4012: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
4013: PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
4014: PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
4015: PetscCall(MatSetUp(B));
4017: PetscCall(PetscNew(&mumps));
4019: B->ops->view = MatView_MUMPS;
4020: B->ops->getinfo = MatGetInfo_MUMPS;
4022: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
4023: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
4024: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
4025: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
4026: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
4027: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
4028: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
4029: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
4030: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
4031: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
4032: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
4033: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
4034: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
4035: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
4036: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS));
4038: if (ftype == MAT_FACTOR_LU) {
4039: B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
4040: B->factortype = MAT_FACTOR_LU;
4041: if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij;
4042: else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij;
4043: else if (isDense) mumps->ConvertToTriples = MatConvertToTriples_dense_xaij;
4044: else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij;
4045: PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
4046: mumps->sym = 0;
4047: } else {
4048: B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
4049: B->factortype = MAT_FACTOR_CHOLESKY;
4050: if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij;
4051: else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij;
4052: else if (isDense) mumps->ConvertToTriples = MatConvertToTriples_dense_xaij;
4053: else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij;
4054: PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY]));
4055: #if defined(PETSC_USE_COMPLEX)
4056: mumps->sym = 2;
4057: #else
4058: if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
4059: else mumps->sym = 2;
4060: #endif
4061: }
4063: /* set solvertype */
4064: PetscCall(PetscFree(B->solvertype));
4065: PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
4066: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
4067: if (size == 1) {
4068: /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
4069: B->canuseordering = PETSC_TRUE;
4070: }
4071: B->ops->destroy = MatDestroy_MUMPS;
4072: B->data = (void *)mumps;
4074: *F = B;
4075: mumps->id.job = JOB_NULL;
4076: mumps->ICNTL_pre = NULL;
4077: mumps->CNTL_pre = NULL;
4078: mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
4079: PetscFunctionReturn(PETSC_SUCCESS);
4080: }
4082: /* MatGetFactor for Seq and MPI SBAIJ matrices */
4083: static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A, PETSC_UNUSED MatFactorType ftype, Mat *F)
4084: {
4085: Mat B;
4086: Mat_MUMPS *mumps;
4087: PetscBool isSeqSBAIJ;
4088: PetscMPIInt size;
4090: PetscFunctionBegin;
4091: #if defined(PETSC_USE_COMPLEX)
4092: if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
4093: PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n"));
4094: *F = NULL;
4095: PetscFunctionReturn(PETSC_SUCCESS);
4096: }
4097: #endif
4098: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
4099: PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
4100: PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
4101: PetscCall(MatSetUp(B));
4103: PetscCall(PetscNew(&mumps));
4104: PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &isSeqSBAIJ));
4105: if (isSeqSBAIJ) {
4106: mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij;
4107: } else {
4108: mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij;
4109: }
4111: B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
4112: B->ops->view = MatView_MUMPS;
4113: B->ops->getinfo = MatGetInfo_MUMPS;
4115: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
4116: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
4117: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
4118: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
4119: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
4120: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
4121: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
4122: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
4123: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
4124: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
4125: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
4126: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
4127: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
4128: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
4129: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS));
4131: B->factortype = MAT_FACTOR_CHOLESKY;
4132: #if defined(PETSC_USE_COMPLEX)
4133: mumps->sym = 2;
4134: #else
4135: if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
4136: else mumps->sym = 2;
4137: #endif
4139: /* set solvertype */
4140: PetscCall(PetscFree(B->solvertype));
4141: PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
4142: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
4143: if (size == 1) {
4144: /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
4145: B->canuseordering = PETSC_TRUE;
4146: }
4147: PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY]));
4148: B->ops->destroy = MatDestroy_MUMPS;
4149: B->data = (void *)mumps;
4151: *F = B;
4152: mumps->id.job = JOB_NULL;
4153: mumps->ICNTL_pre = NULL;
4154: mumps->CNTL_pre = NULL;
4155: mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
4156: PetscFunctionReturn(PETSC_SUCCESS);
4157: }
4159: static PetscErrorCode MatGetFactor_baij_mumps(Mat A, MatFactorType ftype, Mat *F)
4160: {
4161: Mat B;
4162: Mat_MUMPS *mumps;
4163: PetscBool isSeqBAIJ;
4164: PetscMPIInt size;
4166: PetscFunctionBegin;
4167: /* Create the factorization matrix */
4168: PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &isSeqBAIJ));
4169: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
4170: PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
4171: PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
4172: PetscCall(MatSetUp(B));
4174: PetscCall(PetscNew(&mumps));
4175: PetscCheck(ftype == MAT_FACTOR_LU, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead");
4176: B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS;
4177: B->factortype = MAT_FACTOR_LU;
4178: if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij;
4179: else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij;
4180: mumps->sym = 0;
4181: PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
4183: B->ops->view = MatView_MUMPS;
4184: B->ops->getinfo = MatGetInfo_MUMPS;
4186: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
4187: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
4188: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
4189: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
4190: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
4191: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
4192: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
4193: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
4194: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
4195: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
4196: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
4197: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
4198: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
4199: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
4200: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS));
4202: /* set solvertype */
4203: PetscCall(PetscFree(B->solvertype));
4204: PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
4205: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
4206: if (size == 1) {
4207: /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
4208: B->canuseordering = PETSC_TRUE;
4209: }
4210: B->ops->destroy = MatDestroy_MUMPS;
4211: B->data = (void *)mumps;
4213: *F = B;
4214: mumps->id.job = JOB_NULL;
4215: mumps->ICNTL_pre = NULL;
4216: mumps->CNTL_pre = NULL;
4217: mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
4218: PetscFunctionReturn(PETSC_SUCCESS);
4219: }
4221: /* MatGetFactor for Seq and MPI SELL matrices */
4222: static PetscErrorCode MatGetFactor_sell_mumps(Mat A, MatFactorType ftype, Mat *F)
4223: {
4224: Mat B;
4225: Mat_MUMPS *mumps;
4226: PetscBool isSeqSELL;
4227: PetscMPIInt size;
4229: PetscFunctionBegin;
4230: /* Create the factorization matrix */
4231: PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSELL, &isSeqSELL));
4232: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
4233: PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
4234: PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
4235: PetscCall(MatSetUp(B));
4237: PetscCall(PetscNew(&mumps));
4239: B->ops->view = MatView_MUMPS;
4240: B->ops->getinfo = MatGetInfo_MUMPS;
4242: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
4243: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
4244: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
4245: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
4246: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
4247: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
4248: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
4249: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
4250: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
4251: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
4252: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
4253: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
4255: PetscCheck(ftype == MAT_FACTOR_LU, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented");
4256: B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
4257: B->factortype = MAT_FACTOR_LU;
4258: PetscCheck(isSeqSELL, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented");
4259: mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij;
4260: mumps->sym = 0;
4261: PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
4263: /* set solvertype */
4264: PetscCall(PetscFree(B->solvertype));
4265: PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
4266: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
4267: if (size == 1) {
4268: /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
4269: B->canuseordering = PETSC_TRUE;
4270: }
4271: B->ops->destroy = MatDestroy_MUMPS;
4272: B->data = (void *)mumps;
4274: *F = B;
4275: mumps->id.job = JOB_NULL;
4276: mumps->ICNTL_pre = NULL;
4277: mumps->CNTL_pre = NULL;
4278: mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
4279: PetscFunctionReturn(PETSC_SUCCESS);
4280: }
4282: /* MatGetFactor for MATNEST matrices */
4283: static PetscErrorCode MatGetFactor_nest_mumps(Mat A, MatFactorType ftype, Mat *F)
4284: {
4285: Mat B, **mats;
4286: Mat_MUMPS *mumps;
4287: PetscInt nr, nc;
4288: PetscMPIInt size;
4289: PetscBool flg = PETSC_TRUE;
4291: PetscFunctionBegin;
4292: #if defined(PETSC_USE_COMPLEX)
4293: if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
4294: PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n"));
4295: *F = NULL;
4296: PetscFunctionReturn(PETSC_SUCCESS);
4297: }
4298: #endif
4300: /* Return if some condition is not satisfied */
4301: *F = NULL;
4302: PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats));
4303: if (ftype == MAT_FACTOR_CHOLESKY) {
4304: IS *rows, *cols;
4305: PetscInt *m, *M;
4307: PetscCheck(nr == nc, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_FACTOR_CHOLESKY not supported for nest sizes %" PetscInt_FMT " != %" PetscInt_FMT ". Use MAT_FACTOR_LU.", nr, nc);
4308: PetscCall(PetscMalloc2(nr, &rows, nc, &cols));
4309: PetscCall(MatNestGetISs(A, rows, cols));
4310: for (PetscInt r = 0; flg && r < nr; r++) PetscCall(ISEqualUnsorted(rows[r], cols[r], &flg));
4311: if (!flg) {
4312: PetscCall(PetscFree2(rows, cols));
4313: PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for unequal row and column maps. Use MAT_FACTOR_LU.\n"));
4314: PetscFunctionReturn(PETSC_SUCCESS);
4315: }
4316: PetscCall(PetscMalloc2(nr, &m, nr, &M));
4317: for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetMinMax(rows[r], &m[r], &M[r]));
4318: for (PetscInt r = 0; flg && r < nr; r++)
4319: for (PetscInt k = r + 1; flg && k < nr; k++)
4320: if ((m[k] <= m[r] && m[r] <= M[k]) || (m[k] <= M[r] && M[r] <= M[k])) flg = PETSC_FALSE;
4321: PetscCall(PetscFree2(m, M));
4322: PetscCall(PetscFree2(rows, cols));
4323: if (!flg) {
4324: PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for intersecting row maps. Use MAT_FACTOR_LU.\n"));
4325: PetscFunctionReturn(PETSC_SUCCESS);
4326: }
4327: }
4329: for (PetscInt r = 0; r < nr; r++) {
4330: for (PetscInt c = 0; c < nc; c++) {
4331: Mat sub = mats[r][c];
4332: PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isDiag, isDense;
4334: if (!sub || (ftype == MAT_FACTOR_CHOLESKY && c < r)) continue;
4335: PetscCall(MatGetTranspose_TransposeVirtual(&sub, NULL, NULL, NULL, NULL));
4336: PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ));
4337: PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ));
4338: PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ));
4339: PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ));
4340: PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ));
4341: PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ));
4342: PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag));
4343: PetscCall(PetscObjectTypeCompareAny((PetscObject)sub, &isDense, MATSEQDENSE, MATMPIDENSE, NULL));
4344: if (ftype == MAT_FACTOR_CHOLESKY) {
4345: if (r == c) {
4346: if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isSeqSBAIJ && !isMPISBAIJ && !isDiag && !isDense) {
4347: PetscCall(PetscInfo(sub, "MAT_FACTOR_CHOLESKY not supported for diagonal block of type %s.\n", ((PetscObject)sub)->type_name));
4348: flg = PETSC_FALSE;
4349: }
4350: } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag && !isDense) {
4351: PetscCall(PetscInfo(sub, "MAT_FACTOR_CHOLESKY not supported for off-diagonal block of type %s.\n", ((PetscObject)sub)->type_name));
4352: flg = PETSC_FALSE;
4353: }
4354: } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag && !isDense) {
4355: PetscCall(PetscInfo(sub, "MAT_FACTOR_LU not supported for block of type %s.\n", ((PetscObject)sub)->type_name));
4356: flg = PETSC_FALSE;
4357: }
4358: }
4359: }
4360: if (!flg) PetscFunctionReturn(PETSC_SUCCESS);
4362: /* Create the factorization matrix */
4363: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
4364: PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
4365: PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
4366: PetscCall(MatSetUp(B));
4368: PetscCall(PetscNew(&mumps));
4370: B->ops->view = MatView_MUMPS;
4371: B->ops->getinfo = MatGetInfo_MUMPS;
4373: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
4374: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
4375: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
4376: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
4377: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
4378: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
4379: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
4380: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
4381: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
4382: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
4383: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
4384: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
4385: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
4386: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
4387: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS));
4389: if (ftype == MAT_FACTOR_LU) {
4390: B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
4391: B->factortype = MAT_FACTOR_LU;
4392: mumps->sym = 0;
4393: } else {
4394: B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
4395: B->factortype = MAT_FACTOR_CHOLESKY;
4396: #if defined(PETSC_USE_COMPLEX)
4397: mumps->sym = 2;
4398: #else
4399: if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
4400: else mumps->sym = 2;
4401: #endif
4402: }
4403: mumps->ConvertToTriples = MatConvertToTriples_nest_xaij;
4404: PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[ftype]));
4406: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
4407: if (size == 1) {
4408: /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
4409: B->canuseordering = PETSC_TRUE;
4410: }
4412: /* set solvertype */
4413: PetscCall(PetscFree(B->solvertype));
4414: PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
4415: B->ops->destroy = MatDestroy_MUMPS;
4416: B->data = (void *)mumps;
4418: *F = B;
4419: mumps->id.job = JOB_NULL;
4420: mumps->ICNTL_pre = NULL;
4421: mumps->CNTL_pre = NULL;
4422: mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
4423: PetscFunctionReturn(PETSC_SUCCESS);
4424: }
4426: PETSC_INTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void)
4427: {
4428: PetscFunctionBegin;
4429: PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
4430: PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
4431: PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps));
4432: PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps));
4433: PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPISBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps));
4434: PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
4435: PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
4436: PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps));
4437: PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps));
4438: PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps));
4439: PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSELL, MAT_FACTOR_LU, MatGetFactor_sell_mumps));
4440: PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
4441: PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
4442: PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQDENSE, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
4443: PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQDENSE, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
4444: PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIDENSE, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
4445: PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIDENSE, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
4446: PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_LU, MatGetFactor_nest_mumps));
4447: PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_CHOLESKY, MatGetFactor_nest_mumps));
4448: PetscFunctionReturn(PETSC_SUCCESS);
4449: }