Actual source code: mumps.c
1: /*
2: Provides an interface to the MUMPS sparse solver
3: */
4: #include <petscpkg_version.h>
5: #include <petscsf.h>
6: #include <../src/mat/impls/aij/mpi/mpiaij.h>
7: #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
8: #include <../src/mat/impls/sell/mpi/mpisell.h>
9: #include <petsc/private/vecimpl.h>
11: #define MUMPS_MANUALS "(see users manual https://mumps-solver.org/index.php?page=doc \"Error and warning diagnostics\")"
13: EXTERN_C_BEGIN
14: #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
15: #include <cmumps_c.h>
16: #include <zmumps_c.h>
17: #include <smumps_c.h>
18: #include <dmumps_c.h>
19: #else
20: #if defined(PETSC_USE_COMPLEX)
21: #if defined(PETSC_USE_REAL_SINGLE)
22: #include <cmumps_c.h>
23: #define MUMPS_c cmumps_c
24: #define MumpsScalar CMUMPS_COMPLEX
25: #else
26: #include <zmumps_c.h>
27: #define MUMPS_c zmumps_c
28: #define MumpsScalar ZMUMPS_COMPLEX
29: #endif
30: #else
31: #if defined(PETSC_USE_REAL_SINGLE)
32: #include <smumps_c.h>
33: #define MUMPS_c smumps_c
34: #define MumpsScalar SMUMPS_REAL
35: #else
36: #include <dmumps_c.h>
37: #define MUMPS_c dmumps_c
38: #define MumpsScalar DMUMPS_REAL
39: #endif
40: #endif
41: #endif
42: #if defined(PETSC_USE_COMPLEX)
43: #if defined(PETSC_USE_REAL_SINGLE)
44: #define MUMPS_STRUC_C CMUMPS_STRUC_C
45: #else
46: #define MUMPS_STRUC_C ZMUMPS_STRUC_C
47: #endif
48: #else
49: #if defined(PETSC_USE_REAL_SINGLE)
50: #define MUMPS_STRUC_C SMUMPS_STRUC_C
51: #else
52: #define MUMPS_STRUC_C DMUMPS_STRUC_C
53: #endif
54: #endif
55: EXTERN_C_END
57: #define JOB_INIT -1
58: #define JOB_NULL 0
59: #define JOB_FACTSYMBOLIC 1
60: #define JOB_FACTNUMERIC 2
61: #define JOB_SOLVE 3
62: #define JOB_END -2
64: /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for
65: number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the
66: naming convention in PetscMPIInt, PetscBLASInt etc.
67: */
68: typedef MUMPS_INT PetscMUMPSInt;
70: #if PETSC_PKG_MUMPS_VERSION_GE(5, 3, 0)
71: #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */
72: #error "PETSc has not been tested with full 64-bit MUMPS and we choose to error out"
73: #endif
74: #else
75: #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */
76: #error "PETSc has not been tested with full 64-bit MUMPS and we choose to error out"
77: #endif
78: #endif
80: #define MPIU_MUMPSINT MPI_INT
81: #define PETSC_MUMPS_INT_MAX 2147483647
82: #define PETSC_MUMPS_INT_MIN -2147483648
84: /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/
85: static inline PetscErrorCode PetscMUMPSIntCast(PetscCount a, PetscMUMPSInt *b)
86: {
87: PetscFunctionBegin;
88: #if PetscDefined(USE_64BIT_INDICES)
89: PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt");
90: #endif
91: *b = (PetscMUMPSInt)a;
92: PetscFunctionReturn(PETSC_SUCCESS);
93: }
95: /* Put these utility routines here since they are only used in this file */
96: static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems PetscOptionsObject, const char opt[], const char text[], const char man[], PetscMUMPSInt currentvalue, PetscMUMPSInt *value, PetscBool *set, PetscMUMPSInt lb, PetscMUMPSInt ub)
97: {
98: PetscInt myval;
99: PetscBool myset;
101: PetscFunctionBegin;
102: /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */
103: PetscCall(PetscOptionsInt_Private(PetscOptionsObject, opt, text, man, (PetscInt)currentvalue, &myval, &myset, lb, ub));
104: if (myset) PetscCall(PetscMUMPSIntCast(myval, value));
105: if (set) *set = myset;
106: PetscFunctionReturn(PETSC_SUCCESS);
107: }
108: #define PetscOptionsMUMPSInt(a, b, c, d, e, f) PetscOptionsMUMPSInt_Private(PetscOptionsObject, a, b, c, d, e, f, PETSC_MUMPS_INT_MIN, PETSC_MUMPS_INT_MAX)
110: // An abstract type for specific MUMPS types {S,D,C,Z}MUMPS_STRUC_C.
111: //
112: // With the abstract (outer) type, we can write shared code. We call MUMPS through a type-to-be-determined inner field within the abstract type.
113: // Before/after calling MUMPS, we need to copy in/out fields between the outer and the inner, which seems expensive. But note that the large fixed size
114: // arrays within the types are directly linked. At the end, we only need to copy ~20 intergers/pointers, which is doable. See PreMumpsCall()/PostMumpsCall().
115: //
116: // Not all fields in the specific types are exposed in the abstract type. We only need those used by the PETSc/MUMPS interface.
117: // Notably, DMUMPS_COMPLEX* and DMUMPS_REAL* fields are now declared as void *. Their type will be determined by the the actual precision to be used.
118: // Also note that we added some *_len fields not in specific types to track sizes of those MumpsScalar buffers.
119: typedef struct {
120: PetscPrecision precision; // precision used by MUMPS
121: void *internal_id; // the data structure passed to MUMPS, whose actual type {S,D,C,Z}MUMPS_STRUC_C is to be decided by precision and PETSc's use of complex
123: // aliased fields from internal_id, so that we can use XMUMPS_STRUC_C to write shared code across different precisions.
124: MUMPS_INT sym, par, job;
125: MUMPS_INT comm_fortran; /* Fortran communicator */
126: MUMPS_INT *icntl;
127: void *cntl; // MumpsReal, fixed size array
128: MUMPS_INT n;
129: MUMPS_INT nblk;
131: /* Assembled entry */
132: MUMPS_INT8 nnz;
133: MUMPS_INT *irn;
134: MUMPS_INT *jcn;
135: void *a; // MumpsScalar, centralized input
136: PetscCount a_len;
138: /* Distributed entry */
139: MUMPS_INT8 nnz_loc;
140: MUMPS_INT *irn_loc;
141: MUMPS_INT *jcn_loc;
142: void *a_loc; // MumpsScalar, distributed input
143: PetscCount a_loc_len;
145: /* Matrix by blocks */
146: MUMPS_INT *blkptr;
147: MUMPS_INT *blkvar;
149: /* Ordering, if given by user */
150: MUMPS_INT *perm_in;
152: /* RHS, solution, ouptput data and statistics */
153: void *rhs, *redrhs, *rhs_sparse, *sol_loc, *rhs_loc; // MumpsScalar buffers
154: PetscCount rhs_len, redrhs_len, rhs_sparse_len, sol_loc_len, rhs_loc_len; // length of buffers (in MumpsScalar) IF allocated in a different precision than PetscScalar
156: MUMPS_INT *irhs_sparse, *irhs_ptr, *isol_loc, *irhs_loc;
157: MUMPS_INT nrhs, lrhs, lredrhs, nz_rhs, lsol_loc, nloc_rhs, lrhs_loc;
158: // MUMPS_INT nsol_loc; // introduced in MUMPS-5.7, but PETSc doesn't use it; would cause compile errors with the widely used 5.6. If you add it, must also update PreMumpsCall() and guard this with #if PETSC_PKG_MUMPS_VERSION_GE(5, 7, 0)
159: MUMPS_INT schur_lld;
160: MUMPS_INT *info, *infog; // fixed size array
161: void *rinfo, *rinfog; // MumpsReal, fixed size array
163: /* Null space */
164: MUMPS_INT *pivnul_list; // allocated by MUMPS!
165: MUMPS_INT *mapping; // allocated by MUMPS!
167: /* Schur */
168: MUMPS_INT size_schur;
169: MUMPS_INT *listvar_schur;
170: void *schur; // MumpsScalar
171: PetscCount schur_len;
173: /* For out-of-core */
174: char *ooc_tmpdir; // fixed size array
175: char *ooc_prefix; // fixed size array
176: } XMUMPS_STRUC_C;
178: // Note: fixed-size arrays are allocated by MUMPS; redirect them to the outer struct
179: #define AllocatInternalID(MUMPS_STRUC_T, outer) \
180: do { \
181: MUMPS_STRUC_T *inner; \
182: PetscCall(PetscNew(&inner)); \
183: outer->icntl = inner->icntl; \
184: outer->cntl = inner->cntl; \
185: outer->info = inner->info; \
186: outer->infog = inner->infog; \
187: outer->rinfo = inner->rinfo; \
188: outer->rinfog = inner->rinfog; \
189: outer->ooc_tmpdir = inner->ooc_tmpdir; \
190: outer->ooc_prefix = inner->ooc_prefix; \
191: /* the three field should never change after init */ \
192: inner->comm_fortran = outer->comm_fortran; \
193: inner->par = outer->par; \
194: inner->sym = outer->sym; \
195: outer->internal_id = inner; \
196: } while (0)
198: // Allocate the internal [SDCZ]MUMPS_STRUC_C ID data structure in the given , and link fields of the outer and the inner
199: static inline PetscErrorCode MatMumpsAllocateInternalID(XMUMPS_STRUC_C *outer, PetscPrecision precision)
200: {
201: PetscFunctionBegin;
202: outer->precision = precision;
203: #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
204: #if defined(PETSC_USE_COMPLEX)
205: if (precision == PETSC_PRECISION_SINGLE) AllocatInternalID(CMUMPS_STRUC_C, outer);
206: else AllocatInternalID(ZMUMPS_STRUC_C, outer);
207: #else
208: if (precision == PETSC_PRECISION_SINGLE) AllocatInternalID(SMUMPS_STRUC_C, outer);
209: else AllocatInternalID(DMUMPS_STRUC_C, outer);
210: #endif
211: #else
212: AllocatInternalID(MUMPS_STRUC_C, outer);
213: #endif
214: PetscFunctionReturn(PETSC_SUCCESS);
215: }
217: #define FreeInternalIDFields(MUMPS_STRUC_T, outer) \
218: do { \
219: MUMPS_STRUC_T *inner = (MUMPS_STRUC_T *)(outer)->internal_id; \
220: PetscCall(PetscFree(inner->a)); \
221: PetscCall(PetscFree(inner->a_loc)); \
222: PetscCall(PetscFree(inner->redrhs)); \
223: PetscCall(PetscFree(inner->rhs)); \
224: PetscCall(PetscFree(inner->rhs_sparse)); \
225: PetscCall(PetscFree(inner->rhs_loc)); \
226: PetscCall(PetscFree(inner->sol_loc)); \
227: PetscCall(PetscFree(inner->schur)); \
228: } while (0)
230: static inline PetscErrorCode MatMumpsFreeInternalID(XMUMPS_STRUC_C *outer)
231: {
232: PetscFunctionBegin;
233: if (outer->internal_id) { // sometimes, the inner is never created before we destroy the outer
234: #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
235: const PetscPrecision mumps_precision = outer->precision;
236: if (mumps_precision != PETSC_SCALAR_PRECISION) { // Free internal buffers if we used mixed precision
237: #if defined(PETSC_USE_COMPLEX)
238: if (mumps_precision == PETSC_PRECISION_SINGLE) FreeInternalIDFields(CMUMPS_STRUC_C, outer);
239: else FreeInternalIDFields(ZMUMPS_STRUC_C, outer);
240: #else
241: if (mumps_precision == PETSC_PRECISION_SINGLE) FreeInternalIDFields(SMUMPS_STRUC_C, outer);
242: else FreeInternalIDFields(DMUMPS_STRUC_C, outer);
243: #endif
244: }
245: #endif
246: PetscCall(PetscFree(outer->internal_id));
247: }
248: PetscFunctionReturn(PETSC_SUCCESS);
249: }
251: // Make a companion MumpsScalar array (with a given PetscScalar array), to hold at least MumpsScalars in the given and return the address at .
252: // indicates if we need to convert PetscScalars to MumpsScalars after allocating the MumpsScalar array.
253: // (For bravity, we use for array address and for its length in MumpsScalar, though in code they should be <*ma> and <*m>)
254: // If already points to a buffer/array, on input should be its length. Note the buffer might be freed if it is not big enough for this request.
255: //
256: // The returned array is a companion, so how it is created depends on if PetscScalar and MumpsScalar are the same.
257: // 1) If they are different, a separate array will be made and its length and address will be provided at and on output.
258: // 2) Otherwise, will be returned in , and will be zero on output.
259: //
260: //
261: // Input parameters:
262: // + convert - whether to do PetscScalar to MumpsScalar conversion
263: // . n - length of the PetscScalar array
264: // . pa - [n]], points to the PetscScalar array
265: // . precision - precision of MumpsScalar
266: // . m - on input, length of an existing MumpsScalar array if any, otherwise *m is just zero.
267: // - ma - on input, an existing MumpsScalar array if any.
268: //
269: // Output parameters:
270: // + m - length of the MumpsScalar buffer at if MumpsScalar is different from PetscScalar, otherwise 0
271: // . ma - the MumpsScalar array, which could be an alias of when the two types are the same.
272: //
273: // Note:
274: // New memory, if allocated, is done via PetscMalloc1(), and is owned by caller.
275: static PetscErrorCode MatMumpsMakeMumpsScalarArray(PetscBool convert, PetscCount n, const PetscScalar *pa, PetscPrecision precision, PetscCount *m, void **ma)
276: {
277: PetscFunctionBegin;
278: #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
279: const PetscPrecision mumps_precision = precision;
280: PetscCheck(precision == PETSC_PRECISION_SINGLE || precision == PETSC_PRECISION_DOUBLE, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unsupported precicison (%d). Must be single or double", (int)precision);
281: #if defined(PETSC_USE_COMPLEX)
282: if (mumps_precision != PETSC_SCALAR_PRECISION) {
283: if (mumps_precision == PETSC_PRECISION_SINGLE) {
284: if (*m < n) {
285: PetscCall(PetscFree(*ma));
286: PetscCall(PetscMalloc1(n, (CMUMPS_COMPLEX **)ma));
287: *m = n;
288: }
289: if (convert) {
290: CMUMPS_COMPLEX *b = *(CMUMPS_COMPLEX **)ma;
291: for (PetscCount i = 0; i < n; i++) {
292: b[i].r = PetscRealPart(pa[i]);
293: b[i].i = PetscImaginaryPart(pa[i]);
294: }
295: }
296: } else {
297: if (*m < n) {
298: PetscCall(PetscFree(*ma));
299: PetscCall(PetscMalloc1(n, (ZMUMPS_COMPLEX **)ma));
300: *m = n;
301: }
302: if (convert) {
303: ZMUMPS_COMPLEX *b = *(ZMUMPS_COMPLEX **)ma;
304: for (PetscCount i = 0; i < n; i++) {
305: b[i].r = PetscRealPart(pa[i]);
306: b[i].i = PetscImaginaryPart(pa[i]);
307: }
308: }
309: }
310: }
311: #else
312: if (mumps_precision != PETSC_SCALAR_PRECISION) {
313: if (mumps_precision == PETSC_PRECISION_SINGLE) {
314: if (*m < n) {
315: PetscCall(PetscFree(*ma));
316: PetscCall(PetscMalloc1(n, (SMUMPS_REAL **)ma));
317: *m = n;
318: }
319: if (convert) {
320: SMUMPS_REAL *b = *(SMUMPS_REAL **)ma;
321: for (PetscCount i = 0; i < n; i++) b[i] = pa[i];
322: }
323: } else {
324: if (*m < n) {
325: PetscCall(PetscFree(*ma));
326: PetscCall(PetscMalloc1(n, (DMUMPS_REAL **)ma));
327: *m = n;
328: }
329: if (convert) {
330: DMUMPS_REAL *b = *(DMUMPS_REAL **)ma;
331: for (PetscCount i = 0; i < n; i++) b[i] = pa[i];
332: }
333: }
334: }
335: #endif
336: else
337: #endif
338: {
339: if (*m != 0) PetscCall(PetscFree(*ma)); // free existing buffer if any
340: *ma = (void *)pa; // same precision, make them alias
341: *m = 0;
342: }
343: PetscFunctionReturn(PETSC_SUCCESS);
344: }
346: // Cast a MumpsScalar array in to a PetscScalar array at address .
347: //
348: // 1) If the two types are different, cast array elements.
349: // 2) Otherwise, this works as a memcpy; of course, if the two addresses are equal, it is a no-op.
350: static PetscErrorCode MatMumpsCastMumpsScalarArray(PetscCount n, PetscPrecision mumps_precision, const void *ma, PetscScalar *pa)
351: {
352: PetscFunctionBegin;
353: #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
354: if (mumps_precision != PETSC_SCALAR_PRECISION) {
355: #if defined(PETSC_USE_COMPLEX)
356: if (mumps_precision == PETSC_PRECISION_SINGLE) {
357: PetscReal *a = (PetscReal *)pa;
358: const SMUMPS_REAL *b = (const SMUMPS_REAL *)ma;
359: for (PetscCount i = 0; i < 2 * n; i++) a[i] = b[i];
360: } else {
361: PetscReal *a = (PetscReal *)pa;
362: const DMUMPS_REAL *b = (const DMUMPS_REAL *)ma;
363: for (PetscCount i = 0; i < 2 * n; i++) a[i] = b[i];
364: }
365: #else
366: if (mumps_precision == PETSC_PRECISION_SINGLE) {
367: const SMUMPS_REAL *b = (const SMUMPS_REAL *)ma;
368: for (PetscCount i = 0; i < n; i++) pa[i] = b[i];
369: } else {
370: const DMUMPS_REAL *b = (const DMUMPS_REAL *)ma;
371: for (PetscCount i = 0; i < n; i++) pa[i] = b[i];
372: }
373: #endif
374: } else
375: #endif
376: PetscCall(PetscArraycpy((PetscScalar *)pa, (PetscScalar *)ma, n));
377: PetscFunctionReturn(PETSC_SUCCESS);
378: }
380: // Cast a PetscScalar array to a MumpsScalar array in the given at address .
381: //
382: // 1) If the two types are different, cast array elements.
383: // 2) Otherwise, this works as a memcpy; of course, if the two addresses are equal, it is a no-op.
384: static PetscErrorCode MatMumpsCastPetscScalarArray(PetscCount n, const PetscScalar *pa, PetscPrecision mumps_precision, const void *ma)
385: {
386: PetscFunctionBegin;
387: #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
388: if (mumps_precision != PETSC_SCALAR_PRECISION) {
389: #if defined(PETSC_USE_COMPLEX)
390: if (mumps_precision == PETSC_PRECISION_SINGLE) {
391: CMUMPS_COMPLEX *b = (CMUMPS_COMPLEX *)ma;
392: for (PetscCount i = 0; i < n; i++) {
393: b[i].r = PetscRealPart(pa[i]);
394: b[i].i = PetscImaginaryPart(pa[i]);
395: }
396: } else {
397: ZMUMPS_COMPLEX *b = (ZMUMPS_COMPLEX *)ma;
398: for (PetscCount i = 0; i < n; i++) {
399: b[i].r = PetscRealPart(pa[i]);
400: b[i].i = PetscImaginaryPart(pa[i]);
401: }
402: }
403: #else
404: if (mumps_precision == PETSC_PRECISION_SINGLE) {
405: SMUMPS_REAL *b = (SMUMPS_REAL *)ma;
406: for (PetscCount i = 0; i < n; i++) b[i] = pa[i];
407: } else {
408: DMUMPS_REAL *b = (DMUMPS_REAL *)ma;
409: for (PetscCount i = 0; i < n; i++) b[i] = pa[i];
410: }
411: #endif
412: } else
413: #endif
414: PetscCall(PetscArraycpy((PetscScalar *)ma, (PetscScalar *)pa, n));
415: PetscFunctionReturn(PETSC_SUCCESS);
416: }
418: static inline MPI_Datatype MPIU_MUMPSREAL(const XMUMPS_STRUC_C *id)
419: {
420: return id->precision == PETSC_PRECISION_DOUBLE ? MPI_DOUBLE : MPI_FLOAT;
421: }
423: #define PreMumpsCall(inner, outer, mumpsscalar) \
424: do { \
425: inner->job = outer->job; \
426: inner->n = outer->n; \
427: inner->nblk = outer->nblk; \
428: inner->nnz = outer->nnz; \
429: inner->irn = outer->irn; \
430: inner->jcn = outer->jcn; \
431: inner->a = (mumpsscalar *)outer->a; \
432: inner->nnz_loc = outer->nnz_loc; \
433: inner->irn_loc = outer->irn_loc; \
434: inner->jcn_loc = outer->jcn_loc; \
435: inner->a_loc = (mumpsscalar *)outer->a_loc; \
436: inner->blkptr = outer->blkptr; \
437: inner->blkvar = outer->blkvar; \
438: inner->perm_in = outer->perm_in; \
439: inner->rhs = (mumpsscalar *)outer->rhs; \
440: inner->redrhs = (mumpsscalar *)outer->redrhs; \
441: inner->rhs_sparse = (mumpsscalar *)outer->rhs_sparse; \
442: inner->sol_loc = (mumpsscalar *)outer->sol_loc; \
443: inner->rhs_loc = (mumpsscalar *)outer->rhs_loc; \
444: inner->irhs_sparse = outer->irhs_sparse; \
445: inner->irhs_ptr = outer->irhs_ptr; \
446: inner->isol_loc = outer->isol_loc; \
447: inner->irhs_loc = outer->irhs_loc; \
448: inner->nrhs = outer->nrhs; \
449: inner->lrhs = outer->lrhs; \
450: inner->lredrhs = outer->lredrhs; \
451: inner->nz_rhs = outer->nz_rhs; \
452: inner->lsol_loc = outer->lsol_loc; \
453: inner->nloc_rhs = outer->nloc_rhs; \
454: inner->lrhs_loc = outer->lrhs_loc; \
455: inner->schur_lld = outer->schur_lld; \
456: inner->size_schur = outer->size_schur; \
457: inner->listvar_schur = outer->listvar_schur; \
458: inner->schur = (mumpsscalar *)outer->schur; \
459: } while (0)
461: #define PostMumpsCall(inner, outer) \
462: do { \
463: outer->pivnul_list = inner->pivnul_list; \
464: outer->mapping = inner->mapping; \
465: } while (0)
467: // Entry for PETSc to call mumps
468: static inline PetscErrorCode PetscCallMumps_Private(XMUMPS_STRUC_C *outer)
469: {
470: PetscFunctionBegin;
471: #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
472: #if defined(PETSC_USE_COMPLEX)
473: if (outer->precision == PETSC_PRECISION_SINGLE) {
474: CMUMPS_STRUC_C *inner = (CMUMPS_STRUC_C *)outer->internal_id;
475: PreMumpsCall(inner, outer, CMUMPS_COMPLEX);
476: PetscStackCallExternalVoid("cmumps_c", cmumps_c(inner));
477: PostMumpsCall(inner, outer);
478: } else {
479: ZMUMPS_STRUC_C *inner = (ZMUMPS_STRUC_C *)outer->internal_id;
480: PreMumpsCall(inner, outer, ZMUMPS_COMPLEX);
481: PetscStackCallExternalVoid("zmumps_c", zmumps_c(inner));
482: PostMumpsCall(inner, outer);
483: }
484: #else
485: if (outer->precision == PETSC_PRECISION_SINGLE) {
486: SMUMPS_STRUC_C *inner = (SMUMPS_STRUC_C *)outer->internal_id;
487: PreMumpsCall(inner, outer, SMUMPS_REAL);
488: PetscStackCallExternalVoid("smumps_c", smumps_c(inner));
489: PostMumpsCall(inner, outer);
490: } else {
491: DMUMPS_STRUC_C *inner = (DMUMPS_STRUC_C *)outer->internal_id;
492: PreMumpsCall(inner, outer, DMUMPS_REAL);
493: PetscStackCallExternalVoid("dmumps_c", dmumps_c(inner));
494: PostMumpsCall(inner, outer);
495: }
496: #endif
497: #else
498: MUMPS_STRUC_C *inner = (MUMPS_STRUC_C *)outer->internal_id;
499: PreMumpsCall(inner, outer, MumpsScalar);
500: PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(inner));
501: PostMumpsCall(inner, outer);
502: #endif
503: PetscFunctionReturn(PETSC_SUCCESS);
504: }
506: /* macros s.t. indices match MUMPS documentation */
507: #define ICNTL(I) icntl[(I) - 1]
508: #define INFOG(I) infog[(I) - 1]
509: #define INFO(I) info[(I) - 1]
511: // Get a value from a MumpsScalar array, which is the field in the struct of MUMPS_STRUC_C. The value is convertible to PetscScalar. Note no minus 1 on I!
512: #if defined(PETSC_USE_COMPLEX)
513: #define ID_FIELD_GET(ID, F, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((CMUMPS_COMPLEX *)(ID).F)[I].r + PETSC_i * ((CMUMPS_COMPLEX *)(ID).F)[I].i : ((ZMUMPS_COMPLEX *)(ID).F)[I].r + PETSC_i * ((ZMUMPS_COMPLEX *)(ID).F)[I].i)
514: #else
515: #define ID_FIELD_GET(ID, F, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).F)[I] : ((double *)(ID).F)[I])
516: #endif
518: // Get a value from MumpsReal arrays. The value is convertible to PetscReal.
519: #define ID_CNTL_GET(ID, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).cntl)[(I) - 1] : ((double *)(ID).cntl)[(I) - 1])
520: #define ID_RINFOG_GET(ID, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).rinfog)[(I) - 1] : ((double *)(ID).rinfog)[(I) - 1])
521: #define ID_RINFO_GET(ID, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).rinfo)[(I) - 1] : ((double *)(ID).rinfo)[(I) - 1])
523: // Set the I-th entry of the MumpsReal array id.cntl[] with a PetscReal
524: #define ID_CNTL_SET(ID, I, VAL) \
525: do { \
526: if ((ID).precision == PETSC_PRECISION_SINGLE) ((float *)(ID).cntl)[(I) - 1] = (VAL); \
527: else ((double *)(ID).cntl)[(I) - 1] = (VAL); \
528: } while (0)
530: /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */
531: #if defined(PETSC_HAVE_OPENMP_SUPPORT)
532: #define PetscMUMPS_c(mumps) \
533: do { \
534: if (mumps->use_petsc_omp_support) { \
535: if (mumps->is_omp_master) { \
536: PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \
537: PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \
538: PetscCall(PetscCallMumps_Private(&mumps->id)); \
539: PetscCall(PetscFPTrapPop()); \
540: PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \
541: } \
542: PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \
543: /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific \
544: to processes, so we only Bcast info[1], an error code and leave others (since they do not have \
545: an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82. \
546: omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \
547: */ \
548: MUMPS_STRUC_C tmp; /* All MUMPS_STRUC_C types have same lengths on these info arrays */ \
549: PetscCallMPI(MPI_Bcast(mumps->id.infog, PETSC_STATIC_ARRAY_LENGTH(tmp.infog), MPIU_MUMPSINT, 0, mumps->omp_comm)); \
550: PetscCallMPI(MPI_Bcast(mumps->id.info, PETSC_STATIC_ARRAY_LENGTH(tmp.info), MPIU_MUMPSINT, 0, mumps->omp_comm)); \
551: PetscCallMPI(MPI_Bcast(mumps->id.rinfog, PETSC_STATIC_ARRAY_LENGTH(tmp.rinfog), MPIU_MUMPSREAL(&mumps->id), 0, mumps->omp_comm)); \
552: PetscCallMPI(MPI_Bcast(mumps->id.rinfo, PETSC_STATIC_ARRAY_LENGTH(tmp.rinfo), MPIU_MUMPSREAL(&mumps->id), 0, mumps->omp_comm)); \
553: } else { \
554: PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \
555: PetscCall(PetscCallMumps_Private(&mumps->id)); \
556: PetscCall(PetscFPTrapPop()); \
557: } \
558: } while (0)
559: #else
560: #define PetscMUMPS_c(mumps) \
561: do { \
562: PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \
563: PetscCall(PetscCallMumps_Private(&mumps->id)); \
564: PetscCall(PetscFPTrapPop()); \
565: } while (0)
566: #endif
568: typedef struct Mat_MUMPS Mat_MUMPS;
569: struct Mat_MUMPS {
570: XMUMPS_STRUC_C id;
572: MatStructure matstruc;
573: PetscMPIInt myid, petsc_size;
574: PetscMUMPSInt *irn, *jcn; /* the (i,j,v) triplets passed to mumps. */
575: PetscScalar *val, *val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */
576: PetscCount nnz; /* number of nonzeros. The type is called selective 64-bit in mumps */
577: PetscMUMPSInt sym;
578: MPI_Comm mumps_comm;
579: PetscMUMPSInt *ICNTL_pre;
580: PetscReal *CNTL_pre;
581: PetscMUMPSInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */
582: VecScatter scat_rhs, scat_sol; /* used by MatSolve() */
583: PetscMUMPSInt ICNTL20; /* use centralized (0) or distributed (10) dense RHS */
584: PetscMUMPSInt ICNTL26;
585: PetscMUMPSInt lrhs_loc, nloc_rhs, *irhs_loc;
586: #if defined(PETSC_HAVE_OPENMP_SUPPORT)
587: PetscInt *rhs_nrow, max_nrhs;
588: PetscMPIInt *rhs_recvcounts, *rhs_disps;
589: PetscScalar *rhs_loc, *rhs_recvbuf;
590: #endif
591: Vec b_seq, x_seq;
592: PetscInt ninfo, *info; /* which INFO to display */
593: PetscInt sizeredrhs;
594: PetscScalar *schur_sol;
595: PetscInt schur_sizesol;
596: PetscScalar *redrhs; // buffer in PetscScalar in case MumpsScalar is in a different precision
597: PetscMUMPSInt *ia_alloc, *ja_alloc; /* work arrays used for the CSR struct for sparse rhs */
598: PetscCount cur_ilen, cur_jlen; /* current len of ia_alloc[], ja_alloc[] */
599: PetscErrorCode (*ConvertToTriples)(Mat, PetscInt, MatReuse, Mat_MUMPS *);
601: /* Support for MATNEST */
602: PetscErrorCode (**nest_convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *);
603: PetscCount *nest_vals_start;
604: PetscScalar *nest_vals;
606: /* stuff used by petsc/mumps OpenMP support*/
607: PetscBool use_petsc_omp_support;
608: PetscOmpCtrl omp_ctrl; /* an OpenMP controller that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */
609: MPI_Comm petsc_comm, omp_comm; /* petsc_comm is PETSc matrix's comm */
610: PetscCount *recvcount; /* a collection of nnz on omp_master */
611: PetscMPIInt tag, omp_comm_size;
612: PetscBool is_omp_master; /* is this rank the master of omp_comm */
613: MPI_Request *reqs;
614: };
616: /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt.
617: Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices.
618: */
619: static PetscErrorCode PetscMUMPSIntCSRCast(PETSC_UNUSED Mat_MUMPS *mumps, PetscInt nrow, PetscInt *ia, PetscInt *ja, PetscMUMPSInt **ia_mumps, PetscMUMPSInt **ja_mumps, PetscMUMPSInt *nnz_mumps)
620: {
621: PetscInt nnz = ia[nrow] - 1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscCount since mumps only uses PetscMUMPSInt for rhs */
623: PetscFunctionBegin;
624: #if defined(PETSC_USE_64BIT_INDICES)
625: {
626: PetscInt i;
627: if (nrow + 1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */
628: PetscCall(PetscFree(mumps->ia_alloc));
629: PetscCall(PetscMalloc1(nrow + 1, &mumps->ia_alloc));
630: mumps->cur_ilen = nrow + 1;
631: }
632: if (nnz > mumps->cur_jlen) {
633: PetscCall(PetscFree(mumps->ja_alloc));
634: PetscCall(PetscMalloc1(nnz, &mumps->ja_alloc));
635: mumps->cur_jlen = nnz;
636: }
637: for (i = 0; i < nrow + 1; i++) PetscCall(PetscMUMPSIntCast(ia[i], &mumps->ia_alloc[i]));
638: for (i = 0; i < nnz; i++) PetscCall(PetscMUMPSIntCast(ja[i], &mumps->ja_alloc[i]));
639: *ia_mumps = mumps->ia_alloc;
640: *ja_mumps = mumps->ja_alloc;
641: }
642: #else
643: *ia_mumps = ia;
644: *ja_mumps = ja;
645: #endif
646: PetscCall(PetscMUMPSIntCast(nnz, nnz_mumps));
647: PetscFunctionReturn(PETSC_SUCCESS);
648: }
650: static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS *mumps)
651: {
652: PetscFunctionBegin;
653: PetscCall(PetscFree(mumps->id.listvar_schur));
654: PetscCall(PetscFree(mumps->redrhs)); // if needed, id.redrhs will be freed in MatMumpsFreeInternalID()
655: PetscCall(PetscFree(mumps->schur_sol));
656: mumps->id.size_schur = 0;
657: mumps->id.schur_lld = 0;
658: if (mumps->id.internal_id) mumps->id.ICNTL(19) = 0; // sometimes, the inner id is yet built
659: PetscFunctionReturn(PETSC_SUCCESS);
660: }
662: /* solve with rhs in mumps->id.redrhs and return in the same location */
663: static PetscErrorCode MatMumpsSolveSchur_Private(Mat F)
664: {
665: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
666: Mat S, B, X; // solve S*X = B; all three matrices are dense
667: MatFactorSchurStatus schurstatus;
668: PetscInt sizesol;
669: const PetscScalar *xarray;
671: PetscFunctionBegin;
672: PetscCall(MatFactorFactorizeSchurComplement(F));
673: PetscCall(MatFactorGetSchurComplement(F, &S, &schurstatus));
674: PetscCall(MatMumpsCastMumpsScalarArray(mumps->sizeredrhs, mumps->id.precision, mumps->id.redrhs, mumps->redrhs));
676: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->redrhs, &B));
677: PetscCall(MatSetType(B, ((PetscObject)S)->type_name));
678: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
679: PetscCall(MatBindToCPU(B, S->boundtocpu));
680: #endif
681: switch (schurstatus) {
682: case MAT_FACTOR_SCHUR_FACTORED:
683: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->redrhs, &X));
684: PetscCall(MatSetType(X, ((PetscObject)S)->type_name));
685: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
686: PetscCall(MatBindToCPU(X, S->boundtocpu));
687: #endif
688: if (!mumps->id.ICNTL(9)) { /* transpose solve */
689: PetscCall(MatMatSolveTranspose(S, B, X));
690: } else {
691: PetscCall(MatMatSolve(S, B, X));
692: }
693: break;
694: case MAT_FACTOR_SCHUR_INVERTED:
695: sizesol = mumps->id.nrhs * mumps->id.size_schur;
696: if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) {
697: PetscCall(PetscFree(mumps->schur_sol));
698: PetscCall(PetscMalloc1(sizesol, &mumps->schur_sol));
699: mumps->schur_sizesol = sizesol;
700: }
701: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->schur_sol, &X));
702: PetscCall(MatSetType(X, ((PetscObject)S)->type_name));
703: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
704: PetscCall(MatBindToCPU(X, S->boundtocpu));
705: #endif
706: PetscCall(MatProductCreateWithMat(S, B, NULL, X));
707: if (!mumps->id.ICNTL(9)) { /* transpose solve */
708: PetscCall(MatProductSetType(X, MATPRODUCT_AtB));
709: } else {
710: PetscCall(MatProductSetType(X, MATPRODUCT_AB));
711: }
712: PetscCall(MatProductSetFromOptions(X));
713: PetscCall(MatProductSymbolic(X));
714: PetscCall(MatProductNumeric(X));
716: PetscCall(MatCopy(X, B, SAME_NONZERO_PATTERN));
717: break;
718: default:
719: SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unhandled MatFactorSchurStatus %d", F->schur_status);
720: }
721: // MUST get the array from X (not B), though they share the same host array. We can only guarantee X has the correct data on device.
722: PetscCall(MatDenseGetArrayRead(X, &xarray)); // xarray should be mumps->redrhs, but using MatDenseGetArrayRead is safer with GPUs.
723: PetscCall(MatMumpsCastPetscScalarArray(mumps->sizeredrhs, xarray, mumps->id.precision, mumps->id.redrhs));
724: PetscCall(MatDenseRestoreArrayRead(X, &xarray));
725: PetscCall(MatFactorRestoreSchurComplement(F, &S, schurstatus));
726: PetscCall(MatDestroy(&B));
727: PetscCall(MatDestroy(&X));
728: PetscFunctionReturn(PETSC_SUCCESS);
729: }
731: static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion)
732: {
733: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
735: PetscFunctionBegin;
736: if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */
737: PetscFunctionReturn(PETSC_SUCCESS);
738: }
739: if (!expansion) { /* prepare for the condensation step */
740: PetscInt sizeredrhs = mumps->id.nrhs * mumps->id.size_schur;
741: /* allocate MUMPS internal array to store reduced right-hand sides */
742: if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) {
743: mumps->id.lredrhs = mumps->id.size_schur;
744: mumps->sizeredrhs = mumps->id.nrhs * mumps->id.lredrhs;
745: if (mumps->id.redrhs_len) PetscCall(PetscFree(mumps->id.redrhs));
746: PetscCall(PetscFree(mumps->redrhs));
747: PetscCall(PetscMalloc1(mumps->sizeredrhs, &mumps->redrhs));
748: PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, mumps->sizeredrhs, mumps->redrhs, mumps->id.precision, &mumps->id.redrhs_len, &mumps->id.redrhs));
749: }
750: } else { /* prepare for the expansion step */
751: PetscCall(MatMumpsSolveSchur_Private(F)); /* solve Schur complement, put solution in id.redrhs (this has to be done by the MUMPS user, so basically us) */
752: mumps->id.ICNTL(26) = 2; /* expansion phase */
753: PetscMUMPS_c(mumps);
754: PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));
755: /* restore defaults */
756: mumps->id.ICNTL(26) = -1;
757: /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */
758: if (mumps->id.nrhs > 1) {
759: if (mumps->id.redrhs_len) PetscCall(PetscFree(mumps->id.redrhs));
760: PetscCall(PetscFree(mumps->redrhs));
761: mumps->id.redrhs_len = 0;
762: mumps->id.lredrhs = 0;
763: mumps->sizeredrhs = 0;
764: }
765: }
766: PetscFunctionReturn(PETSC_SUCCESS);
767: }
769: /*
770: MatConvertToTriples_A_B - convert PETSc matrix to triples: row[nz], col[nz], val[nz]
772: input:
773: A - matrix in aij,baij or sbaij format
774: shift - 0: C style output triple; 1: Fortran style output triple.
775: reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple
776: MAT_REUSE_MATRIX: only the values in v array are updated
777: output:
778: nnz - dim of r, c, and v (number of local nonzero entries of A)
779: r, c, v - row and col index, matrix values (matrix triples)
781: The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is
782: freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means
783: that the PetscMalloc() cannot easily be replaced with a PetscMalloc3().
785: */
787: static PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
788: {
789: const PetscScalar *av;
790: const PetscInt *ai, *aj, *ajj, M = A->rmap->n;
791: PetscCount nz, rnz, k;
792: PetscMUMPSInt *row, *col;
793: Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data;
795: PetscFunctionBegin;
796: PetscCall(MatSeqAIJGetArrayRead(A, &av));
797: if (reuse == MAT_INITIAL_MATRIX) {
798: nz = aa->nz;
799: ai = aa->i;
800: aj = aa->j;
801: PetscCall(PetscMalloc2(nz, &row, nz, &col));
802: for (PetscCount i = k = 0; i < M; i++) {
803: rnz = ai[i + 1] - ai[i];
804: ajj = aj + ai[i];
805: for (PetscCount j = 0; j < rnz; j++) {
806: PetscCall(PetscMUMPSIntCast(i + shift, &row[k]));
807: PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[k]));
808: k++;
809: }
810: }
811: mumps->val = (PetscScalar *)av;
812: mumps->irn = row;
813: mumps->jcn = col;
814: mumps->nnz = nz;
815: } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, av, aa->nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqaij_seqaij(), so one needs to copy the memory */
816: else mumps->val = (PetscScalar *)av; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
817: PetscCall(MatSeqAIJRestoreArrayRead(A, &av));
818: PetscFunctionReturn(PETSC_SUCCESS);
819: }
821: static PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
822: {
823: PetscCount nz, i, j, k, r;
824: Mat_SeqSELL *a = (Mat_SeqSELL *)A->data;
825: PetscMUMPSInt *row, *col;
827: PetscFunctionBegin;
828: nz = a->sliidx[a->totalslices];
829: if (reuse == MAT_INITIAL_MATRIX) {
830: PetscCall(PetscMalloc2(nz, &row, nz, &col));
831: for (i = k = 0; i < a->totalslices; i++) {
832: for (j = a->sliidx[i], r = 0; j < a->sliidx[i + 1]; j++, r = ((r + 1) & 0x07)) PetscCall(PetscMUMPSIntCast(8 * i + r + shift, &row[k++]));
833: }
834: for (i = 0; i < nz; i++) PetscCall(PetscMUMPSIntCast(a->colidx[i] + shift, &col[i]));
835: mumps->irn = row;
836: mumps->jcn = col;
837: mumps->nnz = nz;
838: mumps->val = a->val;
839: } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, a->val, nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqsell_seqaij(), so one needs to copy the memory */
840: else mumps->val = a->val; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
841: PetscFunctionReturn(PETSC_SUCCESS);
842: }
844: static PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
845: {
846: Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)A->data;
847: const PetscInt *ai, *aj, *ajj, bs2 = aa->bs2;
848: PetscCount M, nz = bs2 * aa->nz, idx = 0, rnz, i, j, k, m;
849: PetscInt bs;
850: PetscMUMPSInt *row, *col;
852: PetscFunctionBegin;
853: if (reuse == MAT_INITIAL_MATRIX) {
854: PetscCall(MatGetBlockSize(A, &bs));
855: M = A->rmap->N / bs;
856: ai = aa->i;
857: aj = aa->j;
858: PetscCall(PetscMalloc2(nz, &row, nz, &col));
859: for (i = 0; i < M; i++) {
860: ajj = aj + ai[i];
861: rnz = ai[i + 1] - ai[i];
862: for (k = 0; k < rnz; k++) {
863: for (j = 0; j < bs; j++) {
864: for (m = 0; m < bs; m++) {
865: PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[idx]));
866: PetscCall(PetscMUMPSIntCast(bs * ajj[k] + j + shift, &col[idx]));
867: idx++;
868: }
869: }
870: }
871: }
872: mumps->irn = row;
873: mumps->jcn = col;
874: mumps->nnz = nz;
875: mumps->val = aa->a;
876: } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, aa->a, nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqbaij_seqaij(), so one needs to copy the memory */
877: else mumps->val = aa->a; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
878: PetscFunctionReturn(PETSC_SUCCESS);
879: }
881: static PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
882: {
883: const PetscInt *ai, *aj, *ajj;
884: PetscInt bs;
885: PetscCount nz, rnz, i, j, k, m;
886: PetscMUMPSInt *row, *col;
887: PetscScalar *val;
888: Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)A->data;
889: const PetscInt bs2 = aa->bs2, mbs = aa->mbs;
890: #if defined(PETSC_USE_COMPLEX)
891: PetscBool isset, hermitian;
892: #endif
894: PetscFunctionBegin;
895: #if defined(PETSC_USE_COMPLEX)
896: PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
897: PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
898: #endif
899: ai = aa->i;
900: aj = aa->j;
901: PetscCall(MatGetBlockSize(A, &bs));
902: if (reuse == MAT_INITIAL_MATRIX) {
903: const PetscCount alloc_size = aa->nz * bs2;
905: PetscCall(PetscMalloc2(alloc_size, &row, alloc_size, &col));
906: if (bs > 1) {
907: PetscCall(PetscMalloc1(alloc_size, &mumps->val_alloc));
908: mumps->val = mumps->val_alloc;
909: } else {
910: mumps->val = aa->a;
911: }
912: mumps->irn = row;
913: mumps->jcn = col;
914: } else {
915: row = mumps->irn;
916: col = mumps->jcn;
917: }
918: val = mumps->val;
920: nz = 0;
921: if (bs > 1) {
922: for (i = 0; i < mbs; i++) {
923: rnz = ai[i + 1] - ai[i];
924: ajj = aj + ai[i];
925: for (j = 0; j < rnz; j++) {
926: for (k = 0; k < bs; k++) {
927: for (m = 0; m < bs; m++) {
928: if (ajj[j] > i || k >= m) {
929: if (reuse == MAT_INITIAL_MATRIX) {
930: PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[nz]));
931: PetscCall(PetscMUMPSIntCast(ajj[j] * bs + k + shift, &col[nz]));
932: }
933: val[nz++] = aa->a[(ai[i] + j) * bs2 + m + k * bs];
934: }
935: }
936: }
937: }
938: }
939: } else if (reuse == MAT_INITIAL_MATRIX) {
940: for (i = 0; i < mbs; i++) {
941: rnz = ai[i + 1] - ai[i];
942: ajj = aj + ai[i];
943: for (j = 0; j < rnz; j++) {
944: PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
945: PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
946: nz++;
947: }
948: }
949: PetscCheck(nz == aa->nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different numbers of nonzeros %" PetscCount_FMT " != %" PetscInt_FMT, nz, aa->nz);
950: } else if (mumps->nest_vals)
951: PetscCall(PetscArraycpy(mumps->val, aa->a, aa->nz)); /* bs == 1 and MAT_REUSE_MATRIX, MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqsbaij_seqsbaij(), so one needs to copy the memory */
952: else mumps->val = aa->a; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
953: if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz;
954: PetscFunctionReturn(PETSC_SUCCESS);
955: }
957: static PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
958: {
959: const PetscInt *ai, *aj, *ajj, *adiag, M = A->rmap->n;
960: PetscCount nz, rnz, i, j;
961: const PetscScalar *av, *v1;
962: PetscScalar *val;
963: PetscMUMPSInt *row, *col;
964: Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data;
965: PetscBool diagDense;
966: #if defined(PETSC_USE_COMPLEX)
967: PetscBool hermitian, isset;
968: #endif
970: PetscFunctionBegin;
971: #if defined(PETSC_USE_COMPLEX)
972: PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
973: PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
974: #endif
975: PetscCall(MatSeqAIJGetArrayRead(A, &av));
976: ai = aa->i;
977: aj = aa->j;
978: PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, &diagDense));
979: if (reuse == MAT_INITIAL_MATRIX) {
980: /* count nz in the upper triangular part of A */
981: nz = 0;
982: if (!diagDense) {
983: for (i = 0; i < M; i++) {
984: if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
985: for (j = ai[i]; j < ai[i + 1]; j++) {
986: if (aj[j] < i) continue;
987: nz++;
988: }
989: } else {
990: nz += ai[i + 1] - adiag[i];
991: }
992: }
993: } else {
994: for (i = 0; i < M; i++) nz += ai[i + 1] - adiag[i];
995: }
996: PetscCall(PetscMalloc2(nz, &row, nz, &col));
997: PetscCall(PetscMalloc1(nz, &val));
998: mumps->nnz = nz;
999: mumps->irn = row;
1000: mumps->jcn = col;
1001: mumps->val = mumps->val_alloc = val;
1003: nz = 0;
1004: if (!diagDense) {
1005: for (i = 0; i < M; i++) {
1006: if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
1007: for (j = ai[i]; j < ai[i + 1]; j++) {
1008: if (aj[j] < i) continue;
1009: PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
1010: PetscCall(PetscMUMPSIntCast(aj[j] + shift, &col[nz]));
1011: val[nz] = av[j];
1012: nz++;
1013: }
1014: } else {
1015: rnz = ai[i + 1] - adiag[i];
1016: ajj = aj + adiag[i];
1017: v1 = av + adiag[i];
1018: for (j = 0; j < rnz; j++) {
1019: PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
1020: PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
1021: val[nz++] = v1[j];
1022: }
1023: }
1024: }
1025: } else {
1026: for (i = 0; i < M; i++) {
1027: rnz = ai[i + 1] - adiag[i];
1028: ajj = aj + adiag[i];
1029: v1 = av + adiag[i];
1030: for (j = 0; j < rnz; j++) {
1031: PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
1032: PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
1033: val[nz++] = v1[j];
1034: }
1035: }
1036: }
1037: } else {
1038: nz = 0;
1039: val = mumps->val;
1040: if (!diagDense) {
1041: for (i = 0; i < M; i++) {
1042: if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
1043: for (j = ai[i]; j < ai[i + 1]; j++) {
1044: if (aj[j] < i) continue;
1045: val[nz++] = av[j];
1046: }
1047: } else {
1048: rnz = ai[i + 1] - adiag[i];
1049: v1 = av + adiag[i];
1050: for (j = 0; j < rnz; j++) val[nz++] = v1[j];
1051: }
1052: }
1053: } else {
1054: for (i = 0; i < M; i++) {
1055: rnz = ai[i + 1] - adiag[i];
1056: v1 = av + adiag[i];
1057: for (j = 0; j < rnz; j++) val[nz++] = v1[j];
1058: }
1059: }
1060: }
1061: PetscCall(MatSeqAIJRestoreArrayRead(A, &av));
1062: PetscFunctionReturn(PETSC_SUCCESS);
1063: }
1065: static PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1066: {
1067: const PetscInt *ai, *aj, *bi, *bj, *garray, *ajj, *bjj;
1068: PetscInt bs;
1069: PetscCount rstart, nz, i, j, k, m, jj, irow, countA, countB;
1070: PetscMUMPSInt *row, *col;
1071: const PetscScalar *av, *bv, *v1, *v2;
1072: PetscScalar *val;
1073: Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)A->data;
1074: Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)mat->A->data;
1075: Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)mat->B->data;
1076: const PetscInt bs2 = aa->bs2, mbs = aa->mbs;
1077: #if defined(PETSC_USE_COMPLEX)
1078: PetscBool hermitian, isset;
1079: #endif
1081: PetscFunctionBegin;
1082: #if defined(PETSC_USE_COMPLEX)
1083: PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
1084: PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
1085: #endif
1086: PetscCall(MatGetBlockSize(A, &bs));
1087: rstart = A->rmap->rstart;
1088: ai = aa->i;
1089: aj = aa->j;
1090: bi = bb->i;
1091: bj = bb->j;
1092: av = aa->a;
1093: bv = bb->a;
1095: garray = mat->garray;
1097: if (reuse == MAT_INITIAL_MATRIX) {
1098: nz = (aa->nz + bb->nz) * bs2; /* just a conservative estimate */
1099: PetscCall(PetscMalloc2(nz, &row, nz, &col));
1100: PetscCall(PetscMalloc1(nz, &val));
1101: /* can not decide the exact mumps->nnz now because of the SBAIJ */
1102: mumps->irn = row;
1103: mumps->jcn = col;
1104: mumps->val = mumps->val_alloc = val;
1105: } else {
1106: val = mumps->val;
1107: }
1109: jj = 0;
1110: irow = rstart;
1111: for (i = 0; i < mbs; i++) {
1112: ajj = aj + ai[i]; /* ptr to the beginning of this row */
1113: countA = ai[i + 1] - ai[i];
1114: countB = bi[i + 1] - bi[i];
1115: bjj = bj + bi[i];
1116: v1 = av + ai[i] * bs2;
1117: v2 = bv + bi[i] * bs2;
1119: if (bs > 1) {
1120: /* A-part */
1121: for (j = 0; j < countA; j++) {
1122: for (k = 0; k < bs; k++) {
1123: for (m = 0; m < bs; m++) {
1124: if (rstart + ajj[j] * bs > irow || k >= m) {
1125: if (reuse == MAT_INITIAL_MATRIX) {
1126: PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj]));
1127: PetscCall(PetscMUMPSIntCast(rstart + ajj[j] * bs + k + shift, &col[jj]));
1128: }
1129: val[jj++] = v1[j * bs2 + m + k * bs];
1130: }
1131: }
1132: }
1133: }
1135: /* B-part */
1136: for (j = 0; j < countB; j++) {
1137: for (k = 0; k < bs; k++) {
1138: for (m = 0; m < bs; m++) {
1139: if (reuse == MAT_INITIAL_MATRIX) {
1140: PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj]));
1141: PetscCall(PetscMUMPSIntCast(garray[bjj[j]] * bs + k + shift, &col[jj]));
1142: }
1143: val[jj++] = v2[j * bs2 + m + k * bs];
1144: }
1145: }
1146: }
1147: } else {
1148: /* A-part */
1149: for (j = 0; j < countA; j++) {
1150: if (reuse == MAT_INITIAL_MATRIX) {
1151: PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
1152: PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj]));
1153: }
1154: val[jj++] = v1[j];
1155: }
1157: /* B-part */
1158: for (j = 0; j < countB; j++) {
1159: if (reuse == MAT_INITIAL_MATRIX) {
1160: PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
1161: PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
1162: }
1163: val[jj++] = v2[j];
1164: }
1165: }
1166: irow += bs;
1167: }
1168: if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = jj;
1169: PetscFunctionReturn(PETSC_SUCCESS);
1170: }
1172: static PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1173: {
1174: const PetscInt *ai, *aj, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj;
1175: PetscCount rstart, cstart, nz, i, j, jj, irow, countA, countB;
1176: PetscMUMPSInt *row, *col;
1177: const PetscScalar *av, *bv, *v1, *v2;
1178: PetscScalar *val;
1179: Mat Ad, Ao;
1180: Mat_SeqAIJ *aa;
1181: Mat_SeqAIJ *bb;
1183: PetscFunctionBegin;
1184: PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray));
1185: PetscCall(MatSeqAIJGetArrayRead(Ad, &av));
1186: PetscCall(MatSeqAIJGetArrayRead(Ao, &bv));
1188: aa = (Mat_SeqAIJ *)Ad->data;
1189: bb = (Mat_SeqAIJ *)Ao->data;
1190: ai = aa->i;
1191: aj = aa->j;
1192: bi = bb->i;
1193: bj = bb->j;
1195: rstart = A->rmap->rstart;
1196: cstart = A->cmap->rstart;
1198: if (reuse == MAT_INITIAL_MATRIX) {
1199: nz = (PetscCount)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */
1200: PetscCall(PetscMalloc2(nz, &row, nz, &col));
1201: PetscCall(PetscMalloc1(nz, &val));
1202: mumps->nnz = nz;
1203: mumps->irn = row;
1204: mumps->jcn = col;
1205: mumps->val = mumps->val_alloc = val;
1206: } else {
1207: val = mumps->val;
1208: }
1210: jj = 0;
1211: irow = rstart;
1212: for (i = 0; i < m; i++) {
1213: ajj = aj + ai[i]; /* ptr to the beginning of this row */
1214: countA = ai[i + 1] - ai[i];
1215: countB = bi[i + 1] - bi[i];
1216: bjj = bj + bi[i];
1217: v1 = av + ai[i];
1218: v2 = bv + bi[i];
1220: /* A-part */
1221: for (j = 0; j < countA; j++) {
1222: if (reuse == MAT_INITIAL_MATRIX) {
1223: PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
1224: PetscCall(PetscMUMPSIntCast(cstart + ajj[j] + shift, &col[jj]));
1225: }
1226: val[jj++] = v1[j];
1227: }
1229: /* B-part */
1230: for (j = 0; j < countB; j++) {
1231: if (reuse == MAT_INITIAL_MATRIX) {
1232: PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
1233: PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
1234: }
1235: val[jj++] = v2[j];
1236: }
1237: irow++;
1238: }
1239: PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av));
1240: PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv));
1241: PetscFunctionReturn(PETSC_SUCCESS);
1242: }
1244: static PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1245: {
1246: Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)A->data;
1247: Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)mat->A->data;
1248: Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)mat->B->data;
1249: const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j, *ajj, *bjj;
1250: const PetscInt *garray = mat->garray, mbs = mat->mbs, rstart = A->rmap->rstart, cstart = A->cmap->rstart;
1251: const PetscInt bs2 = mat->bs2;
1252: PetscInt bs;
1253: PetscCount nz, i, j, k, n, jj, irow, countA, countB, idx;
1254: PetscMUMPSInt *row, *col;
1255: const PetscScalar *av = aa->a, *bv = bb->a, *v1, *v2;
1256: PetscScalar *val;
1258: PetscFunctionBegin;
1259: PetscCall(MatGetBlockSize(A, &bs));
1260: if (reuse == MAT_INITIAL_MATRIX) {
1261: nz = bs2 * (aa->nz + bb->nz);
1262: PetscCall(PetscMalloc2(nz, &row, nz, &col));
1263: PetscCall(PetscMalloc1(nz, &val));
1264: mumps->nnz = nz;
1265: mumps->irn = row;
1266: mumps->jcn = col;
1267: mumps->val = mumps->val_alloc = val;
1268: } else {
1269: val = mumps->val;
1270: }
1272: jj = 0;
1273: irow = rstart;
1274: for (i = 0; i < mbs; i++) {
1275: countA = ai[i + 1] - ai[i];
1276: countB = bi[i + 1] - bi[i];
1277: ajj = aj + ai[i];
1278: bjj = bj + bi[i];
1279: v1 = av + bs2 * ai[i];
1280: v2 = bv + bs2 * bi[i];
1282: idx = 0;
1283: /* A-part */
1284: for (k = 0; k < countA; k++) {
1285: for (j = 0; j < bs; j++) {
1286: for (n = 0; n < bs; n++) {
1287: if (reuse == MAT_INITIAL_MATRIX) {
1288: PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj]));
1289: PetscCall(PetscMUMPSIntCast(cstart + bs * ajj[k] + j + shift, &col[jj]));
1290: }
1291: val[jj++] = v1[idx++];
1292: }
1293: }
1294: }
1296: idx = 0;
1297: /* B-part */
1298: for (k = 0; k < countB; k++) {
1299: for (j = 0; j < bs; j++) {
1300: for (n = 0; n < bs; n++) {
1301: if (reuse == MAT_INITIAL_MATRIX) {
1302: PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj]));
1303: PetscCall(PetscMUMPSIntCast(bs * garray[bjj[k]] + j + shift, &col[jj]));
1304: }
1305: val[jj++] = v2[idx++];
1306: }
1307: }
1308: }
1309: irow += bs;
1310: }
1311: PetscFunctionReturn(PETSC_SUCCESS);
1312: }
1314: static PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1315: {
1316: const PetscInt *ai, *aj, *adiag, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj;
1317: PetscCount rstart, nz, nza, nzb, i, j, jj, irow, countA, countB;
1318: PetscMUMPSInt *row, *col;
1319: const PetscScalar *av, *bv, *v1, *v2;
1320: PetscScalar *val;
1321: Mat Ad, Ao;
1322: Mat_SeqAIJ *aa;
1323: Mat_SeqAIJ *bb;
1324: #if defined(PETSC_USE_COMPLEX)
1325: PetscBool hermitian, isset;
1326: #endif
1328: PetscFunctionBegin;
1329: #if defined(PETSC_USE_COMPLEX)
1330: PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
1331: PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
1332: #endif
1333: PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray));
1334: PetscCall(MatSeqAIJGetArrayRead(Ad, &av));
1335: PetscCall(MatSeqAIJGetArrayRead(Ao, &bv));
1337: aa = (Mat_SeqAIJ *)Ad->data;
1338: bb = (Mat_SeqAIJ *)Ao->data;
1339: ai = aa->i;
1340: aj = aa->j;
1341: bi = bb->i;
1342: bj = bb->j;
1343: PetscCall(MatGetDiagonalMarkers_SeqAIJ(Ad, &adiag, NULL));
1344: rstart = A->rmap->rstart;
1346: if (reuse == MAT_INITIAL_MATRIX) {
1347: nza = 0; /* num of upper triangular entries in mat->A, including diagonals */
1348: nzb = 0; /* num of upper triangular entries in mat->B */
1349: for (i = 0; i < m; i++) {
1350: nza += (ai[i + 1] - adiag[i]);
1351: countB = bi[i + 1] - bi[i];
1352: bjj = bj + bi[i];
1353: for (j = 0; j < countB; j++) {
1354: if (garray[bjj[j]] > rstart) nzb++;
1355: }
1356: }
1358: nz = nza + nzb; /* total nz of upper triangular part of mat */
1359: PetscCall(PetscMalloc2(nz, &row, nz, &col));
1360: PetscCall(PetscMalloc1(nz, &val));
1361: mumps->nnz = nz;
1362: mumps->irn = row;
1363: mumps->jcn = col;
1364: mumps->val = mumps->val_alloc = val;
1365: } else {
1366: val = mumps->val;
1367: }
1369: jj = 0;
1370: irow = rstart;
1371: for (i = 0; i < m; i++) {
1372: ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */
1373: v1 = av + adiag[i];
1374: countA = ai[i + 1] - adiag[i];
1375: countB = bi[i + 1] - bi[i];
1376: bjj = bj + bi[i];
1377: v2 = bv + bi[i];
1379: /* A-part */
1380: for (j = 0; j < countA; j++) {
1381: if (reuse == MAT_INITIAL_MATRIX) {
1382: PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
1383: PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj]));
1384: }
1385: val[jj++] = v1[j];
1386: }
1388: /* B-part */
1389: for (j = 0; j < countB; j++) {
1390: if (garray[bjj[j]] > rstart) {
1391: if (reuse == MAT_INITIAL_MATRIX) {
1392: PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
1393: PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
1394: }
1395: val[jj++] = v2[j];
1396: }
1397: }
1398: irow++;
1399: }
1400: PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av));
1401: PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv));
1402: PetscFunctionReturn(PETSC_SUCCESS);
1403: }
1405: static PetscErrorCode MatConvertToTriples_diagonal_xaij(Mat A, PETSC_UNUSED PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1406: {
1407: const PetscScalar *av;
1408: const PetscInt M = A->rmap->n;
1409: PetscCount i;
1410: PetscMUMPSInt *row, *col;
1411: Vec v;
1413: PetscFunctionBegin;
1414: PetscCall(MatDiagonalGetDiagonal(A, &v));
1415: PetscCall(VecGetArrayRead(v, &av));
1416: if (reuse == MAT_INITIAL_MATRIX) {
1417: PetscCall(PetscMalloc2(M, &row, M, &col));
1418: for (i = 0; i < M; i++) {
1419: PetscCall(PetscMUMPSIntCast(i + A->rmap->rstart, &row[i]));
1420: col[i] = row[i];
1421: }
1422: mumps->val = (PetscScalar *)av;
1423: mumps->irn = row;
1424: mumps->jcn = col;
1425: mumps->nnz = M;
1426: } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, av, M)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_diagonal_xaij(), so one needs to copy the memory */
1427: else mumps->val = (PetscScalar *)av; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
1428: PetscCall(VecRestoreArrayRead(v, &av));
1429: PetscFunctionReturn(PETSC_SUCCESS);
1430: }
1432: static PetscErrorCode MatConvertToTriples_dense_xaij(Mat A, PETSC_UNUSED PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1433: {
1434: PetscScalar *v;
1435: const PetscInt m = A->rmap->n, N = A->cmap->N;
1436: PetscInt lda;
1437: PetscCount i, j;
1438: PetscMUMPSInt *row, *col;
1440: PetscFunctionBegin;
1441: PetscCall(MatDenseGetArray(A, &v));
1442: PetscCall(MatDenseGetLDA(A, &lda));
1443: if (reuse == MAT_INITIAL_MATRIX) {
1444: PetscCall(PetscMalloc2(m * N, &row, m * N, &col));
1445: for (i = 0; i < m; i++) {
1446: col[i] = 0;
1447: PetscCall(PetscMUMPSIntCast(i + A->rmap->rstart, &row[i]));
1448: }
1449: for (j = 1; j < N; j++) {
1450: for (i = 0; i < m; i++) PetscCall(PetscMUMPSIntCast(j, col + i + m * j));
1451: PetscCall(PetscArraycpy(row + m * j, row + m * (j - 1), m));
1452: }
1453: if (lda == m) mumps->val = v;
1454: else {
1455: PetscCall(PetscMalloc1(m * N, &mumps->val));
1456: mumps->val_alloc = mumps->val;
1457: for (j = 0; j < N; j++) PetscCall(PetscArraycpy(mumps->val + m * j, v + lda * j, m));
1458: }
1459: mumps->irn = row;
1460: mumps->jcn = col;
1461: mumps->nnz = m * N;
1462: } else {
1463: if (lda == m && !mumps->nest_vals) mumps->val = v;
1464: else {
1465: for (j = 0; j < N; j++) PetscCall(PetscArraycpy(mumps->val + m * j, v + lda * j, m));
1466: }
1467: }
1468: PetscCall(MatDenseRestoreArray(A, &v));
1469: PetscFunctionReturn(PETSC_SUCCESS);
1470: }
1472: // If the input Mat (sub) is either MATTRANSPOSEVIRTUAL or MATHERMITIANTRANSPOSEVIRTUAL, this function gets the parent Mat until it is not a
1473: // MATTRANSPOSEVIRTUAL or MATHERMITIANTRANSPOSEVIRTUAL itself and returns the appropriate shift, scaling, and whether the parent Mat should be conjugated
1474: // and its rows and columns permuted
1475: // TODO FIXME: this should not be in this file and should instead be refactored where the same logic applies, e.g., MatAXPY_Dense_Nest()
1476: static PetscErrorCode MatGetTranspose_TransposeVirtual(Mat *sub, PetscBool *conjugate, PetscScalar *vshift, PetscScalar *vscale, PetscBool *swap)
1477: {
1478: Mat A;
1479: PetscScalar s[2];
1480: PetscBool isTrans, isHTrans, compare;
1482: PetscFunctionBegin;
1483: do {
1484: PetscCall(PetscObjectTypeCompare((PetscObject)*sub, MATTRANSPOSEVIRTUAL, &isTrans));
1485: if (isTrans) {
1486: PetscCall(MatTransposeGetMat(*sub, &A));
1487: isHTrans = PETSC_FALSE;
1488: } else {
1489: PetscCall(PetscObjectTypeCompare((PetscObject)*sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans));
1490: if (isHTrans) PetscCall(MatHermitianTransposeGetMat(*sub, &A));
1491: }
1492: compare = (PetscBool)(isTrans || isHTrans);
1493: if (compare) {
1494: if (vshift && vscale) {
1495: PetscCall(MatShellGetScalingShifts(*sub, s, s + 1, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
1496: if (!*conjugate) {
1497: *vshift += s[0] * *vscale;
1498: *vscale *= s[1];
1499: } else {
1500: *vshift += PetscConj(s[0]) * *vscale;
1501: *vscale *= PetscConj(s[1]);
1502: }
1503: }
1504: if (swap) *swap = (PetscBool)!*swap;
1505: if (isHTrans && conjugate) *conjugate = (PetscBool)!*conjugate;
1506: *sub = A;
1507: }
1508: } while (compare);
1509: PetscFunctionReturn(PETSC_SUCCESS);
1510: }
1512: static PetscErrorCode MatConvertToTriples_nest_xaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1513: {
1514: Mat **mats;
1515: PetscInt nr, nc;
1516: PetscBool chol = mumps->sym ? PETSC_TRUE : PETSC_FALSE;
1518: PetscFunctionBegin;
1519: PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats));
1520: if (reuse == MAT_INITIAL_MATRIX) {
1521: PetscMUMPSInt *irns, *jcns;
1522: PetscScalar *vals;
1523: PetscCount totnnz, cumnnz, maxnnz;
1524: PetscInt *pjcns_w, Mbs = 0;
1525: IS *rows, *cols;
1526: PetscInt **rows_idx, **cols_idx;
1528: cumnnz = 0;
1529: maxnnz = 0;
1530: PetscCall(PetscMalloc2(nr * nc + 1, &mumps->nest_vals_start, nr * nc, &mumps->nest_convert_to_triples));
1531: for (PetscInt r = 0; r < nr; r++) {
1532: for (PetscInt c = 0; c < nc; c++) {
1533: Mat sub = mats[r][c];
1535: mumps->nest_convert_to_triples[r * nc + c] = NULL;
1536: if (chol && c < r) continue; /* skip lower-triangular block for Cholesky */
1537: if (sub) {
1538: PetscErrorCode (*convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *) = NULL;
1539: PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isDiag, isDense;
1540: MatInfo info;
1542: PetscCall(MatGetTranspose_TransposeVirtual(&sub, NULL, NULL, NULL, NULL));
1543: PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ));
1544: PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ));
1545: PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ));
1546: PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ));
1547: PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ));
1548: PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ));
1549: PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag));
1550: PetscCall(PetscObjectTypeCompareAny((PetscObject)sub, &isDense, MATSEQDENSE, MATMPIDENSE, NULL));
1552: if (chol) {
1553: if (r == c) {
1554: if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqsbaij;
1555: else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpisbaij;
1556: else if (isSeqSBAIJ) convert_to_triples = MatConvertToTriples_seqsbaij_seqsbaij;
1557: else if (isMPISBAIJ) convert_to_triples = MatConvertToTriples_mpisbaij_mpisbaij;
1558: else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij;
1559: else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij;
1560: } else {
1561: if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij;
1562: else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij;
1563: else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij;
1564: else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij;
1565: else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij;
1566: else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij;
1567: }
1568: } else {
1569: if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij;
1570: else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij;
1571: else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij;
1572: else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij;
1573: else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij;
1574: else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij;
1575: }
1576: PetscCheck(convert_to_triples, PetscObjectComm((PetscObject)sub), PETSC_ERR_SUP, "Not for block of type %s", ((PetscObject)sub)->type_name);
1577: mumps->nest_convert_to_triples[r * nc + c] = convert_to_triples;
1578: PetscCall(MatGetInfo(sub, MAT_LOCAL, &info));
1579: cumnnz += (PetscCount)info.nz_used; /* can be overestimated for Cholesky */
1580: maxnnz = PetscMax(maxnnz, info.nz_used);
1581: }
1582: }
1583: }
1585: /* Allocate total COO */
1586: totnnz = cumnnz;
1587: PetscCall(PetscMalloc2(totnnz, &irns, totnnz, &jcns));
1588: PetscCall(PetscMalloc1(totnnz, &vals));
1590: /* Handle rows and column maps
1591: We directly map rows and use an SF for the columns */
1592: PetscCall(PetscMalloc4(nr, &rows, nc, &cols, nr, &rows_idx, nc, &cols_idx));
1593: PetscCall(MatNestGetISs(A, rows, cols));
1594: for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetIndices(rows[r], (const PetscInt **)&rows_idx[r]));
1595: for (PetscInt c = 0; c < nc; c++) PetscCall(ISGetIndices(cols[c], (const PetscInt **)&cols_idx[c]));
1596: if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscMalloc1(maxnnz, &pjcns_w));
1597: else (void)maxnnz;
1599: cumnnz = 0;
1600: for (PetscInt r = 0; r < nr; r++) {
1601: for (PetscInt c = 0; c < nc; c++) {
1602: Mat sub = mats[r][c];
1603: const PetscInt *ridx = rows_idx[r];
1604: const PetscInt *cidx = cols_idx[c];
1605: PetscScalar vscale = 1.0, vshift = 0.0;
1606: PetscInt rst, size, bs;
1607: PetscSF csf;
1608: PetscBool conjugate = PETSC_FALSE, swap = PETSC_FALSE;
1609: PetscLayout cmap;
1610: PetscInt innz;
1612: mumps->nest_vals_start[r * nc + c] = cumnnz;
1613: if (c == r) {
1614: PetscCall(ISGetSize(rows[r], &size));
1615: if (!mumps->nest_convert_to_triples[r * nc + c]) {
1616: for (PetscInt c = 0; c < nc && !sub; ++c) sub = mats[r][c]; // diagonal Mat is NULL, so start over from the beginning of the current row
1617: }
1618: PetscCall(MatGetBlockSize(sub, &bs));
1619: Mbs += size / bs;
1620: }
1621: if (!mumps->nest_convert_to_triples[r * nc + c]) continue;
1623: /* Extract inner blocks if needed */
1624: PetscCall(MatGetTranspose_TransposeVirtual(&sub, &conjugate, &vshift, &vscale, &swap));
1625: PetscCheck(vshift == 0.0, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Nonzero shift in parent MatShell");
1627: /* Get column layout to map off-process columns */
1628: PetscCall(MatGetLayouts(sub, NULL, &cmap));
1630: /* Get row start to map on-process rows */
1631: PetscCall(MatGetOwnershipRange(sub, &rst, NULL));
1633: /* Directly use the mumps datastructure and use C ordering for now */
1634: PetscCall((*mumps->nest_convert_to_triples[r * nc + c])(sub, 0, MAT_INITIAL_MATRIX, mumps));
1636: /* Swap the role of rows and columns indices for transposed blocks
1637: since we need values with global final ordering */
1638: if (swap) {
1639: cidx = rows_idx[r];
1640: ridx = cols_idx[c];
1641: }
1643: /* Communicate column indices
1644: This could have been done with a single SF but it would have complicated the code a lot.
1645: But since we do it only once, we pay the price of setting up an SF for each block */
1646: if (PetscDefined(USE_64BIT_INDICES)) {
1647: for (PetscInt k = 0; k < mumps->nnz; k++) pjcns_w[k] = mumps->jcn[k];
1648: } else pjcns_w = (PetscInt *)mumps->jcn; /* This cast is needed only to silence warnings for 64bit integers builds */
1649: PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &csf));
1650: PetscCall(PetscIntCast(mumps->nnz, &innz));
1651: PetscCall(PetscSFSetGraphLayout(csf, cmap, innz, NULL, PETSC_OWN_POINTER, pjcns_w));
1652: PetscCall(PetscSFBcastBegin(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE));
1653: PetscCall(PetscSFBcastEnd(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE));
1654: PetscCall(PetscSFDestroy(&csf));
1656: /* Import indices: use direct map for rows and mapped indices for columns */
1657: if (swap) {
1658: for (PetscInt k = 0; k < mumps->nnz; k++) {
1659: PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &jcns[cumnnz + k]));
1660: PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &irns[cumnnz + k]));
1661: }
1662: } else {
1663: for (PetscInt k = 0; k < mumps->nnz; k++) {
1664: PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &irns[cumnnz + k]));
1665: PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &jcns[cumnnz + k]));
1666: }
1667: }
1669: /* Import values to full COO */
1670: if (conjugate) { /* conjugate the entries */
1671: PetscScalar *v = vals + cumnnz;
1672: for (PetscInt k = 0; k < mumps->nnz; k++) v[k] = vscale * PetscConj(mumps->val[k]);
1673: } else if (vscale != 1.0) {
1674: PetscScalar *v = vals + cumnnz;
1675: for (PetscInt k = 0; k < mumps->nnz; k++) v[k] = vscale * mumps->val[k];
1676: } else PetscCall(PetscArraycpy(vals + cumnnz, mumps->val, mumps->nnz));
1678: /* Shift new starting point and sanity check */
1679: cumnnz += mumps->nnz;
1680: PetscCheck(cumnnz <= totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected number of nonzeros %" PetscCount_FMT " != %" PetscCount_FMT, cumnnz, totnnz);
1682: /* Free scratch memory */
1683: PetscCall(PetscFree2(mumps->irn, mumps->jcn));
1684: PetscCall(PetscFree(mumps->val_alloc));
1685: mumps->val = NULL;
1686: mumps->nnz = 0;
1687: }
1688: }
1689: if (mumps->id.ICNTL(15) == 1) {
1690: if (Mbs != A->rmap->N) {
1691: PetscMPIInt rank, size;
1693: PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
1694: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
1695: if (rank == 0) {
1696: PetscInt shift = 0;
1698: PetscCall(PetscMUMPSIntCast(Mbs, &mumps->id.nblk));
1699: PetscCall(PetscFree(mumps->id.blkptr));
1700: PetscCall(PetscMalloc1(Mbs + 1, &mumps->id.blkptr));
1701: mumps->id.blkptr[0] = 1;
1702: for (PetscInt i = 0; i < size; ++i) {
1703: for (PetscInt r = 0; r < nr; r++) {
1704: Mat sub = mats[r][r];
1705: const PetscInt *ranges;
1706: PetscInt bs;
1708: for (PetscInt c = 0; c < nc && !sub; ++c) sub = mats[r][c]; // diagonal Mat is NULL, so start over from the beginning of the current row
1709: PetscCall(MatGetOwnershipRanges(sub, &ranges));
1710: PetscCall(MatGetBlockSize(sub, &bs));
1711: for (PetscInt j = 0, start = mumps->id.blkptr[shift] + bs; j < ranges[i + 1] - ranges[i]; j += bs) PetscCall(PetscMUMPSIntCast(start + j, mumps->id.blkptr + shift + j / bs + 1));
1712: shift += (ranges[i + 1] - ranges[i]) / bs;
1713: }
1714: }
1715: }
1716: } else mumps->id.ICNTL(15) = 0;
1717: }
1718: if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscFree(pjcns_w));
1719: for (PetscInt r = 0; r < nr; r++) PetscCall(ISRestoreIndices(rows[r], (const PetscInt **)&rows_idx[r]));
1720: for (PetscInt c = 0; c < nc; c++) PetscCall(ISRestoreIndices(cols[c], (const PetscInt **)&cols_idx[c]));
1721: PetscCall(PetscFree4(rows, cols, rows_idx, cols_idx));
1722: if (!chol) PetscCheck(cumnnz == totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different number of nonzeros %" PetscCount_FMT " != %" PetscCount_FMT, cumnnz, totnnz);
1723: mumps->nest_vals_start[nr * nc] = cumnnz;
1725: /* Set pointers for final MUMPS data structure */
1726: mumps->nest_vals = vals;
1727: mumps->val_alloc = NULL; /* do not use val_alloc since it may be reallocated with the OMP callpath */
1728: mumps->val = vals;
1729: mumps->irn = irns;
1730: mumps->jcn = jcns;
1731: mumps->nnz = cumnnz;
1732: } else {
1733: PetscScalar *oval = mumps->nest_vals;
1734: for (PetscInt r = 0; r < nr; r++) {
1735: for (PetscInt c = 0; c < nc; c++) {
1736: PetscBool conjugate = PETSC_FALSE;
1737: Mat sub = mats[r][c];
1738: PetscScalar vscale = 1.0, vshift = 0.0;
1739: PetscInt midx = r * nc + c;
1741: if (!mumps->nest_convert_to_triples[midx]) continue;
1742: PetscCall(MatGetTranspose_TransposeVirtual(&sub, &conjugate, &vshift, &vscale, NULL));
1743: PetscCheck(vshift == 0.0, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Nonzero shift in parent MatShell");
1744: mumps->val = oval + mumps->nest_vals_start[midx];
1745: PetscCall((*mumps->nest_convert_to_triples[midx])(sub, shift, MAT_REUSE_MATRIX, mumps));
1746: if (conjugate) {
1747: PetscCount nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx];
1748: for (PetscCount k = 0; k < nnz; k++) mumps->val[k] = vscale * PetscConj(mumps->val[k]);
1749: } else if (vscale != 1.0) {
1750: PetscCount nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx];
1751: for (PetscCount k = 0; k < nnz; k++) mumps->val[k] *= vscale;
1752: }
1753: }
1754: }
1755: mumps->val = oval;
1756: }
1757: PetscFunctionReturn(PETSC_SUCCESS);
1758: }
1760: static PetscErrorCode MatDestroy_MUMPS(Mat A)
1761: {
1762: Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
1764: PetscFunctionBegin;
1765: PetscCall(PetscFree(mumps->id.isol_loc));
1766: PetscCall(VecScatterDestroy(&mumps->scat_rhs));
1767: PetscCall(VecScatterDestroy(&mumps->scat_sol));
1768: PetscCall(VecDestroy(&mumps->b_seq));
1769: PetscCall(VecDestroy(&mumps->x_seq));
1770: PetscCall(PetscFree(mumps->id.perm_in));
1771: PetscCall(PetscFree(mumps->id.blkvar));
1772: PetscCall(PetscFree(mumps->id.blkptr));
1773: PetscCall(PetscFree2(mumps->irn, mumps->jcn));
1774: PetscCall(PetscFree(mumps->val_alloc));
1775: PetscCall(PetscFree(mumps->info));
1776: PetscCall(PetscFree(mumps->ICNTL_pre));
1777: PetscCall(PetscFree(mumps->CNTL_pre));
1778: PetscCall(MatMumpsResetSchur_Private(mumps));
1779: if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */
1780: mumps->id.job = JOB_END;
1781: PetscMUMPS_c(mumps);
1782: PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in termination: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
1783: if (mumps->mumps_comm != MPI_COMM_NULL) {
1784: if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) PetscCallMPI(MPI_Comm_free(&mumps->mumps_comm));
1785: else PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &mumps->mumps_comm));
1786: }
1787: }
1788: PetscCall(MatMumpsFreeInternalID(&mumps->id));
1789: #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1790: if (mumps->use_petsc_omp_support) {
1791: PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl));
1792: PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf));
1793: PetscCall(PetscFree3(mumps->rhs_nrow, mumps->rhs_recvcounts, mumps->rhs_disps));
1794: }
1795: #endif
1796: PetscCall(PetscFree(mumps->ia_alloc));
1797: PetscCall(PetscFree(mumps->ja_alloc));
1798: PetscCall(PetscFree(mumps->recvcount));
1799: PetscCall(PetscFree(mumps->reqs));
1800: PetscCall(PetscFree(mumps->irhs_loc));
1801: PetscCall(PetscFree2(mumps->nest_vals_start, mumps->nest_convert_to_triples));
1802: PetscCall(PetscFree(mumps->nest_vals));
1803: PetscCall(PetscFree(A->data));
1805: /* clear composed functions */
1806: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
1807: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorSetSchurIS_C", NULL));
1808: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorCreateSchurComplement_C", NULL));
1809: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetIcntl_C", NULL));
1810: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetIcntl_C", NULL));
1811: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetCntl_C", NULL));
1812: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetCntl_C", NULL));
1813: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfo_C", NULL));
1814: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfog_C", NULL));
1815: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfo_C", NULL));
1816: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfog_C", NULL));
1817: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetNullPivots_C", NULL));
1818: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverse_C", NULL));
1819: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverseTranspose_C", NULL));
1820: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetBlk_C", NULL));
1821: PetscFunctionReturn(PETSC_SUCCESS);
1822: }
1824: /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */
1825: static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A, PetscInt nrhs, const PetscScalar *array)
1826: {
1827: Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
1828: const PetscMPIInt ompsize = mumps->omp_comm_size;
1829: PetscInt i, m, M, rstart;
1831: PetscFunctionBegin;
1832: PetscCall(MatGetSize(A, &M, NULL));
1833: PetscCall(MatGetLocalSize(A, &m, NULL));
1834: PetscCheck(M <= PETSC_MUMPS_INT_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt");
1835: if (ompsize == 1) {
1836: if (!mumps->irhs_loc) {
1837: mumps->nloc_rhs = (PetscMUMPSInt)m;
1838: PetscCall(PetscMalloc1(m, &mumps->irhs_loc));
1839: PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
1840: for (i = 0; i < m; i++) PetscCall(PetscMUMPSIntCast(rstart + i + 1, &mumps->irhs_loc[i])); /* use 1-based indices */
1841: }
1842: PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, m * nrhs, array, mumps->id.precision, &mumps->id.rhs_loc_len, &mumps->id.rhs_loc));
1843: } else {
1844: #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1845: const PetscInt *ranges;
1846: PetscMPIInt j, k, sendcount, *petsc_ranks, *omp_ranks;
1847: MPI_Group petsc_group, omp_group;
1848: PetscScalar *recvbuf = NULL;
1850: if (mumps->is_omp_master) {
1851: /* Lazily initialize the omp stuff for distributed rhs */
1852: if (!mumps->irhs_loc) {
1853: PetscCall(PetscMalloc2(ompsize, &omp_ranks, ompsize, &petsc_ranks));
1854: PetscCall(PetscMalloc3(ompsize, &mumps->rhs_nrow, ompsize, &mumps->rhs_recvcounts, ompsize, &mumps->rhs_disps));
1855: PetscCallMPI(MPI_Comm_group(mumps->petsc_comm, &petsc_group));
1856: PetscCallMPI(MPI_Comm_group(mumps->omp_comm, &omp_group));
1857: for (j = 0; j < ompsize; j++) omp_ranks[j] = j;
1858: PetscCallMPI(MPI_Group_translate_ranks(omp_group, ompsize, omp_ranks, petsc_group, petsc_ranks));
1860: /* Populate mumps->irhs_loc[], rhs_nrow[] */
1861: mumps->nloc_rhs = 0;
1862: PetscCall(MatGetOwnershipRanges(A, &ranges));
1863: for (j = 0; j < ompsize; j++) {
1864: mumps->rhs_nrow[j] = ranges[petsc_ranks[j] + 1] - ranges[petsc_ranks[j]];
1865: mumps->nloc_rhs += mumps->rhs_nrow[j];
1866: }
1867: PetscCall(PetscMalloc1(mumps->nloc_rhs, &mumps->irhs_loc));
1868: for (j = k = 0; j < ompsize; j++) {
1869: for (i = ranges[petsc_ranks[j]]; i < ranges[petsc_ranks[j] + 1]; i++, k++) PetscCall(PetscMUMPSIntCast(i + 1, &mumps->irhs_loc[k])); /* uses 1-based indices */
1870: }
1872: PetscCall(PetscFree2(omp_ranks, petsc_ranks));
1873: PetscCallMPI(MPI_Group_free(&petsc_group));
1874: PetscCallMPI(MPI_Group_free(&omp_group));
1875: }
1877: /* Realloc buffers when current nrhs is bigger than what we have met */
1878: if (nrhs > mumps->max_nrhs) {
1879: PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf));
1880: PetscCall(PetscMalloc2(mumps->nloc_rhs * nrhs, &mumps->rhs_loc, mumps->nloc_rhs * nrhs, &mumps->rhs_recvbuf));
1881: mumps->max_nrhs = nrhs;
1882: }
1884: /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */
1885: for (j = 0; j < ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j] * nrhs, &mumps->rhs_recvcounts[j]));
1886: mumps->rhs_disps[0] = 0;
1887: for (j = 1; j < ompsize; j++) {
1888: mumps->rhs_disps[j] = mumps->rhs_disps[j - 1] + mumps->rhs_recvcounts[j - 1];
1889: PetscCheck(mumps->rhs_disps[j] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscMPIInt overflow!");
1890: }
1891: recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */
1892: }
1894: PetscCall(PetscMPIIntCast(m * nrhs, &sendcount));
1895: PetscCallMPI(MPI_Gatherv(array, sendcount, MPIU_SCALAR, recvbuf, mumps->rhs_recvcounts, mumps->rhs_disps, MPIU_SCALAR, 0, mumps->omp_comm));
1897: if (mumps->is_omp_master) {
1898: if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */
1899: PetscScalar *dst, *dstbase = mumps->rhs_loc;
1900: for (j = 0; j < ompsize; j++) {
1901: const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j];
1902: dst = dstbase;
1903: for (i = 0; i < nrhs; i++) {
1904: PetscCall(PetscArraycpy(dst, src, mumps->rhs_nrow[j]));
1905: src += mumps->rhs_nrow[j];
1906: dst += mumps->nloc_rhs;
1907: }
1908: dstbase += mumps->rhs_nrow[j];
1909: }
1910: }
1911: PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nloc_rhs * nrhs, mumps->rhs_loc, mumps->id.precision, &mumps->id.rhs_loc_len, &mumps->id.rhs_loc));
1912: }
1913: #endif /* PETSC_HAVE_OPENMP_SUPPORT */
1914: }
1915: mumps->id.nrhs = (PetscMUMPSInt)nrhs;
1916: mumps->id.nloc_rhs = (PetscMUMPSInt)mumps->nloc_rhs;
1917: mumps->id.lrhs_loc = mumps->nloc_rhs;
1918: mumps->id.irhs_loc = mumps->irhs_loc;
1919: PetscFunctionReturn(PETSC_SUCCESS);
1920: }
1922: static PetscErrorCode MatSolve_MUMPS(Mat A, Vec b, Vec x)
1923: {
1924: Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
1925: const PetscScalar *barray = NULL;
1926: PetscScalar *array;
1927: IS is_iden, is_petsc;
1928: PetscInt i;
1929: PetscBool second_solve = PETSC_FALSE;
1930: static PetscBool cite1 = PETSC_FALSE, cite2 = PETSC_FALSE;
1932: PetscFunctionBegin;
1933: PetscCall(PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM "
1934: "Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n",
1935: &cite1));
1936: PetscCall(PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel "
1937: "Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n",
1938: &cite2));
1940: PetscCall(VecFlag(x, A->factorerrortype));
1941: if (A->factorerrortype) {
1942: PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1943: PetscFunctionReturn(PETSC_SUCCESS);
1944: }
1946: mumps->id.nrhs = 1;
1947: if (mumps->petsc_size > 1) {
1948: if (mumps->ICNTL20 == 10) {
1949: mumps->id.ICNTL(20) = 10; /* dense distributed RHS, need to set rhs_loc[], irhs_loc[] */
1950: PetscCall(VecGetArrayRead(b, &barray));
1951: PetscCall(MatMumpsSetUpDistRHSInfo(A, 1, barray));
1952: } else {
1953: mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential b_seq vector*/
1954: PetscCall(VecScatterBegin(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD));
1955: PetscCall(VecScatterEnd(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD));
1956: if (!mumps->myid) {
1957: PetscCall(VecGetArray(mumps->b_seq, &array));
1958: PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->b_seq->map->n, array, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs));
1959: }
1960: }
1961: } else { /* petsc_size == 1, use MUMPS's dense centralized RHS feature, so that we don't need to bother with isol_loc[] to get the solution */
1962: mumps->id.ICNTL(20) = 0;
1963: PetscCall(VecCopy(b, x));
1964: PetscCall(VecGetArray(x, &array));
1965: PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, x->map->n, array, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs));
1966: }
1968: /*
1969: handle condensation step of Schur complement (if any)
1970: We set by default ICNTL(26) == -1 when Schur indices have been provided by the user.
1971: According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase
1972: Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system.
1973: This requires an extra call to PetscMUMPS_c and the computation of the factors for S
1974: */
1975: if (mumps->id.size_schur > 0) {
1976: PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc");
1977: if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) {
1978: second_solve = PETSC_TRUE;
1979: PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); // allocate id.redrhs
1980: mumps->id.ICNTL(26) = 1; /* condensation phase */
1981: } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE));
1982: }
1984: mumps->id.job = JOB_SOLVE;
1985: PetscMUMPS_c(mumps); // reduced solve, put solution in id.redrhs
1986: PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));
1988: /* handle expansion step of Schur complement (if any) */
1989: if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE));
1990: else if (mumps->id.ICNTL(26) == 1) { // condense the right hand side
1991: PetscCall(MatMumpsSolveSchur_Private(A));
1992: for (i = 0; i < mumps->id.size_schur; ++i) array[mumps->id.listvar_schur[i] - 1] = ID_FIELD_GET(mumps->id, redrhs, i);
1993: }
1995: if (mumps->petsc_size > 1) { /* convert mumps distributed solution to PETSc mpi x */
1996: if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) {
1997: /* when id.ICNTL(9) changes, the contents of ilsol_loc may change (not its size, lsol_loc), recreates scat_sol */
1998: PetscCall(VecScatterDestroy(&mumps->scat_sol));
1999: }
2000: if (!mumps->scat_sol) { /* create scatter scat_sol */
2001: PetscInt *isol2_loc = NULL;
2002: PetscCall(ISCreateStride(PETSC_COMM_SELF, mumps->id.lsol_loc, 0, 1, &is_iden)); /* from */
2003: PetscCall(PetscMalloc1(mumps->id.lsol_loc, &isol2_loc));
2004: for (i = 0; i < mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i] - 1; /* change Fortran style to C style */
2005: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, mumps->id.lsol_loc, isol2_loc, PETSC_OWN_POINTER, &is_petsc)); /* to */
2006: PetscCall(VecScatterCreate(mumps->x_seq, is_iden, x, is_petsc, &mumps->scat_sol));
2007: PetscCall(ISDestroy(&is_iden));
2008: PetscCall(ISDestroy(&is_petsc));
2009: mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */
2010: }
2012: PetscScalar *xarray;
2013: PetscCall(VecGetArray(mumps->x_seq, &xarray));
2014: PetscCall(MatMumpsCastMumpsScalarArray(mumps->id.lsol_loc, mumps->id.precision, mumps->id.sol_loc, xarray));
2015: PetscCall(VecRestoreArray(mumps->x_seq, &xarray));
2016: PetscCall(VecScatterBegin(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD));
2017: PetscCall(VecScatterEnd(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD));
2019: if (mumps->ICNTL20 == 10) { // distributed RHS
2020: PetscCall(VecRestoreArrayRead(b, &barray));
2021: } else if (!mumps->myid) { // centralized RHS
2022: PetscCall(VecRestoreArray(mumps->b_seq, &array));
2023: }
2024: } else {
2025: // id.rhs has the solution in mumps precision
2026: PetscCall(MatMumpsCastMumpsScalarArray(x->map->n, mumps->id.precision, mumps->id.rhs, array));
2027: PetscCall(VecRestoreArray(x, &array));
2028: }
2030: PetscCall(PetscLogFlops(2.0 * PetscMax(0, (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n)));
2031: PetscFunctionReturn(PETSC_SUCCESS);
2032: }
2034: static PetscErrorCode MatSolveTranspose_MUMPS(Mat A, Vec b, Vec x)
2035: {
2036: Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
2037: const PetscMUMPSInt value = mumps->id.ICNTL(9);
2039: PetscFunctionBegin;
2040: mumps->id.ICNTL(9) = 0;
2041: PetscCall(MatSolve_MUMPS(A, b, x));
2042: mumps->id.ICNTL(9) = value;
2043: PetscFunctionReturn(PETSC_SUCCESS);
2044: }
2046: static PetscErrorCode MatMatSolve_MUMPS(Mat A, Mat B, Mat X)
2047: {
2048: Mat Bt = NULL;
2049: PetscBool denseX, denseB, flg, flgT;
2050: Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
2051: PetscInt i, nrhs, M, nrhsM;
2052: PetscScalar *array;
2053: const PetscScalar *barray;
2054: PetscInt lsol_loc, nlsol_loc, *idxx, iidx = 0;
2055: PetscMUMPSInt *isol_loc, *isol_loc_save;
2056: PetscScalar *sol_loc;
2057: void *sol_loc_save;
2058: PetscCount sol_loc_len_save;
2059: IS is_to, is_from;
2060: PetscInt k, proc, j, m, myrstart;
2061: const PetscInt *rstart;
2062: Vec v_mpi, msol_loc;
2063: VecScatter scat_sol;
2064: Vec b_seq;
2065: VecScatter scat_rhs;
2066: PetscScalar *aa;
2067: PetscInt spnr, *ia, *ja;
2068: Mat_MPIAIJ *b = NULL;
2070: PetscFunctionBegin;
2071: PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &denseX, MATSEQDENSE, MATMPIDENSE, NULL));
2072: PetscCheck(denseX, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix");
2074: PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &denseB, MATSEQDENSE, MATMPIDENSE, NULL));
2076: if (denseB) {
2077: PetscCheck(B->rmap->n == X->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix B and X must have same row distribution");
2078: mumps->id.ICNTL(20) = 0; /* dense RHS */
2079: } else { /* sparse B */
2080: PetscCheck(X != B, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_IDN, "X and B must be different matrices");
2081: PetscCall(PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSEVIRTUAL, &flgT));
2082: PetscCheck(flgT, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONG, "Matrix B must be MATTRANSPOSEVIRTUAL matrix");
2083: PetscCall(MatShellGetScalingShifts(B, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
2084: /* input B is transpose of actual RHS matrix,
2085: because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */
2086: PetscCall(MatTransposeGetMat(B, &Bt));
2087: mumps->id.ICNTL(20) = 1; /* sparse RHS */
2088: }
2090: PetscCall(MatGetSize(B, &M, &nrhs));
2091: PetscCall(PetscIntMultError(nrhs, M, &nrhsM));
2092: mumps->id.nrhs = (PetscMUMPSInt)nrhs;
2093: mumps->id.lrhs = (PetscMUMPSInt)M;
2095: if (mumps->petsc_size == 1) { // handle this easy case specially and return early
2096: PetscScalar *aa;
2097: PetscInt spnr, *ia, *ja;
2098: PetscBool second_solve = PETSC_FALSE;
2100: PetscCall(MatDenseGetArray(X, &array));
2101: if (denseB) {
2102: /* copy B to X */
2103: PetscCall(MatDenseGetArrayRead(B, &barray));
2104: PetscCall(PetscArraycpy(array, barray, nrhsM));
2105: PetscCall(MatDenseRestoreArrayRead(B, &barray));
2106: } else { /* sparse B */
2107: PetscCall(MatSeqAIJGetArray(Bt, &aa));
2108: PetscCall(MatGetRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
2109: PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
2110: PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
2111: PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->id.nz_rhs, aa, mumps->id.precision, &mumps->id.rhs_sparse_len, &mumps->id.rhs_sparse));
2112: }
2113: PetscCall(MatMumpsMakeMumpsScalarArray(denseB, nrhsM, array, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs));
2115: /* handle condensation step of Schur complement (if any) */
2116: if (mumps->id.size_schur > 0) {
2117: if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) {
2118: second_solve = PETSC_TRUE;
2119: PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); // allocate id.redrhs
2120: mumps->id.ICNTL(26) = 1; /* condensation phase, i.e, to solve id.redrhs */
2121: } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE));
2122: }
2124: mumps->id.job = JOB_SOLVE;
2125: PetscMUMPS_c(mumps);
2126: PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));
2128: /* handle expansion step of Schur complement (if any) */
2129: if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE));
2130: else if (mumps->id.ICNTL(26) == 1) { // condense the right hand side
2131: PetscCall(MatMumpsSolveSchur_Private(A));
2132: for (j = 0; j < nrhs; ++j)
2133: for (i = 0; i < mumps->id.size_schur; ++i) array[mumps->id.listvar_schur[i] - 1 + j * M] = ID_FIELD_GET(mumps->id, redrhs, i + j * mumps->id.lredrhs);
2134: }
2136: if (!denseB) { /* sparse B, restore ia, ja */
2137: PetscCall(MatSeqAIJRestoreArray(Bt, &aa));
2138: PetscCall(MatRestoreRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
2139: PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure");
2140: }
2142: // no matter dense B or sparse B, solution is in id.rhs; convert it to array of X.
2143: PetscCall(MatMumpsCastMumpsScalarArray(nrhsM, mumps->id.precision, mumps->id.rhs, array));
2144: PetscCall(MatDenseRestoreArray(X, &array));
2145: PetscFunctionReturn(PETSC_SUCCESS);
2146: }
2148: /* parallel case: MUMPS requires rhs B to be centralized on the host! */
2149: PetscCheck(!mumps->id.ICNTL(19), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc");
2151: /* create msol_loc to hold mumps local solution */
2152: isol_loc_save = mumps->id.isol_loc; /* save these, as we want to reuse them in MatSolve() */
2153: sol_loc_save = mumps->id.sol_loc;
2154: sol_loc_len_save = mumps->id.sol_loc_len;
2155: mumps->id.isol_loc = NULL; // an init state
2156: mumps->id.sol_loc = NULL;
2157: mumps->id.sol_loc_len = 0;
2159: lsol_loc = mumps->id.lsol_loc;
2160: PetscCall(PetscIntMultError(nrhs, lsol_loc, &nlsol_loc)); /* length of sol_loc */
2161: PetscCall(PetscMalloc2(nlsol_loc, &sol_loc, lsol_loc, &isol_loc));
2162: PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, nlsol_loc, sol_loc, mumps->id.precision, &mumps->id.sol_loc_len, &mumps->id.sol_loc));
2163: mumps->id.isol_loc = isol_loc;
2165: PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nlsol_loc, (PetscScalar *)sol_loc, &msol_loc));
2167: if (denseB) {
2168: if (mumps->ICNTL20 == 10) {
2169: mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
2170: PetscCall(MatDenseGetArrayRead(B, &barray));
2171: PetscCall(MatMumpsSetUpDistRHSInfo(A, nrhs, barray)); // put barray to rhs_loc
2172: PetscCall(MatDenseRestoreArrayRead(B, &barray));
2173: PetscCall(MatGetLocalSize(B, &m, NULL));
2174: PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhsM, NULL, &v_mpi)); // will scatter the solution to v_mpi, which wraps X
2175: } else {
2176: mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
2177: /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in
2178: very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank
2179: 0, re-arrange B into desired order, which is a local operation.
2180: */
2182: /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */
2183: /* wrap dense rhs matrix B into a vector v_mpi */
2184: PetscCall(MatGetLocalSize(B, &m, NULL));
2185: PetscCall(MatDenseGetArrayRead(B, &barray));
2186: PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhsM, barray, &v_mpi));
2187: PetscCall(MatDenseRestoreArrayRead(B, &barray));
2189: /* scatter v_mpi to b_seq in proc[0]. With ICNTL(20) = 0, MUMPS requires rhs to be centralized on the host! */
2190: if (!mumps->myid) {
2191: PetscInt *idx;
2192: /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */
2193: PetscCall(PetscMalloc1(nrhsM, &idx));
2194: PetscCall(MatGetOwnershipRanges(B, &rstart));
2195: for (proc = 0, k = 0; proc < mumps->petsc_size; proc++) {
2196: for (j = 0; j < nrhs; j++) {
2197: for (i = rstart[proc]; i < rstart[proc + 1]; i++) idx[k++] = j * M + i;
2198: }
2199: }
2201: PetscCall(VecCreateSeq(PETSC_COMM_SELF, nrhsM, &b_seq));
2202: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nrhsM, idx, PETSC_OWN_POINTER, &is_to));
2203: PetscCall(ISCreateStride(PETSC_COMM_SELF, nrhsM, 0, 1, &is_from));
2204: } else {
2205: PetscCall(VecCreateSeq(PETSC_COMM_SELF, 0, &b_seq));
2206: PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_to));
2207: PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_from));
2208: }
2210: PetscCall(VecScatterCreate(v_mpi, is_from, b_seq, is_to, &scat_rhs));
2211: PetscCall(VecScatterBegin(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD));
2212: PetscCall(ISDestroy(&is_to));
2213: PetscCall(ISDestroy(&is_from));
2214: PetscCall(VecScatterEnd(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD));
2216: if (!mumps->myid) { /* define rhs on the host */
2217: PetscCall(VecGetArrayRead(b_seq, &barray));
2218: PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, nrhsM, barray, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs));
2219: PetscCall(VecRestoreArrayRead(b_seq, &barray));
2220: }
2221: }
2222: } else { /* sparse B */
2223: b = (Mat_MPIAIJ *)Bt->data;
2225: /* wrap dense X into a vector v_mpi */
2226: PetscCall(MatGetLocalSize(X, &m, NULL));
2227: PetscCall(MatDenseGetArrayRead(X, &barray));
2228: PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X), 1, nrhs * m, nrhsM, barray, &v_mpi));
2229: PetscCall(MatDenseRestoreArrayRead(X, &barray));
2231: if (!mumps->myid) {
2232: PetscCall(MatSeqAIJGetArray(b->A, &aa));
2233: PetscCall(MatGetRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
2234: PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
2235: PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
2236: PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, ((Mat_SeqAIJ *)b->A->data)->nz, aa, mumps->id.precision, &mumps->id.rhs_sparse_len, &mumps->id.rhs_sparse));
2237: } else {
2238: mumps->id.irhs_ptr = NULL;
2239: mumps->id.irhs_sparse = NULL;
2240: mumps->id.nz_rhs = 0;
2241: if (mumps->id.rhs_sparse_len) {
2242: PetscCall(PetscFree(mumps->id.rhs_sparse));
2243: mumps->id.rhs_sparse_len = 0;
2244: }
2245: }
2246: }
2248: /* solve phase */
2249: mumps->id.job = JOB_SOLVE;
2250: PetscMUMPS_c(mumps);
2251: PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
2253: /* scatter mumps distributed solution to PETSc vector v_mpi, which shares local arrays with solution matrix X */
2254: PetscCall(MatDenseGetArray(X, &array));
2255: PetscCall(VecPlaceArray(v_mpi, array));
2257: /* create scatter scat_sol */
2258: PetscCall(MatGetOwnershipRanges(X, &rstart));
2259: /* iidx: index for scatter mumps solution to PETSc X */
2261: PetscCall(ISCreateStride(PETSC_COMM_SELF, nlsol_loc, 0, 1, &is_from));
2262: PetscCall(PetscMalloc1(nlsol_loc, &idxx));
2263: for (i = 0; i < lsol_loc; i++) {
2264: isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */
2266: for (proc = 0; proc < mumps->petsc_size; proc++) {
2267: if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc + 1]) {
2268: myrstart = rstart[proc];
2269: k = isol_loc[i] - myrstart; /* local index on 1st column of PETSc vector X */
2270: iidx = k + myrstart * nrhs; /* maps mumps isol_loc[i] to PETSc index in X */
2271: m = rstart[proc + 1] - rstart[proc]; /* rows of X for this proc */
2272: break;
2273: }
2274: }
2276: for (j = 0; j < nrhs; j++) idxx[i + j * lsol_loc] = iidx + j * m;
2277: }
2278: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nlsol_loc, idxx, PETSC_COPY_VALUES, &is_to));
2279: PetscCall(MatMumpsCastMumpsScalarArray(nlsol_loc, mumps->id.precision, mumps->id.sol_loc, sol_loc)); // Vec msol_loc is created with sol_loc[]
2280: PetscCall(VecScatterCreate(msol_loc, is_from, v_mpi, is_to, &scat_sol));
2281: PetscCall(VecScatterBegin(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD));
2282: PetscCall(ISDestroy(&is_from));
2283: PetscCall(ISDestroy(&is_to));
2284: PetscCall(VecScatterEnd(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD));
2285: PetscCall(MatDenseRestoreArray(X, &array));
2287: if (mumps->id.sol_loc_len) { // in case we allocated intermediate buffers
2288: mumps->id.sol_loc_len = 0;
2289: PetscCall(PetscFree(mumps->id.sol_loc));
2290: }
2292: // restore old values
2293: mumps->id.sol_loc = sol_loc_save;
2294: mumps->id.sol_loc_len = sol_loc_len_save;
2295: mumps->id.isol_loc = isol_loc_save;
2297: PetscCall(PetscFree2(sol_loc, isol_loc));
2298: PetscCall(PetscFree(idxx));
2299: PetscCall(VecDestroy(&msol_loc));
2300: PetscCall(VecDestroy(&v_mpi));
2301: if (!denseB) {
2302: if (!mumps->myid) {
2303: b = (Mat_MPIAIJ *)Bt->data;
2304: PetscCall(MatSeqAIJRestoreArray(b->A, &aa));
2305: PetscCall(MatRestoreRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
2306: PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure");
2307: }
2308: } else {
2309: if (mumps->ICNTL20 == 0) {
2310: PetscCall(VecDestroy(&b_seq));
2311: PetscCall(VecScatterDestroy(&scat_rhs));
2312: }
2313: }
2314: PetscCall(VecScatterDestroy(&scat_sol));
2315: PetscCall(PetscLogFlops(nrhs * PetscMax(0, 2.0 * (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n)));
2316: PetscFunctionReturn(PETSC_SUCCESS);
2317: }
2319: static PetscErrorCode MatMatSolveTranspose_MUMPS(Mat A, Mat B, Mat X)
2320: {
2321: Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
2322: const PetscMUMPSInt value = mumps->id.ICNTL(9);
2324: PetscFunctionBegin;
2325: mumps->id.ICNTL(9) = 0;
2326: PetscCall(MatMatSolve_MUMPS(A, B, X));
2327: mumps->id.ICNTL(9) = value;
2328: PetscFunctionReturn(PETSC_SUCCESS);
2329: }
2331: static PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A, Mat Bt, Mat X)
2332: {
2333: PetscBool flg;
2334: Mat B;
2336: PetscFunctionBegin;
2337: PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt, &flg, MATSEQAIJ, MATMPIAIJ, NULL));
2338: PetscCheck(flg, PetscObjectComm((PetscObject)Bt), PETSC_ERR_ARG_WRONG, "Matrix Bt must be MATAIJ matrix");
2340: /* Create B=Bt^T that uses Bt's data structure */
2341: PetscCall(MatCreateTranspose(Bt, &B));
2343: PetscCall(MatMatSolve_MUMPS(A, B, X));
2344: PetscCall(MatDestroy(&B));
2345: PetscFunctionReturn(PETSC_SUCCESS);
2346: }
2348: #if !defined(PETSC_USE_COMPLEX)
2349: /*
2350: input:
2351: F: numeric factor
2352: output:
2353: nneg: total number of negative pivots
2354: nzero: total number of zero pivots
2355: npos: (global dimension of F) - nneg - nzero
2356: */
2357: static PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos)
2358: {
2359: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2360: PetscMPIInt size;
2362: PetscFunctionBegin;
2363: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F), &size));
2364: /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */
2365: PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia", mumps->id.INFOG(13));
2367: if (nneg) *nneg = mumps->id.INFOG(12);
2368: if (nzero || npos) {
2369: PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection");
2370: if (nzero) *nzero = mumps->id.INFOG(28);
2371: if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28));
2372: }
2373: PetscFunctionReturn(PETSC_SUCCESS);
2374: }
2375: #endif
2377: static PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse, Mat_MUMPS *mumps)
2378: {
2379: PetscMPIInt nreqs;
2380: PetscMUMPSInt *irn, *jcn;
2381: PetscMPIInt count;
2382: PetscCount totnnz, remain;
2383: const PetscInt osize = mumps->omp_comm_size;
2384: PetscScalar *val;
2386: PetscFunctionBegin;
2387: if (osize > 1) {
2388: if (reuse == MAT_INITIAL_MATRIX) {
2389: /* master first gathers counts of nonzeros to receive */
2390: if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize, &mumps->recvcount));
2391: PetscCallMPI(MPI_Gather(&mumps->nnz, 1, MPIU_INT64, mumps->recvcount, 1, MPIU_INT64, 0 /*master*/, mumps->omp_comm));
2393: /* Then each computes number of send/recvs */
2394: if (mumps->is_omp_master) {
2395: /* Start from 1 since self communication is not done in MPI */
2396: nreqs = 0;
2397: for (PetscMPIInt i = 1; i < osize; i++) nreqs += (mumps->recvcount[i] + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX;
2398: } else {
2399: nreqs = (PetscMPIInt)(((mumps->nnz + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX));
2400: }
2401: PetscCall(PetscMalloc1(nreqs * 3, &mumps->reqs)); /* Triple the requests since we send irn, jcn and val separately */
2403: /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others.
2404: MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz
2405: might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size
2406: is very small, the current approach should have no extra overhead compared to MPI_Gatherv.
2407: */
2408: nreqs = 0; /* counter for actual send/recvs */
2409: if (mumps->is_omp_master) {
2410: totnnz = 0;
2412: for (PetscMPIInt i = 0; i < osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */
2413: PetscCall(PetscMalloc2(totnnz, &irn, totnnz, &jcn));
2414: PetscCall(PetscMalloc1(totnnz, &val));
2416: /* Self communication */
2417: PetscCall(PetscArraycpy(irn, mumps->irn, mumps->nnz));
2418: PetscCall(PetscArraycpy(jcn, mumps->jcn, mumps->nnz));
2419: PetscCall(PetscArraycpy(val, mumps->val, mumps->nnz));
2421: /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */
2422: PetscCall(PetscFree2(mumps->irn, mumps->jcn));
2423: PetscCall(PetscFree(mumps->val_alloc));
2424: mumps->nnz = totnnz;
2425: mumps->irn = irn;
2426: mumps->jcn = jcn;
2427: mumps->val = mumps->val_alloc = val;
2429: irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */
2430: jcn += mumps->recvcount[0];
2431: val += mumps->recvcount[0];
2433: /* Remote communication */
2434: for (PetscMPIInt i = 1; i < osize; i++) {
2435: count = (PetscMPIInt)PetscMin(mumps->recvcount[i], (PetscMPIInt)PETSC_MPI_INT_MAX);
2436: remain = mumps->recvcount[i] - count;
2437: while (count > 0) {
2438: PetscCallMPI(MPIU_Irecv(irn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2439: PetscCallMPI(MPIU_Irecv(jcn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2440: PetscCallMPI(MPIU_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2441: irn += count;
2442: jcn += count;
2443: val += count;
2444: count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX);
2445: remain -= count;
2446: }
2447: }
2448: } else {
2449: irn = mumps->irn;
2450: jcn = mumps->jcn;
2451: val = mumps->val;
2452: count = (PetscMPIInt)PetscMin(mumps->nnz, (PetscMPIInt)PETSC_MPI_INT_MAX);
2453: remain = mumps->nnz - count;
2454: while (count > 0) {
2455: PetscCallMPI(MPIU_Isend(irn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2456: PetscCallMPI(MPIU_Isend(jcn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2457: PetscCallMPI(MPIU_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2458: irn += count;
2459: jcn += count;
2460: val += count;
2461: count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX);
2462: remain -= count;
2463: }
2464: }
2465: } else {
2466: nreqs = 0;
2467: if (mumps->is_omp_master) {
2468: val = mumps->val + mumps->recvcount[0];
2469: for (PetscMPIInt i = 1; i < osize; i++) { /* Remote communication only since self data is already in place */
2470: count = (PetscMPIInt)PetscMin(mumps->recvcount[i], (PetscMPIInt)PETSC_MPI_INT_MAX);
2471: remain = mumps->recvcount[i] - count;
2472: while (count > 0) {
2473: PetscCallMPI(MPIU_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2474: val += count;
2475: count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX);
2476: remain -= count;
2477: }
2478: }
2479: } else {
2480: val = mumps->val;
2481: count = (PetscMPIInt)PetscMin(mumps->nnz, (PetscMPIInt)PETSC_MPI_INT_MAX);
2482: remain = mumps->nnz - count;
2483: while (count > 0) {
2484: PetscCallMPI(MPIU_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2485: val += count;
2486: count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX);
2487: remain -= count;
2488: }
2489: }
2490: }
2491: PetscCallMPI(MPI_Waitall(nreqs, mumps->reqs, MPI_STATUSES_IGNORE));
2492: mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */
2493: }
2494: PetscFunctionReturn(PETSC_SUCCESS);
2495: }
2497: static PetscErrorCode MatFactorNumeric_MUMPS(Mat F, Mat A, PETSC_UNUSED const MatFactorInfo *info)
2498: {
2499: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2501: PetscFunctionBegin;
2502: if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) {
2503: if (mumps->id.INFOG(1) == -6) PetscCall(PetscInfo(A, "MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2504: PetscCall(PetscInfo(A, "MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2505: PetscFunctionReturn(PETSC_SUCCESS);
2506: }
2508: PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps));
2509: PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX, mumps));
2511: /* numerical factorization phase */
2512: mumps->id.job = JOB_FACTNUMERIC;
2513: if (!mumps->id.ICNTL(18)) { /* A is centralized */
2514: if (!mumps->myid) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a));
2515: } else {
2516: PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc));
2517: }
2519: if (F->schur) {
2520: const PetscScalar *array;
2521: MUMPS_INT size = mumps->id.size_schur;
2522: PetscCall(MatDenseGetArrayRead(F->schur, &array));
2523: PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, size * size, array, mumps->id.precision, &mumps->id.schur_len, &mumps->id.schur));
2524: PetscCall(MatDenseRestoreArrayRead(F->schur, &array));
2525: }
2527: PetscMUMPS_c(mumps);
2528: if (mumps->id.INFOG(1) < 0) {
2529: PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));
2530: if (mumps->id.INFOG(1) == -10) {
2531: PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2532: F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
2533: } else if (mumps->id.INFOG(1) == -13) {
2534: PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, cannot allocate required memory %d megabytes\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2535: F->factorerrortype = MAT_FACTOR_OUTMEMORY;
2536: } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) {
2537: PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d, problem with work array\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2538: F->factorerrortype = MAT_FACTOR_OUTMEMORY;
2539: } else {
2540: PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2541: F->factorerrortype = MAT_FACTOR_OTHER;
2542: }
2543: }
2544: PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: ICNTL(16)=%d " MUMPS_MANUALS, mumps->id.INFOG(16));
2546: F->assembled = PETSC_TRUE;
2548: if (F->schur) { /* reset Schur status to unfactored */
2549: #if defined(PETSC_HAVE_CUDA)
2550: F->schur->offloadmask = PETSC_OFFLOAD_CPU;
2551: #endif
2552: PetscScalar *array;
2553: PetscCall(MatDenseGetArray(F->schur, &array));
2554: PetscCall(MatMumpsCastMumpsScalarArray(mumps->id.size_schur * mumps->id.size_schur, mumps->id.precision, mumps->id.schur, array));
2555: PetscCall(MatDenseRestoreArray(F->schur, &array));
2556: if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
2557: mumps->id.ICNTL(19) = 2;
2558: PetscCall(MatTranspose(F->schur, MAT_INPLACE_MATRIX, &F->schur));
2559: }
2560: PetscCall(MatFactorRestoreSchurComplement(F, NULL, MAT_FACTOR_SCHUR_UNFACTORED));
2561: }
2563: /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */
2564: if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3;
2566: if (!mumps->is_omp_master) mumps->id.INFO(23) = 0;
2567: // MUMPS userguide: ISOL_loc should be allocated by the user between the factorization and the
2568: // solve phases. On exit from the solve phase, ISOL_loc(i) contains the index of the variables for
2569: // which the solution (in SOL_loc) is available on the local processor.
2570: // If successive calls to the solve phase (JOB= 3) are performed for a given matrix, ISOL_loc will
2571: // normally have the same contents for each of these calls. The only exception is the case of
2572: // unsymmetric matrices (SYM=1) when the transpose option is changed (see ICNTL(9)) and non
2573: // symmetric row/column exchanges (see ICNTL(6)) have occurred before the solve phase.
2574: if (mumps->petsc_size > 1) {
2575: PetscInt lsol_loc;
2576: PetscScalar *array;
2578: /* distributed solution; Create x_seq=sol_loc for repeated use */
2579: if (mumps->x_seq) {
2580: PetscCall(VecScatterDestroy(&mumps->scat_sol));
2581: PetscCall(PetscFree(mumps->id.isol_loc));
2582: PetscCall(VecDestroy(&mumps->x_seq));
2583: }
2584: lsol_loc = mumps->id.INFO(23); /* length of sol_loc */
2585: PetscCall(PetscMalloc1(lsol_loc, &mumps->id.isol_loc));
2586: PetscCall(VecCreateSeq(PETSC_COMM_SELF, lsol_loc, &mumps->x_seq));
2587: PetscCall(VecGetArray(mumps->x_seq, &array));
2588: PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, lsol_loc, array, mumps->id.precision, &mumps->id.sol_loc_len, &mumps->id.sol_loc));
2589: PetscCall(VecRestoreArray(mumps->x_seq, &array));
2590: mumps->id.lsol_loc = (PetscMUMPSInt)lsol_loc;
2591: }
2592: PetscCall(PetscLogFlops((double)ID_RINFO_GET(mumps->id, 2)));
2593: PetscFunctionReturn(PETSC_SUCCESS);
2594: }
2596: /* Sets MUMPS options from the options database */
2597: static PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A)
2598: {
2599: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2600: PetscReal cntl;
2601: PetscMUMPSInt icntl = 0, size, *listvar_schur;
2602: PetscInt info[80], i, ninfo = 80, rbs, cbs;
2603: PetscBool flg = PETSC_FALSE;
2604: PetscBool schur = mumps->id.icntl ? (PetscBool)(mumps->id.ICNTL(26) == -1) : (PetscBool)(mumps->ICNTL26 == -1);
2605: void *arr;
2607: PetscFunctionBegin;
2608: PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MUMPS Options", "Mat");
2609: if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */
2610: PetscPrecision precision = PetscDefined(USE_REAL_SINGLE) ? PETSC_PRECISION_SINGLE : PETSC_PRECISION_DOUBLE;
2611: PetscInt nthreads = 0;
2612: PetscInt nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
2613: PetscInt nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0;
2614: PetscMUMPSInt nblk, *blkvar, *blkptr;
2616: mumps->petsc_comm = PetscObjectComm((PetscObject)A);
2617: PetscCallMPI(MPI_Comm_size(mumps->petsc_comm, &mumps->petsc_size));
2618: PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm, &mumps->myid)); /* "if (!myid)" still works even if mumps_comm is different */
2620: PetscCall(PetscOptionsName("-mat_mumps_use_omp_threads", "Convert MPI processes into OpenMP threads", "None", &mumps->use_petsc_omp_support));
2621: if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */
2622: /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */
2623: PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)F)->prefix, "-mat_mumps_use_omp_threads", &nthreads, NULL));
2624: if (mumps->use_petsc_omp_support) {
2625: PetscCheck(!schur, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature", ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : "");
2626: #if defined(PETSC_HAVE_OPENMP_SUPPORT)
2627: PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm, nthreads, &mumps->omp_ctrl));
2628: PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl, &mumps->omp_comm, &mumps->mumps_comm, &mumps->is_omp_master));
2629: #else
2630: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual",
2631: ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : "");
2632: #endif
2633: } else {
2634: mumps->omp_comm = PETSC_COMM_SELF;
2635: mumps->mumps_comm = mumps->petsc_comm;
2636: mumps->is_omp_master = PETSC_TRUE;
2637: }
2638: PetscCallMPI(MPI_Comm_size(mumps->omp_comm, &mumps->omp_comm_size));
2639: mumps->reqs = NULL;
2640: mumps->tag = 0;
2642: if (mumps->mumps_comm != MPI_COMM_NULL) {
2643: if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) {
2644: /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */
2645: MPI_Comm comm;
2646: PetscCallMPI(MPI_Comm_dup(mumps->mumps_comm, &comm));
2647: mumps->mumps_comm = comm;
2648: } else PetscCall(PetscCommGetComm(mumps->petsc_comm, &mumps->mumps_comm));
2649: }
2651: mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm);
2652: mumps->id.job = JOB_INIT;
2653: mumps->id.par = 1; /* host participates factorizaton and solve */
2654: mumps->id.sym = mumps->sym;
2656: size = mumps->id.size_schur;
2657: arr = mumps->id.schur;
2658: listvar_schur = mumps->id.listvar_schur;
2659: nblk = mumps->id.nblk;
2660: blkvar = mumps->id.blkvar;
2661: blkptr = mumps->id.blkptr;
2662: if (PetscDefined(USE_DEBUG)) {
2663: for (PetscInt i = 0; i < size; i++)
2664: PetscCheck(listvar_schur[i] - 1 >= 0 && listvar_schur[i] - 1 < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid Schur index at position %" PetscInt_FMT "! %" PetscInt_FMT " must be in [0, %" PetscInt_FMT ")", i, (PetscInt)listvar_schur[i] - 1,
2665: A->rmap->N);
2666: }
2668: PetscCall(PetscOptionsEnum("-pc_precision", "Precision used by MUMPS", "MATSOLVERMUMPS", PetscPrecisionTypes, (PetscEnum)precision, (PetscEnum *)&precision, NULL));
2669: PetscCheck(precision == PETSC_PRECISION_SINGLE || precision == PETSC_PRECISION_DOUBLE, PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "MUMPS does not support %s precision", PetscPrecisionTypes[precision]);
2670: PetscCheck(precision == PETSC_SCALAR_PRECISION || PetscDefined(HAVE_MUMPS_MIXED_PRECISION), PetscObjectComm((PetscObject)F), PETSC_ERR_USER, "Your MUMPS library does not support mixed precision, but which is needed with your specified PetscScalar");
2671: PetscCall(MatMumpsAllocateInternalID(&mumps->id, precision));
2673: PetscMUMPS_c(mumps);
2674: PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
2676: /* set PETSc-MUMPS default options - override MUMPS default */
2677: mumps->id.ICNTL(3) = 0;
2678: mumps->id.ICNTL(4) = 0;
2679: if (mumps->petsc_size == 1) {
2680: mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */
2681: mumps->id.ICNTL(7) = 7; /* automatic choice of ordering done by the package */
2682: } else {
2683: mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */
2684: mumps->id.ICNTL(21) = 1; /* distributed solution */
2685: }
2686: if (nblk && blkptr) {
2687: mumps->id.ICNTL(15) = 1;
2688: mumps->id.nblk = nblk;
2689: mumps->id.blkvar = blkvar;
2690: mumps->id.blkptr = blkptr;
2691: } else mumps->id.ICNTL(15) = 0;
2693: /* restore cached ICNTL and CNTL values */
2694: for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1 + 2 * icntl]) = mumps->ICNTL_pre[2 + 2 * icntl];
2695: for (icntl = 0; icntl < nCNTL_pre; ++icntl) ID_CNTL_SET(mumps->id, (PetscInt)mumps->CNTL_pre[1 + 2 * icntl], mumps->CNTL_pre[2 + 2 * icntl]);
2697: PetscCall(PetscFree(mumps->ICNTL_pre));
2698: PetscCall(PetscFree(mumps->CNTL_pre));
2700: if (schur) {
2701: mumps->id.size_schur = size;
2702: mumps->id.schur_lld = size;
2703: mumps->id.schur = arr;
2704: mumps->id.listvar_schur = listvar_schur;
2705: if (mumps->petsc_size > 1) {
2706: PetscBool gs; /* gs is false if any rank other than root has non-empty IS */
2708: mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */
2709: gs = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */
2710: PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &gs, 1, MPI_C_BOOL, MPI_LAND, mumps->petsc_comm));
2711: PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_SUP, "MUMPS distributed parallel Schur complements not yet supported from PETSc");
2712: } else {
2713: if (F->factortype == MAT_FACTOR_LU) {
2714: mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */
2715: } else {
2716: mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */
2717: }
2718: }
2719: mumps->id.ICNTL(26) = -1;
2720: }
2722: /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code.
2723: For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS.
2724: */
2725: PetscCallMPI(MPI_Bcast(mumps->id.icntl, 40, MPI_INT, 0, mumps->omp_comm));
2726: PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15, MPIU_MUMPSREAL(&mumps->id), 0, mumps->omp_comm));
2728: mumps->scat_rhs = NULL;
2729: mumps->scat_sol = NULL;
2730: }
2731: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1", "ICNTL(1): output stream for error messages", "None", mumps->id.ICNTL(1), &icntl, &flg));
2732: if (flg) mumps->id.ICNTL(1) = icntl;
2733: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2", "ICNTL(2): output stream for diagnostic printing, statistics, and warning", "None", mumps->id.ICNTL(2), &icntl, &flg));
2734: if (flg) mumps->id.ICNTL(2) = icntl;
2735: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3", "ICNTL(3): output stream for global information, collected on the host", "None", mumps->id.ICNTL(3), &icntl, &flg));
2736: if (flg) mumps->id.ICNTL(3) = icntl;
2738: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4", "ICNTL(4): level of printing (0 to 4)", "None", mumps->id.ICNTL(4), &icntl, &flg));
2739: if (flg) mumps->id.ICNTL(4) = icntl;
2740: if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */
2742: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6", "ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)", "None", mumps->id.ICNTL(6), &icntl, &flg));
2743: if (flg) mumps->id.ICNTL(6) = icntl;
2745: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7", "ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)", "None", mumps->id.ICNTL(7), &icntl, &flg));
2746: if (flg) {
2747: PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto");
2748: mumps->id.ICNTL(7) = icntl;
2749: }
2751: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8", "ICNTL(8): scaling strategy (-2 to 8 or 77)", "None", mumps->id.ICNTL(8), &mumps->id.ICNTL(8), NULL));
2752: /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */
2753: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10", "ICNTL(10): max num of refinements", "None", mumps->id.ICNTL(10), &mumps->id.ICNTL(10), NULL));
2754: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11", "ICNTL(11): statistics related to an error analysis (via -ksp_view)", "None", mumps->id.ICNTL(11), &mumps->id.ICNTL(11), NULL));
2755: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12", "ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)", "None", mumps->id.ICNTL(12), &mumps->id.ICNTL(12), NULL));
2756: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13", "ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting", "None", mumps->id.ICNTL(13), &mumps->id.ICNTL(13), NULL));
2757: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14", "ICNTL(14): percentage increase in the estimated working space", "None", mumps->id.ICNTL(14), &mumps->id.ICNTL(14), NULL));
2758: PetscCall(MatGetBlockSizes(A, &rbs, &cbs));
2759: if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = (PetscMUMPSInt)-rbs;
2760: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15", "ICNTL(15): compression of the input matrix resulting from a block format", "None", mumps->id.ICNTL(15), &mumps->id.ICNTL(15), &flg));
2761: if (flg) {
2762: if (mumps->id.ICNTL(15) < 0) PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes");
2763: else if (mumps->id.ICNTL(15) > 0) {
2764: const PetscInt *bsizes;
2765: PetscInt nblocks, p, *blkptr = NULL;
2766: PetscMPIInt *recvcounts, *displs, n;
2767: PetscMPIInt rank, size = 0;
2769: PetscCall(MatGetVariableBlockSizes(A, &nblocks, &bsizes));
2770: flg = PETSC_TRUE;
2771: for (p = 0; p < nblocks; ++p) {
2772: if (bsizes[p] > 1) break;
2773: }
2774: if (p == nblocks) flg = PETSC_FALSE;
2775: PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &flg, 1, MPI_C_BOOL, MPI_LOR, PetscObjectComm((PetscObject)A)));
2776: if (flg) { // if at least one process supplies variable block sizes and they are not all set to 1
2777: PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
2778: if (rank == 0) PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
2779: PetscCall(PetscCalloc2(size, &recvcounts, size + 1, &displs));
2780: PetscCall(PetscMPIIntCast(nblocks, &n));
2781: PetscCallMPI(MPI_Gather(&n, 1, MPI_INT, recvcounts, 1, MPI_INT, 0, PetscObjectComm((PetscObject)A)));
2782: for (PetscInt p = 0; p < size; ++p) displs[p + 1] = displs[p] + recvcounts[p];
2783: PetscCall(PetscMalloc1(displs[size] + 1, &blkptr));
2784: PetscCallMPI(MPI_Bcast(displs + size, 1, MPIU_INT, 0, PetscObjectComm((PetscObject)A)));
2785: PetscCallMPI(MPI_Gatherv(bsizes, n, MPIU_INT, blkptr + 1, recvcounts, displs, MPIU_INT, 0, PetscObjectComm((PetscObject)A)));
2786: if (rank == 0) {
2787: blkptr[0] = 1;
2788: for (PetscInt p = 0; p < n; ++p) blkptr[p + 1] += blkptr[p];
2789: PetscCall(MatMumpsSetBlk(F, displs[size], NULL, blkptr));
2790: }
2791: PetscCall(PetscFree2(recvcounts, displs));
2792: PetscCall(PetscFree(blkptr));
2793: }
2794: }
2795: }
2796: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19", "ICNTL(19): computes the Schur complement", "None", mumps->id.ICNTL(19), &mumps->id.ICNTL(19), NULL));
2797: if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */
2798: PetscCall(MatDestroy(&F->schur));
2799: PetscCall(MatMumpsResetSchur_Private(mumps));
2800: }
2802: /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps",
2803: and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran
2804: and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...'
2805: This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug,
2806: see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590.
2807: In short, we could not use distributed RHS until with MPICH v4.0b1 or we enabled a workaround in mumps-5.6.2+
2808: */
2809: mumps->ICNTL20 = 10; /* Distributed dense RHS, by default */
2810: #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) || (PetscDefined(HAVE_MPICH) && MPICH_NUMVERSION < 40000101) || PetscDefined(HAVE_MSMPI)
2811: mumps->ICNTL20 = 0; /* Centralized dense RHS, if need be */
2812: #endif
2813: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20", "ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides", "None", mumps->ICNTL20, &mumps->ICNTL20, &flg));
2814: PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10", (int)mumps->ICNTL20);
2815: #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0)
2816: PetscCheck(!flg || mumps->ICNTL20 != 10, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=10 is not supported before MUMPS-5.3.0");
2817: #endif
2818: /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */
2820: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22", "ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)", "None", mumps->id.ICNTL(22), &mumps->id.ICNTL(22), NULL));
2821: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23", "ICNTL(23): max size of the working memory (MB) that can allocate per processor", "None", mumps->id.ICNTL(23), &mumps->id.ICNTL(23), NULL));
2822: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24", "ICNTL(24): detection of null pivot rows (0 or 1)", "None", mumps->id.ICNTL(24), &mumps->id.ICNTL(24), NULL));
2823: if (mumps->id.ICNTL(24)) mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */
2825: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25", "ICNTL(25): computes a solution of a deficient matrix and a null space basis", "None", mumps->id.ICNTL(25), &mumps->id.ICNTL(25), NULL));
2826: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26", "ICNTL(26): drives the solution phase if a Schur complement matrix", "None", mumps->id.ICNTL(26), &mumps->id.ICNTL(26), NULL));
2827: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27", "ICNTL(27): controls the blocking size for multiple right-hand sides", "None", mumps->id.ICNTL(27), &mumps->id.ICNTL(27), NULL));
2828: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28", "ICNTL(28): use 1 for sequential analysis and ICNTL(7) ordering, or 2 for parallel analysis and ICNTL(29) ordering", "None", mumps->id.ICNTL(28), &mumps->id.ICNTL(28), NULL));
2829: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29", "ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis", "None", mumps->id.ICNTL(29), &mumps->id.ICNTL(29), NULL));
2830: /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */
2831: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31", "ICNTL(31): indicates which factors may be discarded during factorization", "None", mumps->id.ICNTL(31), &mumps->id.ICNTL(31), NULL));
2832: /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elimination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL)); -- not supported by PETSc API */
2833: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33", "ICNTL(33): compute determinant", "None", mumps->id.ICNTL(33), &mumps->id.ICNTL(33), NULL));
2834: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35", "ICNTL(35): activates Block Low Rank (BLR) based factorization", "None", mumps->id.ICNTL(35), &mumps->id.ICNTL(35), NULL));
2835: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36", "ICNTL(36): choice of BLR factorization variant", "None", mumps->id.ICNTL(36), &mumps->id.ICNTL(36), NULL));
2836: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_37", "ICNTL(37): compression of the contribution blocks (CB)", "None", mumps->id.ICNTL(37), &mumps->id.ICNTL(37), NULL));
2837: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38", "ICNTL(38): estimated compression rate of LU factors with BLR", "None", mumps->id.ICNTL(38), &mumps->id.ICNTL(38), NULL));
2838: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_48", "ICNTL(48): multithreading with tree parallelism", "None", mumps->id.ICNTL(48), &mumps->id.ICNTL(48), NULL));
2839: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_49", "ICNTL(49): compact workarray at the end of factorization phase", "None", mumps->id.ICNTL(49), &mumps->id.ICNTL(49), NULL));
2840: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_56", "ICNTL(56): postponing and rank-revealing factorization", "None", mumps->id.ICNTL(56), &mumps->id.ICNTL(56), NULL));
2841: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_58", "ICNTL(58): defines options for symbolic factorization", "None", mumps->id.ICNTL(58), &mumps->id.ICNTL(58), NULL));
2843: PetscCall(PetscOptionsReal("-mat_mumps_cntl_1", "CNTL(1): relative pivoting threshold", "None", (PetscReal)ID_CNTL_GET(mumps->id, 1), &cntl, &flg));
2844: if (flg) ID_CNTL_SET(mumps->id, 1, cntl);
2845: PetscCall(PetscOptionsReal("-mat_mumps_cntl_2", "CNTL(2): stopping criterion of refinement", "None", (PetscReal)ID_CNTL_GET(mumps->id, 2), &cntl, &flg));
2846: if (flg) ID_CNTL_SET(mumps->id, 2, cntl);
2847: PetscCall(PetscOptionsReal("-mat_mumps_cntl_3", "CNTL(3): absolute pivoting threshold", "None", (PetscReal)ID_CNTL_GET(mumps->id, 3), &cntl, &flg));
2848: if (flg) ID_CNTL_SET(mumps->id, 3, cntl);
2849: PetscCall(PetscOptionsReal("-mat_mumps_cntl_4", "CNTL(4): value for static pivoting", "None", (PetscReal)ID_CNTL_GET(mumps->id, 4), &cntl, &flg));
2850: if (flg) ID_CNTL_SET(mumps->id, 4, cntl);
2851: PetscCall(PetscOptionsReal("-mat_mumps_cntl_5", "CNTL(5): fixation for null pivots", "None", (PetscReal)ID_CNTL_GET(mumps->id, 5), &cntl, &flg));
2852: if (flg) ID_CNTL_SET(mumps->id, 5, cntl);
2853: PetscCall(PetscOptionsReal("-mat_mumps_cntl_7", "CNTL(7): dropping parameter used during BLR", "None", (PetscReal)ID_CNTL_GET(mumps->id, 7), &cntl, &flg));
2854: if (flg) ID_CNTL_SET(mumps->id, 7, cntl);
2856: PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL));
2858: PetscCall(PetscOptionsIntArray("-mat_mumps_view_info", "request INFO local to each processor", "", info, &ninfo, NULL));
2859: if (ninfo) {
2860: PetscCheck(ninfo <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "number of INFO %" PetscInt_FMT " must <= 80", ninfo);
2861: PetscCall(PetscMalloc1(ninfo, &mumps->info));
2862: mumps->ninfo = ninfo;
2863: for (i = 0; i < ninfo; i++) {
2864: PetscCheck(info[i] >= 0 && info[i] <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "index of INFO %" PetscInt_FMT " must between 1 and 80", ninfo);
2865: mumps->info[i] = info[i];
2866: }
2867: }
2868: PetscOptionsEnd();
2869: PetscFunctionReturn(PETSC_SUCCESS);
2870: }
2872: static PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F, Mat A, PETSC_UNUSED const MatFactorInfo *info, Mat_MUMPS *mumps)
2873: {
2874: PetscFunctionBegin;
2875: if (mumps->id.INFOG(1) < 0) {
2876: PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in analysis: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
2877: if (mumps->id.INFOG(1) == -6) {
2878: PetscCall(PetscInfo(F, "MUMPS error in analysis: matrix is singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2879: F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT;
2880: } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) {
2881: PetscCall(PetscInfo(F, "MUMPS error in analysis: problem with work array, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2882: F->factorerrortype = MAT_FACTOR_OUTMEMORY;
2883: } else {
2884: PetscCall(PetscInfo(F, "MUMPS error in analysis: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS "\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2885: F->factorerrortype = MAT_FACTOR_OTHER;
2886: }
2887: }
2888: if (!mumps->id.n) F->factorerrortype = MAT_FACTOR_NOERROR;
2889: PetscFunctionReturn(PETSC_SUCCESS);
2890: }
2892: static PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F, Mat A, IS r, PETSC_UNUSED IS c, const MatFactorInfo *info)
2893: {
2894: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2895: Vec b;
2896: const PetscInt M = A->rmap->N;
2898: PetscFunctionBegin;
2899: if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2900: /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */
2901: PetscFunctionReturn(PETSC_SUCCESS);
2902: }
2904: /* Set MUMPS options from the options database */
2905: PetscCall(MatSetFromOptions_MUMPS(F, A));
2907: PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2908: PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));
2910: /* analysis phase */
2911: mumps->id.job = JOB_FACTSYMBOLIC;
2912: PetscCall(PetscMUMPSIntCast(M, &mumps->id.n));
2913: switch (mumps->id.ICNTL(18)) {
2914: case 0: /* centralized assembled matrix input */
2915: if (!mumps->myid) {
2916: mumps->id.nnz = mumps->nnz;
2917: mumps->id.irn = mumps->irn;
2918: mumps->id.jcn = mumps->jcn;
2919: if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a));
2920: if (r && mumps->id.ICNTL(7) == 7) {
2921: mumps->id.ICNTL(7) = 1;
2922: if (!mumps->myid) {
2923: const PetscInt *idx;
2924: PetscInt i;
2926: PetscCall(PetscMalloc1(M, &mumps->id.perm_in));
2927: PetscCall(ISGetIndices(r, &idx));
2928: for (i = 0; i < M; i++) PetscCall(PetscMUMPSIntCast(idx[i] + 1, &mumps->id.perm_in[i])); /* perm_in[]: start from 1, not 0! */
2929: PetscCall(ISRestoreIndices(r, &idx));
2930: }
2931: }
2932: }
2933: break;
2934: case 3: /* distributed assembled matrix input (size>1) */
2935: mumps->id.nnz_loc = mumps->nnz;
2936: mumps->id.irn_loc = mumps->irn;
2937: mumps->id.jcn_loc = mumps->jcn;
2938: if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc));
2939: if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2940: PetscCall(MatCreateVecs(A, NULL, &b));
2941: PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
2942: PetscCall(VecDestroy(&b));
2943: }
2944: break;
2945: }
2946: PetscMUMPS_c(mumps);
2947: PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
2949: F->ops->lufactornumeric = MatFactorNumeric_MUMPS;
2950: F->ops->solve = MatSolve_MUMPS;
2951: F->ops->solvetranspose = MatSolveTranspose_MUMPS;
2952: F->ops->matsolve = MatMatSolve_MUMPS;
2953: F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS;
2954: F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS;
2956: mumps->matstruc = SAME_NONZERO_PATTERN;
2957: PetscFunctionReturn(PETSC_SUCCESS);
2958: }
2960: /* Note the PETSc r and c permutations are ignored */
2961: static PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F, Mat A, PETSC_UNUSED IS r, PETSC_UNUSED IS c, const MatFactorInfo *info)
2962: {
2963: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2964: Vec b;
2965: const PetscInt M = A->rmap->N;
2967: PetscFunctionBegin;
2968: if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2969: /* F is assembled by a previous call of MatLUFactorSymbolic_BAIJMUMPS() */
2970: PetscFunctionReturn(PETSC_SUCCESS);
2971: }
2973: /* Set MUMPS options from the options database */
2974: PetscCall(MatSetFromOptions_MUMPS(F, A));
2976: PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2977: PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));
2979: /* analysis phase */
2980: mumps->id.job = JOB_FACTSYMBOLIC;
2981: PetscCall(PetscMUMPSIntCast(M, &mumps->id.n));
2982: switch (mumps->id.ICNTL(18)) {
2983: case 0: /* centralized assembled matrix input */
2984: if (!mumps->myid) {
2985: mumps->id.nnz = mumps->nnz;
2986: mumps->id.irn = mumps->irn;
2987: mumps->id.jcn = mumps->jcn;
2988: if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a));
2989: }
2990: break;
2991: case 3: /* distributed assembled matrix input (size>1) */
2992: mumps->id.nnz_loc = mumps->nnz;
2993: mumps->id.irn_loc = mumps->irn;
2994: mumps->id.jcn_loc = mumps->jcn;
2995: if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc));
2996: if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2997: PetscCall(MatCreateVecs(A, NULL, &b));
2998: PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
2999: PetscCall(VecDestroy(&b));
3000: }
3001: break;
3002: }
3003: PetscMUMPS_c(mumps);
3004: PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
3006: F->ops->lufactornumeric = MatFactorNumeric_MUMPS;
3007: F->ops->solve = MatSolve_MUMPS;
3008: F->ops->solvetranspose = MatSolveTranspose_MUMPS;
3009: F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS;
3011: mumps->matstruc = SAME_NONZERO_PATTERN;
3012: PetscFunctionReturn(PETSC_SUCCESS);
3013: }
3015: /* Note the PETSc r permutation and factor info are ignored */
3016: static PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F, Mat A, PETSC_UNUSED IS r, const MatFactorInfo *info)
3017: {
3018: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3019: Vec b;
3020: const PetscInt M = A->rmap->N;
3022: PetscFunctionBegin;
3023: if (mumps->matstruc == SAME_NONZERO_PATTERN) {
3024: /* F is assembled by a previous call of MatCholeskyFactorSymbolic_MUMPS() */
3025: PetscFunctionReturn(PETSC_SUCCESS);
3026: }
3028: /* Set MUMPS options from the options database */
3029: PetscCall(MatSetFromOptions_MUMPS(F, A));
3031: PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
3032: PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));
3034: /* analysis phase */
3035: mumps->id.job = JOB_FACTSYMBOLIC;
3036: PetscCall(PetscMUMPSIntCast(M, &mumps->id.n));
3037: switch (mumps->id.ICNTL(18)) {
3038: case 0: /* centralized assembled matrix input */
3039: if (!mumps->myid) {
3040: mumps->id.nnz = mumps->nnz;
3041: mumps->id.irn = mumps->irn;
3042: mumps->id.jcn = mumps->jcn;
3043: if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a));
3044: }
3045: break;
3046: case 3: /* distributed assembled matrix input (size>1) */
3047: mumps->id.nnz_loc = mumps->nnz;
3048: mumps->id.irn_loc = mumps->irn;
3049: mumps->id.jcn_loc = mumps->jcn;
3050: if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc));
3051: if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
3052: PetscCall(MatCreateVecs(A, NULL, &b));
3053: PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
3054: PetscCall(VecDestroy(&b));
3055: }
3056: break;
3057: }
3058: PetscMUMPS_c(mumps);
3059: PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
3061: F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS;
3062: F->ops->solve = MatSolve_MUMPS;
3063: F->ops->solvetranspose = MatSolve_MUMPS;
3064: F->ops->matsolve = MatMatSolve_MUMPS;
3065: F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS;
3066: F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS;
3067: #if defined(PETSC_USE_COMPLEX)
3068: F->ops->getinertia = NULL;
3069: #else
3070: F->ops->getinertia = MatGetInertia_SBAIJMUMPS;
3071: #endif
3073: mumps->matstruc = SAME_NONZERO_PATTERN;
3074: PetscFunctionReturn(PETSC_SUCCESS);
3075: }
3077: static PetscErrorCode MatView_MUMPS(Mat A, PetscViewer viewer)
3078: {
3079: PetscBool isascii;
3080: PetscViewerFormat format;
3081: Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
3083: PetscFunctionBegin;
3084: /* check if matrix is mumps type */
3085: if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(PETSC_SUCCESS);
3087: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
3088: if (isascii) {
3089: PetscCall(PetscViewerGetFormat(viewer, &format));
3090: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
3091: PetscCall(PetscViewerASCIIPrintf(viewer, "MUMPS run parameters:\n"));
3092: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
3093: PetscCall(PetscViewerASCIIPrintf(viewer, " SYM (matrix type): %d\n", mumps->id.sym));
3094: PetscCall(PetscViewerASCIIPrintf(viewer, " PAR (host participation): %d\n", mumps->id.par));
3095: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(1) (output for error): %d\n", mumps->id.ICNTL(1)));
3096: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(2) (output of diagnostic msg): %d\n", mumps->id.ICNTL(2)));
3097: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(3) (output for global info): %d\n", mumps->id.ICNTL(3)));
3098: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(4) (level of printing): %d\n", mumps->id.ICNTL(4)));
3099: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(5) (input mat struct): %d\n", mumps->id.ICNTL(5)));
3100: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(6) (matrix prescaling): %d\n", mumps->id.ICNTL(6)));
3101: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(7) (sequential matrix ordering):%d\n", mumps->id.ICNTL(7)));
3102: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(8) (scaling strategy): %d\n", mumps->id.ICNTL(8)));
3103: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(10) (max num of refinements): %d\n", mumps->id.ICNTL(10)));
3104: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(11) (error analysis): %d\n", mumps->id.ICNTL(11)));
3105: if (mumps->id.ICNTL(11) > 0) {
3106: PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(4) (inf norm of input mat): %g\n", (double)ID_RINFOG_GET(mumps->id, 4)));
3107: PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(5) (inf norm of solution): %g\n", (double)ID_RINFOG_GET(mumps->id, 5)));
3108: PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(6) (inf norm of residual): %g\n", (double)ID_RINFOG_GET(mumps->id, 6)));
3109: PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(7),RINFOG(8) (backward error est): %g, %g\n", (double)ID_RINFOG_GET(mumps->id, 7), (double)ID_RINFOG_GET(mumps->id, 8)));
3110: PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(9) (error estimate): %g\n", (double)ID_RINFOG_GET(mumps->id, 9)));
3111: PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n", (double)ID_RINFOG_GET(mumps->id, 10), (double)ID_RINFOG_GET(mumps->id, 11)));
3112: }
3113: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(12) (efficiency control): %d\n", mumps->id.ICNTL(12)));
3114: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(13) (sequential factorization of the root node): %d\n", mumps->id.ICNTL(13)));
3115: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(14) (percentage of estimated workspace increase): %d\n", mumps->id.ICNTL(14)));
3116: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(15) (compression of the input matrix): %d\n", mumps->id.ICNTL(15)));
3117: /* ICNTL(15-17) not used */
3118: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(18) (input mat struct): %d\n", mumps->id.ICNTL(18)));
3119: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(19) (Schur complement info): %d\n", mumps->id.ICNTL(19)));
3120: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(20) (RHS sparse pattern): %d\n", mumps->id.ICNTL(20)));
3121: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(21) (solution struct): %d\n", mumps->id.ICNTL(21)));
3122: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(22) (in-core/out-of-core facility): %d\n", mumps->id.ICNTL(22)));
3123: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(23) (max size of memory can be allocated locally):%d\n", mumps->id.ICNTL(23)));
3125: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(24) (detection of null pivot rows): %d\n", mumps->id.ICNTL(24)));
3126: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(25) (computation of a null space basis): %d\n", mumps->id.ICNTL(25)));
3127: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(26) (Schur options for RHS or solution): %d\n", mumps->id.ICNTL(26)));
3128: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(27) (blocking size for multiple RHS): %d\n", mumps->id.ICNTL(27)));
3129: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(28) (use parallel or sequential ordering): %d\n", mumps->id.ICNTL(28)));
3130: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(29) (parallel ordering): %d\n", mumps->id.ICNTL(29)));
3132: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(30) (user-specified set of entries in inv(A)): %d\n", mumps->id.ICNTL(30)));
3133: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(31) (factors is discarded in the solve phase): %d\n", mumps->id.ICNTL(31)));
3134: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(33) (compute determinant): %d\n", mumps->id.ICNTL(33)));
3135: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(35) (activate BLR based factorization): %d\n", mumps->id.ICNTL(35)));
3136: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(36) (choice of BLR factorization variant): %d\n", mumps->id.ICNTL(36)));
3137: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(37) (compression of the contribution blocks): %d\n", mumps->id.ICNTL(37)));
3138: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(38) (estimated compression rate of LU factors): %d\n", mumps->id.ICNTL(38)));
3139: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(48) (multithreading with tree parallelism): %d\n", mumps->id.ICNTL(48)));
3140: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(49) (compact workarray at the end of factorization phase):%d\n", mumps->id.ICNTL(49)));
3141: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(56) (postponing and rank-revealing factorization):%d\n", mumps->id.ICNTL(56)));
3142: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(58) (options for symbolic factorization): %d\n", mumps->id.ICNTL(58)));
3144: PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(1) (relative pivoting threshold): %g\n", (double)ID_CNTL_GET(mumps->id, 1)));
3145: PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(2) (stopping criterion of refinement): %g\n", (double)ID_CNTL_GET(mumps->id, 2)));
3146: PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(3) (absolute pivoting threshold): %g\n", (double)ID_CNTL_GET(mumps->id, 3)));
3147: PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(4) (value of static pivoting): %g\n", (double)ID_CNTL_GET(mumps->id, 4)));
3148: PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(5) (fixation for null pivots): %g\n", (double)ID_CNTL_GET(mumps->id, 5)));
3149: PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(7) (dropping parameter for BLR): %g\n", (double)ID_CNTL_GET(mumps->id, 7)));
3151: /* information local to each processor */
3152: PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis):\n"));
3153: PetscCall(PetscViewerASCIIPushSynchronized(viewer));
3154: PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, (double)ID_RINFO_GET(mumps->id, 1)));
3155: PetscCall(PetscViewerFlush(viewer));
3156: PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization):\n"));
3157: PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, (double)ID_RINFO_GET(mumps->id, 2)));
3158: PetscCall(PetscViewerFlush(viewer));
3159: PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization):\n"));
3160: PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, (double)ID_RINFO_GET(mumps->id, 3)));
3161: PetscCall(PetscViewerFlush(viewer));
3163: PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization):\n"));
3164: PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(15)));
3165: PetscCall(PetscViewerFlush(viewer));
3167: PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization):\n"));
3168: PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(16)));
3169: PetscCall(PetscViewerFlush(viewer));
3171: PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization):\n"));
3172: PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(23)));
3173: PetscCall(PetscViewerFlush(viewer));
3175: if (mumps->ninfo && mumps->ninfo <= 80) {
3176: PetscInt i;
3177: for (i = 0; i < mumps->ninfo; i++) {
3178: PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(%" PetscInt_FMT "):\n", mumps->info[i]));
3179: PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(mumps->info[i])));
3180: PetscCall(PetscViewerFlush(viewer));
3181: }
3182: }
3183: PetscCall(PetscViewerASCIIPopSynchronized(viewer));
3184: } else PetscCall(PetscViewerASCIIPrintf(viewer, " Use -%sksp_view ::ascii_info_detail to display information for all processes\n", ((PetscObject)A)->prefix ? ((PetscObject)A)->prefix : ""));
3186: if (mumps->myid == 0) { /* information from the host */
3187: PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(1) (global estimated flops for the elimination after analysis): %g\n", (double)ID_RINFOG_GET(mumps->id, 1)));
3188: PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(2) (global estimated flops for the assembly after factorization): %g\n", (double)ID_RINFOG_GET(mumps->id, 2)));
3189: PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(3) (global estimated flops for the elimination after factorization): %g\n", (double)ID_RINFOG_GET(mumps->id, 3)));
3190: PetscCall(PetscViewerASCIIPrintf(viewer, " (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n", (double)ID_RINFOG_GET(mumps->id, 12), (double)ID_RINFOG_GET(mumps->id, 13), mumps->id.INFOG(34)));
3192: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(3)));
3193: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(4)));
3194: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(5) (estimated maximum front size in the complete tree): %d\n", mumps->id.INFOG(5)));
3195: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(6) (number of nodes in the complete tree): %d\n", mumps->id.INFOG(6)));
3196: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(7) (ordering option effectively used after analysis): %d\n", mumps->id.INFOG(7)));
3197: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n", mumps->id.INFOG(8)));
3198: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n", mumps->id.INFOG(9)));
3199: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(10) (total integer space store the matrix factors after factorization): %d\n", mumps->id.INFOG(10)));
3200: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(11) (order of largest frontal matrix after factorization): %d\n", mumps->id.INFOG(11)));
3201: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(12) (number of off-diagonal pivots): %d\n", mumps->id.INFOG(12)));
3202: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(13) (number of delayed pivots after factorization): %d\n", mumps->id.INFOG(13)));
3203: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(14) (number of memory compress after factorization): %d\n", mumps->id.INFOG(14)));
3204: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(15) (number of steps of iterative refinement after solution): %d\n", mumps->id.INFOG(15)));
3205: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n", mumps->id.INFOG(16)));
3206: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n", mumps->id.INFOG(17)));
3207: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n", mumps->id.INFOG(18)));
3208: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n", mumps->id.INFOG(19)));
3209: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(20) (estimated number of entries in the factors): %d\n", mumps->id.INFOG(20)));
3210: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n", mumps->id.INFOG(21)));
3211: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n", mumps->id.INFOG(22)));
3212: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n", mumps->id.INFOG(23)));
3213: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n", mumps->id.INFOG(24)));
3214: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n", mumps->id.INFOG(25)));
3215: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(28) (after factorization: number of null pivots encountered): %d\n", mumps->id.INFOG(28)));
3216: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n", mumps->id.INFOG(29)));
3217: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n", mumps->id.INFOG(30), mumps->id.INFOG(31)));
3218: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(32) (after analysis: type of analysis done): %d\n", mumps->id.INFOG(32)));
3219: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(33) (value used for ICNTL(8)): %d\n", mumps->id.INFOG(33)));
3220: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(34) (exponent of the determinant if determinant is requested): %d\n", mumps->id.INFOG(34)));
3221: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n", mumps->id.INFOG(35)));
3222: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(36)));
3223: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n", mumps->id.INFOG(37)));
3224: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(38)));
3225: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n", mumps->id.INFOG(39)));
3226: }
3227: }
3228: }
3229: PetscFunctionReturn(PETSC_SUCCESS);
3230: }
3232: static PetscErrorCode MatGetInfo_MUMPS(Mat A, PETSC_UNUSED MatInfoType flag, MatInfo *info)
3233: {
3234: Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
3236: PetscFunctionBegin;
3237: info->block_size = 1.0;
3238: info->nz_allocated = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20);
3239: info->nz_used = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20);
3240: info->nz_unneeded = 0.0;
3241: info->assemblies = 0.0;
3242: info->mallocs = 0.0;
3243: info->memory = 0.0;
3244: info->fill_ratio_given = 0;
3245: info->fill_ratio_needed = 0;
3246: info->factor_mallocs = 0;
3247: PetscFunctionReturn(PETSC_SUCCESS);
3248: }
3250: static PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is)
3251: {
3252: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3253: const PetscScalar *arr;
3254: const PetscInt *idxs;
3255: PetscInt size, i;
3257: PetscFunctionBegin;
3258: PetscCall(ISGetLocalSize(is, &size));
3259: /* Schur complement matrix */
3260: PetscCall(MatDestroy(&F->schur));
3261: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size, size, NULL, &F->schur));
3262: PetscCall(MatDenseGetArrayRead(F->schur, &arr));
3263: // don't allocate mumps->id.schur[] now as its precision is yet to know
3264: PetscCall(PetscMUMPSIntCast(size, &mumps->id.size_schur));
3265: PetscCall(PetscMUMPSIntCast(size, &mumps->id.schur_lld));
3266: PetscCall(MatDenseRestoreArrayRead(F->schur, &arr));
3267: if (mumps->sym == 1) PetscCall(MatSetOption(F->schur, MAT_SPD, PETSC_TRUE));
3269: /* MUMPS expects Fortran style indices */
3270: PetscCall(PetscFree(mumps->id.listvar_schur));
3271: PetscCall(PetscMalloc1(size, &mumps->id.listvar_schur));
3272: PetscCall(ISGetIndices(is, &idxs));
3273: for (i = 0; i < size; i++) PetscCall(PetscMUMPSIntCast(idxs[i] + 1, &mumps->id.listvar_schur[i]));
3274: PetscCall(ISRestoreIndices(is, &idxs));
3275: /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */
3276: if (mumps->id.icntl) mumps->id.ICNTL(26) = -1;
3277: else mumps->ICNTL26 = -1;
3278: PetscFunctionReturn(PETSC_SUCCESS);
3279: }
3281: static PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F, Mat *S)
3282: {
3283: Mat St;
3284: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3285: PetscScalar *array;
3286: PetscInt i, j, N = mumps->id.size_schur;
3288: PetscFunctionBegin;
3289: PetscCheck(mumps->id.ICNTL(19), PetscObjectComm((PetscObject)F), PETSC_ERR_ORDER, "Schur complement mode not selected! Call MatFactorSetSchurIS() to enable it");
3290: PetscCall(MatCreate(PETSC_COMM_SELF, &St));
3291: PetscCall(MatSetSizes(St, PETSC_DECIDE, PETSC_DECIDE, mumps->id.size_schur, mumps->id.size_schur));
3292: PetscCall(MatSetType(St, MATDENSE));
3293: PetscCall(MatSetUp(St));
3294: PetscCall(MatDenseGetArray(St, &array));
3295: if (!mumps->sym) { /* MUMPS always return a full matrix */
3296: if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
3297: for (i = 0; i < N; i++) {
3298: for (j = 0; j < N; j++) array[j * N + i] = ID_FIELD_GET(mumps->id, schur, i * N + j);
3299: }
3300: } else { /* stored by columns */
3301: PetscCall(MatMumpsCastMumpsScalarArray(N * N, mumps->id.precision, mumps->id.schur, array));
3302: }
3303: } else { /* either full or lower-triangular (not packed) */
3304: if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */
3305: for (i = 0; i < N; i++) {
3306: for (j = i; j < N; j++) array[i * N + j] = array[j * N + i] = ID_FIELD_GET(mumps->id, schur, i * N + j);
3307: }
3308: } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */
3309: PetscCall(MatMumpsCastMumpsScalarArray(N * N, mumps->id.precision, mumps->id.schur, array));
3310: } else { /* ICNTL(19) == 1 lower triangular stored by rows */
3311: for (i = 0; i < N; i++) {
3312: for (j = 0; j < i + 1; j++) array[i * N + j] = array[j * N + i] = ID_FIELD_GET(mumps->id, schur, i * N + j);
3313: }
3314: }
3315: }
3316: PetscCall(MatDenseRestoreArray(St, &array));
3317: *S = St;
3318: PetscFunctionReturn(PETSC_SUCCESS);
3319: }
3321: static PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt ival)
3322: {
3323: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3325: PetscFunctionBegin;
3326: if (mumps->id.job == JOB_NULL) { /* need to cache icntl and ival since PetscMUMPS_c() has never been called */
3327: PetscMUMPSInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */
3328: for (i = 0; i < nICNTL_pre; ++i)
3329: if (mumps->ICNTL_pre[1 + 2 * i] == icntl) break; /* is this ICNTL already cached? */
3330: if (i == nICNTL_pre) { /* not already cached */
3331: if (i > 0) PetscCall(PetscRealloc(sizeof(PetscMUMPSInt) * (2 * nICNTL_pre + 3), &mumps->ICNTL_pre));
3332: else PetscCall(PetscCalloc(sizeof(PetscMUMPSInt) * 3, &mumps->ICNTL_pre));
3333: mumps->ICNTL_pre[0]++;
3334: }
3335: mumps->ICNTL_pre[1 + 2 * i] = (PetscMUMPSInt)icntl;
3336: PetscCall(PetscMUMPSIntCast(ival, mumps->ICNTL_pre + 2 + 2 * i));
3337: } else PetscCall(PetscMUMPSIntCast(ival, &mumps->id.ICNTL(icntl)));
3338: PetscFunctionReturn(PETSC_SUCCESS);
3339: }
3341: static PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt *ival)
3342: {
3343: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3345: PetscFunctionBegin;
3346: if (mumps->id.job == JOB_NULL) {
3347: PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0;
3348: *ival = 0;
3349: for (i = 0; i < nICNTL_pre; ++i) {
3350: if (mumps->ICNTL_pre[1 + 2 * i] == icntl) *ival = mumps->ICNTL_pre[2 + 2 * i];
3351: }
3352: } else *ival = mumps->id.ICNTL(icntl);
3353: PetscFunctionReturn(PETSC_SUCCESS);
3354: }
3356: /*@
3357: MatMumpsSetIcntl - Set MUMPS parameter ICNTL() <https://mumps-solver.org/index.php?page=doc>
3359: Logically Collective
3361: Input Parameters:
3362: + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3363: . icntl - index of MUMPS parameter array `ICNTL()`
3364: - ival - value of MUMPS `ICNTL(icntl)`
3366: Options Database Key:
3367: . -mat_mumps_icntl_<icntl> <ival> - change the option numbered `icntl` to `ival`
3369: Level: beginner
3371: Note:
3372: Ignored if MUMPS is not installed or `F` is not a MUMPS matrix
3374: .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3375: @*/
3376: PetscErrorCode MatMumpsSetIcntl(Mat F, PetscInt icntl, PetscInt ival)
3377: {
3378: PetscFunctionBegin;
3380: PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3383: PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 48 || icntl == 49 || icntl == 56 || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl);
3384: PetscTryMethod(F, "MatMumpsSetIcntl_C", (Mat, PetscInt, PetscInt), (F, icntl, ival));
3385: PetscFunctionReturn(PETSC_SUCCESS);
3386: }
3388: /*@
3389: MatMumpsGetIcntl - Get MUMPS parameter ICNTL() <https://mumps-solver.org/index.php?page=doc>
3391: Logically Collective
3393: Input Parameters:
3394: + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3395: - icntl - index of MUMPS parameter array ICNTL()
3397: Output Parameter:
3398: . ival - value of MUMPS ICNTL(icntl)
3400: Level: beginner
3402: .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3403: @*/
3404: PetscErrorCode MatMumpsGetIcntl(Mat F, PetscInt icntl, PetscInt *ival)
3405: {
3406: PetscFunctionBegin;
3408: PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3410: PetscAssertPointer(ival, 3);
3411: PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 48 || icntl == 49 || icntl == 56 || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl);
3412: PetscUseMethod(F, "MatMumpsGetIcntl_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
3413: PetscFunctionReturn(PETSC_SUCCESS);
3414: }
3416: static PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal val)
3417: {
3418: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3420: PetscFunctionBegin;
3421: if (mumps->id.job == JOB_NULL) {
3422: PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
3423: for (i = 0; i < nCNTL_pre; ++i)
3424: if (mumps->CNTL_pre[1 + 2 * i] == icntl) break;
3425: if (i == nCNTL_pre) {
3426: if (i > 0) PetscCall(PetscRealloc(sizeof(PetscReal) * (2 * nCNTL_pre + 3), &mumps->CNTL_pre));
3427: else PetscCall(PetscCalloc(sizeof(PetscReal) * 3, &mumps->CNTL_pre));
3428: mumps->CNTL_pre[0]++;
3429: }
3430: mumps->CNTL_pre[1 + 2 * i] = icntl;
3431: mumps->CNTL_pre[2 + 2 * i] = val;
3432: } else ID_CNTL_SET(mumps->id, icntl, val);
3433: PetscFunctionReturn(PETSC_SUCCESS);
3434: }
3436: static PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal *val)
3437: {
3438: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3440: PetscFunctionBegin;
3441: if (mumps->id.job == JOB_NULL) {
3442: PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
3443: *val = 0.0;
3444: for (i = 0; i < nCNTL_pre; ++i) {
3445: if (mumps->CNTL_pre[1 + 2 * i] == icntl) *val = mumps->CNTL_pre[2 + 2 * i];
3446: }
3447: } else *val = ID_CNTL_GET(mumps->id, icntl);
3448: PetscFunctionReturn(PETSC_SUCCESS);
3449: }
3451: /*@
3452: MatMumpsSetCntl - Set MUMPS parameter CNTL() <https://mumps-solver.org/index.php?page=doc>
3454: Logically Collective
3456: Input Parameters:
3457: + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3458: . icntl - index of MUMPS parameter array `CNTL()`
3459: - val - value of MUMPS `CNTL(icntl)`
3461: Options Database Key:
3462: . -mat_mumps_cntl_<icntl> <val> - change the option numbered icntl to ival
3464: Level: beginner
3466: Note:
3467: Ignored if MUMPS is not installed or `F` is not a MUMPS matrix
3469: .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3470: @*/
3471: PetscErrorCode MatMumpsSetCntl(Mat F, PetscInt icntl, PetscReal val)
3472: {
3473: PetscFunctionBegin;
3475: PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3478: PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl);
3479: PetscTryMethod(F, "MatMumpsSetCntl_C", (Mat, PetscInt, PetscReal), (F, icntl, val));
3480: PetscFunctionReturn(PETSC_SUCCESS);
3481: }
3483: /*@
3484: MatMumpsGetCntl - Get MUMPS parameter CNTL() <https://mumps-solver.org/index.php?page=doc>
3486: Logically Collective
3488: Input Parameters:
3489: + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3490: - icntl - index of MUMPS parameter array CNTL()
3492: Output Parameter:
3493: . val - value of MUMPS CNTL(icntl)
3495: Level: beginner
3497: .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3498: @*/
3499: PetscErrorCode MatMumpsGetCntl(Mat F, PetscInt icntl, PetscReal *val)
3500: {
3501: PetscFunctionBegin;
3503: PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3505: PetscAssertPointer(val, 3);
3506: PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl);
3507: PetscUseMethod(F, "MatMumpsGetCntl_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
3508: PetscFunctionReturn(PETSC_SUCCESS);
3509: }
3511: static PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F, PetscInt icntl, PetscInt *info)
3512: {
3513: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3515: PetscFunctionBegin;
3516: *info = mumps->id.INFO(icntl);
3517: PetscFunctionReturn(PETSC_SUCCESS);
3518: }
3520: static PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F, PetscInt icntl, PetscInt *infog)
3521: {
3522: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3524: PetscFunctionBegin;
3525: *infog = mumps->id.INFOG(icntl);
3526: PetscFunctionReturn(PETSC_SUCCESS);
3527: }
3529: static PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfo)
3530: {
3531: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3533: PetscFunctionBegin;
3534: *rinfo = ID_RINFO_GET(mumps->id, icntl);
3535: PetscFunctionReturn(PETSC_SUCCESS);
3536: }
3538: static PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfog)
3539: {
3540: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3542: PetscFunctionBegin;
3543: *rinfog = ID_RINFOG_GET(mumps->id, icntl);
3544: PetscFunctionReturn(PETSC_SUCCESS);
3545: }
3547: static PetscErrorCode MatMumpsGetNullPivots_MUMPS(Mat F, PetscInt *size, PetscInt **array)
3548: {
3549: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3551: PetscFunctionBegin;
3552: PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection");
3553: *size = 0;
3554: *array = NULL;
3555: if (!mumps->myid) {
3556: *size = mumps->id.INFOG(28);
3557: PetscCall(PetscMalloc1(*size, array));
3558: for (int i = 0; i < *size; i++) (*array)[i] = mumps->id.pivnul_list[i] - 1;
3559: }
3560: PetscFunctionReturn(PETSC_SUCCESS);
3561: }
3563: static PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F, Mat spRHS)
3564: {
3565: Mat Bt = NULL, Btseq = NULL;
3566: PetscBool flg;
3567: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3568: PetscScalar *aa;
3569: PetscInt spnr, *ia, *ja, M, nrhs;
3571: PetscFunctionBegin;
3572: PetscAssertPointer(spRHS, 2);
3573: PetscCall(PetscObjectTypeCompare((PetscObject)spRHS, MATTRANSPOSEVIRTUAL, &flg));
3574: PetscCheck(flg, PetscObjectComm((PetscObject)spRHS), PETSC_ERR_ARG_WRONG, "Matrix spRHS must be type MATTRANSPOSEVIRTUAL matrix");
3575: PetscCall(MatShellGetScalingShifts(spRHS, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
3576: PetscCall(MatTransposeGetMat(spRHS, &Bt));
3578: PetscCall(MatMumpsSetIcntl(F, 30, 1));
3580: if (mumps->petsc_size > 1) {
3581: Mat_MPIAIJ *b = (Mat_MPIAIJ *)Bt->data;
3582: Btseq = b->A;
3583: } else {
3584: Btseq = Bt;
3585: }
3587: PetscCall(MatGetSize(spRHS, &M, &nrhs));
3588: mumps->id.nrhs = (PetscMUMPSInt)nrhs;
3589: PetscCall(PetscMUMPSIntCast(M, &mumps->id.lrhs));
3590: mumps->id.rhs = NULL;
3592: if (!mumps->myid) {
3593: PetscCall(MatSeqAIJGetArray(Btseq, &aa));
3594: PetscCall(MatGetRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
3595: PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
3596: PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
3597: PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, ((Mat_SeqAIJ *)Btseq->data)->nz, aa, mumps->id.precision, &mumps->id.rhs_sparse_len, &mumps->id.rhs_sparse));
3598: } else {
3599: mumps->id.irhs_ptr = NULL;
3600: mumps->id.irhs_sparse = NULL;
3601: mumps->id.nz_rhs = 0;
3602: if (mumps->id.rhs_sparse_len) {
3603: PetscCall(PetscFree(mumps->id.rhs_sparse));
3604: mumps->id.rhs_sparse_len = 0;
3605: }
3606: }
3607: mumps->id.ICNTL(20) = 1; /* rhs is sparse */
3608: mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */
3610: /* solve phase */
3611: mumps->id.job = JOB_SOLVE;
3612: PetscMUMPS_c(mumps);
3613: PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));
3615: if (!mumps->myid) {
3616: PetscCall(MatSeqAIJRestoreArray(Btseq, &aa));
3617: PetscCall(MatRestoreRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
3618: PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
3619: }
3620: PetscFunctionReturn(PETSC_SUCCESS);
3621: }
3623: /*@
3624: MatMumpsGetInverse - Get user-specified set of entries in inverse of `A` <https://mumps-solver.org/index.php?page=doc>
3626: Logically Collective
3628: Input Parameter:
3629: . F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3631: Output Parameter:
3632: . spRHS - sequential sparse matrix in `MATTRANSPOSEVIRTUAL` format with requested entries of inverse of `A`
3634: Level: beginner
3636: .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`
3637: @*/
3638: PetscErrorCode MatMumpsGetInverse(Mat F, Mat spRHS)
3639: {
3640: PetscFunctionBegin;
3642: PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3643: PetscUseMethod(F, "MatMumpsGetInverse_C", (Mat, Mat), (F, spRHS));
3644: PetscFunctionReturn(PETSC_SUCCESS);
3645: }
3647: static PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F, Mat spRHST)
3648: {
3649: Mat spRHS;
3651: PetscFunctionBegin;
3652: PetscCall(MatCreateTranspose(spRHST, &spRHS));
3653: PetscCall(MatMumpsGetInverse_MUMPS(F, spRHS));
3654: PetscCall(MatDestroy(&spRHS));
3655: PetscFunctionReturn(PETSC_SUCCESS);
3656: }
3658: /*@
3659: MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix $A^T $ <https://mumps-solver.org/index.php?page=doc>
3661: Logically Collective
3663: Input Parameter:
3664: . F - the factored matrix of A obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3666: Output Parameter:
3667: . spRHST - sequential sparse matrix in `MATAIJ` format containing the requested entries of inverse of `A`^T
3669: Level: beginner
3671: .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()`
3672: @*/
3673: PetscErrorCode MatMumpsGetInverseTranspose(Mat F, Mat spRHST)
3674: {
3675: PetscBool flg;
3677: PetscFunctionBegin;
3679: PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3680: PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST, &flg, MATSEQAIJ, MATMPIAIJ, NULL));
3681: PetscCheck(flg, PetscObjectComm((PetscObject)spRHST), PETSC_ERR_ARG_WRONG, "Matrix spRHST must be MATAIJ matrix");
3682: PetscUseMethod(F, "MatMumpsGetInverseTranspose_C", (Mat, Mat), (F, spRHST));
3683: PetscFunctionReturn(PETSC_SUCCESS);
3684: }
3686: static PetscErrorCode MatMumpsSetBlk_MUMPS(Mat F, PetscInt nblk, const PetscInt blkvar[], const PetscInt blkptr[])
3687: {
3688: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3690: PetscFunctionBegin;
3691: if (nblk) {
3692: PetscAssertPointer(blkptr, 4);
3693: PetscCall(PetscMUMPSIntCast(nblk, &mumps->id.nblk));
3694: PetscCall(PetscFree(mumps->id.blkptr));
3695: PetscCall(PetscMalloc1(nblk + 1, &mumps->id.blkptr));
3696: for (PetscInt i = 0; i < nblk + 1; ++i) PetscCall(PetscMUMPSIntCast(blkptr[i], mumps->id.blkptr + i));
3697: // mumps->id.icntl[] might have not been allocated, which is done in MatSetFromOptions_MUMPS(). So we don't assign ICNTL(15).
3698: // We use id.nblk and id.blkptr to know what values to set to ICNTL(15) in MatSetFromOptions_MUMPS().
3699: // mumps->id.ICNTL(15) = 1;
3700: if (blkvar) {
3701: PetscCall(PetscFree(mumps->id.blkvar));
3702: PetscCall(PetscMalloc1(F->rmap->N, &mumps->id.blkvar));
3703: for (PetscInt i = 0; i < F->rmap->N; ++i) PetscCall(PetscMUMPSIntCast(blkvar[i], mumps->id.blkvar + i));
3704: }
3705: } else {
3706: PetscCall(PetscFree(mumps->id.blkptr));
3707: PetscCall(PetscFree(mumps->id.blkvar));
3708: // mumps->id.ICNTL(15) = 0;
3709: mumps->id.nblk = 0;
3710: }
3711: PetscFunctionReturn(PETSC_SUCCESS);
3712: }
3714: /*@
3715: MatMumpsSetBlk - Set user-specified variable block sizes to be used with `-mat_mumps_icntl_15 1`
3717: Not collective, only relevant on the first process of the MPI communicator
3719: Input Parameters:
3720: + F - the factored matrix of A obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3721: . nblk - the number of blocks
3722: . blkvar - see MUMPS documentation, `blkvar(blkptr(iblk):blkptr(iblk+1)-1)`, (`iblk=1, nblk`) holds the variables associated to block `iblk`
3723: - blkptr - array starting at 1 and of size `nblk + 1` storing the prefix sum of all blocks
3725: Level: advanced
3727: .seealso: [](ch_matrices), `MATSOLVERMUMPS`, `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatSetVariableBlockSizes()`
3728: @*/
3729: PetscErrorCode MatMumpsSetBlk(Mat F, PetscInt nblk, const PetscInt blkvar[], const PetscInt blkptr[])
3730: {
3731: PetscFunctionBegin;
3733: PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3734: PetscUseMethod(F, "MatMumpsSetBlk_C", (Mat, PetscInt, const PetscInt[], const PetscInt[]), (F, nblk, blkvar, blkptr));
3735: PetscFunctionReturn(PETSC_SUCCESS);
3736: }
3738: /*@
3739: MatMumpsGetInfo - Get MUMPS parameter INFO() <https://mumps-solver.org/index.php?page=doc>
3741: Logically Collective
3743: Input Parameters:
3744: + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3745: - icntl - index of MUMPS parameter array INFO()
3747: Output Parameter:
3748: . ival - value of MUMPS INFO(icntl)
3750: Level: beginner
3752: .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3753: @*/
3754: PetscErrorCode MatMumpsGetInfo(Mat F, PetscInt icntl, PetscInt *ival)
3755: {
3756: PetscFunctionBegin;
3758: PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3759: PetscAssertPointer(ival, 3);
3760: PetscUseMethod(F, "MatMumpsGetInfo_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
3761: PetscFunctionReturn(PETSC_SUCCESS);
3762: }
3764: /*@
3765: MatMumpsGetInfog - Get MUMPS parameter INFOG() <https://mumps-solver.org/index.php?page=doc>
3767: Logically Collective
3769: Input Parameters:
3770: + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3771: - icntl - index of MUMPS parameter array INFOG()
3773: Output Parameter:
3774: . ival - value of MUMPS INFOG(icntl)
3776: Level: beginner
3778: .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3779: @*/
3780: PetscErrorCode MatMumpsGetInfog(Mat F, PetscInt icntl, PetscInt *ival)
3781: {
3782: PetscFunctionBegin;
3784: PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3785: PetscAssertPointer(ival, 3);
3786: PetscUseMethod(F, "MatMumpsGetInfog_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
3787: PetscFunctionReturn(PETSC_SUCCESS);
3788: }
3790: /*@
3791: MatMumpsGetRinfo - Get MUMPS parameter RINFO() <https://mumps-solver.org/index.php?page=doc>
3793: Logically Collective
3795: Input Parameters:
3796: + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3797: - icntl - index of MUMPS parameter array RINFO()
3799: Output Parameter:
3800: . val - value of MUMPS RINFO(icntl)
3802: Level: beginner
3804: .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()`
3805: @*/
3806: PetscErrorCode MatMumpsGetRinfo(Mat F, PetscInt icntl, PetscReal *val)
3807: {
3808: PetscFunctionBegin;
3810: PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3811: PetscAssertPointer(val, 3);
3812: PetscUseMethod(F, "MatMumpsGetRinfo_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
3813: PetscFunctionReturn(PETSC_SUCCESS);
3814: }
3816: /*@
3817: MatMumpsGetRinfog - Get MUMPS parameter RINFOG() <https://mumps-solver.org/index.php?page=doc>
3819: Logically Collective
3821: Input Parameters:
3822: + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3823: - icntl - index of MUMPS parameter array RINFOG()
3825: Output Parameter:
3826: . val - value of MUMPS RINFOG(icntl)
3828: Level: beginner
3830: .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`
3831: @*/
3832: PetscErrorCode MatMumpsGetRinfog(Mat F, PetscInt icntl, PetscReal *val)
3833: {
3834: PetscFunctionBegin;
3836: PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3837: PetscAssertPointer(val, 3);
3838: PetscUseMethod(F, "MatMumpsGetRinfog_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
3839: PetscFunctionReturn(PETSC_SUCCESS);
3840: }
3842: /*@
3843: MatMumpsGetNullPivots - Get MUMPS parameter PIVNUL_LIST() <https://mumps-solver.org/index.php?page=doc>
3845: Logically Collective
3847: Input Parameter:
3848: . F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3850: Output Parameters:
3851: + size - local size of the array. The size of the array is non-zero only on MPI rank 0
3852: - array - array of rows with null pivot, these rows follow 0-based indexing. The array gets allocated within the function and the user is responsible
3853: for freeing this array.
3855: Level: beginner
3857: .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`
3858: @*/
3859: PetscErrorCode MatMumpsGetNullPivots(Mat F, PetscInt *size, PetscInt **array)
3860: {
3861: PetscFunctionBegin;
3863: PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3864: PetscAssertPointer(size, 2);
3865: PetscAssertPointer(array, 3);
3866: PetscUseMethod(F, "MatMumpsGetNullPivots_C", (Mat, PetscInt *, PetscInt **), (F, size, array));
3867: PetscFunctionReturn(PETSC_SUCCESS);
3868: }
3870: /*MC
3871: MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for
3872: MPI distributed and sequential matrices via the external package MUMPS <https://mumps-solver.org/index.php?page=doc>
3874: Works with `MATAIJ` and `MATSBAIJ` matrices
3876: Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS
3878: Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode.
3879: See details below.
3881: Use `-pc_type cholesky` or `lu` `-pc_factor_mat_solver_type mumps` to use this direct solver
3883: Options Database Keys:
3884: + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages
3885: . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning
3886: . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host
3887: . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4)
3888: . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)
3889: . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto
3890: Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only)
3891: . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77)
3892: . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements
3893: . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view)
3894: . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)
3895: . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting
3896: . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space
3897: . -mat_mumps_icntl_15 - ICNTL(15): compression of the input matrix resulting from a block format
3898: . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement
3899: . -mat_mumps_icntl_20 - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS
3900: . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)
3901: . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor
3902: . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1)
3903: . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis
3904: . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix
3905: . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ICNTL(7) ordering, or 2 for parallel analysis and ICNTL(29) ordering
3906: . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis
3907: . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A)
3908: . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization
3909: . -mat_mumps_icntl_33 - ICNTL(33): compute determinant
3910: . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature
3911: . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant
3912: . -mat_mumps_icntl_37 - ICNTL(37): compression of the contribution blocks (CB)
3913: . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR
3914: . -mat_mumps_icntl_48 - ICNTL(48): multithreading with tree parallelism
3915: . -mat_mumps_icntl_49 - ICNTL(49): compact workarray at the end of factorization phase
3916: . -mat_mumps_icntl_58 - ICNTL(58): options for symbolic factorization
3917: . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold
3918: . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement
3919: . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold
3920: . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting
3921: . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots
3922: . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization
3923: - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS.
3924: Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual.
3926: Level: beginner
3928: Notes:
3929: MUMPS Cholesky does not handle (complex) Hermitian matrices (see User's Guide at <https://mumps-solver.org/index.php?page=doc>) so using it will
3930: error if the matrix is Hermitian.
3932: When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling
3933: `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix.
3935: When a MUMPS factorization fails inside a KSP solve, for example with a `KSP_DIVERGED_PC_FAILED`, one can find the MUMPS information about
3936: the failure with
3937: .vb
3938: KSPGetPC(ksp,&pc);
3939: PCFactorGetMatrix(pc,&mat);
3940: MatMumpsGetInfo(mat,....);
3941: MatMumpsGetInfog(mat,....); etc.
3942: .ve
3943: Or run with `-ksp_error_if_not_converged` and the program will be stopped and the information printed in the error message.
3945: MUMPS provides 64-bit integer support in two build modes:
3946: full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and
3947: requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI).
3949: selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices,
3950: MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and
3951: columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit
3952: integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS.
3954: With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc.
3956: Two modes to run MUMPS/PETSc with OpenMP
3957: .vb
3958: Set `OMP_NUM_THREADS` and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP
3959: threads per rank, then you may use "export `OMP_NUM_THREADS` = 16 && mpirun -n 4 ./test".
3960: .ve
3962: .vb
3963: `-mat_mumps_use_omp_threads` [m] and run your code with as many MPI ranks as the number of cores. For example,
3964: if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16"
3965: .ve
3967: To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part
3968: (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with `--with-openmp` `--download-hwloc`
3969: (or `--with-hwloc`), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS
3970: libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS
3971: (PETSc will automatically try to utilized a threaded BLAS if `--with-openmp` is provided).
3973: If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI
3974: processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of
3975: size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm
3976: are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set
3977: by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs.
3978: In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets,
3979: if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind
3980: MPI ranks to cores, then with `-mat_mumps_use_omp_threads` 16, a master rank (and threads it spawns) will use half cores in socket 0, and half
3981: cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the
3982: problem will not happen. Therefore, when you use `-mat_mumps_use_omp_threads`, you need to keep an eye on your MPI rank mapping and CPU binding.
3983: For example, with the Slurm job scheduler, one can use srun `--cpu-bind`=verbose -m block:block to map consecutive MPI ranks to sockets and
3984: examine the mapping result.
3986: PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set `OMP_PROC_BIND` and `OMP_PLACES` in job scripts,
3987: for example, export `OMP_PLACES`=threads and export `OMP_PROC_BIND`=spread. One does not need to export `OMP_NUM_THREADS`=m in job scripts as PETSc
3988: calls `omp_set_num_threads`(m) internally before calling MUMPS.
3990: See {cite}`heroux2011bi` and {cite}`gutierrez2017accommodating`
3992: .seealso: [](ch_matrices), `Mat`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `MatMumpsSetBlk()`, `KSPGetPC()`, `PCFactorGetMatrix()`
3993: M*/
3995: static PetscErrorCode MatFactorGetSolverType_mumps(PETSC_UNUSED Mat A, MatSolverType *type)
3996: {
3997: PetscFunctionBegin;
3998: *type = MATSOLVERMUMPS;
3999: PetscFunctionReturn(PETSC_SUCCESS);
4000: }
4002: /* MatGetFactor for Seq and MPI AIJ matrices */
4003: static PetscErrorCode MatGetFactor_aij_mumps(Mat A, MatFactorType ftype, Mat *F)
4004: {
4005: Mat B;
4006: Mat_MUMPS *mumps;
4007: PetscBool isSeqAIJ, isDiag, isDense;
4008: PetscMPIInt size;
4010: PetscFunctionBegin;
4011: #if defined(PETSC_USE_COMPLEX)
4012: if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
4013: PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n"));
4014: *F = NULL;
4015: PetscFunctionReturn(PETSC_SUCCESS);
4016: }
4017: #endif
4018: /* Create the factorization matrix */
4019: PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ));
4020: PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATDIAGONAL, &isDiag));
4021: PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &isDense, MATSEQDENSE, MATMPIDENSE, NULL));
4022: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
4023: PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
4024: PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
4025: PetscCall(MatSetUp(B));
4027: PetscCall(PetscNew(&mumps));
4029: B->ops->view = MatView_MUMPS;
4030: B->ops->getinfo = MatGetInfo_MUMPS;
4032: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
4033: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
4034: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
4035: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
4036: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
4037: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
4038: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
4039: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
4040: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
4041: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
4042: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
4043: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
4044: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
4045: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
4046: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS));
4048: if (ftype == MAT_FACTOR_LU) {
4049: B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
4050: B->factortype = MAT_FACTOR_LU;
4051: if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij;
4052: else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij;
4053: else if (isDense) mumps->ConvertToTriples = MatConvertToTriples_dense_xaij;
4054: else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij;
4055: PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
4056: mumps->sym = 0;
4057: } else {
4058: B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
4059: B->factortype = MAT_FACTOR_CHOLESKY;
4060: if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij;
4061: else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij;
4062: else if (isDense) mumps->ConvertToTriples = MatConvertToTriples_dense_xaij;
4063: else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij;
4064: PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY]));
4065: #if defined(PETSC_USE_COMPLEX)
4066: mumps->sym = 2;
4067: #else
4068: if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
4069: else mumps->sym = 2;
4070: #endif
4071: }
4073: /* set solvertype */
4074: PetscCall(PetscFree(B->solvertype));
4075: PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
4076: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
4077: if (size == 1) {
4078: /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
4079: B->canuseordering = PETSC_TRUE;
4080: }
4081: B->ops->destroy = MatDestroy_MUMPS;
4082: B->data = (void *)mumps;
4084: *F = B;
4085: mumps->id.job = JOB_NULL;
4086: mumps->ICNTL_pre = NULL;
4087: mumps->CNTL_pre = NULL;
4088: mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
4089: PetscFunctionReturn(PETSC_SUCCESS);
4090: }
4092: /* MatGetFactor for Seq and MPI SBAIJ matrices */
4093: static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A, PETSC_UNUSED MatFactorType ftype, Mat *F)
4094: {
4095: Mat B;
4096: Mat_MUMPS *mumps;
4097: PetscBool isSeqSBAIJ;
4098: PetscMPIInt size;
4100: PetscFunctionBegin;
4101: #if defined(PETSC_USE_COMPLEX)
4102: if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
4103: PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n"));
4104: *F = NULL;
4105: PetscFunctionReturn(PETSC_SUCCESS);
4106: }
4107: #endif
4108: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
4109: PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
4110: PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
4111: PetscCall(MatSetUp(B));
4113: PetscCall(PetscNew(&mumps));
4114: PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &isSeqSBAIJ));
4115: if (isSeqSBAIJ) {
4116: mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij;
4117: } else {
4118: mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij;
4119: }
4121: B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
4122: B->ops->view = MatView_MUMPS;
4123: B->ops->getinfo = MatGetInfo_MUMPS;
4125: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
4126: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
4127: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
4128: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
4129: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
4130: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
4131: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
4132: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
4133: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
4134: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
4135: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
4136: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
4137: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
4138: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
4139: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS));
4141: B->factortype = MAT_FACTOR_CHOLESKY;
4142: #if defined(PETSC_USE_COMPLEX)
4143: mumps->sym = 2;
4144: #else
4145: if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
4146: else mumps->sym = 2;
4147: #endif
4149: /* set solvertype */
4150: PetscCall(PetscFree(B->solvertype));
4151: PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
4152: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
4153: if (size == 1) {
4154: /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
4155: B->canuseordering = PETSC_TRUE;
4156: }
4157: PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY]));
4158: B->ops->destroy = MatDestroy_MUMPS;
4159: B->data = (void *)mumps;
4161: *F = B;
4162: mumps->id.job = JOB_NULL;
4163: mumps->ICNTL_pre = NULL;
4164: mumps->CNTL_pre = NULL;
4165: mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
4166: PetscFunctionReturn(PETSC_SUCCESS);
4167: }
4169: static PetscErrorCode MatGetFactor_baij_mumps(Mat A, MatFactorType ftype, Mat *F)
4170: {
4171: Mat B;
4172: Mat_MUMPS *mumps;
4173: PetscBool isSeqBAIJ;
4174: PetscMPIInt size;
4176: PetscFunctionBegin;
4177: /* Create the factorization matrix */
4178: PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &isSeqBAIJ));
4179: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
4180: PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
4181: PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
4182: PetscCall(MatSetUp(B));
4184: PetscCall(PetscNew(&mumps));
4185: PetscCheck(ftype == MAT_FACTOR_LU, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead");
4186: B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS;
4187: B->factortype = MAT_FACTOR_LU;
4188: if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij;
4189: else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij;
4190: mumps->sym = 0;
4191: PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
4193: B->ops->view = MatView_MUMPS;
4194: B->ops->getinfo = MatGetInfo_MUMPS;
4196: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
4197: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
4198: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
4199: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
4200: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
4201: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
4202: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
4203: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
4204: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
4205: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
4206: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
4207: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
4208: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
4209: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
4210: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS));
4212: /* set solvertype */
4213: PetscCall(PetscFree(B->solvertype));
4214: PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
4215: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
4216: if (size == 1) {
4217: /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
4218: B->canuseordering = PETSC_TRUE;
4219: }
4220: B->ops->destroy = MatDestroy_MUMPS;
4221: B->data = (void *)mumps;
4223: *F = B;
4224: mumps->id.job = JOB_NULL;
4225: mumps->ICNTL_pre = NULL;
4226: mumps->CNTL_pre = NULL;
4227: mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
4228: PetscFunctionReturn(PETSC_SUCCESS);
4229: }
4231: /* MatGetFactor for Seq and MPI SELL matrices */
4232: static PetscErrorCode MatGetFactor_sell_mumps(Mat A, MatFactorType ftype, Mat *F)
4233: {
4234: Mat B;
4235: Mat_MUMPS *mumps;
4236: PetscBool isSeqSELL;
4237: PetscMPIInt size;
4239: PetscFunctionBegin;
4240: /* Create the factorization matrix */
4241: PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSELL, &isSeqSELL));
4242: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
4243: PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
4244: PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
4245: PetscCall(MatSetUp(B));
4247: PetscCall(PetscNew(&mumps));
4249: B->ops->view = MatView_MUMPS;
4250: B->ops->getinfo = MatGetInfo_MUMPS;
4252: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
4253: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
4254: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
4255: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
4256: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
4257: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
4258: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
4259: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
4260: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
4261: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
4262: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
4263: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
4265: PetscCheck(ftype == MAT_FACTOR_LU, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented");
4266: B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
4267: B->factortype = MAT_FACTOR_LU;
4268: PetscCheck(isSeqSELL, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented");
4269: mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij;
4270: mumps->sym = 0;
4271: PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
4273: /* set solvertype */
4274: PetscCall(PetscFree(B->solvertype));
4275: PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
4276: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
4277: if (size == 1) {
4278: /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
4279: B->canuseordering = PETSC_TRUE;
4280: }
4281: B->ops->destroy = MatDestroy_MUMPS;
4282: B->data = (void *)mumps;
4284: *F = B;
4285: mumps->id.job = JOB_NULL;
4286: mumps->ICNTL_pre = NULL;
4287: mumps->CNTL_pre = NULL;
4288: mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
4289: PetscFunctionReturn(PETSC_SUCCESS);
4290: }
4292: /* MatGetFactor for MATNEST matrices */
4293: static PetscErrorCode MatGetFactor_nest_mumps(Mat A, MatFactorType ftype, Mat *F)
4294: {
4295: Mat B, **mats;
4296: Mat_MUMPS *mumps;
4297: PetscInt nr, nc;
4298: PetscMPIInt size;
4299: PetscBool flg = PETSC_TRUE;
4301: PetscFunctionBegin;
4302: #if defined(PETSC_USE_COMPLEX)
4303: if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
4304: PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n"));
4305: *F = NULL;
4306: PetscFunctionReturn(PETSC_SUCCESS);
4307: }
4308: #endif
4310: /* Return if some condition is not satisfied */
4311: *F = NULL;
4312: PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats));
4313: if (ftype == MAT_FACTOR_CHOLESKY) {
4314: IS *rows, *cols;
4315: PetscInt *m, *M;
4317: PetscCheck(nr == nc, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_FACTOR_CHOLESKY not supported for nest sizes %" PetscInt_FMT " != %" PetscInt_FMT ". Use MAT_FACTOR_LU.", nr, nc);
4318: PetscCall(PetscMalloc2(nr, &rows, nc, &cols));
4319: PetscCall(MatNestGetISs(A, rows, cols));
4320: for (PetscInt r = 0; flg && r < nr; r++) PetscCall(ISEqualUnsorted(rows[r], cols[r], &flg));
4321: if (!flg) {
4322: PetscCall(PetscFree2(rows, cols));
4323: PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for unequal row and column maps. Use MAT_FACTOR_LU.\n"));
4324: PetscFunctionReturn(PETSC_SUCCESS);
4325: }
4326: PetscCall(PetscMalloc2(nr, &m, nr, &M));
4327: for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetMinMax(rows[r], &m[r], &M[r]));
4328: for (PetscInt r = 0; flg && r < nr; r++)
4329: for (PetscInt k = r + 1; flg && k < nr; k++)
4330: if ((m[k] <= m[r] && m[r] <= M[k]) || (m[k] <= M[r] && M[r] <= M[k])) flg = PETSC_FALSE;
4331: PetscCall(PetscFree2(m, M));
4332: PetscCall(PetscFree2(rows, cols));
4333: if (!flg) {
4334: PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for intersecting row maps. Use MAT_FACTOR_LU.\n"));
4335: PetscFunctionReturn(PETSC_SUCCESS);
4336: }
4337: }
4339: for (PetscInt r = 0; r < nr; r++) {
4340: for (PetscInt c = 0; c < nc; c++) {
4341: Mat sub = mats[r][c];
4342: PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isDiag, isDense;
4344: if (!sub || (ftype == MAT_FACTOR_CHOLESKY && c < r)) continue;
4345: PetscCall(MatGetTranspose_TransposeVirtual(&sub, NULL, NULL, NULL, NULL));
4346: PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ));
4347: PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ));
4348: PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ));
4349: PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ));
4350: PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ));
4351: PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ));
4352: PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag));
4353: PetscCall(PetscObjectTypeCompareAny((PetscObject)sub, &isDense, MATSEQDENSE, MATMPIDENSE, NULL));
4354: if (ftype == MAT_FACTOR_CHOLESKY) {
4355: if (r == c) {
4356: if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isSeqSBAIJ && !isMPISBAIJ && !isDiag && !isDense) {
4357: PetscCall(PetscInfo(sub, "MAT_FACTOR_CHOLESKY not supported for diagonal block of type %s.\n", ((PetscObject)sub)->type_name));
4358: flg = PETSC_FALSE;
4359: }
4360: } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag && !isDense) {
4361: PetscCall(PetscInfo(sub, "MAT_FACTOR_CHOLESKY not supported for off-diagonal block of type %s.\n", ((PetscObject)sub)->type_name));
4362: flg = PETSC_FALSE;
4363: }
4364: } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag && !isDense) {
4365: PetscCall(PetscInfo(sub, "MAT_FACTOR_LU not supported for block of type %s.\n", ((PetscObject)sub)->type_name));
4366: flg = PETSC_FALSE;
4367: }
4368: }
4369: }
4370: if (!flg) PetscFunctionReturn(PETSC_SUCCESS);
4372: /* Create the factorization matrix */
4373: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
4374: PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
4375: PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
4376: PetscCall(MatSetUp(B));
4378: PetscCall(PetscNew(&mumps));
4380: B->ops->view = MatView_MUMPS;
4381: B->ops->getinfo = MatGetInfo_MUMPS;
4383: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
4384: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
4385: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
4386: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
4387: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
4388: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
4389: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
4390: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
4391: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
4392: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
4393: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
4394: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
4395: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
4396: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
4397: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS));
4399: if (ftype == MAT_FACTOR_LU) {
4400: B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
4401: B->factortype = MAT_FACTOR_LU;
4402: mumps->sym = 0;
4403: } else {
4404: B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
4405: B->factortype = MAT_FACTOR_CHOLESKY;
4406: #if defined(PETSC_USE_COMPLEX)
4407: mumps->sym = 2;
4408: #else
4409: if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
4410: else mumps->sym = 2;
4411: #endif
4412: }
4413: mumps->ConvertToTriples = MatConvertToTriples_nest_xaij;
4414: PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[ftype]));
4416: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
4417: if (size == 1) {
4418: /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
4419: B->canuseordering = PETSC_TRUE;
4420: }
4422: /* set solvertype */
4423: PetscCall(PetscFree(B->solvertype));
4424: PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
4425: B->ops->destroy = MatDestroy_MUMPS;
4426: B->data = (void *)mumps;
4428: *F = B;
4429: mumps->id.job = JOB_NULL;
4430: mumps->ICNTL_pre = NULL;
4431: mumps->CNTL_pre = NULL;
4432: mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
4433: PetscFunctionReturn(PETSC_SUCCESS);
4434: }
4436: PETSC_INTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void)
4437: {
4438: PetscFunctionBegin;
4439: PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
4440: PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
4441: PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps));
4442: PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps));
4443: PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPISBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps));
4444: PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
4445: PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
4446: PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps));
4447: PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps));
4448: PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps));
4449: PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSELL, MAT_FACTOR_LU, MatGetFactor_sell_mumps));
4450: PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
4451: PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
4452: PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQDENSE, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
4453: PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQDENSE, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
4454: PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIDENSE, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
4455: PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIDENSE, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
4456: PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_LU, MatGetFactor_nest_mumps));
4457: PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_CHOLESKY, MatGetFactor_nest_mumps));
4458: PetscFunctionReturn(PETSC_SUCCESS);
4459: }