Actual source code: mumps.c

  1: /*
  2:     Provides an interface to the MUMPS sparse solver
  3: */
  4: #include <petscpkg_version.h>
  5: #include <petscsf.h>
  6: #include <../src/mat/impls/aij/mpi/mpiaij.h>
  7: #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
  8: #include <../src/mat/impls/sell/mpi/mpisell.h>
  9: #include <petsc/private/vecimpl.h>

 11: #define MUMPS_MANUALS "(see users manual https://mumps-solver.org/index.php?page=doc \"Error and warning diagnostics\")"

 13: EXTERN_C_BEGIN
 14: #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
 15:   #include <cmumps_c.h>
 16:   #include <zmumps_c.h>
 17:   #include <smumps_c.h>
 18:   #include <dmumps_c.h>
 19: #else
 20:   #if defined(PETSC_USE_COMPLEX)
 21:     #if defined(PETSC_USE_REAL_SINGLE)
 22:       #include <cmumps_c.h>
 23:       #define MUMPS_c     cmumps_c
 24:       #define MumpsScalar CMUMPS_COMPLEX
 25:     #else
 26:       #include <zmumps_c.h>
 27:       #define MUMPS_c     zmumps_c
 28:       #define MumpsScalar ZMUMPS_COMPLEX
 29:     #endif
 30:   #else
 31:     #if defined(PETSC_USE_REAL_SINGLE)
 32:       #include <smumps_c.h>
 33:       #define MUMPS_c     smumps_c
 34:       #define MumpsScalar SMUMPS_REAL
 35:     #else
 36:       #include <dmumps_c.h>
 37:       #define MUMPS_c     dmumps_c
 38:       #define MumpsScalar DMUMPS_REAL
 39:     #endif
 40:   #endif
 41: #endif
 42: #if defined(PETSC_USE_COMPLEX)
 43:   #if defined(PETSC_USE_REAL_SINGLE)
 44:     #define MUMPS_STRUC_C CMUMPS_STRUC_C
 45:   #else
 46:     #define MUMPS_STRUC_C ZMUMPS_STRUC_C
 47:   #endif
 48: #else
 49:   #if defined(PETSC_USE_REAL_SINGLE)
 50:     #define MUMPS_STRUC_C SMUMPS_STRUC_C
 51:   #else
 52:     #define MUMPS_STRUC_C DMUMPS_STRUC_C
 53:   #endif
 54: #endif
 55: EXTERN_C_END

 57: #define JOB_INIT         -1
 58: #define JOB_NULL         0
 59: #define JOB_FACTSYMBOLIC 1
 60: #define JOB_FACTNUMERIC  2
 61: #define JOB_SOLVE        3
 62: #define JOB_END          -2

 64: /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for
 65:    number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the
 66:    naming convention in PetscMPIInt, PetscBLASInt etc.
 67: */
 68: typedef MUMPS_INT PetscMUMPSInt;

 70: #if PETSC_PKG_MUMPS_VERSION_GE(5, 3, 0)
 71:   #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */
 72:     #error "PETSc has not been tested with full 64-bit MUMPS and we choose to error out"
 73:   #endif
 74: #else
 75:   #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */
 76:     #error "PETSc has not been tested with full 64-bit MUMPS and we choose to error out"
 77:   #endif
 78: #endif

 80: #define MPIU_MUMPSINT       MPI_INT
 81: #define PETSC_MUMPS_INT_MAX 2147483647
 82: #define PETSC_MUMPS_INT_MIN -2147483648

 84: /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/
 85: static inline PetscErrorCode PetscMUMPSIntCast(PetscCount a, PetscMUMPSInt *b)
 86: {
 87:   PetscFunctionBegin;
 88: #if PetscDefined(USE_64BIT_INDICES)
 89:   PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt");
 90: #endif
 91:   *b = (PetscMUMPSInt)a;
 92:   PetscFunctionReturn(PETSC_SUCCESS);
 93: }

 95: /* Put these utility routines here since they are only used in this file */
 96: static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems PetscOptionsObject, const char opt[], const char text[], const char man[], PetscMUMPSInt currentvalue, PetscMUMPSInt *value, PetscBool *set, PetscMUMPSInt lb, PetscMUMPSInt ub)
 97: {
 98:   PetscInt  myval;
 99:   PetscBool myset;

101:   PetscFunctionBegin;
102:   /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */
103:   PetscCall(PetscOptionsInt_Private(PetscOptionsObject, opt, text, man, (PetscInt)currentvalue, &myval, &myset, lb, ub));
104:   if (myset) PetscCall(PetscMUMPSIntCast(myval, value));
105:   if (set) *set = myset;
106:   PetscFunctionReturn(PETSC_SUCCESS);
107: }
108: #define PetscOptionsMUMPSInt(a, b, c, d, e, f) PetscOptionsMUMPSInt_Private(PetscOptionsObject, a, b, c, d, e, f, PETSC_MUMPS_INT_MIN, PETSC_MUMPS_INT_MAX)

110: // An abstract type for specific MUMPS types {S,D,C,Z}MUMPS_STRUC_C.
111: //
112: // With the abstract (outer) type, we can write shared code. We call MUMPS through a type-to-be-determined inner field within the abstract type.
113: // Before/after calling MUMPS, we need to copy in/out fields between the outer and the inner, which seems expensive. But note that the large fixed size
114: // arrays within the types are directly linked. At the end, we only need to copy ~20 intergers/pointers, which is doable. See PreMumpsCall()/PostMumpsCall().
115: //
116: // Not all fields in the specific types are exposed in the abstract type. We only need those used by the PETSc/MUMPS interface.
117: // Notably, DMUMPS_COMPLEX* and DMUMPS_REAL* fields are now declared as void *. Their type will be determined by the the actual precision to be used.
118: // Also note that we added some *_len fields not in specific types to track sizes of those MumpsScalar buffers.
119: typedef struct {
120:   PetscPrecision precision;   // precision used by MUMPS
121:   void          *internal_id; // the data structure passed to MUMPS, whose actual type {S,D,C,Z}MUMPS_STRUC_C is to be decided by precision and PETSc's use of complex

123:   // aliased fields from internal_id, so that we can use XMUMPS_STRUC_C to write shared code across different precisions.
124:   MUMPS_INT  sym, par, job;
125:   MUMPS_INT  comm_fortran; /* Fortran communicator */
126:   MUMPS_INT *icntl;
127:   void      *cntl; // MumpsReal, fixed size array
128:   MUMPS_INT  n;
129:   MUMPS_INT  nblk;

131:   /* Assembled entry */
132:   MUMPS_INT8 nnz;
133:   MUMPS_INT *irn;
134:   MUMPS_INT *jcn;
135:   void      *a; // MumpsScalar, centralized input
136:   PetscCount a_len;

138:   /* Distributed entry */
139:   MUMPS_INT8 nnz_loc;
140:   MUMPS_INT *irn_loc;
141:   MUMPS_INT *jcn_loc;
142:   void      *a_loc; // MumpsScalar, distributed input
143:   PetscCount a_loc_len;

145:   /* Matrix by blocks */
146:   MUMPS_INT *blkptr;
147:   MUMPS_INT *blkvar;

149:   /* Ordering, if given by user */
150:   MUMPS_INT *perm_in;

152:   /* RHS, solution, ouptput data and statistics */
153:   void      *rhs, *redrhs, *rhs_sparse, *sol_loc, *rhs_loc;                 // MumpsScalar buffers
154:   PetscCount rhs_len, redrhs_len, rhs_sparse_len, sol_loc_len, rhs_loc_len; // length of buffers (in MumpsScalar) IF allocated in a different precision than PetscScalar

156:   MUMPS_INT *irhs_sparse, *irhs_ptr, *isol_loc, *irhs_loc;
157:   MUMPS_INT  nrhs, lrhs, lredrhs, nz_rhs, lsol_loc, nloc_rhs, lrhs_loc;
158:   // MUMPS_INT  nsol_loc; // introduced in MUMPS-5.7, but PETSc doesn't use it; would cause compile errors with the widely used 5.6. If you add it, must also update PreMumpsCall() and guard this with #if PETSC_PKG_MUMPS_VERSION_GE(5, 7, 0)
159:   MUMPS_INT  schur_lld;
160:   MUMPS_INT *info, *infog;   // fixed size array
161:   void      *rinfo, *rinfog; // MumpsReal, fixed size array

163:   /* Null space */
164:   MUMPS_INT *pivnul_list; // allocated by MUMPS!
165:   MUMPS_INT *mapping;     // allocated by MUMPS!

167:   /* Schur */
168:   MUMPS_INT  size_schur;
169:   MUMPS_INT *listvar_schur;
170:   void      *schur; // MumpsScalar
171:   PetscCount schur_len;

173:   /* For out-of-core */
174:   char *ooc_tmpdir; // fixed size array
175:   char *ooc_prefix; // fixed size array
176: } XMUMPS_STRUC_C;

178: // Note: fixed-size arrays are allocated by MUMPS; redirect them to the outer struct
179: #define AllocateInternalID(MUMPS_STRUC_T, outer) \
180:   do { \
181:     MUMPS_STRUC_T *inner; \
182:     PetscCall(PetscNew(&inner)); \
183:     outer->icntl      = inner->icntl; \
184:     outer->cntl       = inner->cntl; \
185:     outer->info       = inner->info; \
186:     outer->infog      = inner->infog; \
187:     outer->rinfo      = inner->rinfo; \
188:     outer->rinfog     = inner->rinfog; \
189:     outer->ooc_tmpdir = inner->ooc_tmpdir; \
190:     outer->ooc_prefix = inner->ooc_prefix; \
191:     /* the three field should never change after init */ \
192:     inner->comm_fortran = outer->comm_fortran; \
193:     inner->par          = outer->par; \
194:     inner->sym          = outer->sym; \
195:     outer->internal_id  = inner; \
196:   } while (0)

198: // Allocate the internal [SDCZ]MUMPS_STRUC_C ID data structure in the given , and link fields of the outer and the inner
199: static inline PetscErrorCode MatMumpsAllocateInternalID(XMUMPS_STRUC_C *outer, PetscPrecision precision)
200: {
201:   PetscFunctionBegin;
202:   outer->precision = precision;
203: #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
204:   #if defined(PETSC_USE_COMPLEX)
205:   if (precision == PETSC_PRECISION_SINGLE) AllocateInternalID(CMUMPS_STRUC_C, outer);
206:   else AllocateInternalID(ZMUMPS_STRUC_C, outer);
207:   #else
208:   if (precision == PETSC_PRECISION_SINGLE) AllocateInternalID(SMUMPS_STRUC_C, outer);
209:   else AllocateInternalID(DMUMPS_STRUC_C, outer);
210:   #endif
211: #else
212:   AllocateInternalID(MUMPS_STRUC_C, outer);
213: #endif
214:   PetscFunctionReturn(PETSC_SUCCESS);
215: }

217: #define FreeInternalIDFields(MUMPS_STRUC_T, outer) \
218:   do { \
219:     MUMPS_STRUC_T *inner = (MUMPS_STRUC_T *)(outer)->internal_id; \
220:     PetscCall(PetscFree(inner->a)); \
221:     PetscCall(PetscFree(inner->a_loc)); \
222:     PetscCall(PetscFree(inner->redrhs)); \
223:     PetscCall(PetscFree(inner->rhs)); \
224:     PetscCall(PetscFree(inner->rhs_sparse)); \
225:     PetscCall(PetscFree(inner->rhs_loc)); \
226:     PetscCall(PetscFree(inner->sol_loc)); \
227:     PetscCall(PetscFree(inner->schur)); \
228:   } while (0)

230: static inline PetscErrorCode MatMumpsFreeInternalID(XMUMPS_STRUC_C *outer)
231: {
232:   PetscFunctionBegin;
233:   if (outer->internal_id) { // sometimes, the inner is never created before we destroy the outer
234: #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
235:     const PetscPrecision mumps_precision = outer->precision;
236:     if (mumps_precision != PETSC_SCALAR_PRECISION) { // Free internal buffers if we used mixed precision
237:   #if defined(PETSC_USE_COMPLEX)
238:       if (mumps_precision == PETSC_PRECISION_SINGLE) FreeInternalIDFields(CMUMPS_STRUC_C, outer);
239:       else FreeInternalIDFields(ZMUMPS_STRUC_C, outer);
240:   #else
241:       if (mumps_precision == PETSC_PRECISION_SINGLE) FreeInternalIDFields(SMUMPS_STRUC_C, outer);
242:       else FreeInternalIDFields(DMUMPS_STRUC_C, outer);
243:   #endif
244:     }
245: #endif
246:     PetscCall(PetscFree(outer->internal_id));
247:   }
248:   PetscFunctionReturn(PETSC_SUCCESS);
249: }

251: // Make a companion MumpsScalar array (with a given PetscScalar array), to hold at least  MumpsScalars in the given  and return the address at .
252: //  indicates if we need to convert PetscScalars to MumpsScalars after allocating the MumpsScalar array.
253: // (For bravity, we use  for array address and  for its length in MumpsScalar, though in code they should be <*ma> and <*m>)
254: // If  already points to a buffer/array, on input  should be its length. Note the buffer might be freed if it is not big enough for this request.
255: //
256: // The returned array is a companion, so how it is created depends on if PetscScalar and MumpsScalar are the same.
257: // 1) If they are different, a separate array will be made and its length and address will be provided at  and  on output.
258: // 2) Otherwise,  will be returned in , and  will be zero on output.
259: //
260: //
261: //   Input parameters:
262: // + convert   - whether to do PetscScalar to MumpsScalar conversion
263: // . n         - length of the PetscScalar array
264: // . pa        - [n]], points to the PetscScalar array
265: // . precision - precision of MumpsScalar
266: // . m         - on input, length of an existing MumpsScalar array  if any, otherwise *m is just zero.
267: // - ma        - on input, an existing MumpsScalar array if any.
268: //
269: //   Output parameters:
270: // + m  - length of the MumpsScalar buffer at  if MumpsScalar is different from PetscScalar, otherwise 0
271: // . ma - the MumpsScalar array, which could be an alias of  when the two types are the same.
272: //
273: //   Note:
274: //    New memory, if allocated, is done via PetscMalloc1(), and is owned by caller.
275: static PetscErrorCode MatMumpsMakeMumpsScalarArray(PetscBool convert, PetscCount n, const PetscScalar *pa, PetscPrecision precision, PetscCount *m, void **ma)
276: {
277:   PetscFunctionBegin;
278: #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
279:   const PetscPrecision mumps_precision = precision;
280:   PetscCheck(precision == PETSC_PRECISION_SINGLE || precision == PETSC_PRECISION_DOUBLE, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unsupported precicison (%d). Must be single or double", (int)precision);
281:   #if defined(PETSC_USE_COMPLEX)
282:   if (mumps_precision != PETSC_SCALAR_PRECISION) {
283:     if (mumps_precision == PETSC_PRECISION_SINGLE) {
284:       if (*m < n) {
285:         PetscCall(PetscFree(*ma));
286:         PetscCall(PetscMalloc1(n, (CMUMPS_COMPLEX **)ma));
287:         *m = n;
288:       }
289:       if (convert) {
290:         CMUMPS_COMPLEX *b = *(CMUMPS_COMPLEX **)ma;
291:         for (PetscCount i = 0; i < n; i++) {
292:           b[i].r = PetscRealPart(pa[i]);
293:           b[i].i = PetscImaginaryPart(pa[i]);
294:         }
295:       }
296:     } else {
297:       if (*m < n) {
298:         PetscCall(PetscFree(*ma));
299:         PetscCall(PetscMalloc1(n, (ZMUMPS_COMPLEX **)ma));
300:         *m = n;
301:       }
302:       if (convert) {
303:         ZMUMPS_COMPLEX *b = *(ZMUMPS_COMPLEX **)ma;
304:         for (PetscCount i = 0; i < n; i++) {
305:           b[i].r = PetscRealPart(pa[i]);
306:           b[i].i = PetscImaginaryPart(pa[i]);
307:         }
308:       }
309:     }
310:   }
311:   #else
312:   if (mumps_precision != PETSC_SCALAR_PRECISION) {
313:     if (mumps_precision == PETSC_PRECISION_SINGLE) {
314:       if (*m < n) {
315:         PetscCall(PetscFree(*ma));
316:         PetscCall(PetscMalloc1(n, (SMUMPS_REAL **)ma));
317:         *m = n;
318:       }
319:       if (convert) {
320:         SMUMPS_REAL *b = *(SMUMPS_REAL **)ma;
321:         for (PetscCount i = 0; i < n; i++) b[i] = pa[i];
322:       }
323:     } else {
324:       if (*m < n) {
325:         PetscCall(PetscFree(*ma));
326:         PetscCall(PetscMalloc1(n, (DMUMPS_REAL **)ma));
327:         *m = n;
328:       }
329:       if (convert) {
330:         DMUMPS_REAL *b = *(DMUMPS_REAL **)ma;
331:         for (PetscCount i = 0; i < n; i++) b[i] = pa[i];
332:       }
333:     }
334:   }
335:   #endif
336:   else
337: #endif
338:   {
339:     if (*m != 0) PetscCall(PetscFree(*ma)); // free existing buffer if any
340:     *ma = (void *)pa;                       // same precision, make them alias
341:     *m  = 0;
342:   }
343:   PetscFunctionReturn(PETSC_SUCCESS);
344: }

346: // Cast a MumpsScalar array  in  to a PetscScalar array at address .
347: //
348: // 1) If the two types are different, cast array elements.
349: // 2) Otherwise, this works as a memcpy; of course, if the two addresses are equal, it is a no-op.
350: static PetscErrorCode MatMumpsCastMumpsScalarArray(PetscCount n, PetscPrecision mumps_precision, const void *ma, PetscScalar *pa)
351: {
352:   PetscFunctionBegin;
353: #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
354:   if (mumps_precision != PETSC_SCALAR_PRECISION) {
355:   #if defined(PETSC_USE_COMPLEX)
356:     if (mumps_precision == PETSC_PRECISION_SINGLE) {
357:       PetscReal         *a = (PetscReal *)pa;
358:       const SMUMPS_REAL *b = (const SMUMPS_REAL *)ma;
359:       for (PetscCount i = 0; i < 2 * n; i++) a[i] = b[i];
360:     } else {
361:       PetscReal         *a = (PetscReal *)pa;
362:       const DMUMPS_REAL *b = (const DMUMPS_REAL *)ma;
363:       for (PetscCount i = 0; i < 2 * n; i++) a[i] = b[i];
364:     }
365:   #else
366:     if (mumps_precision == PETSC_PRECISION_SINGLE) {
367:       const SMUMPS_REAL *b = (const SMUMPS_REAL *)ma;
368:       for (PetscCount i = 0; i < n; i++) pa[i] = b[i];
369:     } else {
370:       const DMUMPS_REAL *b = (const DMUMPS_REAL *)ma;
371:       for (PetscCount i = 0; i < n; i++) pa[i] = b[i];
372:     }
373:   #endif
374:   } else
375: #endif
376:     PetscCall(PetscArraycpy((PetscScalar *)pa, (PetscScalar *)ma, n));
377:   PetscFunctionReturn(PETSC_SUCCESS);
378: }

380: // Cast a PetscScalar array  to a MumpsScalar array in the given  at address .
381: //
382: // 1) If the two types are different, cast array elements.
383: // 2) Otherwise, this works as a memcpy; of course, if the two addresses are equal, it is a no-op.
384: static PetscErrorCode MatMumpsCastPetscScalarArray(PetscCount n, const PetscScalar *pa, PetscPrecision mumps_precision, const void *ma)
385: {
386:   PetscFunctionBegin;
387: #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
388:   if (mumps_precision != PETSC_SCALAR_PRECISION) {
389:   #if defined(PETSC_USE_COMPLEX)
390:     if (mumps_precision == PETSC_PRECISION_SINGLE) {
391:       CMUMPS_COMPLEX *b = (CMUMPS_COMPLEX *)ma;
392:       for (PetscCount i = 0; i < n; i++) {
393:         b[i].r = PetscRealPart(pa[i]);
394:         b[i].i = PetscImaginaryPart(pa[i]);
395:       }
396:     } else {
397:       ZMUMPS_COMPLEX *b = (ZMUMPS_COMPLEX *)ma;
398:       for (PetscCount i = 0; i < n; i++) {
399:         b[i].r = PetscRealPart(pa[i]);
400:         b[i].i = PetscImaginaryPart(pa[i]);
401:       }
402:     }
403:   #else
404:     if (mumps_precision == PETSC_PRECISION_SINGLE) {
405:       SMUMPS_REAL *b = (SMUMPS_REAL *)ma;
406:       for (PetscCount i = 0; i < n; i++) b[i] = pa[i];
407:     } else {
408:       DMUMPS_REAL *b = (DMUMPS_REAL *)ma;
409:       for (PetscCount i = 0; i < n; i++) b[i] = pa[i];
410:     }
411:   #endif
412:   } else
413: #endif
414:     PetscCall(PetscArraycpy((PetscScalar *)ma, (PetscScalar *)pa, n));
415:   PetscFunctionReturn(PETSC_SUCCESS);
416: }

418: static inline MPI_Datatype MPIU_MUMPSREAL(const XMUMPS_STRUC_C *id)
419: {
420:   return id->precision == PETSC_PRECISION_DOUBLE ? MPI_DOUBLE : MPI_FLOAT;
421: }

423: #define PreMumpsCall(inner, outer, mumpsscalar) \
424:   do { \
425:     inner->job           = outer->job; \
426:     inner->n             = outer->n; \
427:     inner->nblk          = outer->nblk; \
428:     inner->nnz           = outer->nnz; \
429:     inner->irn           = outer->irn; \
430:     inner->jcn           = outer->jcn; \
431:     inner->a             = (mumpsscalar *)outer->a; \
432:     inner->nnz_loc       = outer->nnz_loc; \
433:     inner->irn_loc       = outer->irn_loc; \
434:     inner->jcn_loc       = outer->jcn_loc; \
435:     inner->a_loc         = (mumpsscalar *)outer->a_loc; \
436:     inner->blkptr        = outer->blkptr; \
437:     inner->blkvar        = outer->blkvar; \
438:     inner->perm_in       = outer->perm_in; \
439:     inner->rhs           = (mumpsscalar *)outer->rhs; \
440:     inner->redrhs        = (mumpsscalar *)outer->redrhs; \
441:     inner->rhs_sparse    = (mumpsscalar *)outer->rhs_sparse; \
442:     inner->sol_loc       = (mumpsscalar *)outer->sol_loc; \
443:     inner->rhs_loc       = (mumpsscalar *)outer->rhs_loc; \
444:     inner->irhs_sparse   = outer->irhs_sparse; \
445:     inner->irhs_ptr      = outer->irhs_ptr; \
446:     inner->isol_loc      = outer->isol_loc; \
447:     inner->irhs_loc      = outer->irhs_loc; \
448:     inner->nrhs          = outer->nrhs; \
449:     inner->lrhs          = outer->lrhs; \
450:     inner->lredrhs       = outer->lredrhs; \
451:     inner->nz_rhs        = outer->nz_rhs; \
452:     inner->lsol_loc      = outer->lsol_loc; \
453:     inner->nloc_rhs      = outer->nloc_rhs; \
454:     inner->lrhs_loc      = outer->lrhs_loc; \
455:     inner->schur_lld     = outer->schur_lld; \
456:     inner->size_schur    = outer->size_schur; \
457:     inner->listvar_schur = outer->listvar_schur; \
458:     inner->schur         = (mumpsscalar *)outer->schur; \
459:   } while (0)

461: #define PostMumpsCall(inner, outer) \
462:   do { \
463:     outer->pivnul_list = inner->pivnul_list; \
464:     outer->mapping     = inner->mapping; \
465:   } while (0)

467: // Entry for PETSc to call mumps
468: static inline PetscErrorCode PetscCallMumps_Private(XMUMPS_STRUC_C *outer)
469: {
470:   PetscFunctionBegin;
471: #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
472:   #if defined(PETSC_USE_COMPLEX)
473:   if (outer->precision == PETSC_PRECISION_SINGLE) {
474:     CMUMPS_STRUC_C *inner = (CMUMPS_STRUC_C *)outer->internal_id;
475:     PreMumpsCall(inner, outer, CMUMPS_COMPLEX);
476:     PetscStackCallExternalVoid("cmumps_c", cmumps_c(inner));
477:     PostMumpsCall(inner, outer);
478:   } else {
479:     ZMUMPS_STRUC_C *inner = (ZMUMPS_STRUC_C *)outer->internal_id;
480:     PreMumpsCall(inner, outer, ZMUMPS_COMPLEX);
481:     PetscStackCallExternalVoid("zmumps_c", zmumps_c(inner));
482:     PostMumpsCall(inner, outer);
483:   }
484:   #else
485:   if (outer->precision == PETSC_PRECISION_SINGLE) {
486:     SMUMPS_STRUC_C *inner = (SMUMPS_STRUC_C *)outer->internal_id;
487:     PreMumpsCall(inner, outer, SMUMPS_REAL);
488:     PetscStackCallExternalVoid("smumps_c", smumps_c(inner));
489:     PostMumpsCall(inner, outer);
490:   } else {
491:     DMUMPS_STRUC_C *inner = (DMUMPS_STRUC_C *)outer->internal_id;
492:     PreMumpsCall(inner, outer, DMUMPS_REAL);
493:     PetscStackCallExternalVoid("dmumps_c", dmumps_c(inner));
494:     PostMumpsCall(inner, outer);
495:   }
496:   #endif
497: #else
498:   MUMPS_STRUC_C *inner = (MUMPS_STRUC_C *)outer->internal_id;
499:   PreMumpsCall(inner, outer, MumpsScalar);
500:   PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(inner));
501:   PostMumpsCall(inner, outer);
502: #endif
503:   PetscFunctionReturn(PETSC_SUCCESS);
504: }

506: /* macros s.t. indices match MUMPS documentation */
507: #define ICNTL(I) icntl[(I) - 1]
508: #define INFOG(I) infog[(I) - 1]
509: #define INFO(I)  info[(I) - 1]

511: // Get a value from a MumpsScalar array, which is the  field in the struct of MUMPS_STRUC_C. The value is convertible to PetscScalar. Note no minus 1 on I!
512: #if defined(PETSC_USE_COMPLEX)
513:   #define ID_FIELD_GET(ID, F, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((CMUMPS_COMPLEX *)(ID).F)[I].r + PETSC_i * ((CMUMPS_COMPLEX *)(ID).F)[I].i : ((ZMUMPS_COMPLEX *)(ID).F)[I].r + PETSC_i * ((ZMUMPS_COMPLEX *)(ID).F)[I].i)
514: #else
515:   #define ID_FIELD_GET(ID, F, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).F)[I] : ((double *)(ID).F)[I])
516: #endif

518: // Get a value from MumpsReal arrays. The value is convertible to PetscReal.
519: #define ID_CNTL_GET(ID, I)   ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).cntl)[(I) - 1] : ((double *)(ID).cntl)[(I) - 1])
520: #define ID_RINFOG_GET(ID, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).rinfog)[(I) - 1] : ((double *)(ID).rinfog)[(I) - 1])
521: #define ID_RINFO_GET(ID, I)  ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).rinfo)[(I) - 1] : ((double *)(ID).rinfo)[(I) - 1])

523: // Set the I-th entry of the MumpsReal array id.cntl[] with a PetscReal 
524: #define ID_CNTL_SET(ID, I, VAL) \
525:   do { \
526:     if ((ID).precision == PETSC_PRECISION_SINGLE) ((float *)(ID).cntl)[(I) - 1] = (VAL); \
527:     else ((double *)(ID).cntl)[(I) - 1] = (VAL); \
528:   } while (0)

530: /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */
531: #if defined(PETSC_HAVE_OPENMP_SUPPORT)
532:   #define PetscMUMPS_c(mumps) \
533:     do { \
534:       if (mumps->use_petsc_omp_support) { \
535:         if (mumps->is_omp_master) { \
536:           PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \
537:           PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \
538:           PetscCall(PetscCallMumps_Private(&mumps->id)); \
539:           PetscCall(PetscFPTrapPop()); \
540:           PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \
541:         } \
542:         PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \
543:         /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific      \
544:          to processes, so we only Bcast info[1], an error code and leave others (since they do not have   \
545:          an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82.                   \
546:          omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \
547:       */ \
548:         MUMPS_STRUC_C tmp; /* All MUMPS_STRUC_C types have same lengths on these info arrays */ \
549:         PetscCallMPI(MPI_Bcast(mumps->id.infog, PETSC_STATIC_ARRAY_LENGTH(tmp.infog), MPIU_MUMPSINT, 0, mumps->omp_comm)); \
550:         PetscCallMPI(MPI_Bcast(mumps->id.info, PETSC_STATIC_ARRAY_LENGTH(tmp.info), MPIU_MUMPSINT, 0, mumps->omp_comm)); \
551:         PetscCallMPI(MPI_Bcast(mumps->id.rinfog, PETSC_STATIC_ARRAY_LENGTH(tmp.rinfog), MPIU_MUMPSREAL(&mumps->id), 0, mumps->omp_comm)); \
552:         PetscCallMPI(MPI_Bcast(mumps->id.rinfo, PETSC_STATIC_ARRAY_LENGTH(tmp.rinfo), MPIU_MUMPSREAL(&mumps->id), 0, mumps->omp_comm)); \
553:       } else { \
554:         PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \
555:         PetscCall(PetscCallMumps_Private(&mumps->id)); \
556:         PetscCall(PetscFPTrapPop()); \
557:       } \
558:     } while (0)
559: #else
560:   #define PetscMUMPS_c(mumps) \
561:     do { \
562:       PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \
563:       PetscCall(PetscCallMumps_Private(&mumps->id)); \
564:       PetscCall(PetscFPTrapPop()); \
565:     } while (0)
566: #endif

568: typedef struct Mat_MUMPS Mat_MUMPS;
569: struct Mat_MUMPS {
570:   XMUMPS_STRUC_C id;

572:   MatStructure   matstruc;
573:   PetscMPIInt    myid, petsc_size;
574:   PetscMUMPSInt *irn, *jcn;       /* the (i,j,v) triplets passed to mumps. */
575:   PetscScalar   *val, *val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */
576:   PetscCount     nnz;             /* number of nonzeros. The type is called selective 64-bit in mumps */
577:   PetscMUMPSInt  sym;
578:   MPI_Comm       mumps_comm;
579:   PetscMUMPSInt *ICNTL_pre;
580:   PetscReal     *CNTL_pre;
581:   PetscMUMPSInt  ICNTL9_pre;         /* check if ICNTL(9) is changed from previous MatSolve */
582:   VecScatter     scat_rhs, scat_sol; /* used by MatSolve() */
583:   PetscMUMPSInt  ICNTL20;            /* use centralized (0) or distributed (10) dense RHS */
584:   PetscMUMPSInt  ICNTL26;
585:   PetscMUMPSInt  lrhs_loc, nloc_rhs, *irhs_loc;
586: #if defined(PETSC_HAVE_OPENMP_SUPPORT)
587:   PetscInt    *rhs_nrow, max_nrhs;
588:   PetscMPIInt *rhs_recvcounts, *rhs_disps;
589:   PetscScalar *rhs_loc, *rhs_recvbuf;
590: #endif
591:   Vec            b_seq, x_seq;
592:   PetscInt       ninfo, *info; /* which INFO to display */
593:   PetscInt       sizeredrhs;
594:   PetscScalar   *schur_sol;
595:   PetscInt       schur_sizesol;
596:   PetscScalar   *redrhs;              // buffer in PetscScalar in case MumpsScalar is in a different precision
597:   PetscMUMPSInt *ia_alloc, *ja_alloc; /* work arrays used for the CSR struct for sparse rhs */
598:   PetscCount     cur_ilen, cur_jlen;  /* current len of ia_alloc[], ja_alloc[] */
599:   PetscErrorCode (*ConvertToTriples)(Mat, PetscInt, MatReuse, Mat_MUMPS *);

601:   /* Support for MATNEST */
602:   PetscErrorCode (**nest_convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *);
603:   PetscCount  *nest_vals_start;
604:   PetscScalar *nest_vals;

606:   /* stuff used by petsc/mumps OpenMP support*/
607:   PetscBool    use_petsc_omp_support;
608:   PetscOmpCtrl omp_ctrl;             /* an OpenMP controller that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */
609:   MPI_Comm     petsc_comm, omp_comm; /* petsc_comm is PETSc matrix's comm */
610:   PetscCount  *recvcount;            /* a collection of nnz on omp_master */
611:   PetscMPIInt  tag, omp_comm_size;
612:   PetscBool    is_omp_master; /* is this rank the master of omp_comm */
613:   MPI_Request *reqs;
614: };

616: /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt.
617:    Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices.
618:  */
619: static PetscErrorCode PetscMUMPSIntCSRCast(PETSC_UNUSED Mat_MUMPS *mumps, PetscInt nrow, PetscInt *ia, PetscInt *ja, PetscMUMPSInt **ia_mumps, PetscMUMPSInt **ja_mumps, PetscMUMPSInt *nnz_mumps)
620: {
621:   PetscInt nnz = ia[nrow] - 1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscCount since mumps only uses PetscMUMPSInt for rhs */

623:   PetscFunctionBegin;
624: #if defined(PETSC_USE_64BIT_INDICES)
625:   {
626:     PetscInt i;
627:     if (nrow + 1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */
628:       PetscCall(PetscFree(mumps->ia_alloc));
629:       PetscCall(PetscMalloc1(nrow + 1, &mumps->ia_alloc));
630:       mumps->cur_ilen = nrow + 1;
631:     }
632:     if (nnz > mumps->cur_jlen) {
633:       PetscCall(PetscFree(mumps->ja_alloc));
634:       PetscCall(PetscMalloc1(nnz, &mumps->ja_alloc));
635:       mumps->cur_jlen = nnz;
636:     }
637:     for (i = 0; i < nrow + 1; i++) PetscCall(PetscMUMPSIntCast(ia[i], &mumps->ia_alloc[i]));
638:     for (i = 0; i < nnz; i++) PetscCall(PetscMUMPSIntCast(ja[i], &mumps->ja_alloc[i]));
639:     *ia_mumps = mumps->ia_alloc;
640:     *ja_mumps = mumps->ja_alloc;
641:   }
642: #else
643:   *ia_mumps = ia;
644:   *ja_mumps = ja;
645: #endif
646:   PetscCall(PetscMUMPSIntCast(nnz, nnz_mumps));
647:   PetscFunctionReturn(PETSC_SUCCESS);
648: }

650: static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS *mumps)
651: {
652:   PetscFunctionBegin;
653:   PetscCall(PetscFree(mumps->id.listvar_schur));
654:   PetscCall(PetscFree(mumps->redrhs)); // if needed, id.redrhs will be freed in MatMumpsFreeInternalID()
655:   PetscCall(PetscFree(mumps->schur_sol));
656:   mumps->id.size_schur = 0;
657:   mumps->id.schur_lld  = 0;
658:   if (mumps->id.internal_id) mumps->id.ICNTL(19) = 0; // sometimes, the inner id is yet built
659:   PetscFunctionReturn(PETSC_SUCCESS);
660: }

662: /* solve with rhs in mumps->id.redrhs and return in the same location */
663: static PetscErrorCode MatMumpsSolveSchur_Private(Mat F)
664: {
665:   Mat_MUMPS           *mumps = (Mat_MUMPS *)F->data;
666:   Mat                  S, B, X; // solve S*X = B; all three matrices are dense
667:   MatFactorSchurStatus schurstatus;
668:   PetscInt             sizesol;
669:   const PetscScalar   *xarray;

671:   PetscFunctionBegin;
672:   PetscCall(MatFactorFactorizeSchurComplement(F));
673:   PetscCall(MatFactorGetSchurComplement(F, &S, &schurstatus));
674:   PetscCall(MatMumpsCastMumpsScalarArray(mumps->sizeredrhs, mumps->id.precision, mumps->id.redrhs, mumps->redrhs));

676:   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->redrhs, &B));
677:   PetscCall(MatSetType(B, ((PetscObject)S)->type_name));
678: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
679:   PetscCall(MatBindToCPU(B, S->boundtocpu));
680: #endif
681:   switch (schurstatus) {
682:   case MAT_FACTOR_SCHUR_FACTORED:
683:     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->redrhs, &X));
684:     PetscCall(MatSetType(X, ((PetscObject)S)->type_name));
685: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
686:     PetscCall(MatBindToCPU(X, S->boundtocpu));
687: #endif
688:     if (!mumps->id.ICNTL(9)) { /* transpose solve */
689:       PetscCall(MatMatSolveTranspose(S, B, X));
690:     } else {
691:       PetscCall(MatMatSolve(S, B, X));
692:     }
693:     break;
694:   case MAT_FACTOR_SCHUR_INVERTED:
695:     sizesol = mumps->id.nrhs * mumps->id.size_schur;
696:     if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) {
697:       PetscCall(PetscFree(mumps->schur_sol));
698:       PetscCall(PetscMalloc1(sizesol, &mumps->schur_sol));
699:       mumps->schur_sizesol = sizesol;
700:     }
701:     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->schur_sol, &X));
702:     PetscCall(MatSetType(X, ((PetscObject)S)->type_name));
703: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
704:     PetscCall(MatBindToCPU(X, S->boundtocpu));
705: #endif
706:     PetscCall(MatProductCreateWithMat(S, B, NULL, X));
707:     if (!mumps->id.ICNTL(9)) { /* transpose solve */
708:       PetscCall(MatProductSetType(X, MATPRODUCT_AtB));
709:     } else {
710:       PetscCall(MatProductSetType(X, MATPRODUCT_AB));
711:     }
712:     PetscCall(MatProductSetFromOptions(X));
713:     PetscCall(MatProductSymbolic(X));
714:     PetscCall(MatProductNumeric(X));

716:     PetscCall(MatCopy(X, B, SAME_NONZERO_PATTERN));
717:     break;
718:   default:
719:     SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unhandled MatFactorSchurStatus %d", F->schur_status);
720:   }
721:   // MUST get the array from X (not B), though they share the same host array. We can only guarantee X has the correct data on device.
722:   PetscCall(MatDenseGetArrayRead(X, &xarray)); // xarray should be mumps->redrhs, but using MatDenseGetArrayRead is safer with GPUs.
723:   PetscCall(MatMumpsCastPetscScalarArray(mumps->sizeredrhs, xarray, mumps->id.precision, mumps->id.redrhs));
724:   PetscCall(MatDenseRestoreArrayRead(X, &xarray));
725:   PetscCall(MatFactorRestoreSchurComplement(F, &S, schurstatus));
726:   PetscCall(MatDestroy(&B));
727:   PetscCall(MatDestroy(&X));
728:   PetscFunctionReturn(PETSC_SUCCESS);
729: }

731: static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion)
732: {
733:   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;

735:   PetscFunctionBegin;
736:   if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */
737:     PetscFunctionReturn(PETSC_SUCCESS);
738:   }
739:   if (!expansion) { /* prepare for the condensation step */
740:     PetscInt sizeredrhs = mumps->id.nrhs * mumps->id.size_schur;
741:     /* allocate MUMPS internal array to store reduced right-hand sides */
742:     if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) {
743:       mumps->id.lredrhs = mumps->id.size_schur;
744:       mumps->sizeredrhs = mumps->id.nrhs * mumps->id.lredrhs;
745:       if (mumps->id.redrhs_len) PetscCall(PetscFree(mumps->id.redrhs));
746:       PetscCall(PetscFree(mumps->redrhs));
747:       PetscCall(PetscMalloc1(mumps->sizeredrhs, &mumps->redrhs));
748:       PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, mumps->sizeredrhs, mumps->redrhs, mumps->id.precision, &mumps->id.redrhs_len, &mumps->id.redrhs));
749:     }
750:   } else {                                    /* prepare for the expansion step */
751:     PetscCall(MatMumpsSolveSchur_Private(F)); /* solve Schur complement, put solution in id.redrhs (this has to be done by the MUMPS user, so basically us) */
752:     mumps->id.ICNTL(26) = 2;                  /* expansion phase */
753:     PetscMUMPS_c(mumps);
754:     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));
755:     /* restore defaults */
756:     mumps->id.ICNTL(26) = -1;
757:     /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */
758:     if (mumps->id.nrhs > 1) {
759:       if (mumps->id.redrhs_len) PetscCall(PetscFree(mumps->id.redrhs));
760:       PetscCall(PetscFree(mumps->redrhs));
761:       mumps->id.redrhs_len = 0;
762:       mumps->id.lredrhs    = 0;
763:       mumps->sizeredrhs    = 0;
764:     }
765:   }
766:   PetscFunctionReturn(PETSC_SUCCESS);
767: }

769: /*
770:   MatConvertToTriples_A_B - convert PETSc matrix to triples: row[nz], col[nz], val[nz]

772:   input:
773:     A       - matrix in aij,baij or sbaij format
774:     shift   - 0: C style output triple; 1: Fortran style output triple.
775:     reuse   - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple
776:               MAT_REUSE_MATRIX:   only the values in v array are updated
777:   output:
778:     nnz     - dim of r, c, and v (number of local nonzero entries of A)
779:     r, c, v - row and col index, matrix values (matrix triples)

781:   The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is
782:   freed with PetscFree(mumps->irn);  This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means
783:   that the PetscMalloc() cannot easily be replaced with a PetscMalloc3().

785:  */

787: static PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
788: {
789:   const PetscScalar *av;
790:   const PetscInt    *ai, *aj, *ajj, M = A->rmap->n;
791:   PetscCount         nz, rnz, k;
792:   PetscMUMPSInt     *row, *col;
793:   Mat_SeqAIJ        *aa = (Mat_SeqAIJ *)A->data;

795:   PetscFunctionBegin;
796:   PetscCall(MatSeqAIJGetArrayRead(A, &av));
797:   if (reuse == MAT_INITIAL_MATRIX) {
798:     nz = aa->nz;
799:     ai = aa->i;
800:     aj = aa->j;
801:     PetscCall(PetscMalloc2(nz, &row, nz, &col));
802:     for (PetscCount i = k = 0; i < M; i++) {
803:       rnz = ai[i + 1] - ai[i];
804:       ajj = aj + ai[i];
805:       for (PetscCount j = 0; j < rnz; j++) {
806:         PetscCall(PetscMUMPSIntCast(i + shift, &row[k]));
807:         PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[k]));
808:         k++;
809:       }
810:     }
811:     mumps->val = (PetscScalar *)av;
812:     mumps->irn = row;
813:     mumps->jcn = col;
814:     mumps->nnz = nz;
815:   } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, av, aa->nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqaij_seqaij(), so one needs to copy the memory */
816:   else mumps->val = (PetscScalar *)av;                                           /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
817:   PetscCall(MatSeqAIJRestoreArrayRead(A, &av));
818:   PetscFunctionReturn(PETSC_SUCCESS);
819: }

821: static PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
822: {
823:   PetscCount     nz, i, j, k, r;
824:   Mat_SeqSELL   *a = (Mat_SeqSELL *)A->data;
825:   PetscMUMPSInt *row, *col;

827:   PetscFunctionBegin;
828:   nz = a->sliidx[a->totalslices];
829:   if (reuse == MAT_INITIAL_MATRIX) {
830:     PetscCall(PetscMalloc2(nz, &row, nz, &col));
831:     for (i = k = 0; i < a->totalslices; i++) {
832:       for (j = a->sliidx[i], r = 0; j < a->sliidx[i + 1]; j++, r = ((r + 1) & 0x07)) PetscCall(PetscMUMPSIntCast(8 * i + r + shift, &row[k++]));
833:     }
834:     for (i = 0; i < nz; i++) PetscCall(PetscMUMPSIntCast(a->colidx[i] + shift, &col[i]));
835:     mumps->irn = row;
836:     mumps->jcn = col;
837:     mumps->nnz = nz;
838:     mumps->val = a->val;
839:   } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, a->val, nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqsell_seqaij(), so one needs to copy the memory */
840:   else mumps->val = a->val;                                                      /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
841:   PetscFunctionReturn(PETSC_SUCCESS);
842: }

844: static PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
845: {
846:   Mat_SeqBAIJ    *aa = (Mat_SeqBAIJ *)A->data;
847:   const PetscInt *ai, *aj, *ajj, bs2 = aa->bs2;
848:   PetscCount      M, nz = bs2 * aa->nz, idx = 0, rnz, i, j, k, m;
849:   PetscInt        bs;
850:   PetscMUMPSInt  *row, *col;

852:   PetscFunctionBegin;
853:   if (reuse == MAT_INITIAL_MATRIX) {
854:     PetscCall(MatGetBlockSize(A, &bs));
855:     M  = A->rmap->N / bs;
856:     ai = aa->i;
857:     aj = aa->j;
858:     PetscCall(PetscMalloc2(nz, &row, nz, &col));
859:     for (i = 0; i < M; i++) {
860:       ajj = aj + ai[i];
861:       rnz = ai[i + 1] - ai[i];
862:       for (k = 0; k < rnz; k++) {
863:         for (j = 0; j < bs; j++) {
864:           for (m = 0; m < bs; m++) {
865:             PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[idx]));
866:             PetscCall(PetscMUMPSIntCast(bs * ajj[k] + j + shift, &col[idx]));
867:             idx++;
868:           }
869:         }
870:       }
871:     }
872:     mumps->irn = row;
873:     mumps->jcn = col;
874:     mumps->nnz = nz;
875:     mumps->val = aa->a;
876:   } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, aa->a, nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqbaij_seqaij(), so one needs to copy the memory */
877:   else mumps->val = aa->a;                                                      /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
878:   PetscFunctionReturn(PETSC_SUCCESS);
879: }

881: static PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
882: {
883:   const PetscInt *ai, *aj, *ajj;
884:   PetscInt        bs;
885:   PetscCount      nz, rnz, i, j, k, m;
886:   PetscMUMPSInt  *row, *col;
887:   PetscScalar    *val;
888:   Mat_SeqSBAIJ   *aa  = (Mat_SeqSBAIJ *)A->data;
889:   const PetscInt  bs2 = aa->bs2, mbs = aa->mbs;
890: #if defined(PETSC_USE_COMPLEX)
891:   PetscBool isset, hermitian;
892: #endif

894:   PetscFunctionBegin;
895: #if defined(PETSC_USE_COMPLEX)
896:   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
897:   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
898: #endif
899:   ai = aa->i;
900:   aj = aa->j;
901:   PetscCall(MatGetBlockSize(A, &bs));
902:   if (reuse == MAT_INITIAL_MATRIX) {
903:     const PetscCount alloc_size = aa->nz * bs2;

905:     PetscCall(PetscMalloc2(alloc_size, &row, alloc_size, &col));
906:     if (bs > 1) {
907:       PetscCall(PetscMalloc1(alloc_size, &mumps->val_alloc));
908:       mumps->val = mumps->val_alloc;
909:     } else {
910:       mumps->val = aa->a;
911:     }
912:     mumps->irn = row;
913:     mumps->jcn = col;
914:   } else {
915:     row = mumps->irn;
916:     col = mumps->jcn;
917:   }
918:   val = mumps->val;

920:   nz = 0;
921:   if (bs > 1) {
922:     for (i = 0; i < mbs; i++) {
923:       rnz = ai[i + 1] - ai[i];
924:       ajj = aj + ai[i];
925:       for (j = 0; j < rnz; j++) {
926:         for (k = 0; k < bs; k++) {
927:           for (m = 0; m < bs; m++) {
928:             if (ajj[j] > i || k >= m) {
929:               if (reuse == MAT_INITIAL_MATRIX) {
930:                 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[nz]));
931:                 PetscCall(PetscMUMPSIntCast(ajj[j] * bs + k + shift, &col[nz]));
932:               }
933:               val[nz++] = aa->a[(ai[i] + j) * bs2 + m + k * bs];
934:             }
935:           }
936:         }
937:       }
938:     }
939:   } else if (reuse == MAT_INITIAL_MATRIX) {
940:     for (i = 0; i < mbs; i++) {
941:       rnz = ai[i + 1] - ai[i];
942:       ajj = aj + ai[i];
943:       for (j = 0; j < rnz; j++) {
944:         PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
945:         PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
946:         nz++;
947:       }
948:     }
949:     PetscCheck(nz == aa->nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different numbers of nonzeros %" PetscCount_FMT " != %" PetscInt_FMT, nz, aa->nz);
950:   } else if (mumps->nest_vals)
951:     PetscCall(PetscArraycpy(mumps->val, aa->a, aa->nz)); /* bs == 1 and MAT_REUSE_MATRIX, MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqsbaij_seqsbaij(), so one needs to copy the memory */
952:   else mumps->val = aa->a;                               /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
953:   if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz;
954:   PetscFunctionReturn(PETSC_SUCCESS);
955: }

957: static PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
958: {
959:   const PetscInt    *ai, *aj, *ajj, *adiag, M = A->rmap->n;
960:   PetscCount         nz, rnz, i, j;
961:   const PetscScalar *av, *v1;
962:   PetscScalar       *val;
963:   PetscMUMPSInt     *row, *col;
964:   Mat_SeqAIJ        *aa = (Mat_SeqAIJ *)A->data;
965:   PetscBool          diagDense;
966: #if defined(PETSC_USE_COMPLEX)
967:   PetscBool hermitian, isset;
968: #endif

970:   PetscFunctionBegin;
971: #if defined(PETSC_USE_COMPLEX)
972:   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
973:   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
974: #endif
975:   PetscCall(MatSeqAIJGetArrayRead(A, &av));
976:   ai = aa->i;
977:   aj = aa->j;
978:   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, &diagDense));
979:   if (reuse == MAT_INITIAL_MATRIX) {
980:     /* count nz in the upper triangular part of A */
981:     nz = 0;
982:     if (!diagDense) {
983:       for (i = 0; i < M; i++) {
984:         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
985:           for (j = ai[i]; j < ai[i + 1]; j++) {
986:             if (aj[j] < i) continue;
987:             nz++;
988:           }
989:         } else {
990:           nz += ai[i + 1] - adiag[i];
991:         }
992:       }
993:     } else {
994:       for (i = 0; i < M; i++) nz += ai[i + 1] - adiag[i];
995:     }
996:     PetscCall(PetscMalloc2(nz, &row, nz, &col));
997:     PetscCall(PetscMalloc1(nz, &val));
998:     mumps->nnz = nz;
999:     mumps->irn = row;
1000:     mumps->jcn = col;
1001:     mumps->val = mumps->val_alloc = val;

1003:     nz = 0;
1004:     if (!diagDense) {
1005:       for (i = 0; i < M; i++) {
1006:         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
1007:           for (j = ai[i]; j < ai[i + 1]; j++) {
1008:             if (aj[j] < i) continue;
1009:             PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
1010:             PetscCall(PetscMUMPSIntCast(aj[j] + shift, &col[nz]));
1011:             val[nz] = av[j];
1012:             nz++;
1013:           }
1014:         } else {
1015:           rnz = ai[i + 1] - adiag[i];
1016:           ajj = aj + adiag[i];
1017:           v1  = av + adiag[i];
1018:           for (j = 0; j < rnz; j++) {
1019:             PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
1020:             PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
1021:             val[nz++] = v1[j];
1022:           }
1023:         }
1024:       }
1025:     } else {
1026:       for (i = 0; i < M; i++) {
1027:         rnz = ai[i + 1] - adiag[i];
1028:         ajj = aj + adiag[i];
1029:         v1  = av + adiag[i];
1030:         for (j = 0; j < rnz; j++) {
1031:           PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
1032:           PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
1033:           val[nz++] = v1[j];
1034:         }
1035:       }
1036:     }
1037:   } else {
1038:     nz  = 0;
1039:     val = mumps->val;
1040:     if (!diagDense) {
1041:       for (i = 0; i < M; i++) {
1042:         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
1043:           for (j = ai[i]; j < ai[i + 1]; j++) {
1044:             if (aj[j] < i) continue;
1045:             val[nz++] = av[j];
1046:           }
1047:         } else {
1048:           rnz = ai[i + 1] - adiag[i];
1049:           v1  = av + adiag[i];
1050:           for (j = 0; j < rnz; j++) val[nz++] = v1[j];
1051:         }
1052:       }
1053:     } else {
1054:       for (i = 0; i < M; i++) {
1055:         rnz = ai[i + 1] - adiag[i];
1056:         v1  = av + adiag[i];
1057:         for (j = 0; j < rnz; j++) val[nz++] = v1[j];
1058:       }
1059:     }
1060:   }
1061:   PetscCall(MatSeqAIJRestoreArrayRead(A, &av));
1062:   PetscFunctionReturn(PETSC_SUCCESS);
1063: }

1065: static PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1066: {
1067:   const PetscInt    *ai, *aj, *bi, *bj, *garray, *ajj, *bjj;
1068:   PetscInt           bs;
1069:   PetscCount         rstart, nz, i, j, k, m, jj, irow, countA, countB;
1070:   PetscMUMPSInt     *row, *col;
1071:   const PetscScalar *av, *bv, *v1, *v2;
1072:   PetscScalar       *val;
1073:   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ *)A->data;
1074:   Mat_SeqSBAIJ      *aa  = (Mat_SeqSBAIJ *)mat->A->data;
1075:   Mat_SeqBAIJ       *bb  = (Mat_SeqBAIJ *)mat->B->data;
1076:   const PetscInt     bs2 = aa->bs2, mbs = aa->mbs;
1077: #if defined(PETSC_USE_COMPLEX)
1078:   PetscBool hermitian, isset;
1079: #endif

1081:   PetscFunctionBegin;
1082: #if defined(PETSC_USE_COMPLEX)
1083:   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
1084:   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
1085: #endif
1086:   PetscCall(MatGetBlockSize(A, &bs));
1087:   rstart = A->rmap->rstart;
1088:   ai     = aa->i;
1089:   aj     = aa->j;
1090:   bi     = bb->i;
1091:   bj     = bb->j;
1092:   av     = aa->a;
1093:   bv     = bb->a;

1095:   garray = mat->garray;

1097:   if (reuse == MAT_INITIAL_MATRIX) {
1098:     nz = (aa->nz + bb->nz) * bs2; /* just a conservative estimate */
1099:     PetscCall(PetscMalloc2(nz, &row, nz, &col));
1100:     PetscCall(PetscMalloc1(nz, &val));
1101:     /* can not decide the exact mumps->nnz now because of the SBAIJ */
1102:     mumps->irn = row;
1103:     mumps->jcn = col;
1104:     mumps->val = mumps->val_alloc = val;
1105:   } else {
1106:     val = mumps->val;
1107:   }

1109:   jj   = 0;
1110:   irow = rstart;
1111:   for (i = 0; i < mbs; i++) {
1112:     ajj    = aj + ai[i]; /* ptr to the beginning of this row */
1113:     countA = ai[i + 1] - ai[i];
1114:     countB = bi[i + 1] - bi[i];
1115:     bjj    = bj + bi[i];
1116:     v1     = av + ai[i] * bs2;
1117:     v2     = bv + bi[i] * bs2;

1119:     if (bs > 1) {
1120:       /* A-part */
1121:       for (j = 0; j < countA; j++) {
1122:         for (k = 0; k < bs; k++) {
1123:           for (m = 0; m < bs; m++) {
1124:             if (rstart + ajj[j] * bs > irow || k >= m) {
1125:               if (reuse == MAT_INITIAL_MATRIX) {
1126:                 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj]));
1127:                 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] * bs + k + shift, &col[jj]));
1128:               }
1129:               val[jj++] = v1[j * bs2 + m + k * bs];
1130:             }
1131:           }
1132:         }
1133:       }

1135:       /* B-part */
1136:       for (j = 0; j < countB; j++) {
1137:         for (k = 0; k < bs; k++) {
1138:           for (m = 0; m < bs; m++) {
1139:             if (reuse == MAT_INITIAL_MATRIX) {
1140:               PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj]));
1141:               PetscCall(PetscMUMPSIntCast(garray[bjj[j]] * bs + k + shift, &col[jj]));
1142:             }
1143:             val[jj++] = v2[j * bs2 + m + k * bs];
1144:           }
1145:         }
1146:       }
1147:     } else {
1148:       /* A-part */
1149:       for (j = 0; j < countA; j++) {
1150:         if (reuse == MAT_INITIAL_MATRIX) {
1151:           PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
1152:           PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj]));
1153:         }
1154:         val[jj++] = v1[j];
1155:       }

1157:       /* B-part */
1158:       for (j = 0; j < countB; j++) {
1159:         if (reuse == MAT_INITIAL_MATRIX) {
1160:           PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
1161:           PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
1162:         }
1163:         val[jj++] = v2[j];
1164:       }
1165:     }
1166:     irow += bs;
1167:   }
1168:   if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = jj;
1169:   PetscFunctionReturn(PETSC_SUCCESS);
1170: }

1172: static PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1173: {
1174:   const PetscInt    *ai, *aj, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj;
1175:   PetscCount         rstart, cstart, nz, i, j, jj, irow, countA, countB;
1176:   PetscMUMPSInt     *row, *col;
1177:   const PetscScalar *av, *bv, *v1, *v2;
1178:   PetscScalar       *val;
1179:   Mat                Ad, Ao;
1180:   Mat_SeqAIJ        *aa;
1181:   Mat_SeqAIJ        *bb;

1183:   PetscFunctionBegin;
1184:   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray));
1185:   PetscCall(MatSeqAIJGetArrayRead(Ad, &av));
1186:   PetscCall(MatSeqAIJGetArrayRead(Ao, &bv));

1188:   aa = (Mat_SeqAIJ *)Ad->data;
1189:   bb = (Mat_SeqAIJ *)Ao->data;
1190:   ai = aa->i;
1191:   aj = aa->j;
1192:   bi = bb->i;
1193:   bj = bb->j;

1195:   rstart = A->rmap->rstart;
1196:   cstart = A->cmap->rstart;

1198:   if (reuse == MAT_INITIAL_MATRIX) {
1199:     nz = (PetscCount)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */
1200:     PetscCall(PetscMalloc2(nz, &row, nz, &col));
1201:     PetscCall(PetscMalloc1(nz, &val));
1202:     mumps->nnz = nz;
1203:     mumps->irn = row;
1204:     mumps->jcn = col;
1205:     mumps->val = mumps->val_alloc = val;
1206:   } else {
1207:     val = mumps->val;
1208:   }

1210:   jj   = 0;
1211:   irow = rstart;
1212:   for (i = 0; i < m; i++) {
1213:     ajj    = aj + ai[i]; /* ptr to the beginning of this row */
1214:     countA = ai[i + 1] - ai[i];
1215:     countB = bi[i + 1] - bi[i];
1216:     bjj    = bj + bi[i];
1217:     v1     = av + ai[i];
1218:     v2     = bv + bi[i];

1220:     /* A-part */
1221:     for (j = 0; j < countA; j++) {
1222:       if (reuse == MAT_INITIAL_MATRIX) {
1223:         PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
1224:         PetscCall(PetscMUMPSIntCast(cstart + ajj[j] + shift, &col[jj]));
1225:       }
1226:       val[jj++] = v1[j];
1227:     }

1229:     /* B-part */
1230:     for (j = 0; j < countB; j++) {
1231:       if (reuse == MAT_INITIAL_MATRIX) {
1232:         PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
1233:         PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
1234:       }
1235:       val[jj++] = v2[j];
1236:     }
1237:     irow++;
1238:   }
1239:   PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av));
1240:   PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv));
1241:   PetscFunctionReturn(PETSC_SUCCESS);
1242: }

1244: static PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1245: {
1246:   Mat_MPIBAIJ       *mat = (Mat_MPIBAIJ *)A->data;
1247:   Mat_SeqBAIJ       *aa  = (Mat_SeqBAIJ *)mat->A->data;
1248:   Mat_SeqBAIJ       *bb  = (Mat_SeqBAIJ *)mat->B->data;
1249:   const PetscInt    *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j, *ajj, *bjj;
1250:   const PetscInt    *garray = mat->garray, mbs = mat->mbs, rstart = A->rmap->rstart, cstart = A->cmap->rstart;
1251:   const PetscInt     bs2 = mat->bs2;
1252:   PetscInt           bs;
1253:   PetscCount         nz, i, j, k, n, jj, irow, countA, countB, idx;
1254:   PetscMUMPSInt     *row, *col;
1255:   const PetscScalar *av = aa->a, *bv = bb->a, *v1, *v2;
1256:   PetscScalar       *val;

1258:   PetscFunctionBegin;
1259:   PetscCall(MatGetBlockSize(A, &bs));
1260:   if (reuse == MAT_INITIAL_MATRIX) {
1261:     nz = bs2 * (aa->nz + bb->nz);
1262:     PetscCall(PetscMalloc2(nz, &row, nz, &col));
1263:     PetscCall(PetscMalloc1(nz, &val));
1264:     mumps->nnz = nz;
1265:     mumps->irn = row;
1266:     mumps->jcn = col;
1267:     mumps->val = mumps->val_alloc = val;
1268:   } else {
1269:     val = mumps->val;
1270:   }

1272:   jj   = 0;
1273:   irow = rstart;
1274:   for (i = 0; i < mbs; i++) {
1275:     countA = ai[i + 1] - ai[i];
1276:     countB = bi[i + 1] - bi[i];
1277:     ajj    = aj + ai[i];
1278:     bjj    = bj + bi[i];
1279:     v1     = av + bs2 * ai[i];
1280:     v2     = bv + bs2 * bi[i];

1282:     idx = 0;
1283:     /* A-part */
1284:     for (k = 0; k < countA; k++) {
1285:       for (j = 0; j < bs; j++) {
1286:         for (n = 0; n < bs; n++) {
1287:           if (reuse == MAT_INITIAL_MATRIX) {
1288:             PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj]));
1289:             PetscCall(PetscMUMPSIntCast(cstart + bs * ajj[k] + j + shift, &col[jj]));
1290:           }
1291:           val[jj++] = v1[idx++];
1292:         }
1293:       }
1294:     }

1296:     idx = 0;
1297:     /* B-part */
1298:     for (k = 0; k < countB; k++) {
1299:       for (j = 0; j < bs; j++) {
1300:         for (n = 0; n < bs; n++) {
1301:           if (reuse == MAT_INITIAL_MATRIX) {
1302:             PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj]));
1303:             PetscCall(PetscMUMPSIntCast(bs * garray[bjj[k]] + j + shift, &col[jj]));
1304:           }
1305:           val[jj++] = v2[idx++];
1306:         }
1307:       }
1308:     }
1309:     irow += bs;
1310:   }
1311:   PetscFunctionReturn(PETSC_SUCCESS);
1312: }

1314: static PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1315: {
1316:   const PetscInt    *ai, *aj, *adiag, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj;
1317:   PetscCount         rstart, nz, nza, nzb, i, j, jj, irow, countA, countB;
1318:   PetscMUMPSInt     *row, *col;
1319:   const PetscScalar *av, *bv, *v1, *v2;
1320:   PetscScalar       *val;
1321:   Mat                Ad, Ao;
1322:   Mat_SeqAIJ        *aa;
1323:   Mat_SeqAIJ        *bb;
1324: #if defined(PETSC_USE_COMPLEX)
1325:   PetscBool hermitian, isset;
1326: #endif

1328:   PetscFunctionBegin;
1329: #if defined(PETSC_USE_COMPLEX)
1330:   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
1331:   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
1332: #endif
1333:   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray));
1334:   PetscCall(MatSeqAIJGetArrayRead(Ad, &av));
1335:   PetscCall(MatSeqAIJGetArrayRead(Ao, &bv));

1337:   aa = (Mat_SeqAIJ *)Ad->data;
1338:   bb = (Mat_SeqAIJ *)Ao->data;
1339:   ai = aa->i;
1340:   aj = aa->j;
1341:   bi = bb->i;
1342:   bj = bb->j;
1343:   PetscCall(MatGetDiagonalMarkers_SeqAIJ(Ad, &adiag, NULL));
1344:   rstart = A->rmap->rstart;

1346:   if (reuse == MAT_INITIAL_MATRIX) {
1347:     nza = 0; /* num of upper triangular entries in mat->A, including diagonals */
1348:     nzb = 0; /* num of upper triangular entries in mat->B */
1349:     for (i = 0; i < m; i++) {
1350:       nza += (ai[i + 1] - adiag[i]);
1351:       countB = bi[i + 1] - bi[i];
1352:       bjj    = bj + bi[i];
1353:       for (j = 0; j < countB; j++) {
1354:         if (garray[bjj[j]] > rstart) nzb++;
1355:       }
1356:     }

1358:     nz = nza + nzb; /* total nz of upper triangular part of mat */
1359:     PetscCall(PetscMalloc2(nz, &row, nz, &col));
1360:     PetscCall(PetscMalloc1(nz, &val));
1361:     mumps->nnz = nz;
1362:     mumps->irn = row;
1363:     mumps->jcn = col;
1364:     mumps->val = mumps->val_alloc = val;
1365:   } else {
1366:     val = mumps->val;
1367:   }

1369:   jj   = 0;
1370:   irow = rstart;
1371:   for (i = 0; i < m; i++) {
1372:     ajj    = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */
1373:     v1     = av + adiag[i];
1374:     countA = ai[i + 1] - adiag[i];
1375:     countB = bi[i + 1] - bi[i];
1376:     bjj    = bj + bi[i];
1377:     v2     = bv + bi[i];

1379:     /* A-part */
1380:     for (j = 0; j < countA; j++) {
1381:       if (reuse == MAT_INITIAL_MATRIX) {
1382:         PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
1383:         PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj]));
1384:       }
1385:       val[jj++] = v1[j];
1386:     }

1388:     /* B-part */
1389:     for (j = 0; j < countB; j++) {
1390:       if (garray[bjj[j]] > rstart) {
1391:         if (reuse == MAT_INITIAL_MATRIX) {
1392:           PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
1393:           PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
1394:         }
1395:         val[jj++] = v2[j];
1396:       }
1397:     }
1398:     irow++;
1399:   }
1400:   PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av));
1401:   PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv));
1402:   PetscFunctionReturn(PETSC_SUCCESS);
1403: }

1405: static PetscErrorCode MatConvertToTriples_diagonal_xaij(Mat A, PETSC_UNUSED PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1406: {
1407:   const PetscScalar *av;
1408:   const PetscInt     M = A->rmap->n;
1409:   PetscCount         i;
1410:   PetscMUMPSInt     *row, *col;
1411:   Vec                v;

1413:   PetscFunctionBegin;
1414:   PetscCall(MatDiagonalGetDiagonal(A, &v));
1415:   PetscCall(VecGetArrayRead(v, &av));
1416:   if (reuse == MAT_INITIAL_MATRIX) {
1417:     PetscCall(PetscMalloc2(M, &row, M, &col));
1418:     for (i = 0; i < M; i++) {
1419:       PetscCall(PetscMUMPSIntCast(i + A->rmap->rstart, &row[i]));
1420:       col[i] = row[i];
1421:     }
1422:     mumps->val = (PetscScalar *)av;
1423:     mumps->irn = row;
1424:     mumps->jcn = col;
1425:     mumps->nnz = M;
1426:   } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, av, M)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_diagonal_xaij(), so one needs to copy the memory */
1427:   else mumps->val = (PetscScalar *)av;                                      /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
1428:   PetscCall(VecRestoreArrayRead(v, &av));
1429:   PetscFunctionReturn(PETSC_SUCCESS);
1430: }

1432: static PetscErrorCode MatConvertToTriples_dense_xaij(Mat A, PETSC_UNUSED PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1433: {
1434:   PetscScalar   *v;
1435:   const PetscInt m = A->rmap->n, N = A->cmap->N;
1436:   PetscInt       lda;
1437:   PetscCount     i, j;
1438:   PetscMUMPSInt *row, *col;

1440:   PetscFunctionBegin;
1441:   PetscCall(MatDenseGetArray(A, &v));
1442:   PetscCall(MatDenseGetLDA(A, &lda));
1443:   if (reuse == MAT_INITIAL_MATRIX) {
1444:     PetscCall(PetscMalloc2(m * N, &row, m * N, &col));
1445:     for (i = 0; i < m; i++) {
1446:       col[i] = 0;
1447:       PetscCall(PetscMUMPSIntCast(i + A->rmap->rstart, &row[i]));
1448:     }
1449:     for (j = 1; j < N; j++) {
1450:       for (i = 0; i < m; i++) PetscCall(PetscMUMPSIntCast(j, col + i + m * j));
1451:       PetscCall(PetscArraycpy(row + m * j, row + m * (j - 1), m));
1452:     }
1453:     if (lda == m) mumps->val = v;
1454:     else {
1455:       PetscCall(PetscMalloc1(m * N, &mumps->val));
1456:       mumps->val_alloc = mumps->val;
1457:       for (j = 0; j < N; j++) PetscCall(PetscArraycpy(mumps->val + m * j, v + lda * j, m));
1458:     }
1459:     mumps->irn = row;
1460:     mumps->jcn = col;
1461:     mumps->nnz = m * N;
1462:   } else {
1463:     if (lda == m && !mumps->nest_vals) mumps->val = v;
1464:     else {
1465:       for (j = 0; j < N; j++) PetscCall(PetscArraycpy(mumps->val + m * j, v + lda * j, m));
1466:     }
1467:   }
1468:   PetscCall(MatDenseRestoreArray(A, &v));
1469:   PetscFunctionReturn(PETSC_SUCCESS);
1470: }

1472: // If the input Mat (sub) is either MATTRANSPOSEVIRTUAL or MATHERMITIANTRANSPOSEVIRTUAL, this function gets the parent Mat until it is not a
1473: // MATTRANSPOSEVIRTUAL or MATHERMITIANTRANSPOSEVIRTUAL itself and returns the appropriate shift, scaling, and whether the parent Mat should be conjugated
1474: // and its rows and columns permuted
1475: // TODO FIXME: this should not be in this file and should instead be refactored where the same logic applies, e.g., MatAXPY_Dense_Nest()
1476: static PetscErrorCode MatGetTranspose_TransposeVirtual(Mat *sub, PetscBool *conjugate, PetscScalar *vshift, PetscScalar *vscale, PetscBool *swap)
1477: {
1478:   Mat         A;
1479:   PetscScalar s[2];
1480:   PetscBool   isTrans, isHTrans, compare;

1482:   PetscFunctionBegin;
1483:   do {
1484:     PetscCall(PetscObjectTypeCompare((PetscObject)*sub, MATTRANSPOSEVIRTUAL, &isTrans));
1485:     if (isTrans) {
1486:       PetscCall(MatTransposeGetMat(*sub, &A));
1487:       isHTrans = PETSC_FALSE;
1488:     } else {
1489:       PetscCall(PetscObjectTypeCompare((PetscObject)*sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans));
1490:       if (isHTrans) PetscCall(MatHermitianTransposeGetMat(*sub, &A));
1491:     }
1492:     compare = (PetscBool)(isTrans || isHTrans);
1493:     if (compare) {
1494:       if (vshift && vscale) {
1495:         PetscCall(MatShellGetScalingShifts(*sub, s, s + 1, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
1496:         if (!*conjugate) {
1497:           *vshift += s[0] * *vscale;
1498:           *vscale *= s[1];
1499:         } else {
1500:           *vshift += PetscConj(s[0]) * *vscale;
1501:           *vscale *= PetscConj(s[1]);
1502:         }
1503:       }
1504:       if (swap) *swap = (PetscBool)!*swap;
1505:       if (isHTrans && conjugate) *conjugate = (PetscBool)!*conjugate;
1506:       *sub = A;
1507:     }
1508:   } while (compare);
1509:   PetscFunctionReturn(PETSC_SUCCESS);
1510: }

1512: static PetscErrorCode MatConvertToTriples_nest_xaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1513: {
1514:   Mat     **mats;
1515:   PetscInt  nr, nc;
1516:   PetscBool chol = mumps->sym ? PETSC_TRUE : PETSC_FALSE;

1518:   PetscFunctionBegin;
1519:   PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats));
1520:   if (reuse == MAT_INITIAL_MATRIX) {
1521:     PetscMUMPSInt *irns, *jcns;
1522:     PetscScalar   *vals;
1523:     PetscCount     totnnz, cumnnz, maxnnz;
1524:     PetscInt      *pjcns_w, Mbs = 0;
1525:     IS            *rows, *cols;
1526:     PetscInt     **rows_idx, **cols_idx;

1528:     cumnnz = 0;
1529:     maxnnz = 0;
1530:     PetscCall(PetscMalloc2(nr * nc + 1, &mumps->nest_vals_start, nr * nc, &mumps->nest_convert_to_triples));
1531:     for (PetscInt r = 0; r < nr; r++) {
1532:       for (PetscInt c = 0; c < nc; c++) {
1533:         Mat sub = mats[r][c];

1535:         mumps->nest_convert_to_triples[r * nc + c] = NULL;
1536:         if (chol && c < r) continue; /* skip lower-triangular block for Cholesky */
1537:         if (sub) {
1538:           PetscErrorCode (*convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *) = NULL;
1539:           PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isDiag, isDense;
1540:           MatInfo   info;

1542:           PetscCall(MatGetTranspose_TransposeVirtual(&sub, NULL, NULL, NULL, NULL));
1543:           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ));
1544:           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ));
1545:           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ));
1546:           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ));
1547:           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ));
1548:           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ));
1549:           PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag));
1550:           PetscCall(PetscObjectTypeCompareAny((PetscObject)sub, &isDense, MATSEQDENSE, MATMPIDENSE, NULL));

1552:           if (chol) {
1553:             if (r == c) {
1554:               if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqsbaij;
1555:               else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpisbaij;
1556:               else if (isSeqSBAIJ) convert_to_triples = MatConvertToTriples_seqsbaij_seqsbaij;
1557:               else if (isMPISBAIJ) convert_to_triples = MatConvertToTriples_mpisbaij_mpisbaij;
1558:               else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij;
1559:               else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij;
1560:             } else {
1561:               if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij;
1562:               else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij;
1563:               else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij;
1564:               else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij;
1565:               else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij;
1566:               else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij;
1567:             }
1568:           } else {
1569:             if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij;
1570:             else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij;
1571:             else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij;
1572:             else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij;
1573:             else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij;
1574:             else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij;
1575:           }
1576:           PetscCheck(convert_to_triples, PetscObjectComm((PetscObject)sub), PETSC_ERR_SUP, "Not for block of type %s", ((PetscObject)sub)->type_name);
1577:           mumps->nest_convert_to_triples[r * nc + c] = convert_to_triples;
1578:           PetscCall(MatGetInfo(sub, MAT_LOCAL, &info));
1579:           cumnnz += (PetscCount)info.nz_used; /* can be overestimated for Cholesky */
1580:           maxnnz = PetscMax(maxnnz, info.nz_used);
1581:         }
1582:       }
1583:     }

1585:     /* Allocate total COO */
1586:     totnnz = cumnnz;
1587:     PetscCall(PetscMalloc2(totnnz, &irns, totnnz, &jcns));
1588:     PetscCall(PetscMalloc1(totnnz, &vals));

1590:     /* Handle rows and column maps
1591:        We directly map rows and use an SF for the columns */
1592:     PetscCall(PetscMalloc4(nr, &rows, nc, &cols, nr, &rows_idx, nc, &cols_idx));
1593:     PetscCall(MatNestGetISs(A, rows, cols));
1594:     for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetIndices(rows[r], (const PetscInt **)&rows_idx[r]));
1595:     for (PetscInt c = 0; c < nc; c++) PetscCall(ISGetIndices(cols[c], (const PetscInt **)&cols_idx[c]));
1596:     if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscMalloc1(maxnnz, &pjcns_w));
1597:     else (void)maxnnz;

1599:     cumnnz = 0;
1600:     for (PetscInt r = 0; r < nr; r++) {
1601:       for (PetscInt c = 0; c < nc; c++) {
1602:         Mat             sub    = mats[r][c];
1603:         const PetscInt *ridx   = rows_idx[r];
1604:         const PetscInt *cidx   = cols_idx[c];
1605:         PetscScalar     vscale = 1.0, vshift = 0.0;
1606:         PetscInt        rst, size, bs;
1607:         PetscSF         csf;
1608:         PetscBool       conjugate = PETSC_FALSE, swap = PETSC_FALSE;
1609:         PetscLayout     cmap;
1610:         PetscInt        innz;

1612:         mumps->nest_vals_start[r * nc + c] = cumnnz;
1613:         if (c == r) {
1614:           PetscCall(ISGetSize(rows[r], &size));
1615:           if (!mumps->nest_convert_to_triples[r * nc + c]) {
1616:             for (PetscInt c = 0; c < nc && !sub; ++c) sub = mats[r][c]; // diagonal Mat is NULL, so start over from the beginning of the current row
1617:           }
1618:           PetscCall(MatGetBlockSize(sub, &bs));
1619:           Mbs += size / bs;
1620:         }
1621:         if (!mumps->nest_convert_to_triples[r * nc + c]) continue;

1623:         /* Extract inner blocks if needed */
1624:         PetscCall(MatGetTranspose_TransposeVirtual(&sub, &conjugate, &vshift, &vscale, &swap));
1625:         PetscCheck(vshift == 0.0, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Nonzero shift in parent MatShell");

1627:         /* Get column layout to map off-process columns */
1628:         PetscCall(MatGetLayouts(sub, NULL, &cmap));

1630:         /* Get row start to map on-process rows */
1631:         PetscCall(MatGetOwnershipRange(sub, &rst, NULL));

1633:         /* Directly use the mumps datastructure and use C ordering for now */
1634:         PetscCall((*mumps->nest_convert_to_triples[r * nc + c])(sub, 0, MAT_INITIAL_MATRIX, mumps));

1636:         /* Swap the role of rows and columns indices for transposed blocks
1637:            since we need values with global final ordering */
1638:         if (swap) {
1639:           cidx = rows_idx[r];
1640:           ridx = cols_idx[c];
1641:         }

1643:         /* Communicate column indices
1644:            This could have been done with a single SF but it would have complicated the code a lot.
1645:            But since we do it only once, we pay the price of setting up an SF for each block */
1646:         if (PetscDefined(USE_64BIT_INDICES)) {
1647:           for (PetscInt k = 0; k < mumps->nnz; k++) pjcns_w[k] = mumps->jcn[k];
1648:         } else pjcns_w = (PetscInt *)mumps->jcn; /* This cast is needed only to silence warnings for 64bit integers builds */
1649:         PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &csf));
1650:         PetscCall(PetscIntCast(mumps->nnz, &innz));
1651:         PetscCall(PetscSFSetGraphLayout(csf, cmap, innz, NULL, PETSC_OWN_POINTER, pjcns_w));
1652:         PetscCall(PetscSFBcastBegin(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE));
1653:         PetscCall(PetscSFBcastEnd(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE));
1654:         PetscCall(PetscSFDestroy(&csf));

1656:         /* Import indices: use direct map for rows and mapped indices for columns */
1657:         if (swap) {
1658:           for (PetscInt k = 0; k < mumps->nnz; k++) {
1659:             PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &jcns[cumnnz + k]));
1660:             PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &irns[cumnnz + k]));
1661:           }
1662:         } else {
1663:           for (PetscInt k = 0; k < mumps->nnz; k++) {
1664:             PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &irns[cumnnz + k]));
1665:             PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &jcns[cumnnz + k]));
1666:           }
1667:         }

1669:         /* Import values to full COO */
1670:         if (conjugate) { /* conjugate the entries */
1671:           PetscScalar *v = vals + cumnnz;
1672:           for (PetscInt k = 0; k < mumps->nnz; k++) v[k] = vscale * PetscConj(mumps->val[k]);
1673:         } else if (vscale != 1.0) {
1674:           PetscScalar *v = vals + cumnnz;
1675:           for (PetscInt k = 0; k < mumps->nnz; k++) v[k] = vscale * mumps->val[k];
1676:         } else PetscCall(PetscArraycpy(vals + cumnnz, mumps->val, mumps->nnz));

1678:         /* Shift new starting point and sanity check */
1679:         cumnnz += mumps->nnz;
1680:         PetscCheck(cumnnz <= totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected number of nonzeros %" PetscCount_FMT " != %" PetscCount_FMT, cumnnz, totnnz);

1682:         /* Free scratch memory */
1683:         PetscCall(PetscFree2(mumps->irn, mumps->jcn));
1684:         PetscCall(PetscFree(mumps->val_alloc));
1685:         mumps->val = NULL;
1686:         mumps->nnz = 0;
1687:       }
1688:     }
1689:     if (mumps->id.ICNTL(15) == 1) {
1690:       if (Mbs != A->rmap->N) {
1691:         PetscMPIInt rank, size;

1693:         PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
1694:         PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
1695:         if (rank == 0) {
1696:           PetscInt shift = 0;

1698:           PetscCall(PetscMUMPSIntCast(Mbs, &mumps->id.nblk));
1699:           PetscCall(PetscFree(mumps->id.blkptr));
1700:           PetscCall(PetscMalloc1(Mbs + 1, &mumps->id.blkptr));
1701:           mumps->id.blkptr[0] = 1;
1702:           for (PetscInt i = 0; i < size; ++i) {
1703:             for (PetscInt r = 0; r < nr; r++) {
1704:               Mat             sub = mats[r][r];
1705:               const PetscInt *ranges;
1706:               PetscInt        bs;

1708:               for (PetscInt c = 0; c < nc && !sub; ++c) sub = mats[r][c]; // diagonal Mat is NULL, so start over from the beginning of the current row
1709:               PetscCall(MatGetOwnershipRanges(sub, &ranges));
1710:               PetscCall(MatGetBlockSize(sub, &bs));
1711:               for (PetscInt j = 0, start = mumps->id.blkptr[shift] + bs; j < ranges[i + 1] - ranges[i]; j += bs) PetscCall(PetscMUMPSIntCast(start + j, mumps->id.blkptr + shift + j / bs + 1));
1712:               shift += (ranges[i + 1] - ranges[i]) / bs;
1713:             }
1714:           }
1715:         }
1716:       } else mumps->id.ICNTL(15) = 0;
1717:     }
1718:     if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscFree(pjcns_w));
1719:     for (PetscInt r = 0; r < nr; r++) PetscCall(ISRestoreIndices(rows[r], (const PetscInt **)&rows_idx[r]));
1720:     for (PetscInt c = 0; c < nc; c++) PetscCall(ISRestoreIndices(cols[c], (const PetscInt **)&cols_idx[c]));
1721:     PetscCall(PetscFree4(rows, cols, rows_idx, cols_idx));
1722:     if (!chol) PetscCheck(cumnnz == totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different number of nonzeros %" PetscCount_FMT " != %" PetscCount_FMT, cumnnz, totnnz);
1723:     mumps->nest_vals_start[nr * nc] = cumnnz;

1725:     /* Set pointers for final MUMPS data structure */
1726:     mumps->nest_vals = vals;
1727:     mumps->val_alloc = NULL; /* do not use val_alloc since it may be reallocated with the OMP callpath */
1728:     mumps->val       = vals;
1729:     mumps->irn       = irns;
1730:     mumps->jcn       = jcns;
1731:     mumps->nnz       = cumnnz;
1732:   } else {
1733:     PetscScalar *oval = mumps->nest_vals;
1734:     for (PetscInt r = 0; r < nr; r++) {
1735:       for (PetscInt c = 0; c < nc; c++) {
1736:         PetscBool   conjugate = PETSC_FALSE;
1737:         Mat         sub       = mats[r][c];
1738:         PetscScalar vscale = 1.0, vshift = 0.0;
1739:         PetscInt    midx = r * nc + c;

1741:         if (!mumps->nest_convert_to_triples[midx]) continue;
1742:         PetscCall(MatGetTranspose_TransposeVirtual(&sub, &conjugate, &vshift, &vscale, NULL));
1743:         PetscCheck(vshift == 0.0, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Nonzero shift in parent MatShell");
1744:         mumps->val = oval + mumps->nest_vals_start[midx];
1745:         PetscCall((*mumps->nest_convert_to_triples[midx])(sub, shift, MAT_REUSE_MATRIX, mumps));
1746:         if (conjugate) {
1747:           PetscCount nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx];
1748:           for (PetscCount k = 0; k < nnz; k++) mumps->val[k] = vscale * PetscConj(mumps->val[k]);
1749:         } else if (vscale != 1.0) {
1750:           PetscCount nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx];
1751:           for (PetscCount k = 0; k < nnz; k++) mumps->val[k] *= vscale;
1752:         }
1753:       }
1754:     }
1755:     mumps->val = oval;
1756:   }
1757:   PetscFunctionReturn(PETSC_SUCCESS);
1758: }

1760: static PetscErrorCode MatDestroy_MUMPS(Mat A)
1761: {
1762:   Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;

1764:   PetscFunctionBegin;
1765:   PetscCall(PetscFree(mumps->id.isol_loc));
1766:   PetscCall(VecScatterDestroy(&mumps->scat_rhs));
1767:   PetscCall(VecScatterDestroy(&mumps->scat_sol));
1768:   PetscCall(VecDestroy(&mumps->b_seq));
1769:   PetscCall(VecDestroy(&mumps->x_seq));
1770:   PetscCall(PetscFree(mumps->id.perm_in));
1771:   PetscCall(PetscFree(mumps->id.blkvar));
1772:   PetscCall(PetscFree(mumps->id.blkptr));
1773:   PetscCall(PetscFree2(mumps->irn, mumps->jcn));
1774:   PetscCall(PetscFree(mumps->val_alloc));
1775:   PetscCall(PetscFree(mumps->info));
1776:   PetscCall(PetscFree(mumps->ICNTL_pre));
1777:   PetscCall(PetscFree(mumps->CNTL_pre));
1778:   PetscCall(MatMumpsResetSchur_Private(mumps));
1779:   if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */
1780:     mumps->id.job = JOB_END;
1781:     PetscMUMPS_c(mumps);
1782:     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in termination: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
1783:     if (mumps->mumps_comm != MPI_COMM_NULL) {
1784:       if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) PetscCallMPI(MPI_Comm_free(&mumps->mumps_comm));
1785:       else PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &mumps->mumps_comm));
1786:     }
1787:   }
1788:   PetscCall(MatMumpsFreeInternalID(&mumps->id));
1789: #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1790:   if (mumps->use_petsc_omp_support) {
1791:     PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl));
1792:     PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf));
1793:     PetscCall(PetscFree3(mumps->rhs_nrow, mumps->rhs_recvcounts, mumps->rhs_disps));
1794:   }
1795: #endif
1796:   PetscCall(PetscFree(mumps->ia_alloc));
1797:   PetscCall(PetscFree(mumps->ja_alloc));
1798:   PetscCall(PetscFree(mumps->recvcount));
1799:   PetscCall(PetscFree(mumps->reqs));
1800:   PetscCall(PetscFree(mumps->irhs_loc));
1801:   PetscCall(PetscFree2(mumps->nest_vals_start, mumps->nest_convert_to_triples));
1802:   PetscCall(PetscFree(mumps->nest_vals));
1803:   PetscCall(PetscFree(A->data));

1805:   /* clear composed functions */
1806:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
1807:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorSetSchurIS_C", NULL));
1808:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorCreateSchurComplement_C", NULL));
1809:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetIcntl_C", NULL));
1810:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetIcntl_C", NULL));
1811:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetCntl_C", NULL));
1812:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetCntl_C", NULL));
1813:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfo_C", NULL));
1814:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfog_C", NULL));
1815:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfo_C", NULL));
1816:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfog_C", NULL));
1817:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetNullPivots_C", NULL));
1818:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverse_C", NULL));
1819:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverseTranspose_C", NULL));
1820:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetBlk_C", NULL));
1821:   PetscFunctionReturn(PETSC_SUCCESS);
1822: }

1824: /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */
1825: static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A, PetscInt nrhs, const PetscScalar *array)
1826: {
1827:   Mat_MUMPS        *mumps   = (Mat_MUMPS *)A->data;
1828:   const PetscMPIInt ompsize = mumps->omp_comm_size;
1829:   PetscInt          i, m, M, rstart;

1831:   PetscFunctionBegin;
1832:   PetscCall(MatGetSize(A, &M, NULL));
1833:   PetscCall(MatGetLocalSize(A, &m, NULL));
1834:   PetscCheck(M <= PETSC_MUMPS_INT_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt");
1835:   if (ompsize == 1) {
1836:     if (!mumps->irhs_loc) {
1837:       mumps->nloc_rhs = (PetscMUMPSInt)m;
1838:       PetscCall(PetscMalloc1(m, &mumps->irhs_loc));
1839:       PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
1840:       for (i = 0; i < m; i++) PetscCall(PetscMUMPSIntCast(rstart + i + 1, &mumps->irhs_loc[i])); /* use 1-based indices */
1841:     }
1842:     PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, m * nrhs, array, mumps->id.precision, &mumps->id.rhs_loc_len, &mumps->id.rhs_loc));
1843:   } else {
1844: #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1845:     const PetscInt *ranges;
1846:     PetscMPIInt     j, k, sendcount, *petsc_ranks, *omp_ranks;
1847:     MPI_Group       petsc_group, omp_group;
1848:     PetscScalar    *recvbuf = NULL;

1850:     if (mumps->is_omp_master) {
1851:       /* Lazily initialize the omp stuff for distributed rhs */
1852:       if (!mumps->irhs_loc) {
1853:         PetscCall(PetscMalloc2(ompsize, &omp_ranks, ompsize, &petsc_ranks));
1854:         PetscCall(PetscMalloc3(ompsize, &mumps->rhs_nrow, ompsize, &mumps->rhs_recvcounts, ompsize, &mumps->rhs_disps));
1855:         PetscCallMPI(MPI_Comm_group(mumps->petsc_comm, &petsc_group));
1856:         PetscCallMPI(MPI_Comm_group(mumps->omp_comm, &omp_group));
1857:         for (j = 0; j < ompsize; j++) omp_ranks[j] = j;
1858:         PetscCallMPI(MPI_Group_translate_ranks(omp_group, ompsize, omp_ranks, petsc_group, petsc_ranks));

1860:         /* Populate mumps->irhs_loc[], rhs_nrow[] */
1861:         mumps->nloc_rhs = 0;
1862:         PetscCall(MatGetOwnershipRanges(A, &ranges));
1863:         for (j = 0; j < ompsize; j++) {
1864:           mumps->rhs_nrow[j] = ranges[petsc_ranks[j] + 1] - ranges[petsc_ranks[j]];
1865:           mumps->nloc_rhs += mumps->rhs_nrow[j];
1866:         }
1867:         PetscCall(PetscMalloc1(mumps->nloc_rhs, &mumps->irhs_loc));
1868:         for (j = k = 0; j < ompsize; j++) {
1869:           for (i = ranges[petsc_ranks[j]]; i < ranges[petsc_ranks[j] + 1]; i++, k++) PetscCall(PetscMUMPSIntCast(i + 1, &mumps->irhs_loc[k])); /* uses 1-based indices */
1870:         }

1872:         PetscCall(PetscFree2(omp_ranks, petsc_ranks));
1873:         PetscCallMPI(MPI_Group_free(&petsc_group));
1874:         PetscCallMPI(MPI_Group_free(&omp_group));
1875:       }

1877:       /* Realloc buffers when current nrhs is bigger than what we have met */
1878:       if (nrhs > mumps->max_nrhs) {
1879:         PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf));
1880:         PetscCall(PetscMalloc2(mumps->nloc_rhs * nrhs, &mumps->rhs_loc, mumps->nloc_rhs * nrhs, &mumps->rhs_recvbuf));
1881:         mumps->max_nrhs = nrhs;
1882:       }

1884:       /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */
1885:       for (j = 0; j < ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j] * nrhs, &mumps->rhs_recvcounts[j]));
1886:       mumps->rhs_disps[0] = 0;
1887:       for (j = 1; j < ompsize; j++) {
1888:         mumps->rhs_disps[j] = mumps->rhs_disps[j - 1] + mumps->rhs_recvcounts[j - 1];
1889:         PetscCheck(mumps->rhs_disps[j] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscMPIInt overflow!");
1890:       }
1891:       recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */
1892:     }

1894:     PetscCall(PetscMPIIntCast(m * nrhs, &sendcount));
1895:     PetscCallMPI(MPI_Gatherv(array, sendcount, MPIU_SCALAR, recvbuf, mumps->rhs_recvcounts, mumps->rhs_disps, MPIU_SCALAR, 0, mumps->omp_comm));

1897:     if (mumps->is_omp_master) {
1898:       if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */
1899:         PetscScalar *dst, *dstbase = mumps->rhs_loc;
1900:         for (j = 0; j < ompsize; j++) {
1901:           const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j];
1902:           dst                    = dstbase;
1903:           for (i = 0; i < nrhs; i++) {
1904:             PetscCall(PetscArraycpy(dst, src, mumps->rhs_nrow[j]));
1905:             src += mumps->rhs_nrow[j];
1906:             dst += mumps->nloc_rhs;
1907:           }
1908:           dstbase += mumps->rhs_nrow[j];
1909:         }
1910:       }
1911:       PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nloc_rhs * nrhs, mumps->rhs_loc, mumps->id.precision, &mumps->id.rhs_loc_len, &mumps->id.rhs_loc));
1912:     }
1913: #endif /* PETSC_HAVE_OPENMP_SUPPORT */
1914:   }
1915:   mumps->id.nrhs     = (PetscMUMPSInt)nrhs;
1916:   mumps->id.nloc_rhs = (PetscMUMPSInt)mumps->nloc_rhs;
1917:   mumps->id.lrhs_loc = mumps->nloc_rhs;
1918:   mumps->id.irhs_loc = mumps->irhs_loc;
1919:   PetscFunctionReturn(PETSC_SUCCESS);
1920: }

1922: static PetscErrorCode MatSolve_MUMPS(Mat A, Vec b, Vec x)
1923: {
1924:   Mat_MUMPS         *mumps  = (Mat_MUMPS *)A->data;
1925:   const PetscScalar *barray = NULL;
1926:   PetscScalar       *array;
1927:   IS                 is_iden, is_petsc;
1928:   PetscInt           i;
1929:   PetscBool          second_solve = PETSC_FALSE;
1930:   static PetscBool   cite1 = PETSC_FALSE, cite2 = PETSC_FALSE;

1932:   PetscFunctionBegin;
1933:   PetscCall(PetscCitationsRegister("@article{MUMPS01,\n  author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n  title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n  journal = {SIAM "
1934:                                    "Journal on Matrix Analysis and Applications},\n  volume = {23},\n  number = {1},\n  pages = {15--41},\n  year = {2001}\n}\n",
1935:                                    &cite1));
1936:   PetscCall(PetscCitationsRegister("@article{MUMPS02,\n  author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n  title = {Hybrid scheduling for the parallel solution of linear systems},\n  journal = {Parallel "
1937:                                    "Computing},\n  volume = {32},\n  number = {2},\n  pages = {136--156},\n  year = {2006}\n}\n",
1938:                                    &cite2));

1940:   PetscCall(VecFlag(x, A->factorerrortype));
1941:   if (A->factorerrortype) {
1942:     PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1943:     PetscFunctionReturn(PETSC_SUCCESS);
1944:   }

1946:   mumps->id.nrhs = 1;
1947:   if (mumps->petsc_size > 1) {
1948:     if (mumps->ICNTL20 == 10) {
1949:       mumps->id.ICNTL(20) = 10; /* dense distributed RHS, need to set rhs_loc[], irhs_loc[] */
1950:       PetscCall(VecGetArrayRead(b, &barray));
1951:       PetscCall(MatMumpsSetUpDistRHSInfo(A, 1, barray));
1952:     } else {
1953:       mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential b_seq vector*/
1954:       PetscCall(VecScatterBegin(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD));
1955:       PetscCall(VecScatterEnd(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD));
1956:       if (!mumps->myid) {
1957:         PetscCall(VecGetArray(mumps->b_seq, &array));
1958:         PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->b_seq->map->n, array, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs));
1959:       }
1960:     }
1961:   } else { /* petsc_size == 1, use MUMPS's dense centralized RHS feature, so that we don't need to bother with isol_loc[] to get the solution */
1962:     mumps->id.ICNTL(20) = 0;
1963:     PetscCall(VecCopy(b, x));
1964:     PetscCall(VecGetArray(x, &array));
1965:     PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, x->map->n, array, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs));
1966:   }

1968:   /*
1969:      handle condensation step of Schur complement (if any)
1970:      We set by default ICNTL(26) == -1 when Schur indices have been provided by the user.
1971:      According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase
1972:      Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system.
1973:      This requires an extra call to PetscMUMPS_c and the computation of the factors for S
1974:   */
1975:   if (mumps->id.size_schur > 0) {
1976:     PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc");
1977:     if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) {
1978:       second_solve = PETSC_TRUE;
1979:       PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); // allocate id.redrhs
1980:       mumps->id.ICNTL(26) = 1;                                /* condensation phase */
1981:     } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE));
1982:   }

1984:   mumps->id.job = JOB_SOLVE;
1985:   PetscMUMPS_c(mumps); // reduced solve, put solution in id.redrhs
1986:   PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));

1988:   /* handle expansion step of Schur complement (if any) */
1989:   if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE));
1990:   else if (mumps->id.ICNTL(26) == 1) { // condense the right hand side
1991:     PetscCall(MatMumpsSolveSchur_Private(A));
1992:     for (i = 0; i < mumps->id.size_schur; ++i) array[mumps->id.listvar_schur[i] - 1] = ID_FIELD_GET(mumps->id, redrhs, i);
1993:   }

1995:   if (mumps->petsc_size > 1) { /* convert mumps distributed solution to PETSc mpi x */
1996:     if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) {
1997:       /* when id.ICNTL(9) changes, the contents of ilsol_loc may change (not its size, lsol_loc), recreates scat_sol */
1998:       PetscCall(VecScatterDestroy(&mumps->scat_sol));
1999:     }
2000:     if (!mumps->scat_sol) { /* create scatter scat_sol */
2001:       PetscInt *isol2_loc = NULL;
2002:       PetscCall(ISCreateStride(PETSC_COMM_SELF, mumps->id.lsol_loc, 0, 1, &is_iden)); /* from */
2003:       PetscCall(PetscMalloc1(mumps->id.lsol_loc, &isol2_loc));
2004:       for (i = 0; i < mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i] - 1;                        /* change Fortran style to C style */
2005:       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, mumps->id.lsol_loc, isol2_loc, PETSC_OWN_POINTER, &is_petsc)); /* to */
2006:       PetscCall(VecScatterCreate(mumps->x_seq, is_iden, x, is_petsc, &mumps->scat_sol));
2007:       PetscCall(ISDestroy(&is_iden));
2008:       PetscCall(ISDestroy(&is_petsc));
2009:       mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */
2010:     }

2012:     PetscScalar *xarray;
2013:     PetscCall(VecGetArray(mumps->x_seq, &xarray));
2014:     PetscCall(MatMumpsCastMumpsScalarArray(mumps->id.lsol_loc, mumps->id.precision, mumps->id.sol_loc, xarray));
2015:     PetscCall(VecRestoreArray(mumps->x_seq, &xarray));
2016:     PetscCall(VecScatterBegin(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD));
2017:     PetscCall(VecScatterEnd(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD));

2019:     if (mumps->ICNTL20 == 10) { // distributed RHS
2020:       PetscCall(VecRestoreArrayRead(b, &barray));
2021:     } else if (!mumps->myid) { // centralized RHS
2022:       PetscCall(VecRestoreArray(mumps->b_seq, &array));
2023:     }
2024:   } else {
2025:     // id.rhs has the solution in mumps precision
2026:     PetscCall(MatMumpsCastMumpsScalarArray(x->map->n, mumps->id.precision, mumps->id.rhs, array));
2027:     PetscCall(VecRestoreArray(x, &array));
2028:   }

2030:   PetscCall(PetscLogFlops(2.0 * PetscMax(0, (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n)));
2031:   PetscFunctionReturn(PETSC_SUCCESS);
2032: }

2034: static PetscErrorCode MatSolveTranspose_MUMPS(Mat A, Vec b, Vec x)
2035: {
2036:   Mat_MUMPS          *mumps = (Mat_MUMPS *)A->data;
2037:   const PetscMUMPSInt value = mumps->id.ICNTL(9);

2039:   PetscFunctionBegin;
2040:   mumps->id.ICNTL(9) = 0;
2041:   PetscCall(MatSolve_MUMPS(A, b, x));
2042:   mumps->id.ICNTL(9) = value;
2043:   PetscFunctionReturn(PETSC_SUCCESS);
2044: }

2046: static PetscErrorCode MatMatSolve_MUMPS(Mat A, Mat B, Mat X)
2047: {
2048:   Mat                Bt = NULL;
2049:   PetscBool          denseX, denseB, flg, flgT;
2050:   Mat_MUMPS         *mumps = (Mat_MUMPS *)A->data;
2051:   PetscInt           i, nrhs, M, nrhsM;
2052:   PetscScalar       *array;
2053:   const PetscScalar *barray;
2054:   PetscInt           lsol_loc, nlsol_loc, *idxx, iidx = 0;
2055:   PetscMUMPSInt     *isol_loc, *isol_loc_save;
2056:   PetscScalar       *sol_loc;
2057:   void              *sol_loc_save;
2058:   PetscCount         sol_loc_len_save;
2059:   IS                 is_to, is_from;
2060:   PetscInt           k, proc, j, m, myrstart;
2061:   const PetscInt    *rstart;
2062:   Vec                v_mpi, msol_loc;
2063:   VecScatter         scat_sol;
2064:   Vec                b_seq;
2065:   VecScatter         scat_rhs;
2066:   PetscScalar       *aa;
2067:   PetscInt           spnr, *ia, *ja;
2068:   Mat_MPIAIJ        *b = NULL;

2070:   PetscFunctionBegin;
2071:   PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &denseX, MATSEQDENSE, MATMPIDENSE, NULL));
2072:   PetscCheck(denseX, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix");

2074:   PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &denseB, MATSEQDENSE, MATMPIDENSE, NULL));

2076:   if (denseB) {
2077:     PetscCheck(B->rmap->n == X->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix B and X must have same row distribution");
2078:     mumps->id.ICNTL(20) = 0; /* dense RHS */
2079:   } else {                   /* sparse B */
2080:     PetscCheck(X != B, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_IDN, "X and B must be different matrices");
2081:     PetscCall(PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSEVIRTUAL, &flgT));
2082:     PetscCheck(flgT, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONG, "Matrix B must be MATTRANSPOSEVIRTUAL matrix");
2083:     PetscCall(MatShellGetScalingShifts(B, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
2084:     /* input B is transpose of actual RHS matrix,
2085:      because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */
2086:     PetscCall(MatTransposeGetMat(B, &Bt));
2087:     mumps->id.ICNTL(20) = 1; /* sparse RHS */
2088:   }

2090:   PetscCall(MatGetSize(B, &M, &nrhs));
2091:   PetscCall(PetscIntMultError(nrhs, M, &nrhsM));
2092:   mumps->id.nrhs = (PetscMUMPSInt)nrhs;
2093:   mumps->id.lrhs = (PetscMUMPSInt)M;

2095:   if (mumps->petsc_size == 1) { // handle this easy case specially and return early
2096:     PetscScalar *aa;
2097:     PetscInt     spnr, *ia, *ja;
2098:     PetscBool    second_solve = PETSC_FALSE;

2100:     PetscCall(MatDenseGetArray(X, &array));
2101:     if (denseB) {
2102:       /* copy B to X */
2103:       PetscCall(MatDenseGetArrayRead(B, &barray));
2104:       PetscCall(PetscArraycpy(array, barray, nrhsM));
2105:       PetscCall(MatDenseRestoreArrayRead(B, &barray));
2106:     } else { /* sparse B */
2107:       PetscCall(MatSeqAIJGetArray(Bt, &aa));
2108:       PetscCall(MatGetRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
2109:       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
2110:       PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
2111:       PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->id.nz_rhs, aa, mumps->id.precision, &mumps->id.rhs_sparse_len, &mumps->id.rhs_sparse));
2112:     }
2113:     PetscCall(MatMumpsMakeMumpsScalarArray(denseB, nrhsM, array, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs));

2115:     /* handle condensation step of Schur complement (if any) */
2116:     if (mumps->id.size_schur > 0) {
2117:       if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) {
2118:         second_solve = PETSC_TRUE;
2119:         PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); // allocate id.redrhs
2120:         mumps->id.ICNTL(26) = 1;                                /* condensation phase, i.e, to solve id.redrhs */
2121:       } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE));
2122:     }

2124:     mumps->id.job = JOB_SOLVE;
2125:     PetscMUMPS_c(mumps);
2126:     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));

2128:     /* handle expansion step of Schur complement (if any) */
2129:     if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE));
2130:     else if (mumps->id.ICNTL(26) == 1) { // condense the right hand side
2131:       PetscCall(MatMumpsSolveSchur_Private(A));
2132:       for (j = 0; j < nrhs; ++j)
2133:         for (i = 0; i < mumps->id.size_schur; ++i) array[mumps->id.listvar_schur[i] - 1 + j * M] = ID_FIELD_GET(mumps->id, redrhs, i + j * mumps->id.lredrhs);
2134:     }

2136:     if (!denseB) { /* sparse B, restore ia, ja */
2137:       PetscCall(MatSeqAIJRestoreArray(Bt, &aa));
2138:       PetscCall(MatRestoreRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
2139:       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure");
2140:     }

2142:     // no matter dense B or sparse B, solution is in id.rhs; convert it to array of X.
2143:     PetscCall(MatMumpsCastMumpsScalarArray(nrhsM, mumps->id.precision, mumps->id.rhs, array));
2144:     PetscCall(MatDenseRestoreArray(X, &array));
2145:     PetscFunctionReturn(PETSC_SUCCESS);
2146:   }

2148:   /* parallel case: MUMPS requires rhs B to be centralized on the host! */
2149:   PetscCheck(!mumps->id.ICNTL(19), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc");

2151:   /* create msol_loc to hold mumps local solution */
2152:   isol_loc_save         = mumps->id.isol_loc; /* save these, as we want to reuse them in MatSolve() */
2153:   sol_loc_save          = mumps->id.sol_loc;
2154:   sol_loc_len_save      = mumps->id.sol_loc_len;
2155:   mumps->id.isol_loc    = NULL; // an init state
2156:   mumps->id.sol_loc     = NULL;
2157:   mumps->id.sol_loc_len = 0;

2159:   lsol_loc = mumps->id.lsol_loc;
2160:   PetscCall(PetscIntMultError(nrhs, lsol_loc, &nlsol_loc)); /* length of sol_loc */
2161:   PetscCall(PetscMalloc2(nlsol_loc, &sol_loc, lsol_loc, &isol_loc));
2162:   PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, nlsol_loc, sol_loc, mumps->id.precision, &mumps->id.sol_loc_len, &mumps->id.sol_loc));
2163:   mumps->id.isol_loc = isol_loc;

2165:   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nlsol_loc, (PetscScalar *)sol_loc, &msol_loc));

2167:   if (denseB) {
2168:     if (mumps->ICNTL20 == 10) {
2169:       mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
2170:       PetscCall(MatDenseGetArrayRead(B, &barray));
2171:       PetscCall(MatMumpsSetUpDistRHSInfo(A, nrhs, barray)); // put barray to rhs_loc
2172:       PetscCall(MatDenseRestoreArrayRead(B, &barray));
2173:       PetscCall(MatGetLocalSize(B, &m, NULL));
2174:       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhsM, NULL, &v_mpi)); // will scatter the solution to v_mpi, which wraps X
2175:     } else {
2176:       mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
2177:       /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in
2178:         very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank
2179:         0, re-arrange B into desired order, which is a local operation.
2180:       */

2182:       /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */
2183:       /* wrap dense rhs matrix B into a vector v_mpi */
2184:       PetscCall(MatGetLocalSize(B, &m, NULL));
2185:       PetscCall(MatDenseGetArrayRead(B, &barray));
2186:       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhsM, barray, &v_mpi));
2187:       PetscCall(MatDenseRestoreArrayRead(B, &barray));

2189:       /* scatter v_mpi to b_seq in proc[0]. With ICNTL(20) = 0, MUMPS requires rhs to be centralized on the host! */
2190:       if (!mumps->myid) {
2191:         PetscInt *idx;
2192:         /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */
2193:         PetscCall(PetscMalloc1(nrhsM, &idx));
2194:         PetscCall(MatGetOwnershipRanges(B, &rstart));
2195:         for (proc = 0, k = 0; proc < mumps->petsc_size; proc++) {
2196:           for (j = 0; j < nrhs; j++) {
2197:             for (i = rstart[proc]; i < rstart[proc + 1]; i++) idx[k++] = j * M + i;
2198:           }
2199:         }

2201:         PetscCall(VecCreateSeq(PETSC_COMM_SELF, nrhsM, &b_seq));
2202:         PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nrhsM, idx, PETSC_OWN_POINTER, &is_to));
2203:         PetscCall(ISCreateStride(PETSC_COMM_SELF, nrhsM, 0, 1, &is_from));
2204:       } else {
2205:         PetscCall(VecCreateSeq(PETSC_COMM_SELF, 0, &b_seq));
2206:         PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_to));
2207:         PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_from));
2208:       }

2210:       PetscCall(VecScatterCreate(v_mpi, is_from, b_seq, is_to, &scat_rhs));
2211:       PetscCall(VecScatterBegin(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD));
2212:       PetscCall(ISDestroy(&is_to));
2213:       PetscCall(ISDestroy(&is_from));
2214:       PetscCall(VecScatterEnd(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD));

2216:       if (!mumps->myid) { /* define rhs on the host */
2217:         PetscCall(VecGetArrayRead(b_seq, &barray));
2218:         PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, nrhsM, barray, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs));
2219:         PetscCall(VecRestoreArrayRead(b_seq, &barray));
2220:       }
2221:     }
2222:   } else { /* sparse B */
2223:     b = (Mat_MPIAIJ *)Bt->data;

2225:     /* wrap dense X into a vector v_mpi */
2226:     PetscCall(MatGetLocalSize(X, &m, NULL));
2227:     PetscCall(MatDenseGetArrayRead(X, &barray));
2228:     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X), 1, nrhs * m, nrhsM, barray, &v_mpi));
2229:     PetscCall(MatDenseRestoreArrayRead(X, &barray));

2231:     if (!mumps->myid) {
2232:       PetscCall(MatSeqAIJGetArray(b->A, &aa));
2233:       PetscCall(MatGetRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
2234:       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
2235:       PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
2236:       PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, ((Mat_SeqAIJ *)b->A->data)->nz, aa, mumps->id.precision, &mumps->id.rhs_sparse_len, &mumps->id.rhs_sparse));
2237:     } else {
2238:       mumps->id.irhs_ptr    = NULL;
2239:       mumps->id.irhs_sparse = NULL;
2240:       mumps->id.nz_rhs      = 0;
2241:       if (mumps->id.rhs_sparse_len) {
2242:         PetscCall(PetscFree(mumps->id.rhs_sparse));
2243:         mumps->id.rhs_sparse_len = 0;
2244:       }
2245:     }
2246:   }

2248:   /* solve phase */
2249:   mumps->id.job = JOB_SOLVE;
2250:   PetscMUMPS_c(mumps);
2251:   PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));

2253:   /* scatter mumps distributed solution to PETSc vector v_mpi, which shares local arrays with solution matrix X */
2254:   PetscCall(MatDenseGetArray(X, &array));
2255:   PetscCall(VecPlaceArray(v_mpi, array));

2257:   /* create scatter scat_sol */
2258:   PetscCall(MatGetOwnershipRanges(X, &rstart));
2259:   /* iidx: index for scatter mumps solution to PETSc X */

2261:   PetscCall(ISCreateStride(PETSC_COMM_SELF, nlsol_loc, 0, 1, &is_from));
2262:   PetscCall(PetscMalloc1(nlsol_loc, &idxx));
2263:   for (i = 0; i < lsol_loc; i++) {
2264:     isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */

2266:     for (proc = 0; proc < mumps->petsc_size; proc++) {
2267:       if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc + 1]) {
2268:         myrstart = rstart[proc];
2269:         k        = isol_loc[i] - myrstart;          /* local index on 1st column of PETSc vector X */
2270:         iidx     = k + myrstart * nrhs;             /* maps mumps isol_loc[i] to PETSc index in X */
2271:         m        = rstart[proc + 1] - rstart[proc]; /* rows of X for this proc */
2272:         break;
2273:       }
2274:     }

2276:     for (j = 0; j < nrhs; j++) idxx[i + j * lsol_loc] = iidx + j * m;
2277:   }
2278:   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nlsol_loc, idxx, PETSC_COPY_VALUES, &is_to));
2279:   PetscCall(MatMumpsCastMumpsScalarArray(nlsol_loc, mumps->id.precision, mumps->id.sol_loc, sol_loc)); // Vec msol_loc is created with sol_loc[]
2280:   PetscCall(VecScatterCreate(msol_loc, is_from, v_mpi, is_to, &scat_sol));
2281:   PetscCall(VecScatterBegin(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD));
2282:   PetscCall(ISDestroy(&is_from));
2283:   PetscCall(ISDestroy(&is_to));
2284:   PetscCall(VecScatterEnd(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD));
2285:   PetscCall(MatDenseRestoreArray(X, &array));

2287:   if (mumps->id.sol_loc_len) { // in case we allocated intermediate buffers
2288:     mumps->id.sol_loc_len = 0;
2289:     PetscCall(PetscFree(mumps->id.sol_loc));
2290:   }

2292:   // restore old values
2293:   mumps->id.sol_loc     = sol_loc_save;
2294:   mumps->id.sol_loc_len = sol_loc_len_save;
2295:   mumps->id.isol_loc    = isol_loc_save;

2297:   PetscCall(PetscFree2(sol_loc, isol_loc));
2298:   PetscCall(PetscFree(idxx));
2299:   PetscCall(VecDestroy(&msol_loc));
2300:   PetscCall(VecDestroy(&v_mpi));
2301:   if (!denseB) {
2302:     if (!mumps->myid) {
2303:       b = (Mat_MPIAIJ *)Bt->data;
2304:       PetscCall(MatSeqAIJRestoreArray(b->A, &aa));
2305:       PetscCall(MatRestoreRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
2306:       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure");
2307:     }
2308:   } else {
2309:     if (mumps->ICNTL20 == 0) {
2310:       PetscCall(VecDestroy(&b_seq));
2311:       PetscCall(VecScatterDestroy(&scat_rhs));
2312:     }
2313:   }
2314:   PetscCall(VecScatterDestroy(&scat_sol));
2315:   PetscCall(PetscLogFlops(nrhs * PetscMax(0, 2.0 * (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n)));
2316:   PetscFunctionReturn(PETSC_SUCCESS);
2317: }

2319: static PetscErrorCode MatMatSolveTranspose_MUMPS(Mat A, Mat B, Mat X)
2320: {
2321:   Mat_MUMPS          *mumps = (Mat_MUMPS *)A->data;
2322:   const PetscMUMPSInt value = mumps->id.ICNTL(9);

2324:   PetscFunctionBegin;
2325:   mumps->id.ICNTL(9) = 0;
2326:   PetscCall(MatMatSolve_MUMPS(A, B, X));
2327:   mumps->id.ICNTL(9) = value;
2328:   PetscFunctionReturn(PETSC_SUCCESS);
2329: }

2331: static PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A, Mat Bt, Mat X)
2332: {
2333:   PetscBool flg;
2334:   Mat       B;

2336:   PetscFunctionBegin;
2337:   PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt, &flg, MATSEQAIJ, MATMPIAIJ, NULL));
2338:   PetscCheck(flg, PetscObjectComm((PetscObject)Bt), PETSC_ERR_ARG_WRONG, "Matrix Bt must be MATAIJ matrix");

2340:   /* Create B=Bt^T that uses Bt's data structure */
2341:   PetscCall(MatCreateTranspose(Bt, &B));

2343:   PetscCall(MatMatSolve_MUMPS(A, B, X));
2344:   PetscCall(MatDestroy(&B));
2345:   PetscFunctionReturn(PETSC_SUCCESS);
2346: }

2348: #if !defined(PETSC_USE_COMPLEX)
2349: /*
2350:   input:
2351:    F:        numeric factor
2352:   output:
2353:    nneg:     total number of negative pivots
2354:    nzero:    total number of zero pivots
2355:    npos:     (global dimension of F) - nneg - nzero
2356: */
2357: static PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos)
2358: {
2359:   Mat_MUMPS  *mumps = (Mat_MUMPS *)F->data;
2360:   PetscMPIInt size;

2362:   PetscFunctionBegin;
2363:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F), &size));
2364:   /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */
2365:   PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia", mumps->id.INFOG(13));

2367:   if (nneg) *nneg = mumps->id.INFOG(12);
2368:   if (nzero || npos) {
2369:     PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection");
2370:     if (nzero) *nzero = mumps->id.INFOG(28);
2371:     if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28));
2372:   }
2373:   PetscFunctionReturn(PETSC_SUCCESS);
2374: }
2375: #endif

2377: static PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse, Mat_MUMPS *mumps)
2378: {
2379:   PetscMPIInt    nreqs;
2380:   PetscMUMPSInt *irn, *jcn;
2381:   PetscMPIInt    count;
2382:   PetscCount     totnnz, remain;
2383:   const PetscInt osize = mumps->omp_comm_size;
2384:   PetscScalar   *val;

2386:   PetscFunctionBegin;
2387:   if (osize > 1) {
2388:     if (reuse == MAT_INITIAL_MATRIX) {
2389:       /* master first gathers counts of nonzeros to receive */
2390:       if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize, &mumps->recvcount));
2391:       PetscCallMPI(MPI_Gather(&mumps->nnz, 1, MPIU_INT64, mumps->recvcount, 1, MPIU_INT64, 0 /*master*/, mumps->omp_comm));

2393:       /* Then each computes number of send/recvs */
2394:       if (mumps->is_omp_master) {
2395:         /* Start from 1 since self communication is not done in MPI */
2396:         nreqs = 0;
2397:         for (PetscMPIInt i = 1; i < osize; i++) nreqs += (mumps->recvcount[i] + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX;
2398:       } else {
2399:         nreqs = (PetscMPIInt)(((mumps->nnz + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX));
2400:       }
2401:       PetscCall(PetscMalloc1(nreqs * 3, &mumps->reqs)); /* Triple the requests since we send irn, jcn and val separately */

2403:       /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others.
2404:          MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz
2405:          might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size
2406:          is very small, the current approach should have no extra overhead compared to MPI_Gatherv.
2407:        */
2408:       nreqs = 0; /* counter for actual send/recvs */
2409:       if (mumps->is_omp_master) {
2410:         totnnz = 0;

2412:         for (PetscMPIInt i = 0; i < osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */
2413:         PetscCall(PetscMalloc2(totnnz, &irn, totnnz, &jcn));
2414:         PetscCall(PetscMalloc1(totnnz, &val));

2416:         /* Self communication */
2417:         PetscCall(PetscArraycpy(irn, mumps->irn, mumps->nnz));
2418:         PetscCall(PetscArraycpy(jcn, mumps->jcn, mumps->nnz));
2419:         PetscCall(PetscArraycpy(val, mumps->val, mumps->nnz));

2421:         /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */
2422:         PetscCall(PetscFree2(mumps->irn, mumps->jcn));
2423:         PetscCall(PetscFree(mumps->val_alloc));
2424:         mumps->nnz = totnnz;
2425:         mumps->irn = irn;
2426:         mumps->jcn = jcn;
2427:         mumps->val = mumps->val_alloc = val;

2429:         irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */
2430:         jcn += mumps->recvcount[0];
2431:         val += mumps->recvcount[0];

2433:         /* Remote communication */
2434:         for (PetscMPIInt i = 1; i < osize; i++) {
2435:           count  = (PetscMPIInt)PetscMin(mumps->recvcount[i], (PetscMPIInt)PETSC_MPI_INT_MAX);
2436:           remain = mumps->recvcount[i] - count;
2437:           while (count > 0) {
2438:             PetscCallMPI(MPIU_Irecv(irn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2439:             PetscCallMPI(MPIU_Irecv(jcn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2440:             PetscCallMPI(MPIU_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2441:             irn += count;
2442:             jcn += count;
2443:             val += count;
2444:             count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX);
2445:             remain -= count;
2446:           }
2447:         }
2448:       } else {
2449:         irn    = mumps->irn;
2450:         jcn    = mumps->jcn;
2451:         val    = mumps->val;
2452:         count  = (PetscMPIInt)PetscMin(mumps->nnz, (PetscMPIInt)PETSC_MPI_INT_MAX);
2453:         remain = mumps->nnz - count;
2454:         while (count > 0) {
2455:           PetscCallMPI(MPIU_Isend(irn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2456:           PetscCallMPI(MPIU_Isend(jcn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2457:           PetscCallMPI(MPIU_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2458:           irn += count;
2459:           jcn += count;
2460:           val += count;
2461:           count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX);
2462:           remain -= count;
2463:         }
2464:       }
2465:     } else {
2466:       nreqs = 0;
2467:       if (mumps->is_omp_master) {
2468:         val = mumps->val + mumps->recvcount[0];
2469:         for (PetscMPIInt i = 1; i < osize; i++) { /* Remote communication only since self data is already in place */
2470:           count  = (PetscMPIInt)PetscMin(mumps->recvcount[i], (PetscMPIInt)PETSC_MPI_INT_MAX);
2471:           remain = mumps->recvcount[i] - count;
2472:           while (count > 0) {
2473:             PetscCallMPI(MPIU_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2474:             val += count;
2475:             count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX);
2476:             remain -= count;
2477:           }
2478:         }
2479:       } else {
2480:         val    = mumps->val;
2481:         count  = (PetscMPIInt)PetscMin(mumps->nnz, (PetscMPIInt)PETSC_MPI_INT_MAX);
2482:         remain = mumps->nnz - count;
2483:         while (count > 0) {
2484:           PetscCallMPI(MPIU_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2485:           val += count;
2486:           count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX);
2487:           remain -= count;
2488:         }
2489:       }
2490:     }
2491:     PetscCallMPI(MPI_Waitall(nreqs, mumps->reqs, MPI_STATUSES_IGNORE));
2492:     mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */
2493:   }
2494:   PetscFunctionReturn(PETSC_SUCCESS);
2495: }

2497: static PetscErrorCode MatFactorNumeric_MUMPS(Mat F, Mat A, PETSC_UNUSED const MatFactorInfo *info)
2498: {
2499:   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;

2501:   PetscFunctionBegin;
2502:   if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) {
2503:     if (mumps->id.INFOG(1) == -6) PetscCall(PetscInfo(A, "MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2504:     PetscCall(PetscInfo(A, "MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2505:     PetscFunctionReturn(PETSC_SUCCESS);
2506:   }

2508:   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps));
2509:   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX, mumps));

2511:   /* numerical factorization phase */
2512:   mumps->id.job = JOB_FACTNUMERIC;
2513:   if (!mumps->id.ICNTL(18)) { /* A is centralized */
2514:     if (!mumps->myid) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a));
2515:   } else {
2516:     PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc));
2517:   }

2519:   if (F->schur) {
2520:     const PetscScalar *array;
2521:     MUMPS_INT          size = mumps->id.size_schur;
2522:     PetscCall(MatDenseGetArrayRead(F->schur, &array));
2523:     PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, size * size, array, mumps->id.precision, &mumps->id.schur_len, &mumps->id.schur));
2524:     PetscCall(MatDenseRestoreArrayRead(F->schur, &array));
2525:   }

2527:   if (mumps->id.ICNTL(22)) PetscCall(PetscStrncpy(mumps->id.ooc_prefix, ((PetscObject)F)->prefix, sizeof(((MUMPS_STRUC_C *)NULL)->ooc_prefix)));

2529:   PetscMUMPS_c(mumps);
2530:   if (mumps->id.INFOG(1) < 0) {
2531:     PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));
2532:     if (mumps->id.INFOG(1) == -10) {
2533:       PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2534:       F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
2535:     } else if (mumps->id.INFOG(1) == -13) {
2536:       PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, cannot allocate required memory %d megabytes\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2537:       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
2538:     } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) {
2539:       PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d, problem with work array\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2540:       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
2541:     } else {
2542:       PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2543:       F->factorerrortype = MAT_FACTOR_OTHER;
2544:     }
2545:   }
2546:   PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: ICNTL(16)=%d " MUMPS_MANUALS, mumps->id.INFOG(16));

2548:   F->assembled = PETSC_TRUE;

2550:   if (F->schur) { /* reset Schur status to unfactored */
2551: #if defined(PETSC_HAVE_CUDA)
2552:     F->schur->offloadmask = PETSC_OFFLOAD_CPU;
2553: #endif
2554:     PetscScalar *array;
2555:     PetscCall(MatDenseGetArray(F->schur, &array));
2556:     PetscCall(MatMumpsCastMumpsScalarArray(mumps->id.size_schur * mumps->id.size_schur, mumps->id.precision, mumps->id.schur, array));
2557:     PetscCall(MatDenseRestoreArray(F->schur, &array));
2558:     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
2559:       mumps->id.ICNTL(19) = 2;
2560:       PetscCall(MatTranspose(F->schur, MAT_INPLACE_MATRIX, &F->schur));
2561:     }
2562:     PetscCall(MatFactorRestoreSchurComplement(F, NULL, MAT_FACTOR_SCHUR_UNFACTORED));
2563:   }

2565:   /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */
2566:   if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3;

2568:   if (!mumps->is_omp_master) mumps->id.INFO(23) = 0;
2569:   // MUMPS userguide: ISOL_loc should be allocated by the user between the factorization and the
2570:   // solve phases. On exit from the solve phase, ISOL_loc(i) contains the index of the variables for
2571:   // which the solution (in SOL_loc) is available on the local processor.
2572:   // If successive calls to the solve phase (JOB= 3) are performed for a given matrix, ISOL_loc will
2573:   // normally have the same contents for each of these calls. The only exception is the case of
2574:   // unsymmetric matrices (SYM=1) when the transpose option is changed (see ICNTL(9)) and non
2575:   // symmetric row/column exchanges (see ICNTL(6)) have occurred before the solve phase.
2576:   if (mumps->petsc_size > 1) {
2577:     PetscInt     lsol_loc;
2578:     PetscScalar *array;

2580:     /* distributed solution; Create x_seq=sol_loc for repeated use */
2581:     if (mumps->x_seq) {
2582:       PetscCall(VecScatterDestroy(&mumps->scat_sol));
2583:       PetscCall(PetscFree(mumps->id.isol_loc));
2584:       PetscCall(VecDestroy(&mumps->x_seq));
2585:     }
2586:     lsol_loc = mumps->id.INFO(23); /* length of sol_loc */
2587:     PetscCall(PetscMalloc1(lsol_loc, &mumps->id.isol_loc));
2588:     PetscCall(VecCreateSeq(PETSC_COMM_SELF, lsol_loc, &mumps->x_seq));
2589:     PetscCall(VecGetArray(mumps->x_seq, &array));
2590:     PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, lsol_loc, array, mumps->id.precision, &mumps->id.sol_loc_len, &mumps->id.sol_loc));
2591:     PetscCall(VecRestoreArray(mumps->x_seq, &array));
2592:     mumps->id.lsol_loc = (PetscMUMPSInt)lsol_loc;
2593:   }
2594:   PetscCall(PetscLogFlops((double)ID_RINFO_GET(mumps->id, 2)));
2595:   PetscFunctionReturn(PETSC_SUCCESS);
2596: }

2598: /* Sets MUMPS options from the options database */
2599: static PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A)
2600: {
2601:   Mat_MUMPS    *mumps = (Mat_MUMPS *)F->data;
2602:   PetscReal     cntl;
2603:   PetscMUMPSInt icntl = 0, size, *listvar_schur;
2604:   PetscInt      info[80], i, ninfo = 80, rbs, cbs;
2605:   PetscBool     flg   = PETSC_FALSE;
2606:   PetscBool     schur = mumps->id.icntl ? (PetscBool)(mumps->id.ICNTL(26) == -1) : (PetscBool)(mumps->ICNTL26 == -1);
2607:   void         *arr;

2609:   PetscFunctionBegin;
2610:   PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MUMPS Options", "Mat");
2611:   if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */
2612:     PetscPrecision precision  = PetscDefined(USE_REAL_SINGLE) ? PETSC_PRECISION_SINGLE : PETSC_PRECISION_DOUBLE;
2613:     PetscInt       nthreads   = 0;
2614:     PetscInt       nCNTL_pre  = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
2615:     PetscInt       nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0;
2616:     PetscMUMPSInt  nblk, *blkvar, *blkptr;

2618:     mumps->petsc_comm = PetscObjectComm((PetscObject)A);
2619:     PetscCallMPI(MPI_Comm_size(mumps->petsc_comm, &mumps->petsc_size));
2620:     PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm, &mumps->myid)); /* "if (!myid)" still works even if mumps_comm is different */

2622:     PetscCall(PetscOptionsName("-mat_mumps_use_omp_threads", "Convert MPI processes into OpenMP threads", "None", &mumps->use_petsc_omp_support));
2623:     if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */
2624:     /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */
2625:     PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)F)->prefix, "-mat_mumps_use_omp_threads", &nthreads, NULL));
2626:     if (mumps->use_petsc_omp_support) {
2627:       PetscCheck(!schur, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature", ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : "");
2628: #if defined(PETSC_HAVE_OPENMP_SUPPORT)
2629:       PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm, nthreads, &mumps->omp_ctrl));
2630:       PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl, &mumps->omp_comm, &mumps->mumps_comm, &mumps->is_omp_master));
2631: #else
2632:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual",
2633:               ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : "");
2634: #endif
2635:     } else {
2636:       mumps->omp_comm      = PETSC_COMM_SELF;
2637:       mumps->mumps_comm    = mumps->petsc_comm;
2638:       mumps->is_omp_master = PETSC_TRUE;
2639:     }
2640:     PetscCallMPI(MPI_Comm_size(mumps->omp_comm, &mumps->omp_comm_size));
2641:     mumps->reqs = NULL;
2642:     mumps->tag  = 0;

2644:     if (mumps->mumps_comm != MPI_COMM_NULL) {
2645:       if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) {
2646:         /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */
2647:         MPI_Comm comm;
2648:         PetscCallMPI(MPI_Comm_dup(mumps->mumps_comm, &comm));
2649:         mumps->mumps_comm = comm;
2650:       } else PetscCall(PetscCommGetComm(mumps->petsc_comm, &mumps->mumps_comm));
2651:     }

2653:     mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm);
2654:     mumps->id.job          = JOB_INIT;
2655:     mumps->id.par          = 1; /* host participates factorizaton and solve */
2656:     mumps->id.sym          = mumps->sym;

2658:     size          = mumps->id.size_schur;
2659:     arr           = mumps->id.schur;
2660:     listvar_schur = mumps->id.listvar_schur;
2661:     nblk          = mumps->id.nblk;
2662:     blkvar        = mumps->id.blkvar;
2663:     blkptr        = mumps->id.blkptr;
2664:     if (PetscDefined(USE_DEBUG)) {
2665:       for (PetscInt i = 0; i < size; i++)
2666:         PetscCheck(listvar_schur[i] - 1 >= 0 && listvar_schur[i] - 1 < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid Schur index at position %" PetscInt_FMT "! %" PetscInt_FMT " must be in [0, %" PetscInt_FMT ")", i, (PetscInt)listvar_schur[i] - 1,
2667:                    A->rmap->N);
2668:     }

2670:     PetscCall(PetscOptionsEnum("-pc_precision", "Precision used by MUMPS", "MATSOLVERMUMPS", PetscPrecisionTypes, (PetscEnum)precision, (PetscEnum *)&precision, NULL));
2671:     PetscCheck(precision == PETSC_PRECISION_SINGLE || precision == PETSC_PRECISION_DOUBLE, PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "MUMPS does not support %s precision", PetscPrecisionTypes[precision]);
2672:     PetscCheck(precision == PETSC_SCALAR_PRECISION || PetscDefined(HAVE_MUMPS_MIXED_PRECISION), PetscObjectComm((PetscObject)F), PETSC_ERR_USER, "Your MUMPS library does not support mixed precision, but which is needed with your specified PetscScalar");
2673:     PetscCall(MatMumpsAllocateInternalID(&mumps->id, precision));

2675:     PetscMUMPS_c(mumps);
2676:     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));

2678:     /* set PETSc-MUMPS default options - override MUMPS default */
2679:     mumps->id.ICNTL(3) = 0;
2680:     mumps->id.ICNTL(4) = 0;
2681:     if (mumps->petsc_size == 1) {
2682:       mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */
2683:       mumps->id.ICNTL(7)  = 7; /* automatic choice of ordering done by the package */
2684:     } else {
2685:       mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */
2686:       mumps->id.ICNTL(21) = 1; /* distributed solution */
2687:     }
2688:     if (nblk && blkptr) {
2689:       mumps->id.ICNTL(15) = 1;
2690:       mumps->id.nblk      = nblk;
2691:       mumps->id.blkvar    = blkvar;
2692:       mumps->id.blkptr    = blkptr;
2693:     } else mumps->id.ICNTL(15) = 0;

2695:     /* restore cached ICNTL and CNTL values */
2696:     for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1 + 2 * icntl]) = mumps->ICNTL_pre[2 + 2 * icntl];
2697:     for (icntl = 0; icntl < nCNTL_pre; ++icntl) ID_CNTL_SET(mumps->id, (PetscInt)mumps->CNTL_pre[1 + 2 * icntl], mumps->CNTL_pre[2 + 2 * icntl]);

2699:     PetscCall(PetscFree(mumps->ICNTL_pre));
2700:     PetscCall(PetscFree(mumps->CNTL_pre));

2702:     if (schur) {
2703:       mumps->id.size_schur    = size;
2704:       mumps->id.schur_lld     = size;
2705:       mumps->id.schur         = arr;
2706:       mumps->id.listvar_schur = listvar_schur;
2707:       if (mumps->petsc_size > 1) {
2708:         PetscBool gs; /* gs is false if any rank other than root has non-empty IS */

2710:         mumps->id.ICNTL(19) = 1;                                                                            /* MUMPS returns Schur centralized on the host */
2711:         gs                  = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */
2712:         PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &gs, 1, MPI_C_BOOL, MPI_LAND, mumps->petsc_comm));
2713:         PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_SUP, "MUMPS distributed parallel Schur complements not yet supported from PETSc");
2714:       } else {
2715:         if (F->factortype == MAT_FACTOR_LU) {
2716:           mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */
2717:         } else {
2718:           mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */
2719:         }
2720:       }
2721:       mumps->id.ICNTL(26) = -1;
2722:     }

2724:     /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code.
2725:        For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS.
2726:      */
2727:     PetscCallMPI(MPI_Bcast(mumps->id.icntl, 40, MPI_INT, 0, mumps->omp_comm));
2728:     PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15, MPIU_MUMPSREAL(&mumps->id), 0, mumps->omp_comm));

2730:     mumps->scat_rhs = NULL;
2731:     mumps->scat_sol = NULL;
2732:   }
2733:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1", "ICNTL(1): output stream for error messages", "None", mumps->id.ICNTL(1), &icntl, &flg));
2734:   if (flg) mumps->id.ICNTL(1) = icntl;
2735:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2", "ICNTL(2): output stream for diagnostic printing, statistics, and warning", "None", mumps->id.ICNTL(2), &icntl, &flg));
2736:   if (flg) mumps->id.ICNTL(2) = icntl;
2737:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3", "ICNTL(3): output stream for global information, collected on the host", "None", mumps->id.ICNTL(3), &icntl, &flg));
2738:   if (flg) mumps->id.ICNTL(3) = icntl;

2740:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4", "ICNTL(4): level of printing (0 to 4)", "None", mumps->id.ICNTL(4), &icntl, &flg));
2741:   if (flg) mumps->id.ICNTL(4) = icntl;
2742:   if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */

2744:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6", "ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)", "None", mumps->id.ICNTL(6), &icntl, &flg));
2745:   if (flg) mumps->id.ICNTL(6) = icntl;

2747:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7", "ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)", "None", mumps->id.ICNTL(7), &icntl, &flg));
2748:   if (flg) {
2749:     PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto");
2750:     mumps->id.ICNTL(7) = icntl;
2751:   }

2753:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8", "ICNTL(8): scaling strategy (-2 to 8 or 77)", "None", mumps->id.ICNTL(8), &mumps->id.ICNTL(8), NULL));
2754:   /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */
2755:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10", "ICNTL(10): max num of refinements", "None", mumps->id.ICNTL(10), &mumps->id.ICNTL(10), NULL));
2756:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11", "ICNTL(11): statistics related to an error analysis (via -ksp_view)", "None", mumps->id.ICNTL(11), &mumps->id.ICNTL(11), NULL));
2757:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12", "ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)", "None", mumps->id.ICNTL(12), &mumps->id.ICNTL(12), NULL));
2758:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13", "ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting", "None", mumps->id.ICNTL(13), &mumps->id.ICNTL(13), NULL));
2759:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14", "ICNTL(14): percentage increase in the estimated working space", "None", mumps->id.ICNTL(14), &mumps->id.ICNTL(14), NULL));
2760:   PetscCall(MatGetBlockSizes(A, &rbs, &cbs));
2761:   if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = (PetscMUMPSInt)-rbs;
2762:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15", "ICNTL(15): compression of the input matrix resulting from a block format", "None", mumps->id.ICNTL(15), &mumps->id.ICNTL(15), &flg));
2763:   if (flg) {
2764:     if (mumps->id.ICNTL(15) < 0) PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes");
2765:     else if (mumps->id.ICNTL(15) > 0) {
2766:       const PetscInt *bsizes;
2767:       PetscInt        nblocks, p, *blkptr = NULL;
2768:       PetscMPIInt    *recvcounts, *displs, n;
2769:       PetscMPIInt     rank, size = 0;

2771:       PetscCall(MatGetVariableBlockSizes(A, &nblocks, &bsizes));
2772:       flg = PETSC_TRUE;
2773:       for (p = 0; p < nblocks; ++p) {
2774:         if (bsizes[p] > 1) break;
2775:       }
2776:       if (p == nblocks) flg = PETSC_FALSE;
2777:       PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &flg, 1, MPI_C_BOOL, MPI_LOR, PetscObjectComm((PetscObject)A)));
2778:       if (flg) { // if at least one process supplies variable block sizes and they are not all set to 1
2779:         PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
2780:         if (rank == 0) PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
2781:         PetscCall(PetscCalloc2(size, &recvcounts, size + 1, &displs));
2782:         PetscCall(PetscMPIIntCast(nblocks, &n));
2783:         PetscCallMPI(MPI_Gather(&n, 1, MPI_INT, recvcounts, 1, MPI_INT, 0, PetscObjectComm((PetscObject)A)));
2784:         for (PetscInt p = 0; p < size; ++p) displs[p + 1] = displs[p] + recvcounts[p];
2785:         PetscCall(PetscMalloc1(displs[size] + 1, &blkptr));
2786:         PetscCallMPI(MPI_Bcast(displs + size, 1, MPIU_INT, 0, PetscObjectComm((PetscObject)A)));
2787:         PetscCallMPI(MPI_Gatherv(bsizes, n, MPIU_INT, blkptr + 1, recvcounts, displs, MPIU_INT, 0, PetscObjectComm((PetscObject)A)));
2788:         if (rank == 0) {
2789:           blkptr[0] = 1;
2790:           for (PetscInt p = 0; p < n; ++p) blkptr[p + 1] += blkptr[p];
2791:           PetscCall(MatMumpsSetBlk(F, displs[size], NULL, blkptr));
2792:         }
2793:         PetscCall(PetscFree2(recvcounts, displs));
2794:         PetscCall(PetscFree(blkptr));
2795:       }
2796:     }
2797:   }
2798:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19", "ICNTL(19): computes the Schur complement", "None", mumps->id.ICNTL(19), &mumps->id.ICNTL(19), NULL));
2799:   if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */
2800:     PetscCall(MatDestroy(&F->schur));
2801:     PetscCall(MatMumpsResetSchur_Private(mumps));
2802:   }

2804:   /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps",
2805:      and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran
2806:      and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...'
2807:      This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug,
2808:      see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590.
2809:      In short, we could not use distributed RHS until with MPICH v4.0b1 or we enabled a workaround in mumps-5.6.2+
2810:    */
2811:   mumps->ICNTL20 = 10; /* Distributed dense RHS, by default */
2812: #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) || (PetscDefined(HAVE_MPICH) && MPICH_NUMVERSION < 40000101) || PetscDefined(HAVE_MSMPI)
2813:   mumps->ICNTL20 = 0; /* Centralized dense RHS, if need be */
2814: #endif
2815:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20", "ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides", "None", mumps->ICNTL20, &mumps->ICNTL20, &flg));
2816:   PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10", (int)mumps->ICNTL20);
2817: #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0)
2818:   PetscCheck(!flg || mumps->ICNTL20 != 10, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=10 is not supported before MUMPS-5.3.0");
2819: #endif
2820:   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */

2822:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22", "ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)", "None", mumps->id.ICNTL(22), &mumps->id.ICNTL(22), &flg));
2823:   if (flg && mumps->id.ICNTL(22) != 1) mumps->id.ICNTL(22) = 0; // MUMPS treats values other than 1 as 0. Normalize it so we can safely use 'if (mumps->id.ICNTL(22))'
2824:   if (mumps->id.ICNTL(22)) {
2825:     // MUMPS will use the /tmp directory if -mat_mumps_ooc_tmpdir is not set by user.
2826:     // We don't provide option -mat_mumps_ooc_prefix, as we use F's prefix as OOC_PREFIX, which is set later during MatFactorNumeric_MUMPS() to also handle cases where users enable OOC via MatMumpsSetICNTL().
2827:     PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "Out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(((MUMPS_STRUC_C *)NULL)->ooc_tmpdir), NULL));
2828:   }
2829:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23", "ICNTL(23): max size of the working memory (MB) that can allocate per processor", "None", mumps->id.ICNTL(23), &mumps->id.ICNTL(23), NULL));
2830:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24", "ICNTL(24): detection of null pivot rows (0 or 1)", "None", mumps->id.ICNTL(24), &mumps->id.ICNTL(24), NULL));
2831:   if (mumps->id.ICNTL(24)) mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */

2833:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25", "ICNTL(25): computes a solution of a deficient matrix and a null space basis", "None", mumps->id.ICNTL(25), &mumps->id.ICNTL(25), NULL));
2834:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26", "ICNTL(26): drives the solution phase if a Schur complement matrix", "None", mumps->id.ICNTL(26), &mumps->id.ICNTL(26), NULL));
2835:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27", "ICNTL(27): controls the blocking size for multiple right-hand sides", "None", mumps->id.ICNTL(27), &mumps->id.ICNTL(27), NULL));
2836:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28", "ICNTL(28): use 1 for sequential analysis and ICNTL(7) ordering, or 2 for parallel analysis and ICNTL(29) ordering", "None", mumps->id.ICNTL(28), &mumps->id.ICNTL(28), NULL));
2837:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29", "ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis", "None", mumps->id.ICNTL(29), &mumps->id.ICNTL(29), NULL));
2838:   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */
2839:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31", "ICNTL(31): indicates which factors may be discarded during factorization", "None", mumps->id.ICNTL(31), &mumps->id.ICNTL(31), NULL));
2840:   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elimination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL));  -- not supported by PETSc API */
2841:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33", "ICNTL(33): compute determinant", "None", mumps->id.ICNTL(33), &mumps->id.ICNTL(33), NULL));
2842:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35", "ICNTL(35): activates Block Low Rank (BLR) based factorization", "None", mumps->id.ICNTL(35), &mumps->id.ICNTL(35), NULL));
2843:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36", "ICNTL(36): choice of BLR factorization variant", "None", mumps->id.ICNTL(36), &mumps->id.ICNTL(36), NULL));
2844:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_37", "ICNTL(37): compression of the contribution blocks (CB)", "None", mumps->id.ICNTL(37), &mumps->id.ICNTL(37), NULL));
2845:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38", "ICNTL(38): estimated compression rate of LU factors with BLR", "None", mumps->id.ICNTL(38), &mumps->id.ICNTL(38), NULL));
2846:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_48", "ICNTL(48): multithreading with tree parallelism", "None", mumps->id.ICNTL(48), &mumps->id.ICNTL(48), NULL));
2847:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_49", "ICNTL(49): compact workarray at the end of factorization phase", "None", mumps->id.ICNTL(49), &mumps->id.ICNTL(49), NULL));
2848:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_56", "ICNTL(56): postponing and rank-revealing factorization", "None", mumps->id.ICNTL(56), &mumps->id.ICNTL(56), NULL));
2849:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_58", "ICNTL(58): defines options for symbolic factorization", "None", mumps->id.ICNTL(58), &mumps->id.ICNTL(58), NULL));

2851:   PetscCall(PetscOptionsReal("-mat_mumps_cntl_1", "CNTL(1): relative pivoting threshold", "None", (PetscReal)ID_CNTL_GET(mumps->id, 1), &cntl, &flg));
2852:   if (flg) ID_CNTL_SET(mumps->id, 1, cntl);
2853:   PetscCall(PetscOptionsReal("-mat_mumps_cntl_2", "CNTL(2): stopping criterion of refinement", "None", (PetscReal)ID_CNTL_GET(mumps->id, 2), &cntl, &flg));
2854:   if (flg) ID_CNTL_SET(mumps->id, 2, cntl);
2855:   PetscCall(PetscOptionsReal("-mat_mumps_cntl_3", "CNTL(3): absolute pivoting threshold", "None", (PetscReal)ID_CNTL_GET(mumps->id, 3), &cntl, &flg));
2856:   if (flg) ID_CNTL_SET(mumps->id, 3, cntl);
2857:   PetscCall(PetscOptionsReal("-mat_mumps_cntl_4", "CNTL(4): value for static pivoting", "None", (PetscReal)ID_CNTL_GET(mumps->id, 4), &cntl, &flg));
2858:   if (flg) ID_CNTL_SET(mumps->id, 4, cntl);
2859:   PetscCall(PetscOptionsReal("-mat_mumps_cntl_5", "CNTL(5): fixation for null pivots", "None", (PetscReal)ID_CNTL_GET(mumps->id, 5), &cntl, &flg));
2860:   if (flg) ID_CNTL_SET(mumps->id, 5, cntl);
2861:   PetscCall(PetscOptionsReal("-mat_mumps_cntl_7", "CNTL(7): dropping parameter used during BLR", "None", (PetscReal)ID_CNTL_GET(mumps->id, 7), &cntl, &flg));
2862:   if (flg) ID_CNTL_SET(mumps->id, 7, cntl);

2864:   PetscCall(PetscOptionsIntArray("-mat_mumps_view_info", "request INFO local to each processor", "", info, &ninfo, NULL));
2865:   if (ninfo) {
2866:     PetscCheck(ninfo <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "number of INFO %" PetscInt_FMT " must <= 80", ninfo);
2867:     PetscCall(PetscMalloc1(ninfo, &mumps->info));
2868:     mumps->ninfo = ninfo;
2869:     for (i = 0; i < ninfo; i++) {
2870:       PetscCheck(info[i] >= 0 && info[i] <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "index of INFO %" PetscInt_FMT " must between 1 and 80", ninfo);
2871:       mumps->info[i] = info[i];
2872:     }
2873:   }
2874:   PetscOptionsEnd();
2875:   PetscFunctionReturn(PETSC_SUCCESS);
2876: }

2878: static PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F, Mat A, PETSC_UNUSED const MatFactorInfo *info, Mat_MUMPS *mumps)
2879: {
2880:   PetscFunctionBegin;
2881:   if (mumps->id.INFOG(1) < 0) {
2882:     PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in analysis: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
2883:     if (mumps->id.INFOG(1) == -6) {
2884:       PetscCall(PetscInfo(F, "MUMPS error in analysis: matrix is singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2885:       F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT;
2886:     } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) {
2887:       PetscCall(PetscInfo(F, "MUMPS error in analysis: problem with work array, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2888:       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
2889:     } else {
2890:       PetscCall(PetscInfo(F, "MUMPS error in analysis: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS "\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2891:       F->factorerrortype = MAT_FACTOR_OTHER;
2892:     }
2893:   }
2894:   if (!mumps->id.n) F->factorerrortype = MAT_FACTOR_NOERROR;
2895:   PetscFunctionReturn(PETSC_SUCCESS);
2896: }

2898: static PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F, Mat A, IS r, PETSC_UNUSED IS c, const MatFactorInfo *info)
2899: {
2900:   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
2901:   Vec            b;
2902:   const PetscInt M = A->rmap->N;

2904:   PetscFunctionBegin;
2905:   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2906:     /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */
2907:     PetscFunctionReturn(PETSC_SUCCESS);
2908:   }

2910:   /* Set MUMPS options from the options database */
2911:   PetscCall(MatSetFromOptions_MUMPS(F, A));

2913:   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2914:   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));

2916:   /* analysis phase */
2917:   mumps->id.job = JOB_FACTSYMBOLIC;
2918:   PetscCall(PetscMUMPSIntCast(M, &mumps->id.n));
2919:   switch (mumps->id.ICNTL(18)) {
2920:   case 0: /* centralized assembled matrix input */
2921:     if (!mumps->myid) {
2922:       mumps->id.nnz = mumps->nnz;
2923:       mumps->id.irn = mumps->irn;
2924:       mumps->id.jcn = mumps->jcn;
2925:       if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a));
2926:       if (r && mumps->id.ICNTL(7) == 7) {
2927:         mumps->id.ICNTL(7) = 1;
2928:         if (!mumps->myid) {
2929:           const PetscInt *idx;
2930:           PetscInt        i;

2932:           PetscCall(PetscMalloc1(M, &mumps->id.perm_in));
2933:           PetscCall(ISGetIndices(r, &idx));
2934:           for (i = 0; i < M; i++) PetscCall(PetscMUMPSIntCast(idx[i] + 1, &mumps->id.perm_in[i])); /* perm_in[]: start from 1, not 0! */
2935:           PetscCall(ISRestoreIndices(r, &idx));
2936:         }
2937:       }
2938:     }
2939:     break;
2940:   case 3: /* distributed assembled matrix input (size>1) */
2941:     mumps->id.nnz_loc = mumps->nnz;
2942:     mumps->id.irn_loc = mumps->irn;
2943:     mumps->id.jcn_loc = mumps->jcn;
2944:     if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc));
2945:     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2946:       PetscCall(MatCreateVecs(A, NULL, &b));
2947:       PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
2948:       PetscCall(VecDestroy(&b));
2949:     }
2950:     break;
2951:   }
2952:   PetscMUMPS_c(mumps);
2953:   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));

2955:   F->ops->lufactornumeric   = MatFactorNumeric_MUMPS;
2956:   F->ops->solve             = MatSolve_MUMPS;
2957:   F->ops->solvetranspose    = MatSolveTranspose_MUMPS;
2958:   F->ops->matsolve          = MatMatSolve_MUMPS;
2959:   F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS;
2960:   F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS;

2962:   mumps->matstruc = SAME_NONZERO_PATTERN;
2963:   PetscFunctionReturn(PETSC_SUCCESS);
2964: }

2966: /* Note the PETSc r and c permutations are ignored */
2967: static PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F, Mat A, PETSC_UNUSED IS r, PETSC_UNUSED IS c, const MatFactorInfo *info)
2968: {
2969:   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
2970:   Vec            b;
2971:   const PetscInt M = A->rmap->N;

2973:   PetscFunctionBegin;
2974:   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2975:     /* F is assembled by a previous call of MatLUFactorSymbolic_BAIJMUMPS() */
2976:     PetscFunctionReturn(PETSC_SUCCESS);
2977:   }

2979:   /* Set MUMPS options from the options database */
2980:   PetscCall(MatSetFromOptions_MUMPS(F, A));

2982:   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2983:   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));

2985:   /* analysis phase */
2986:   mumps->id.job = JOB_FACTSYMBOLIC;
2987:   PetscCall(PetscMUMPSIntCast(M, &mumps->id.n));
2988:   switch (mumps->id.ICNTL(18)) {
2989:   case 0: /* centralized assembled matrix input */
2990:     if (!mumps->myid) {
2991:       mumps->id.nnz = mumps->nnz;
2992:       mumps->id.irn = mumps->irn;
2993:       mumps->id.jcn = mumps->jcn;
2994:       if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a));
2995:     }
2996:     break;
2997:   case 3: /* distributed assembled matrix input (size>1) */
2998:     mumps->id.nnz_loc = mumps->nnz;
2999:     mumps->id.irn_loc = mumps->irn;
3000:     mumps->id.jcn_loc = mumps->jcn;
3001:     if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc));
3002:     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
3003:       PetscCall(MatCreateVecs(A, NULL, &b));
3004:       PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
3005:       PetscCall(VecDestroy(&b));
3006:     }
3007:     break;
3008:   }
3009:   PetscMUMPS_c(mumps);
3010:   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));

3012:   F->ops->lufactornumeric   = MatFactorNumeric_MUMPS;
3013:   F->ops->solve             = MatSolve_MUMPS;
3014:   F->ops->solvetranspose    = MatSolveTranspose_MUMPS;
3015:   F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS;

3017:   mumps->matstruc = SAME_NONZERO_PATTERN;
3018:   PetscFunctionReturn(PETSC_SUCCESS);
3019: }

3021: /* Note the PETSc r permutation and factor info are ignored */
3022: static PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F, Mat A, PETSC_UNUSED IS r, const MatFactorInfo *info)
3023: {
3024:   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
3025:   Vec            b;
3026:   const PetscInt M = A->rmap->N;

3028:   PetscFunctionBegin;
3029:   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
3030:     /* F is assembled by a previous call of MatCholeskyFactorSymbolic_MUMPS() */
3031:     PetscFunctionReturn(PETSC_SUCCESS);
3032:   }

3034:   /* Set MUMPS options from the options database */
3035:   PetscCall(MatSetFromOptions_MUMPS(F, A));

3037:   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
3038:   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));

3040:   /* analysis phase */
3041:   mumps->id.job = JOB_FACTSYMBOLIC;
3042:   PetscCall(PetscMUMPSIntCast(M, &mumps->id.n));
3043:   switch (mumps->id.ICNTL(18)) {
3044:   case 0: /* centralized assembled matrix input */
3045:     if (!mumps->myid) {
3046:       mumps->id.nnz = mumps->nnz;
3047:       mumps->id.irn = mumps->irn;
3048:       mumps->id.jcn = mumps->jcn;
3049:       if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a));
3050:     }
3051:     break;
3052:   case 3: /* distributed assembled matrix input (size>1) */
3053:     mumps->id.nnz_loc = mumps->nnz;
3054:     mumps->id.irn_loc = mumps->irn;
3055:     mumps->id.jcn_loc = mumps->jcn;
3056:     if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc));
3057:     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
3058:       PetscCall(MatCreateVecs(A, NULL, &b));
3059:       PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
3060:       PetscCall(VecDestroy(&b));
3061:     }
3062:     break;
3063:   }
3064:   PetscMUMPS_c(mumps);
3065:   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));

3067:   F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS;
3068:   F->ops->solve                 = MatSolve_MUMPS;
3069:   F->ops->solvetranspose        = MatSolve_MUMPS;
3070:   F->ops->matsolve              = MatMatSolve_MUMPS;
3071:   F->ops->mattransposesolve     = MatMatTransposeSolve_MUMPS;
3072:   F->ops->matsolvetranspose     = MatMatSolveTranspose_MUMPS;
3073: #if defined(PETSC_USE_COMPLEX)
3074:   F->ops->getinertia = NULL;
3075: #else
3076:   F->ops->getinertia = MatGetInertia_SBAIJMUMPS;
3077: #endif

3079:   mumps->matstruc = SAME_NONZERO_PATTERN;
3080:   PetscFunctionReturn(PETSC_SUCCESS);
3081: }

3083: static PetscErrorCode MatView_MUMPS(Mat A, PetscViewer viewer)
3084: {
3085:   PetscBool         isascii;
3086:   PetscViewerFormat format;
3087:   Mat_MUMPS        *mumps = (Mat_MUMPS *)A->data;

3089:   PetscFunctionBegin;
3090:   /* check if matrix is mumps type */
3091:   if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(PETSC_SUCCESS);

3093:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
3094:   if (isascii) {
3095:     PetscCall(PetscViewerGetFormat(viewer, &format));
3096:     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
3097:       PetscCall(PetscViewerASCIIPrintf(viewer, "MUMPS run parameters:\n"));
3098:       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
3099:         PetscCall(PetscViewerASCIIPrintf(viewer, "  SYM (matrix type):                   %d\n", mumps->id.sym));
3100:         PetscCall(PetscViewerASCIIPrintf(viewer, "  PAR (host participation):            %d\n", mumps->id.par));
3101:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(1) (output for error):         %d\n", mumps->id.ICNTL(1)));
3102:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(2) (output of diagnostic msg): %d\n", mumps->id.ICNTL(2)));
3103:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(3) (output for global info):   %d\n", mumps->id.ICNTL(3)));
3104:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(4) (level of printing):        %d\n", mumps->id.ICNTL(4)));
3105:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(5) (input mat struct):         %d\n", mumps->id.ICNTL(5)));
3106:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(6) (matrix prescaling):        %d\n", mumps->id.ICNTL(6)));
3107:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(7) (sequential matrix ordering):%d\n", mumps->id.ICNTL(7)));
3108:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(8) (scaling strategy):         %d\n", mumps->id.ICNTL(8)));
3109:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(10) (max num of refinements):  %d\n", mumps->id.ICNTL(10)));
3110:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(11) (error analysis):          %d\n", mumps->id.ICNTL(11)));
3111:         if (mumps->id.ICNTL(11) > 0) {
3112:           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(4) (inf norm of input mat):        %g\n", (double)ID_RINFOG_GET(mumps->id, 4)));
3113:           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(5) (inf norm of solution):         %g\n", (double)ID_RINFOG_GET(mumps->id, 5)));
3114:           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(6) (inf norm of residual):         %g\n", (double)ID_RINFOG_GET(mumps->id, 6)));
3115:           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(7),RINFOG(8) (backward error est): %g, %g\n", (double)ID_RINFOG_GET(mumps->id, 7), (double)ID_RINFOG_GET(mumps->id, 8)));
3116:           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(9) (error estimate):               %g\n", (double)ID_RINFOG_GET(mumps->id, 9)));
3117:           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n", (double)ID_RINFOG_GET(mumps->id, 10), (double)ID_RINFOG_GET(mumps->id, 11)));
3118:         }
3119:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(12) (efficiency control):                         %d\n", mumps->id.ICNTL(12)));
3120:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(13) (sequential factorization of the root node):  %d\n", mumps->id.ICNTL(13)));
3121:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(14) (percentage of estimated workspace increase): %d\n", mumps->id.ICNTL(14)));
3122:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(15) (compression of the input matrix):            %d\n", mumps->id.ICNTL(15)));
3123:         /* ICNTL(15-17) not used */
3124:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(18) (input mat struct):                           %d\n", mumps->id.ICNTL(18)));
3125:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(19) (Schur complement info):                      %d\n", mumps->id.ICNTL(19)));
3126:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(20) (RHS sparse pattern):                         %d\n", mumps->id.ICNTL(20)));
3127:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(21) (solution struct):                            %d\n", mumps->id.ICNTL(21)));
3128:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(22) (in-core/out-of-core facility):               %d\n", mumps->id.ICNTL(22)));
3129:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(23) (max size of memory can be allocated locally):%d\n", mumps->id.ICNTL(23)));

3131:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(24) (detection of null pivot rows):               %d\n", mumps->id.ICNTL(24)));
3132:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(25) (computation of a null space basis):          %d\n", mumps->id.ICNTL(25)));
3133:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(26) (Schur options for RHS or solution):          %d\n", mumps->id.ICNTL(26)));
3134:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(27) (blocking size for multiple RHS):             %d\n", mumps->id.ICNTL(27)));
3135:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(28) (use parallel or sequential ordering):        %d\n", mumps->id.ICNTL(28)));
3136:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(29) (parallel ordering):                          %d\n", mumps->id.ICNTL(29)));

3138:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(30) (user-specified set of entries in inv(A)):    %d\n", mumps->id.ICNTL(30)));
3139:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(31) (factors is discarded in the solve phase):    %d\n", mumps->id.ICNTL(31)));
3140:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(33) (compute determinant):                        %d\n", mumps->id.ICNTL(33)));
3141:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(35) (activate BLR based factorization):           %d\n", mumps->id.ICNTL(35)));
3142:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(36) (choice of BLR factorization variant):        %d\n", mumps->id.ICNTL(36)));
3143:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(37) (compression of the contribution blocks):     %d\n", mumps->id.ICNTL(37)));
3144:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(38) (estimated compression rate of LU factors):   %d\n", mumps->id.ICNTL(38)));
3145:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(48) (multithreading with tree parallelism):       %d\n", mumps->id.ICNTL(48)));
3146:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(49) (compact workarray at the end of factorization phase):%d\n", mumps->id.ICNTL(49)));
3147:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(56) (postponing and rank-revealing factorization):%d\n", mumps->id.ICNTL(56)));
3148:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(58) (options for symbolic factorization):         %d\n", mumps->id.ICNTL(58)));

3150:         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(1) (relative pivoting threshold):      %g\n", (double)ID_CNTL_GET(mumps->id, 1)));
3151:         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(2) (stopping criterion of refinement): %g\n", (double)ID_CNTL_GET(mumps->id, 2)));
3152:         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(3) (absolute pivoting threshold):      %g\n", (double)ID_CNTL_GET(mumps->id, 3)));
3153:         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(4) (value of static pivoting):         %g\n", (double)ID_CNTL_GET(mumps->id, 4)));
3154:         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(5) (fixation for null pivots):         %g\n", (double)ID_CNTL_GET(mumps->id, 5)));
3155:         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(7) (dropping parameter for BLR):       %g\n", (double)ID_CNTL_GET(mumps->id, 7)));

3157:         /* information local to each processor */
3158:         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(1) (local estimated flops for the elimination after analysis):\n"));
3159:         PetscCall(PetscViewerASCIIPushSynchronized(viewer));
3160:         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %g\n", mumps->myid, (double)ID_RINFO_GET(mumps->id, 1)));
3161:         PetscCall(PetscViewerFlush(viewer));
3162:         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(2) (local estimated flops for the assembly after factorization):\n"));
3163:         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %g\n", mumps->myid, (double)ID_RINFO_GET(mumps->id, 2)));
3164:         PetscCall(PetscViewerFlush(viewer));
3165:         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(3) (local estimated flops for the elimination after factorization):\n"));
3166:         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %g\n", mumps->myid, (double)ID_RINFO_GET(mumps->id, 3)));
3167:         PetscCall(PetscViewerFlush(viewer));

3169:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization):\n"));
3170:         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(15)));
3171:         PetscCall(PetscViewerFlush(viewer));

3173:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization):\n"));
3174:         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(16)));
3175:         PetscCall(PetscViewerFlush(viewer));

3177:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(23) (num of pivots eliminated on this processor after factorization):\n"));
3178:         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(23)));
3179:         PetscCall(PetscViewerFlush(viewer));

3181:         if (mumps->ninfo && mumps->ninfo <= 80) {
3182:           PetscInt i;
3183:           for (i = 0; i < mumps->ninfo; i++) {
3184:             PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(%" PetscInt_FMT "):\n", mumps->info[i]));
3185:             PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(mumps->info[i])));
3186:             PetscCall(PetscViewerFlush(viewer));
3187:           }
3188:         }
3189:         PetscCall(PetscViewerASCIIPopSynchronized(viewer));
3190:       } else PetscCall(PetscViewerASCIIPrintf(viewer, "  Use -%sksp_view ::ascii_info_detail to display information for all processes\n", ((PetscObject)A)->prefix ? ((PetscObject)A)->prefix : ""));

3192:       if (mumps->myid == 0) { /* information from the host */
3193:         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFOG(1) (global estimated flops for the elimination after analysis): %g\n", (double)ID_RINFOG_GET(mumps->id, 1)));
3194:         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFOG(2) (global estimated flops for the assembly after factorization): %g\n", (double)ID_RINFOG_GET(mumps->id, 2)));
3195:         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFOG(3) (global estimated flops for the elimination after factorization): %g\n", (double)ID_RINFOG_GET(mumps->id, 3)));
3196:         PetscCall(PetscViewerASCIIPrintf(viewer, "  (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n", (double)ID_RINFOG_GET(mumps->id, 12), (double)ID_RINFOG_GET(mumps->id, 13), mumps->id.INFOG(34)));

3198:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(3)));
3199:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(4)));
3200:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(5) (estimated maximum front size in the complete tree): %d\n", mumps->id.INFOG(5)));
3201:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(6) (number of nodes in the complete tree): %d\n", mumps->id.INFOG(6)));
3202:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(7) (ordering option effectively used after analysis): %d\n", mumps->id.INFOG(7)));
3203:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n", mumps->id.INFOG(8)));
3204:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n", mumps->id.INFOG(9)));
3205:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(10) (total integer space store the matrix factors after factorization): %d\n", mumps->id.INFOG(10)));
3206:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(11) (order of largest frontal matrix after factorization): %d\n", mumps->id.INFOG(11)));
3207:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(12) (number of off-diagonal pivots): %d\n", mumps->id.INFOG(12)));
3208:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(13) (number of delayed pivots after factorization): %d\n", mumps->id.INFOG(13)));
3209:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(14) (number of memory compress after factorization): %d\n", mumps->id.INFOG(14)));
3210:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(15) (number of steps of iterative refinement after solution): %d\n", mumps->id.INFOG(15)));
3211:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n", mumps->id.INFOG(16)));
3212:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n", mumps->id.INFOG(17)));
3213:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n", mumps->id.INFOG(18)));
3214:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n", mumps->id.INFOG(19)));
3215:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(20) (estimated number of entries in the factors): %d\n", mumps->id.INFOG(20)));
3216:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n", mumps->id.INFOG(21)));
3217:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n", mumps->id.INFOG(22)));
3218:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n", mumps->id.INFOG(23)));
3219:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n", mumps->id.INFOG(24)));
3220:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n", mumps->id.INFOG(25)));
3221:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(28) (after factorization: number of null pivots encountered): %d\n", mumps->id.INFOG(28)));
3222:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n", mumps->id.INFOG(29)));
3223:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n", mumps->id.INFOG(30), mumps->id.INFOG(31)));
3224:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(32) (after analysis: type of analysis done): %d\n", mumps->id.INFOG(32)));
3225:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(33) (value used for ICNTL(8)): %d\n", mumps->id.INFOG(33)));
3226:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(34) (exponent of the determinant if determinant is requested): %d\n", mumps->id.INFOG(34)));
3227:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n", mumps->id.INFOG(35)));
3228:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(36)));
3229:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n", mumps->id.INFOG(37)));
3230:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(38)));
3231:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n", mumps->id.INFOG(39)));
3232:       }
3233:     }
3234:   }
3235:   PetscFunctionReturn(PETSC_SUCCESS);
3236: }

3238: static PetscErrorCode MatGetInfo_MUMPS(Mat A, PETSC_UNUSED MatInfoType flag, MatInfo *info)
3239: {
3240:   Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;

3242:   PetscFunctionBegin;
3243:   info->block_size        = 1.0;
3244:   info->nz_allocated      = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20);
3245:   info->nz_used           = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20);
3246:   info->nz_unneeded       = 0.0;
3247:   info->assemblies        = 0.0;
3248:   info->mallocs           = 0.0;
3249:   info->memory            = 0.0;
3250:   info->fill_ratio_given  = 0;
3251:   info->fill_ratio_needed = 0;
3252:   info->factor_mallocs    = 0;
3253:   PetscFunctionReturn(PETSC_SUCCESS);
3254: }

3256: static PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is)
3257: {
3258:   Mat_MUMPS         *mumps = (Mat_MUMPS *)F->data;
3259:   const PetscScalar *arr;
3260:   const PetscInt    *idxs;
3261:   PetscInt           size, i;

3263:   PetscFunctionBegin;
3264:   PetscCall(ISGetLocalSize(is, &size));
3265:   /* Schur complement matrix */
3266:   PetscCall(MatDestroy(&F->schur));
3267:   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size, size, NULL, &F->schur));
3268:   PetscCall(MatDenseGetArrayRead(F->schur, &arr));
3269:   // don't allocate mumps->id.schur[] now as its precision is yet to know
3270:   PetscCall(PetscMUMPSIntCast(size, &mumps->id.size_schur));
3271:   PetscCall(PetscMUMPSIntCast(size, &mumps->id.schur_lld));
3272:   PetscCall(MatDenseRestoreArrayRead(F->schur, &arr));
3273:   if (mumps->sym == 1) PetscCall(MatSetOption(F->schur, MAT_SPD, PETSC_TRUE));

3275:   /* MUMPS expects Fortran style indices */
3276:   PetscCall(PetscFree(mumps->id.listvar_schur));
3277:   PetscCall(PetscMalloc1(size, &mumps->id.listvar_schur));
3278:   PetscCall(ISGetIndices(is, &idxs));
3279:   for (i = 0; i < size; i++) PetscCall(PetscMUMPSIntCast(idxs[i] + 1, &mumps->id.listvar_schur[i]));
3280:   PetscCall(ISRestoreIndices(is, &idxs));
3281:   /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */
3282:   if (mumps->id.icntl) mumps->id.ICNTL(26) = -1;
3283:   else mumps->ICNTL26 = -1;
3284:   PetscFunctionReturn(PETSC_SUCCESS);
3285: }

3287: static PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F, Mat *S)
3288: {
3289:   Mat          St;
3290:   Mat_MUMPS   *mumps = (Mat_MUMPS *)F->data;
3291:   PetscScalar *array;
3292:   PetscInt     i, j, N = mumps->id.size_schur;

3294:   PetscFunctionBegin;
3295:   PetscCheck(mumps->id.ICNTL(19), PetscObjectComm((PetscObject)F), PETSC_ERR_ORDER, "Schur complement mode not selected! Call MatFactorSetSchurIS() to enable it");
3296:   PetscCall(MatCreate(PETSC_COMM_SELF, &St));
3297:   PetscCall(MatSetSizes(St, PETSC_DECIDE, PETSC_DECIDE, mumps->id.size_schur, mumps->id.size_schur));
3298:   PetscCall(MatSetType(St, MATDENSE));
3299:   PetscCall(MatSetUp(St));
3300:   PetscCall(MatDenseGetArray(St, &array));
3301:   if (!mumps->sym) {                /* MUMPS always return a full matrix */
3302:     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
3303:       for (i = 0; i < N; i++) {
3304:         for (j = 0; j < N; j++) array[j * N + i] = ID_FIELD_GET(mumps->id, schur, i * N + j);
3305:       }
3306:     } else { /* stored by columns */
3307:       PetscCall(MatMumpsCastMumpsScalarArray(N * N, mumps->id.precision, mumps->id.schur, array));
3308:     }
3309:   } else {                          /* either full or lower-triangular (not packed) */
3310:     if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */
3311:       for (i = 0; i < N; i++) {
3312:         for (j = i; j < N; j++) array[i * N + j] = array[j * N + i] = ID_FIELD_GET(mumps->id, schur, i * N + j);
3313:       }
3314:     } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */
3315:       PetscCall(MatMumpsCastMumpsScalarArray(N * N, mumps->id.precision, mumps->id.schur, array));
3316:     } else { /* ICNTL(19) == 1 lower triangular stored by rows */
3317:       for (i = 0; i < N; i++) {
3318:         for (j = 0; j < i + 1; j++) array[i * N + j] = array[j * N + i] = ID_FIELD_GET(mumps->id, schur, i * N + j);
3319:       }
3320:     }
3321:   }
3322:   PetscCall(MatDenseRestoreArray(St, &array));
3323:   *S = St;
3324:   PetscFunctionReturn(PETSC_SUCCESS);
3325: }

3327: static PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt ival)
3328: {
3329:   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;

3331:   PetscFunctionBegin;
3332:   if (mumps->id.job == JOB_NULL) {                                            /* need to cache icntl and ival since PetscMUMPS_c() has never been called */
3333:     PetscMUMPSInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */
3334:     for (i = 0; i < nICNTL_pre; ++i)
3335:       if (mumps->ICNTL_pre[1 + 2 * i] == icntl) break; /* is this ICNTL already cached? */
3336:     if (i == nICNTL_pre) {                             /* not already cached */
3337:       if (i > 0) PetscCall(PetscRealloc(sizeof(PetscMUMPSInt) * (2 * nICNTL_pre + 3), &mumps->ICNTL_pre));
3338:       else PetscCall(PetscCalloc(sizeof(PetscMUMPSInt) * 3, &mumps->ICNTL_pre));
3339:       mumps->ICNTL_pre[0]++;
3340:     }
3341:     mumps->ICNTL_pre[1 + 2 * i] = (PetscMUMPSInt)icntl;
3342:     PetscCall(PetscMUMPSIntCast(ival, mumps->ICNTL_pre + 2 + 2 * i));
3343:   } else PetscCall(PetscMUMPSIntCast(ival, &mumps->id.ICNTL(icntl)));
3344:   PetscFunctionReturn(PETSC_SUCCESS);
3345: }

3347: static PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt *ival)
3348: {
3349:   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;

3351:   PetscFunctionBegin;
3352:   if (mumps->id.job == JOB_NULL) {
3353:     PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0;
3354:     *ival = 0;
3355:     for (i = 0; i < nICNTL_pre; ++i) {
3356:       if (mumps->ICNTL_pre[1 + 2 * i] == icntl) *ival = mumps->ICNTL_pre[2 + 2 * i];
3357:     }
3358:   } else *ival = mumps->id.ICNTL(icntl);
3359:   PetscFunctionReturn(PETSC_SUCCESS);
3360: }

3362: /*@
3363:   MatMumpsSetIcntl - Set MUMPS parameter ICNTL() <https://mumps-solver.org/index.php?page=doc>

3365:   Logically Collective

3367:   Input Parameters:
3368: + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3369: . icntl - index of MUMPS parameter array `ICNTL()`
3370: - ival  - value of MUMPS `ICNTL(icntl)`

3372:   Options Database Key:
3373: . -mat_mumps_icntl_<icntl> <ival> - change the option numbered `icntl` to `ival`

3375:   Level: beginner

3377:   Note:
3378:   Ignored if MUMPS is not installed or `F` is not a MUMPS matrix

3380: .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3381: @*/
3382: PetscErrorCode MatMumpsSetIcntl(Mat F, PetscInt icntl, PetscInt ival)
3383: {
3384:   PetscFunctionBegin;
3386:   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3389:   PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 48 || icntl == 49 || icntl == 56 || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl);
3390:   PetscTryMethod(F, "MatMumpsSetIcntl_C", (Mat, PetscInt, PetscInt), (F, icntl, ival));
3391:   PetscFunctionReturn(PETSC_SUCCESS);
3392: }

3394: /*@
3395:   MatMumpsGetIcntl - Get MUMPS parameter ICNTL() <https://mumps-solver.org/index.php?page=doc>

3397:   Logically Collective

3399:   Input Parameters:
3400: + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3401: - icntl - index of MUMPS parameter array ICNTL()

3403:   Output Parameter:
3404: . ival - value of MUMPS ICNTL(icntl)

3406:   Level: beginner

3408: .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3409: @*/
3410: PetscErrorCode MatMumpsGetIcntl(Mat F, PetscInt icntl, PetscInt *ival)
3411: {
3412:   PetscFunctionBegin;
3414:   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3416:   PetscAssertPointer(ival, 3);
3417:   PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 48 || icntl == 49 || icntl == 56 || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl);
3418:   PetscUseMethod(F, "MatMumpsGetIcntl_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
3419:   PetscFunctionReturn(PETSC_SUCCESS);
3420: }

3422: static PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal val)
3423: {
3424:   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;

3426:   PetscFunctionBegin;
3427:   if (mumps->id.job == JOB_NULL) {
3428:     PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
3429:     for (i = 0; i < nCNTL_pre; ++i)
3430:       if (mumps->CNTL_pre[1 + 2 * i] == icntl) break;
3431:     if (i == nCNTL_pre) {
3432:       if (i > 0) PetscCall(PetscRealloc(sizeof(PetscReal) * (2 * nCNTL_pre + 3), &mumps->CNTL_pre));
3433:       else PetscCall(PetscCalloc(sizeof(PetscReal) * 3, &mumps->CNTL_pre));
3434:       mumps->CNTL_pre[0]++;
3435:     }
3436:     mumps->CNTL_pre[1 + 2 * i] = icntl;
3437:     mumps->CNTL_pre[2 + 2 * i] = val;
3438:   } else ID_CNTL_SET(mumps->id, icntl, val);
3439:   PetscFunctionReturn(PETSC_SUCCESS);
3440: }

3442: static PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal *val)
3443: {
3444:   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;

3446:   PetscFunctionBegin;
3447:   if (mumps->id.job == JOB_NULL) {
3448:     PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
3449:     *val = 0.0;
3450:     for (i = 0; i < nCNTL_pre; ++i) {
3451:       if (mumps->CNTL_pre[1 + 2 * i] == icntl) *val = mumps->CNTL_pre[2 + 2 * i];
3452:     }
3453:   } else *val = ID_CNTL_GET(mumps->id, icntl);
3454:   PetscFunctionReturn(PETSC_SUCCESS);
3455: }

3457: /*@C
3458:   MatMumpsSetOocTmpDir - Set MUMPS out-of-core `OOC_TMPDIR` <https://mumps-solver.org/index.php?page=doc>

3460:   Logically Collective

3462:   Input Parameters:
3463: + F      - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`.
3464: - tmpdir - temporary directory for out-of-core facility.

3466:   Level: beginner

3468:   Note:
3469:   To make it effective, this routine must be called before the numeric factorization, i.e., `PCSetUp()`.
3470:   If `ooc_tmpdir` is not set, MUMPS will also check the environment variable `MUMPS_OOC_TMPDIR`. But if neither was defined, it will use /tmp by default.

3472: .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsGetOocTmpDir`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3473: @*/
3474: PetscErrorCode MatMumpsSetOocTmpDir(Mat F, const char *tmpdir)
3475: {
3476:   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;

3478:   PetscFunctionBegin;
3481:   PetscCall(PetscStrncpy(mumps->id.ooc_tmpdir, tmpdir, sizeof(((MUMPS_STRUC_C *)NULL)->ooc_tmpdir)));
3482:   PetscFunctionReturn(PETSC_SUCCESS);
3483: }

3485: /*@C
3486:   MatMumpsGetOocTmpDir - Get MUMPS out-of-core `OOC_TMPDIR` <https://mumps-solver.org/index.php?page=doc>

3488:   Logically Collective

3490:   Input Parameter:
3491: . F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`.

3493:   Output Parameter:
3494: . tmpdir - temporary directory for out-of-core facility.

3496:   Level: beginner

3498:   Note:
3499:   The returned string is read-only and user should not try to change it.

3501: .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetOocTmpDir`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3502: @*/
3503: PetscErrorCode MatMumpsGetOocTmpDir(Mat F, const char **tmpdir)
3504: {
3505:   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;

3507:   PetscFunctionBegin;
3510:   if (tmpdir) *tmpdir = mumps->id.ooc_tmpdir;
3511:   PetscFunctionReturn(PETSC_SUCCESS);
3512: }

3514: /*@
3515:   MatMumpsSetCntl - Set MUMPS parameter CNTL() <https://mumps-solver.org/index.php?page=doc>

3517:   Logically Collective

3519:   Input Parameters:
3520: + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3521: . icntl - index of MUMPS parameter array `CNTL()`
3522: - val   - value of MUMPS `CNTL(icntl)`

3524:   Options Database Key:
3525: . -mat_mumps_cntl_<icntl> <val> - change the option numbered icntl to ival

3527:   Level: beginner

3529:   Note:
3530:   Ignored if MUMPS is not installed or `F` is not a MUMPS matrix

3532: .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3533: @*/
3534: PetscErrorCode MatMumpsSetCntl(Mat F, PetscInt icntl, PetscReal val)
3535: {
3536:   PetscFunctionBegin;
3538:   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3541:   PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl);
3542:   PetscTryMethod(F, "MatMumpsSetCntl_C", (Mat, PetscInt, PetscReal), (F, icntl, val));
3543:   PetscFunctionReturn(PETSC_SUCCESS);
3544: }

3546: /*@
3547:   MatMumpsGetCntl - Get MUMPS parameter CNTL() <https://mumps-solver.org/index.php?page=doc>

3549:   Logically Collective

3551:   Input Parameters:
3552: + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3553: - icntl - index of MUMPS parameter array CNTL()

3555:   Output Parameter:
3556: . val - value of MUMPS CNTL(icntl)

3558:   Level: beginner

3560: .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3561: @*/
3562: PetscErrorCode MatMumpsGetCntl(Mat F, PetscInt icntl, PetscReal *val)
3563: {
3564:   PetscFunctionBegin;
3566:   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3568:   PetscAssertPointer(val, 3);
3569:   PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl);
3570:   PetscUseMethod(F, "MatMumpsGetCntl_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
3571:   PetscFunctionReturn(PETSC_SUCCESS);
3572: }

3574: static PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F, PetscInt icntl, PetscInt *info)
3575: {
3576:   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;

3578:   PetscFunctionBegin;
3579:   *info = mumps->id.INFO(icntl);
3580:   PetscFunctionReturn(PETSC_SUCCESS);
3581: }

3583: static PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F, PetscInt icntl, PetscInt *infog)
3584: {
3585:   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;

3587:   PetscFunctionBegin;
3588:   *infog = mumps->id.INFOG(icntl);
3589:   PetscFunctionReturn(PETSC_SUCCESS);
3590: }

3592: static PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfo)
3593: {
3594:   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;

3596:   PetscFunctionBegin;
3597:   *rinfo = ID_RINFO_GET(mumps->id, icntl);
3598:   PetscFunctionReturn(PETSC_SUCCESS);
3599: }

3601: static PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfog)
3602: {
3603:   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;

3605:   PetscFunctionBegin;
3606:   *rinfog = ID_RINFOG_GET(mumps->id, icntl);
3607:   PetscFunctionReturn(PETSC_SUCCESS);
3608: }

3610: static PetscErrorCode MatMumpsGetNullPivots_MUMPS(Mat F, PetscInt *size, PetscInt **array)
3611: {
3612:   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;

3614:   PetscFunctionBegin;
3615:   PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection");
3616:   *size  = 0;
3617:   *array = NULL;
3618:   if (!mumps->myid) {
3619:     *size = mumps->id.INFOG(28);
3620:     PetscCall(PetscMalloc1(*size, array));
3621:     for (int i = 0; i < *size; i++) (*array)[i] = mumps->id.pivnul_list[i] - 1;
3622:   }
3623:   PetscFunctionReturn(PETSC_SUCCESS);
3624: }

3626: static PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F, Mat spRHS)
3627: {
3628:   Mat          Bt = NULL, Btseq = NULL;
3629:   PetscBool    flg;
3630:   Mat_MUMPS   *mumps = (Mat_MUMPS *)F->data;
3631:   PetscScalar *aa;
3632:   PetscInt     spnr, *ia, *ja, M, nrhs;

3634:   PetscFunctionBegin;
3635:   PetscAssertPointer(spRHS, 2);
3636:   PetscCall(PetscObjectTypeCompare((PetscObject)spRHS, MATTRANSPOSEVIRTUAL, &flg));
3637:   PetscCheck(flg, PetscObjectComm((PetscObject)spRHS), PETSC_ERR_ARG_WRONG, "Matrix spRHS must be type MATTRANSPOSEVIRTUAL matrix");
3638:   PetscCall(MatShellGetScalingShifts(spRHS, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
3639:   PetscCall(MatTransposeGetMat(spRHS, &Bt));

3641:   PetscCall(MatMumpsSetIcntl(F, 30, 1));

3643:   if (mumps->petsc_size > 1) {
3644:     Mat_MPIAIJ *b = (Mat_MPIAIJ *)Bt->data;
3645:     Btseq         = b->A;
3646:   } else {
3647:     Btseq = Bt;
3648:   }

3650:   PetscCall(MatGetSize(spRHS, &M, &nrhs));
3651:   mumps->id.nrhs = (PetscMUMPSInt)nrhs;
3652:   PetscCall(PetscMUMPSIntCast(M, &mumps->id.lrhs));
3653:   mumps->id.rhs = NULL;

3655:   if (!mumps->myid) {
3656:     PetscCall(MatSeqAIJGetArray(Btseq, &aa));
3657:     PetscCall(MatGetRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
3658:     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
3659:     PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
3660:     PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, ((Mat_SeqAIJ *)Btseq->data)->nz, aa, mumps->id.precision, &mumps->id.rhs_sparse_len, &mumps->id.rhs_sparse));
3661:   } else {
3662:     mumps->id.irhs_ptr    = NULL;
3663:     mumps->id.irhs_sparse = NULL;
3664:     mumps->id.nz_rhs      = 0;
3665:     if (mumps->id.rhs_sparse_len) {
3666:       PetscCall(PetscFree(mumps->id.rhs_sparse));
3667:       mumps->id.rhs_sparse_len = 0;
3668:     }
3669:   }
3670:   mumps->id.ICNTL(20) = 1; /* rhs is sparse */
3671:   mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */

3673:   /* solve phase */
3674:   mumps->id.job = JOB_SOLVE;
3675:   PetscMUMPS_c(mumps);
3676:   PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));

3678:   if (!mumps->myid) {
3679:     PetscCall(MatSeqAIJRestoreArray(Btseq, &aa));
3680:     PetscCall(MatRestoreRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
3681:     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
3682:   }
3683:   PetscFunctionReturn(PETSC_SUCCESS);
3684: }

3686: /*@
3687:   MatMumpsGetInverse - Get user-specified set of entries in inverse of `A` <https://mumps-solver.org/index.php?page=doc>

3689:   Logically Collective

3691:   Input Parameter:
3692: . F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`

3694:   Output Parameter:
3695: . spRHS - sequential sparse matrix in `MATTRANSPOSEVIRTUAL` format with requested entries of inverse of `A`

3697:   Level: beginner

3699: .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`
3700: @*/
3701: PetscErrorCode MatMumpsGetInverse(Mat F, Mat spRHS)
3702: {
3703:   PetscFunctionBegin;
3705:   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3706:   PetscUseMethod(F, "MatMumpsGetInverse_C", (Mat, Mat), (F, spRHS));
3707:   PetscFunctionReturn(PETSC_SUCCESS);
3708: }

3710: static PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F, Mat spRHST)
3711: {
3712:   Mat spRHS;

3714:   PetscFunctionBegin;
3715:   PetscCall(MatCreateTranspose(spRHST, &spRHS));
3716:   PetscCall(MatMumpsGetInverse_MUMPS(F, spRHS));
3717:   PetscCall(MatDestroy(&spRHS));
3718:   PetscFunctionReturn(PETSC_SUCCESS);
3719: }

3721: /*@
3722:   MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix $A^T $ <https://mumps-solver.org/index.php?page=doc>

3724:   Logically Collective

3726:   Input Parameter:
3727: . F - the factored matrix of A obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`

3729:   Output Parameter:
3730: . spRHST - sequential sparse matrix in `MATAIJ` format containing the requested entries of inverse of `A`^T

3732:   Level: beginner

3734: .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()`
3735: @*/
3736: PetscErrorCode MatMumpsGetInverseTranspose(Mat F, Mat spRHST)
3737: {
3738:   PetscBool flg;

3740:   PetscFunctionBegin;
3742:   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3743:   PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST, &flg, MATSEQAIJ, MATMPIAIJ, NULL));
3744:   PetscCheck(flg, PetscObjectComm((PetscObject)spRHST), PETSC_ERR_ARG_WRONG, "Matrix spRHST must be MATAIJ matrix");
3745:   PetscUseMethod(F, "MatMumpsGetInverseTranspose_C", (Mat, Mat), (F, spRHST));
3746:   PetscFunctionReturn(PETSC_SUCCESS);
3747: }

3749: static PetscErrorCode MatMumpsSetBlk_MUMPS(Mat F, PetscInt nblk, const PetscInt blkvar[], const PetscInt blkptr[])
3750: {
3751:   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;

3753:   PetscFunctionBegin;
3754:   if (nblk) {
3755:     PetscAssertPointer(blkptr, 4);
3756:     PetscCall(PetscMUMPSIntCast(nblk, &mumps->id.nblk));
3757:     PetscCall(PetscFree(mumps->id.blkptr));
3758:     PetscCall(PetscMalloc1(nblk + 1, &mumps->id.blkptr));
3759:     for (PetscInt i = 0; i < nblk + 1; ++i) PetscCall(PetscMUMPSIntCast(blkptr[i], mumps->id.blkptr + i));
3760:     // mumps->id.icntl[] might have not been allocated, which is done in MatSetFromOptions_MUMPS(). So we don't assign ICNTL(15).
3761:     // We use id.nblk and id.blkptr to know what values to set to ICNTL(15) in MatSetFromOptions_MUMPS().
3762:     // mumps->id.ICNTL(15) = 1;
3763:     if (blkvar) {
3764:       PetscCall(PetscFree(mumps->id.blkvar));
3765:       PetscCall(PetscMalloc1(F->rmap->N, &mumps->id.blkvar));
3766:       for (PetscInt i = 0; i < F->rmap->N; ++i) PetscCall(PetscMUMPSIntCast(blkvar[i], mumps->id.blkvar + i));
3767:     }
3768:   } else {
3769:     PetscCall(PetscFree(mumps->id.blkptr));
3770:     PetscCall(PetscFree(mumps->id.blkvar));
3771:     // mumps->id.ICNTL(15) = 0;
3772:     mumps->id.nblk = 0;
3773:   }
3774:   PetscFunctionReturn(PETSC_SUCCESS);
3775: }

3777: /*@
3778:   MatMumpsSetBlk - Set user-specified variable block sizes to be used with `-mat_mumps_icntl_15 1`

3780:   Not collective, only relevant on the first process of the MPI communicator

3782:   Input Parameters:
3783: + F      - the factored matrix of A obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3784: . nblk   - the number of blocks
3785: . blkvar - see MUMPS documentation, `blkvar(blkptr(iblk):blkptr(iblk+1)-1)`, (`iblk=1, nblk`) holds the variables associated to block `iblk`
3786: - blkptr - array starting at 1 and of size `nblk + 1` storing the prefix sum of all blocks

3788:   Level: advanced

3790: .seealso: [](ch_matrices), `MATSOLVERMUMPS`, `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatSetVariableBlockSizes()`
3791: @*/
3792: PetscErrorCode MatMumpsSetBlk(Mat F, PetscInt nblk, const PetscInt blkvar[], const PetscInt blkptr[])
3793: {
3794:   PetscFunctionBegin;
3796:   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3797:   PetscUseMethod(F, "MatMumpsSetBlk_C", (Mat, PetscInt, const PetscInt[], const PetscInt[]), (F, nblk, blkvar, blkptr));
3798:   PetscFunctionReturn(PETSC_SUCCESS);
3799: }

3801: /*@
3802:   MatMumpsGetInfo - Get MUMPS parameter INFO() <https://mumps-solver.org/index.php?page=doc>

3804:   Logically Collective

3806:   Input Parameters:
3807: + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3808: - icntl - index of MUMPS parameter array INFO()

3810:   Output Parameter:
3811: . ival - value of MUMPS INFO(icntl)

3813:   Level: beginner

3815: .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3816: @*/
3817: PetscErrorCode MatMumpsGetInfo(Mat F, PetscInt icntl, PetscInt *ival)
3818: {
3819:   PetscFunctionBegin;
3821:   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3822:   PetscAssertPointer(ival, 3);
3823:   PetscUseMethod(F, "MatMumpsGetInfo_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
3824:   PetscFunctionReturn(PETSC_SUCCESS);
3825: }

3827: /*@
3828:   MatMumpsGetInfog - Get MUMPS parameter INFOG() <https://mumps-solver.org/index.php?page=doc>

3830:   Logically Collective

3832:   Input Parameters:
3833: + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3834: - icntl - index of MUMPS parameter array INFOG()

3836:   Output Parameter:
3837: . ival - value of MUMPS INFOG(icntl)

3839:   Level: beginner

3841: .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3842: @*/
3843: PetscErrorCode MatMumpsGetInfog(Mat F, PetscInt icntl, PetscInt *ival)
3844: {
3845:   PetscFunctionBegin;
3847:   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3848:   PetscAssertPointer(ival, 3);
3849:   PetscUseMethod(F, "MatMumpsGetInfog_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
3850:   PetscFunctionReturn(PETSC_SUCCESS);
3851: }

3853: /*@
3854:   MatMumpsGetRinfo - Get MUMPS parameter RINFO() <https://mumps-solver.org/index.php?page=doc>

3856:   Logically Collective

3858:   Input Parameters:
3859: + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3860: - icntl - index of MUMPS parameter array RINFO()

3862:   Output Parameter:
3863: . val - value of MUMPS RINFO(icntl)

3865:   Level: beginner

3867: .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()`
3868: @*/
3869: PetscErrorCode MatMumpsGetRinfo(Mat F, PetscInt icntl, PetscReal *val)
3870: {
3871:   PetscFunctionBegin;
3873:   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3874:   PetscAssertPointer(val, 3);
3875:   PetscUseMethod(F, "MatMumpsGetRinfo_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
3876:   PetscFunctionReturn(PETSC_SUCCESS);
3877: }

3879: /*@
3880:   MatMumpsGetRinfog - Get MUMPS parameter RINFOG() <https://mumps-solver.org/index.php?page=doc>

3882:   Logically Collective

3884:   Input Parameters:
3885: + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3886: - icntl - index of MUMPS parameter array RINFOG()

3888:   Output Parameter:
3889: . val - value of MUMPS RINFOG(icntl)

3891:   Level: beginner

3893: .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`
3894: @*/
3895: PetscErrorCode MatMumpsGetRinfog(Mat F, PetscInt icntl, PetscReal *val)
3896: {
3897:   PetscFunctionBegin;
3899:   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3900:   PetscAssertPointer(val, 3);
3901:   PetscUseMethod(F, "MatMumpsGetRinfog_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
3902:   PetscFunctionReturn(PETSC_SUCCESS);
3903: }

3905: /*@
3906:   MatMumpsGetNullPivots - Get MUMPS parameter PIVNUL_LIST() <https://mumps-solver.org/index.php?page=doc>

3908:   Logically Collective

3910:   Input Parameter:
3911: . F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`

3913:   Output Parameters:
3914: + size  - local size of the array. The size of the array is non-zero only on MPI rank 0
3915: - array - array of rows with null pivot, these rows follow 0-based indexing. The array gets allocated within the function and the user is responsible
3916:           for freeing this array.

3918:   Level: beginner

3920: .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`
3921: @*/
3922: PetscErrorCode MatMumpsGetNullPivots(Mat F, PetscInt *size, PetscInt **array)
3923: {
3924:   PetscFunctionBegin;
3926:   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3927:   PetscAssertPointer(size, 2);
3928:   PetscAssertPointer(array, 3);
3929:   PetscUseMethod(F, "MatMumpsGetNullPivots_C", (Mat, PetscInt *, PetscInt **), (F, size, array));
3930:   PetscFunctionReturn(PETSC_SUCCESS);
3931: }

3933: /*MC
3934:   MATSOLVERMUMPS -  A matrix type providing direct solvers (LU and Cholesky) for
3935:   MPI distributed and sequential matrices via the external package MUMPS <https://mumps-solver.org/index.php?page=doc>

3937:   Works with `MATAIJ` and `MATSBAIJ` matrices

3939:   Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS

3941:   Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode.
3942:   See details below.

3944:   Use `-pc_type cholesky` or `lu` `-pc_factor_mat_solver_type mumps` to use this direct solver

3946:   Options Database Keys:
3947: +  -mat_mumps_icntl_1  - ICNTL(1): output stream for error messages
3948: .  -mat_mumps_icntl_2  - ICNTL(2): output stream for diagnostic printing, statistics, and warning
3949: .  -mat_mumps_icntl_3  - ICNTL(3): output stream for global information, collected on the host
3950: .  -mat_mumps_icntl_4  - ICNTL(4): level of printing (0 to 4)
3951: .  -mat_mumps_icntl_6  - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)
3952: .  -mat_mumps_icntl_7  - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto
3953:                           Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only)
3954: .  -mat_mumps_icntl_8  - ICNTL(8): scaling strategy (-2 to 8 or 77)
3955: .  -mat_mumps_icntl_10 - ICNTL(10): max num of refinements
3956: .  -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view)
3957: .  -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)
3958: .  -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting
3959: .  -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space
3960: .  -mat_mumps_icntl_15 - ICNTL(15): compression of the input matrix resulting from a block format
3961: .  -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement
3962: .  -mat_mumps_icntl_20 - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS
3963: .  -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)
3964: .  -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor
3965: .  -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1)
3966: .  -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis
3967: .  -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix
3968: .  -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ICNTL(7) ordering, or 2 for parallel analysis and ICNTL(29) ordering
3969: .  -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis
3970: .  -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A)
3971: .  -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization
3972: .  -mat_mumps_icntl_33 - ICNTL(33): compute determinant
3973: .  -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature
3974: .  -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant
3975: .  -mat_mumps_icntl_37 - ICNTL(37): compression of the contribution blocks (CB)
3976: .  -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR
3977: .  -mat_mumps_icntl_48 - ICNTL(48): multithreading with tree parallelism
3978: .  -mat_mumps_icntl_49 - ICNTL(49): compact workarray at the end of factorization phase
3979: .  -mat_mumps_icntl_58 - ICNTL(58): options for symbolic factorization
3980: .  -mat_mumps_cntl_1   - CNTL(1): relative pivoting threshold
3981: .  -mat_mumps_cntl_2   - CNTL(2): stopping criterion of refinement
3982: .  -mat_mumps_cntl_3   - CNTL(3): absolute pivoting threshold
3983: .  -mat_mumps_cntl_4   - CNTL(4): value for static pivoting
3984: .  -mat_mumps_cntl_5   - CNTL(5): fixation for null pivots
3985: .  -mat_mumps_cntl_7   - CNTL(7): precision of the dropping parameter used during BLR factorization
3986: -  -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS.
3987:                                     Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual.

3989:   Level: beginner

3991:   Notes:
3992:   MUMPS Cholesky does not handle (complex) Hermitian matrices (see User's Guide at <https://mumps-solver.org/index.php?page=doc>) so using it will
3993:   error if the matrix is Hermitian.

3995:   When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling
3996:   `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix.

3998:   When a MUMPS factorization fails inside a KSP solve, for example with a `KSP_DIVERGED_PC_FAILED`, one can find the MUMPS information about
3999:   the failure with
4000: .vb
4001:           KSPGetPC(ksp,&pc);
4002:           PCFactorGetMatrix(pc,&mat);
4003:           MatMumpsGetInfo(mat,....);
4004:           MatMumpsGetInfog(mat,....); etc.
4005: .ve
4006:   Or run with `-ksp_error_if_not_converged` and the program will be stopped and the information printed in the error message.

4008:   MUMPS provides 64-bit integer support in two build modes:
4009:   full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and
4010:   requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI).

4012:   selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices,
4013:   MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and
4014:   columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit
4015:   integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS.

4017:   With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc.

4019:   Two modes to run MUMPS/PETSc with OpenMP
4020: .vb
4021:    Set `OMP_NUM_THREADS` and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP
4022:    threads per rank, then you may use "export `OMP_NUM_THREADS` = 16 && mpirun -n 4 ./test".
4023: .ve

4025: .vb
4026:    `-mat_mumps_use_omp_threads` [m] and run your code with as many MPI ranks as the number of cores. For example,
4027:    if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16"
4028: .ve

4030:    To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part
4031:    (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with `--with-openmp` `--download-hwloc`
4032:    (or `--with-hwloc`), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS
4033:    libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS
4034:    (PETSc will automatically try to utilized a threaded BLAS if `--with-openmp` is provided).

4036:    If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI
4037:    processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of
4038:    size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm
4039:    are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set
4040:    by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs.
4041:    In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets,
4042:    if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind
4043:    MPI ranks to cores, then with `-mat_mumps_use_omp_threads` 16, a master rank (and threads it spawns) will use half cores in socket 0, and half
4044:    cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the
4045:    problem will not happen. Therefore, when you use `-mat_mumps_use_omp_threads`, you need to keep an eye on your MPI rank mapping and CPU binding.
4046:    For example, with the Slurm job scheduler, one can use srun `--cpu-bind`=verbose -m block:block to map consecutive MPI ranks to sockets and
4047:    examine the mapping result.

4049:    PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set `OMP_PROC_BIND` and `OMP_PLACES` in job scripts,
4050:    for example, export `OMP_PLACES`=threads and export `OMP_PROC_BIND`=spread. One does not need to export `OMP_NUM_THREADS`=m in job scripts as PETSc
4051:    calls `omp_set_num_threads`(m) internally before calling MUMPS.

4053:    See {cite}`heroux2011bi` and {cite}`gutierrez2017accommodating`

4055: .seealso: [](ch_matrices), `Mat`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `MatMumpsSetBlk()`, `KSPGetPC()`, `PCFactorGetMatrix()`
4056: M*/

4058: static PetscErrorCode MatFactorGetSolverType_mumps(PETSC_UNUSED Mat A, MatSolverType *type)
4059: {
4060:   PetscFunctionBegin;
4061:   *type = MATSOLVERMUMPS;
4062:   PetscFunctionReturn(PETSC_SUCCESS);
4063: }

4065: /* MatGetFactor for Seq and MPI AIJ matrices */
4066: static PetscErrorCode MatGetFactor_aij_mumps(Mat A, MatFactorType ftype, Mat *F)
4067: {
4068:   Mat         B;
4069:   Mat_MUMPS  *mumps;
4070:   PetscBool   isSeqAIJ, isDiag, isDense;
4071:   PetscMPIInt size;

4073:   PetscFunctionBegin;
4074: #if defined(PETSC_USE_COMPLEX)
4075:   if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
4076:     PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n"));
4077:     *F = NULL;
4078:     PetscFunctionReturn(PETSC_SUCCESS);
4079:   }
4080: #endif
4081:   /* Create the factorization matrix */
4082:   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ));
4083:   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATDIAGONAL, &isDiag));
4084:   PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &isDense, MATSEQDENSE, MATMPIDENSE, NULL));
4085:   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
4086:   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
4087:   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
4088:   PetscCall(MatSetUp(B));

4090:   PetscCall(PetscNew(&mumps));

4092:   B->ops->view    = MatView_MUMPS;
4093:   B->ops->getinfo = MatGetInfo_MUMPS;

4095:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
4096:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
4097:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
4098:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
4099:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
4100:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
4101:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
4102:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
4103:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
4104:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
4105:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
4106:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
4107:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
4108:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
4109:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS));

4111:   if (ftype == MAT_FACTOR_LU) {
4112:     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
4113:     B->factortype            = MAT_FACTOR_LU;
4114:     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij;
4115:     else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij;
4116:     else if (isDense) mumps->ConvertToTriples = MatConvertToTriples_dense_xaij;
4117:     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij;
4118:     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
4119:     mumps->sym = 0;
4120:   } else {
4121:     B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
4122:     B->factortype                  = MAT_FACTOR_CHOLESKY;
4123:     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij;
4124:     else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij;
4125:     else if (isDense) mumps->ConvertToTriples = MatConvertToTriples_dense_xaij;
4126:     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij;
4127:     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY]));
4128: #if defined(PETSC_USE_COMPLEX)
4129:     mumps->sym = 2;
4130: #else
4131:     if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
4132:     else mumps->sym = 2;
4133: #endif
4134:   }

4136:   /* set solvertype */
4137:   PetscCall(PetscFree(B->solvertype));
4138:   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
4139:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
4140:   if (size == 1) {
4141:     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
4142:     B->canuseordering = PETSC_TRUE;
4143:   }
4144:   B->ops->destroy = MatDestroy_MUMPS;
4145:   B->data         = (void *)mumps;

4147:   *F               = B;
4148:   mumps->id.job    = JOB_NULL;
4149:   mumps->ICNTL_pre = NULL;
4150:   mumps->CNTL_pre  = NULL;
4151:   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
4152:   PetscFunctionReturn(PETSC_SUCCESS);
4153: }

4155: /* MatGetFactor for Seq and MPI SBAIJ matrices */
4156: static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A, PETSC_UNUSED MatFactorType ftype, Mat *F)
4157: {
4158:   Mat         B;
4159:   Mat_MUMPS  *mumps;
4160:   PetscBool   isSeqSBAIJ;
4161:   PetscMPIInt size;

4163:   PetscFunctionBegin;
4164: #if defined(PETSC_USE_COMPLEX)
4165:   if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
4166:     PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n"));
4167:     *F = NULL;
4168:     PetscFunctionReturn(PETSC_SUCCESS);
4169:   }
4170: #endif
4171:   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
4172:   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
4173:   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
4174:   PetscCall(MatSetUp(B));

4176:   PetscCall(PetscNew(&mumps));
4177:   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &isSeqSBAIJ));
4178:   if (isSeqSBAIJ) {
4179:     mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij;
4180:   } else {
4181:     mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij;
4182:   }

4184:   B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
4185:   B->ops->view                   = MatView_MUMPS;
4186:   B->ops->getinfo                = MatGetInfo_MUMPS;

4188:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
4189:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
4190:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
4191:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
4192:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
4193:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
4194:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
4195:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
4196:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
4197:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
4198:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
4199:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
4200:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
4201:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
4202:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS));

4204:   B->factortype = MAT_FACTOR_CHOLESKY;
4205: #if defined(PETSC_USE_COMPLEX)
4206:   mumps->sym = 2;
4207: #else
4208:   if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
4209:   else mumps->sym = 2;
4210: #endif

4212:   /* set solvertype */
4213:   PetscCall(PetscFree(B->solvertype));
4214:   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
4215:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
4216:   if (size == 1) {
4217:     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
4218:     B->canuseordering = PETSC_TRUE;
4219:   }
4220:   PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY]));
4221:   B->ops->destroy = MatDestroy_MUMPS;
4222:   B->data         = (void *)mumps;

4224:   *F               = B;
4225:   mumps->id.job    = JOB_NULL;
4226:   mumps->ICNTL_pre = NULL;
4227:   mumps->CNTL_pre  = NULL;
4228:   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
4229:   PetscFunctionReturn(PETSC_SUCCESS);
4230: }

4232: static PetscErrorCode MatGetFactor_baij_mumps(Mat A, MatFactorType ftype, Mat *F)
4233: {
4234:   Mat         B;
4235:   Mat_MUMPS  *mumps;
4236:   PetscBool   isSeqBAIJ;
4237:   PetscMPIInt size;

4239:   PetscFunctionBegin;
4240:   /* Create the factorization matrix */
4241:   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &isSeqBAIJ));
4242:   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
4243:   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
4244:   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
4245:   PetscCall(MatSetUp(B));

4247:   PetscCall(PetscNew(&mumps));
4248:   PetscCheck(ftype == MAT_FACTOR_LU, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead");
4249:   B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS;
4250:   B->factortype            = MAT_FACTOR_LU;
4251:   if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij;
4252:   else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij;
4253:   mumps->sym = 0;
4254:   PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));

4256:   B->ops->view    = MatView_MUMPS;
4257:   B->ops->getinfo = MatGetInfo_MUMPS;

4259:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
4260:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
4261:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
4262:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
4263:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
4264:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
4265:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
4266:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
4267:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
4268:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
4269:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
4270:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
4271:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
4272:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
4273:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS));

4275:   /* set solvertype */
4276:   PetscCall(PetscFree(B->solvertype));
4277:   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
4278:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
4279:   if (size == 1) {
4280:     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
4281:     B->canuseordering = PETSC_TRUE;
4282:   }
4283:   B->ops->destroy = MatDestroy_MUMPS;
4284:   B->data         = (void *)mumps;

4286:   *F               = B;
4287:   mumps->id.job    = JOB_NULL;
4288:   mumps->ICNTL_pre = NULL;
4289:   mumps->CNTL_pre  = NULL;
4290:   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
4291:   PetscFunctionReturn(PETSC_SUCCESS);
4292: }

4294: /* MatGetFactor for Seq and MPI SELL matrices */
4295: static PetscErrorCode MatGetFactor_sell_mumps(Mat A, MatFactorType ftype, Mat *F)
4296: {
4297:   Mat         B;
4298:   Mat_MUMPS  *mumps;
4299:   PetscBool   isSeqSELL;
4300:   PetscMPIInt size;

4302:   PetscFunctionBegin;
4303:   /* Create the factorization matrix */
4304:   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSELL, &isSeqSELL));
4305:   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
4306:   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
4307:   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
4308:   PetscCall(MatSetUp(B));

4310:   PetscCall(PetscNew(&mumps));

4312:   B->ops->view    = MatView_MUMPS;
4313:   B->ops->getinfo = MatGetInfo_MUMPS;

4315:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
4316:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
4317:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
4318:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
4319:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
4320:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
4321:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
4322:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
4323:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
4324:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
4325:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
4326:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));

4328:   PetscCheck(ftype == MAT_FACTOR_LU, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented");
4329:   B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
4330:   B->factortype            = MAT_FACTOR_LU;
4331:   PetscCheck(isSeqSELL, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented");
4332:   mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij;
4333:   mumps->sym              = 0;
4334:   PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));

4336:   /* set solvertype */
4337:   PetscCall(PetscFree(B->solvertype));
4338:   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
4339:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
4340:   if (size == 1) {
4341:     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization  */
4342:     B->canuseordering = PETSC_TRUE;
4343:   }
4344:   B->ops->destroy = MatDestroy_MUMPS;
4345:   B->data         = (void *)mumps;

4347:   *F               = B;
4348:   mumps->id.job    = JOB_NULL;
4349:   mumps->ICNTL_pre = NULL;
4350:   mumps->CNTL_pre  = NULL;
4351:   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
4352:   PetscFunctionReturn(PETSC_SUCCESS);
4353: }

4355: /* MatGetFactor for MATNEST matrices */
4356: static PetscErrorCode MatGetFactor_nest_mumps(Mat A, MatFactorType ftype, Mat *F)
4357: {
4358:   Mat         B, **mats;
4359:   Mat_MUMPS  *mumps;
4360:   PetscInt    nr, nc;
4361:   PetscMPIInt size;
4362:   PetscBool   flg = PETSC_TRUE;

4364:   PetscFunctionBegin;
4365: #if defined(PETSC_USE_COMPLEX)
4366:   if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
4367:     PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n"));
4368:     *F = NULL;
4369:     PetscFunctionReturn(PETSC_SUCCESS);
4370:   }
4371: #endif

4373:   /* Return if some condition is not satisfied */
4374:   *F = NULL;
4375:   PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats));
4376:   if (ftype == MAT_FACTOR_CHOLESKY) {
4377:     IS       *rows, *cols;
4378:     PetscInt *m, *M;

4380:     PetscCheck(nr == nc, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_FACTOR_CHOLESKY not supported for nest sizes %" PetscInt_FMT " != %" PetscInt_FMT ". Use MAT_FACTOR_LU.", nr, nc);
4381:     PetscCall(PetscMalloc2(nr, &rows, nc, &cols));
4382:     PetscCall(MatNestGetISs(A, rows, cols));
4383:     for (PetscInt r = 0; flg && r < nr; r++) PetscCall(ISEqualUnsorted(rows[r], cols[r], &flg));
4384:     if (!flg) {
4385:       PetscCall(PetscFree2(rows, cols));
4386:       PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for unequal row and column maps. Use MAT_FACTOR_LU.\n"));
4387:       PetscFunctionReturn(PETSC_SUCCESS);
4388:     }
4389:     PetscCall(PetscMalloc2(nr, &m, nr, &M));
4390:     for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetMinMax(rows[r], &m[r], &M[r]));
4391:     for (PetscInt r = 0; flg && r < nr; r++)
4392:       for (PetscInt k = r + 1; flg && k < nr; k++)
4393:         if ((m[k] <= m[r] && m[r] <= M[k]) || (m[k] <= M[r] && M[r] <= M[k])) flg = PETSC_FALSE;
4394:     PetscCall(PetscFree2(m, M));
4395:     PetscCall(PetscFree2(rows, cols));
4396:     if (!flg) {
4397:       PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for intersecting row maps. Use MAT_FACTOR_LU.\n"));
4398:       PetscFunctionReturn(PETSC_SUCCESS);
4399:     }
4400:   }

4402:   for (PetscInt r = 0; r < nr; r++) {
4403:     for (PetscInt c = 0; c < nc; c++) {
4404:       Mat       sub = mats[r][c];
4405:       PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isDiag, isDense;

4407:       if (!sub || (ftype == MAT_FACTOR_CHOLESKY && c < r)) continue;
4408:       PetscCall(MatGetTranspose_TransposeVirtual(&sub, NULL, NULL, NULL, NULL));
4409:       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ));
4410:       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ));
4411:       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ));
4412:       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ));
4413:       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ));
4414:       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ));
4415:       PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag));
4416:       PetscCall(PetscObjectTypeCompareAny((PetscObject)sub, &isDense, MATSEQDENSE, MATMPIDENSE, NULL));
4417:       if (ftype == MAT_FACTOR_CHOLESKY) {
4418:         if (r == c) {
4419:           if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isSeqSBAIJ && !isMPISBAIJ && !isDiag && !isDense) {
4420:             PetscCall(PetscInfo(sub, "MAT_FACTOR_CHOLESKY not supported for diagonal block of type %s.\n", ((PetscObject)sub)->type_name));
4421:             flg = PETSC_FALSE;
4422:           }
4423:         } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag && !isDense) {
4424:           PetscCall(PetscInfo(sub, "MAT_FACTOR_CHOLESKY not supported for off-diagonal block of type %s.\n", ((PetscObject)sub)->type_name));
4425:           flg = PETSC_FALSE;
4426:         }
4427:       } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag && !isDense) {
4428:         PetscCall(PetscInfo(sub, "MAT_FACTOR_LU not supported for block of type %s.\n", ((PetscObject)sub)->type_name));
4429:         flg = PETSC_FALSE;
4430:       }
4431:     }
4432:   }
4433:   if (!flg) PetscFunctionReturn(PETSC_SUCCESS);

4435:   /* Create the factorization matrix */
4436:   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
4437:   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
4438:   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
4439:   PetscCall(MatSetUp(B));

4441:   PetscCall(PetscNew(&mumps));

4443:   B->ops->view    = MatView_MUMPS;
4444:   B->ops->getinfo = MatGetInfo_MUMPS;

4446:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
4447:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
4448:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
4449:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
4450:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
4451:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
4452:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
4453:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
4454:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
4455:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
4456:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
4457:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
4458:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
4459:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
4460:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS));

4462:   if (ftype == MAT_FACTOR_LU) {
4463:     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
4464:     B->factortype            = MAT_FACTOR_LU;
4465:     mumps->sym               = 0;
4466:   } else {
4467:     B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
4468:     B->factortype                  = MAT_FACTOR_CHOLESKY;
4469: #if defined(PETSC_USE_COMPLEX)
4470:     mumps->sym = 2;
4471: #else
4472:     if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
4473:     else mumps->sym = 2;
4474: #endif
4475:   }
4476:   mumps->ConvertToTriples = MatConvertToTriples_nest_xaij;
4477:   PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[ftype]));

4479:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
4480:   if (size == 1) {
4481:     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
4482:     B->canuseordering = PETSC_TRUE;
4483:   }

4485:   /* set solvertype */
4486:   PetscCall(PetscFree(B->solvertype));
4487:   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
4488:   B->ops->destroy = MatDestroy_MUMPS;
4489:   B->data         = (void *)mumps;

4491:   *F               = B;
4492:   mumps->id.job    = JOB_NULL;
4493:   mumps->ICNTL_pre = NULL;
4494:   mumps->CNTL_pre  = NULL;
4495:   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
4496:   PetscFunctionReturn(PETSC_SUCCESS);
4497: }

4499: PETSC_INTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void)
4500: {
4501:   PetscFunctionBegin;
4502:   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
4503:   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
4504:   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps));
4505:   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps));
4506:   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPISBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps));
4507:   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
4508:   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
4509:   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps));
4510:   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps));
4511:   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps));
4512:   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSELL, MAT_FACTOR_LU, MatGetFactor_sell_mumps));
4513:   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
4514:   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
4515:   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQDENSE, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
4516:   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQDENSE, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
4517:   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIDENSE, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
4518:   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIDENSE, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
4519:   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_LU, MatGetFactor_nest_mumps));
4520:   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_CHOLESKY, MatGetFactor_nest_mumps));
4521:   PetscFunctionReturn(PETSC_SUCCESS);
4522: }