Actual source code: imumps.c

  1: /*
  2:     Provides an interface to the MUMPS sparse solver
  3: */
  4: #include <petscpkg_version.h>
  5: #include <petscsf.h>
  6: #include <../src/mat/impls/aij/mpi/mpiaij.h>
  7: #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
  8: #include <../src/mat/impls/sell/mpi/mpisell.h>
  9: #include <petsc/private/vecimpl.h>

 11: #define MUMPS_MANUALS "(see users manual https://mumps-solver.org/index.php?page=doc \"Error and warning diagnostics\")"

 13: EXTERN_C_BEGIN
 14: #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
 15:   #include <cmumps_c.h>
 16:   #include <zmumps_c.h>
 17:   #include <smumps_c.h>
 18:   #include <dmumps_c.h>
 19: #else
 20:   #if defined(PETSC_USE_COMPLEX)
 21:     #if defined(PETSC_USE_REAL_SINGLE)
 22:       #include <cmumps_c.h>
 23:       #define MUMPS_c     cmumps_c
 24:       #define MumpsScalar CMUMPS_COMPLEX
 25:     #else
 26:       #include <zmumps_c.h>
 27:       #define MUMPS_c     zmumps_c
 28:       #define MumpsScalar ZMUMPS_COMPLEX
 29:     #endif
 30:   #else
 31:     #if defined(PETSC_USE_REAL_SINGLE)
 32:       #include <smumps_c.h>
 33:       #define MUMPS_c     smumps_c
 34:       #define MumpsScalar SMUMPS_REAL
 35:     #else
 36:       #include <dmumps_c.h>
 37:       #define MUMPS_c     dmumps_c
 38:       #define MumpsScalar DMUMPS_REAL
 39:     #endif
 40:   #endif
 41: #endif
 42: #if defined(PETSC_USE_COMPLEX)
 43:   #if defined(PETSC_USE_REAL_SINGLE)
 44:     #define MUMPS_STRUC_C CMUMPS_STRUC_C
 45:   #else
 46:     #define MUMPS_STRUC_C ZMUMPS_STRUC_C
 47:   #endif
 48: #else
 49:   #if defined(PETSC_USE_REAL_SINGLE)
 50:     #define MUMPS_STRUC_C SMUMPS_STRUC_C
 51:   #else
 52:     #define MUMPS_STRUC_C DMUMPS_STRUC_C
 53:   #endif
 54: #endif
 55: EXTERN_C_END

 57: #define JOB_INIT         -1
 58: #define JOB_NULL         0
 59: #define JOB_FACTSYMBOLIC 1
 60: #define JOB_FACTNUMERIC  2
 61: #define JOB_SOLVE        3
 62: #define JOB_END          -2

 64: /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for
 65:    number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the
 66:    naming convention in PetscMPIInt, PetscBLASInt etc.
 67: */
 68: typedef MUMPS_INT PetscMUMPSInt;

 70: #if PETSC_PKG_MUMPS_VERSION_GE(5, 3, 0)
 71:   #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */
 72:     #error "PETSc has not been tested with full 64-bit MUMPS and we choose to error out"
 73:   #endif
 74: #else
 75:   #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */
 76:     #error "PETSc has not been tested with full 64-bit MUMPS and we choose to error out"
 77:   #endif
 78: #endif

 80: #define MPIU_MUMPSINT       MPI_INT
 81: #define PETSC_MUMPS_INT_MAX 2147483647
 82: #define PETSC_MUMPS_INT_MIN -2147483648

 84: /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/
 85: static inline PetscErrorCode PetscMUMPSIntCast(PetscCount a, PetscMUMPSInt *b)
 86: {
 87:   PetscFunctionBegin;
 88: #if PetscDefined(USE_64BIT_INDICES)
 89:   PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt");
 90: #endif
 91:   *b = (PetscMUMPSInt)a;
 92:   PetscFunctionReturn(PETSC_SUCCESS);
 93: }

 95: /* Put these utility routines here since they are only used in this file */
 96: static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems PetscOptionsObject, const char opt[], const char text[], const char man[], PetscMUMPSInt currentvalue, PetscMUMPSInt *value, PetscBool *set, PetscMUMPSInt lb, PetscMUMPSInt ub)
 97: {
 98:   PetscInt  myval;
 99:   PetscBool myset;

101:   PetscFunctionBegin;
102:   /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */
103:   PetscCall(PetscOptionsInt_Private(PetscOptionsObject, opt, text, man, (PetscInt)currentvalue, &myval, &myset, lb, ub));
104:   if (myset) PetscCall(PetscMUMPSIntCast(myval, value));
105:   if (set) *set = myset;
106:   PetscFunctionReturn(PETSC_SUCCESS);
107: }
108: #define PetscOptionsMUMPSInt(a, b, c, d, e, f) PetscOptionsMUMPSInt_Private(PetscOptionsObject, a, b, c, d, e, f, PETSC_MUMPS_INT_MIN, PETSC_MUMPS_INT_MAX)

110: // An abstract type for specific MUMPS types {S,D,C,Z}MUMPS_STRUC_C.
111: //
112: // With the abstract (outer) type, we can write shared code. We call MUMPS through a type-to-be-determined inner field within the abstract type.
113: // Before/after calling MUMPS, we need to copy in/out fields between the outer and the inner, which seems expensive. But note that the large fixed size
114: // arrays within the types are directly linked. At the end, we only need to copy ~20 integers/pointers, which is doable. See PreMumpsCall()/PostMumpsCall().
115: //
116: // Not all fields in the specific types are exposed in the abstract type. We only need those used by the PETSc/MUMPS interface.
117: // Notably, DMUMPS_COMPLEX* and DMUMPS_REAL* fields are now declared as void *. Their type will be determined by the the actual precision to be used.
118: // Also note that we added some *_len fields not in specific types to track sizes of those MumpsScalar buffers.
119: typedef struct {
120:   PetscPrecision precision;   // precision used by MUMPS
121:   void          *internal_id; // the data structure passed to MUMPS, whose actual type {S,D,C,Z}MUMPS_STRUC_C is to be decided by precision and PETSc's use of complex

123:   // aliased fields from internal_id, so that we can use XMUMPS_STRUC_C to write shared code across different precisions.
124:   MUMPS_INT  sym, par, job;
125:   MUMPS_INT  comm_fortran; /* Fortran communicator */
126:   MUMPS_INT *icntl;
127:   void      *cntl; // MumpsReal, fixed size array
128:   MUMPS_INT  n;
129:   MUMPS_INT  nblk;

131:   /* Assembled entry */
132:   MUMPS_INT8 nnz;
133:   MUMPS_INT *irn;
134:   MUMPS_INT *jcn;
135:   void      *a; // MumpsScalar, centralized input
136:   PetscCount a_len;

138:   /* Distributed entry */
139:   MUMPS_INT8 nnz_loc;
140:   MUMPS_INT *irn_loc;
141:   MUMPS_INT *jcn_loc;
142:   void      *a_loc; // MumpsScalar, distributed input
143:   PetscCount a_loc_len;

145:   /* Matrix by blocks */
146:   MUMPS_INT *blkptr;
147:   MUMPS_INT *blkvar;

149:   /* Ordering, if given by user */
150:   MUMPS_INT *perm_in;

152:   /* RHS, solution, ouptput data and statistics */
153:   void      *rhs, *redrhs, *rhs_sparse, *sol_loc, *rhs_loc;                 // MumpsScalar buffers
154:   PetscCount rhs_len, redrhs_len, rhs_sparse_len, sol_loc_len, rhs_loc_len; // length of buffers (in MumpsScalar) IF allocated in a different precision than PetscScalar

156:   MUMPS_INT *irhs_sparse, *irhs_ptr, *isol_loc, *irhs_loc;
157:   MUMPS_INT  nrhs, lrhs, lredrhs, nz_rhs, lsol_loc, nloc_rhs, lrhs_loc;
158:   // MUMPS_INT  nsol_loc; // introduced in MUMPS-5.7, but PETSc doesn't use it; would cause compile errors with the widely used 5.6. If you add it, must also update PreMumpsCall() and guard this with #if PETSC_PKG_MUMPS_VERSION_GE(5, 7, 0)
159:   MUMPS_INT  schur_lld;
160:   MUMPS_INT *info, *infog;   // fixed size array
161:   void      *rinfo, *rinfog; // MumpsReal, fixed size array

163:   /* Null space */
164:   MUMPS_INT *pivnul_list; // allocated by MUMPS!
165:   MUMPS_INT *mapping;     // allocated by MUMPS!

167:   /* Schur */
168:   MUMPS_INT  size_schur;
169:   MUMPS_INT *listvar_schur;
170:   void      *schur; // MumpsScalar
171:   PetscCount schur_len;

173:   /* For out-of-core */
174:   char *ooc_tmpdir; // fixed size array
175:   char *ooc_prefix; // fixed size array
176: } XMUMPS_STRUC_C;

178: // Note: fixed-size arrays are allocated by MUMPS; redirect them to the outer struct
179: #define AllocateInternalID(MUMPS_STRUC_T, outer) \
180:   do { \
181:     MUMPS_STRUC_T *inner; \
182:     PetscCall(PetscNew(&inner)); \
183:     outer->icntl      = inner->icntl; \
184:     outer->cntl       = inner->cntl; \
185:     outer->info       = inner->info; \
186:     outer->infog      = inner->infog; \
187:     outer->rinfo      = inner->rinfo; \
188:     outer->rinfog     = inner->rinfog; \
189:     outer->ooc_tmpdir = inner->ooc_tmpdir; \
190:     outer->ooc_prefix = inner->ooc_prefix; \
191:     /* the three field should never change after init */ \
192:     inner->comm_fortran = outer->comm_fortran; \
193:     inner->par          = outer->par; \
194:     inner->sym          = outer->sym; \
195:     outer->internal_id  = inner; \
196:   } while (0)

198: // Allocate the internal [SDCZ]MUMPS_STRUC_C ID data structure in the given , and link fields of the outer and the inner
199: static inline PetscErrorCode MatMumpsAllocateInternalID(XMUMPS_STRUC_C *outer, PetscPrecision precision)
200: {
201:   PetscFunctionBegin;
202:   outer->precision = precision;
203: #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
204:   #if defined(PETSC_USE_COMPLEX)
205:   if (precision == PETSC_PRECISION_SINGLE) AllocateInternalID(CMUMPS_STRUC_C, outer);
206:   else AllocateInternalID(ZMUMPS_STRUC_C, outer);
207:   #else
208:   if (precision == PETSC_PRECISION_SINGLE) AllocateInternalID(SMUMPS_STRUC_C, outer);
209:   else AllocateInternalID(DMUMPS_STRUC_C, outer);
210:   #endif
211: #else
212:   AllocateInternalID(MUMPS_STRUC_C, outer);
213: #endif
214:   PetscFunctionReturn(PETSC_SUCCESS);
215: }

217: #define FreeInternalIDFields(MUMPS_STRUC_T, outer) \
218:   do { \
219:     MUMPS_STRUC_T *inner = (MUMPS_STRUC_T *)(outer)->internal_id; \
220:     PetscCall(PetscFree(inner->a)); \
221:     PetscCall(PetscFree(inner->a_loc)); \
222:     PetscCall(PetscFree(inner->redrhs)); \
223:     PetscCall(PetscFree(inner->rhs)); \
224:     PetscCall(PetscFree(inner->rhs_sparse)); \
225:     PetscCall(PetscFree(inner->rhs_loc)); \
226:     PetscCall(PetscFree(inner->sol_loc)); \
227:     PetscCall(PetscFree(inner->schur)); \
228:   } while (0)

230: static inline PetscErrorCode MatMumpsFreeInternalID(XMUMPS_STRUC_C *outer)
231: {
232:   PetscFunctionBegin;
233:   if (outer->internal_id) { // sometimes, the inner is never created before we destroy the outer
234: #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
235:     const PetscPrecision mumps_precision = outer->precision;
236:     if (mumps_precision != PETSC_SCALAR_PRECISION) { // Free internal buffers if we used mixed precision
237:   #if defined(PETSC_USE_COMPLEX)
238:       if (mumps_precision == PETSC_PRECISION_SINGLE) FreeInternalIDFields(CMUMPS_STRUC_C, outer);
239:       else FreeInternalIDFields(ZMUMPS_STRUC_C, outer);
240:   #else
241:       if (mumps_precision == PETSC_PRECISION_SINGLE) FreeInternalIDFields(SMUMPS_STRUC_C, outer);
242:       else FreeInternalIDFields(DMUMPS_STRUC_C, outer);
243:   #endif
244:     }
245: #endif
246:     PetscCall(PetscFree(outer->internal_id));
247:   }
248:   PetscFunctionReturn(PETSC_SUCCESS);
249: }

251: // Make a companion MumpsScalar array (with a given PetscScalar array), to hold at least  MumpsScalars in the given  and return the address at .
252: //  indicates if we need to convert PetscScalars to MumpsScalars after allocating the MumpsScalar array.
253: // (For bravity, we use  for array address and  for its length in MumpsScalar, though in code they should be <*ma> and <*m>)
254: // If  already points to a buffer/array, on input  should be its length. Note the buffer might be freed if it is not big enough for this request.
255: //
256: // The returned array is a companion, so how it is created depends on if PetscScalar and MumpsScalar are the same.
257: // 1) If they are different, a separate array will be made and its length and address will be provided at  and  on output.
258: // 2) Otherwise,  will be returned in , and  will be zero on output.
259: //
260: //
261: //   Input parameters:
262: // + convert   - whether to do PetscScalar to MumpsScalar conversion
263: // . n         - length of the PetscScalar array
264: // . pa        - [n]], points to the PetscScalar array
265: // . precision - precision of MumpsScalar
266: // . m         - on input, length of an existing MumpsScalar array  if any, otherwise *m is just zero.
267: // - ma        - on input, an existing MumpsScalar array if any.
268: //
269: //   Output parameters:
270: // + m  - length of the MumpsScalar buffer at  if MumpsScalar is different from PetscScalar, otherwise 0
271: // . ma - the MumpsScalar array, which could be an alias of  when the two types are the same.
272: //
273: //   Note:
274: //    New memory, if allocated, is done via PetscMalloc1(), and is owned by caller.
275: static PetscErrorCode MatMumpsMakeMumpsScalarArray(PetscBool convert, PetscCount n, const PetscScalar *pa, PetscPrecision precision, PetscCount *m, void **ma)
276: {
277:   PetscFunctionBegin;
278: #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
279:   const PetscPrecision mumps_precision = precision;
280:   PetscCheck(precision == PETSC_PRECISION_SINGLE || precision == PETSC_PRECISION_DOUBLE, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unsupported precicison (%d). Must be single or double", (int)precision);
281:   #if defined(PETSC_USE_COMPLEX)
282:   if (mumps_precision != PETSC_SCALAR_PRECISION) {
283:     if (mumps_precision == PETSC_PRECISION_SINGLE) {
284:       if (*m < n) {
285:         PetscCall(PetscFree(*ma));
286:         PetscCall(PetscMalloc1(n, (CMUMPS_COMPLEX **)ma));
287:         *m = n;
288:       }
289:       if (convert) {
290:         CMUMPS_COMPLEX *b = *(CMUMPS_COMPLEX **)ma;
291:         for (PetscCount i = 0; i < n; i++) {
292:           b[i].r = PetscRealPart(pa[i]);
293:           b[i].i = PetscImaginaryPart(pa[i]);
294:         }
295:       }
296:     } else {
297:       if (*m < n) {
298:         PetscCall(PetscFree(*ma));
299:         PetscCall(PetscMalloc1(n, (ZMUMPS_COMPLEX **)ma));
300:         *m = n;
301:       }
302:       if (convert) {
303:         ZMUMPS_COMPLEX *b = *(ZMUMPS_COMPLEX **)ma;
304:         for (PetscCount i = 0; i < n; i++) {
305:           b[i].r = PetscRealPart(pa[i]);
306:           b[i].i = PetscImaginaryPart(pa[i]);
307:         }
308:       }
309:     }
310:   }
311:   #else
312:   if (mumps_precision != PETSC_SCALAR_PRECISION) {
313:     if (mumps_precision == PETSC_PRECISION_SINGLE) {
314:       if (*m < n) {
315:         PetscCall(PetscFree(*ma));
316:         PetscCall(PetscMalloc1(n, (SMUMPS_REAL **)ma));
317:         *m = n;
318:       }
319:       if (convert) {
320:         SMUMPS_REAL *b = *(SMUMPS_REAL **)ma;
321:         for (PetscCount i = 0; i < n; i++) b[i] = pa[i];
322:       }
323:     } else {
324:       if (*m < n) {
325:         PetscCall(PetscFree(*ma));
326:         PetscCall(PetscMalloc1(n, (DMUMPS_REAL **)ma));
327:         *m = n;
328:       }
329:       if (convert) {
330:         DMUMPS_REAL *b = *(DMUMPS_REAL **)ma;
331:         for (PetscCount i = 0; i < n; i++) b[i] = pa[i];
332:       }
333:     }
334:   }
335:   #endif
336:   else
337: #endif
338:   {
339:     if (*m != 0) PetscCall(PetscFree(*ma)); // free existing buffer if any
340:     *ma = (void *)pa;                       // same precision, make them alias
341:     *m  = 0;
342:   }
343:   PetscFunctionReturn(PETSC_SUCCESS);
344: }

346: // Cast a MumpsScalar array  in  to a PetscScalar array at address .
347: //
348: // 1) If the two types are different, cast array elements.
349: // 2) Otherwise, this works as a memcpy; of course, if the two addresses are equal, it is a no-op.
350: static PetscErrorCode MatMumpsCastMumpsScalarArray(PetscCount n, PetscPrecision mumps_precision, const void *ma, PetscScalar *pa)
351: {
352:   PetscFunctionBegin;
353: #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
354:   if (mumps_precision != PETSC_SCALAR_PRECISION) {
355:   #if defined(PETSC_USE_COMPLEX)
356:     if (mumps_precision == PETSC_PRECISION_SINGLE) {
357:       PetscReal         *a = (PetscReal *)pa;
358:       const SMUMPS_REAL *b = (const SMUMPS_REAL *)ma;
359:       for (PetscCount i = 0; i < 2 * n; i++) a[i] = b[i];
360:     } else {
361:       PetscReal         *a = (PetscReal *)pa;
362:       const DMUMPS_REAL *b = (const DMUMPS_REAL *)ma;
363:       for (PetscCount i = 0; i < 2 * n; i++) a[i] = b[i];
364:     }
365:   #else
366:     if (mumps_precision == PETSC_PRECISION_SINGLE) {
367:       const SMUMPS_REAL *b = (const SMUMPS_REAL *)ma;
368:       for (PetscCount i = 0; i < n; i++) pa[i] = b[i];
369:     } else {
370:       const DMUMPS_REAL *b = (const DMUMPS_REAL *)ma;
371:       for (PetscCount i = 0; i < n; i++) pa[i] = b[i];
372:     }
373:   #endif
374:   } else
375: #endif
376:     PetscCall(PetscArraycpy((PetscScalar *)pa, (PetscScalar *)ma, n));
377:   PetscFunctionReturn(PETSC_SUCCESS);
378: }

380: // Cast a PetscScalar array  to a MumpsScalar array in the given  at address .
381: //
382: // 1) If the two types are different, cast array elements.
383: // 2) Otherwise, this works as a memcpy; of course, if the two addresses are equal, it is a no-op.
384: static PetscErrorCode MatMumpsCastPetscScalarArray(PetscCount n, const PetscScalar *pa, PetscPrecision mumps_precision, const void *ma)
385: {
386:   PetscFunctionBegin;
387: #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
388:   if (mumps_precision != PETSC_SCALAR_PRECISION) {
389:   #if defined(PETSC_USE_COMPLEX)
390:     if (mumps_precision == PETSC_PRECISION_SINGLE) {
391:       CMUMPS_COMPLEX *b = (CMUMPS_COMPLEX *)ma;
392:       for (PetscCount i = 0; i < n; i++) {
393:         b[i].r = PetscRealPart(pa[i]);
394:         b[i].i = PetscImaginaryPart(pa[i]);
395:       }
396:     } else {
397:       ZMUMPS_COMPLEX *b = (ZMUMPS_COMPLEX *)ma;
398:       for (PetscCount i = 0; i < n; i++) {
399:         b[i].r = PetscRealPart(pa[i]);
400:         b[i].i = PetscImaginaryPart(pa[i]);
401:       }
402:     }
403:   #else
404:     if (mumps_precision == PETSC_PRECISION_SINGLE) {
405:       SMUMPS_REAL *b = (SMUMPS_REAL *)ma;
406:       for (PetscCount i = 0; i < n; i++) b[i] = pa[i];
407:     } else {
408:       DMUMPS_REAL *b = (DMUMPS_REAL *)ma;
409:       for (PetscCount i = 0; i < n; i++) b[i] = pa[i];
410:     }
411:   #endif
412:   } else
413: #endif
414:     PetscCall(PetscArraycpy((PetscScalar *)ma, (PetscScalar *)pa, n));
415:   PetscFunctionReturn(PETSC_SUCCESS);
416: }

418: static inline MPI_Datatype MPIU_MUMPSREAL(const XMUMPS_STRUC_C *id)
419: {
420:   return id->precision == PETSC_PRECISION_DOUBLE ? MPI_DOUBLE : MPI_FLOAT;
421: }

423: #define PreMumpsCall(inner, outer, mumpsscalar) \
424:   do { \
425:     inner->job           = outer->job; \
426:     inner->n             = outer->n; \
427:     inner->nblk          = outer->nblk; \
428:     inner->nnz           = outer->nnz; \
429:     inner->irn           = outer->irn; \
430:     inner->jcn           = outer->jcn; \
431:     inner->a             = (mumpsscalar *)outer->a; \
432:     inner->nnz_loc       = outer->nnz_loc; \
433:     inner->irn_loc       = outer->irn_loc; \
434:     inner->jcn_loc       = outer->jcn_loc; \
435:     inner->a_loc         = (mumpsscalar *)outer->a_loc; \
436:     inner->blkptr        = outer->blkptr; \
437:     inner->blkvar        = outer->blkvar; \
438:     inner->perm_in       = outer->perm_in; \
439:     inner->rhs           = (mumpsscalar *)outer->rhs; \
440:     inner->redrhs        = (mumpsscalar *)outer->redrhs; \
441:     inner->rhs_sparse    = (mumpsscalar *)outer->rhs_sparse; \
442:     inner->sol_loc       = (mumpsscalar *)outer->sol_loc; \
443:     inner->rhs_loc       = (mumpsscalar *)outer->rhs_loc; \
444:     inner->irhs_sparse   = outer->irhs_sparse; \
445:     inner->irhs_ptr      = outer->irhs_ptr; \
446:     inner->isol_loc      = outer->isol_loc; \
447:     inner->irhs_loc      = outer->irhs_loc; \
448:     inner->nrhs          = outer->nrhs; \
449:     inner->lrhs          = outer->lrhs; \
450:     inner->lredrhs       = outer->lredrhs; \
451:     inner->nz_rhs        = outer->nz_rhs; \
452:     inner->lsol_loc      = outer->lsol_loc; \
453:     inner->nloc_rhs      = outer->nloc_rhs; \
454:     inner->lrhs_loc      = outer->lrhs_loc; \
455:     inner->schur_lld     = outer->schur_lld; \
456:     inner->size_schur    = outer->size_schur; \
457:     inner->listvar_schur = outer->listvar_schur; \
458:     inner->schur         = (mumpsscalar *)outer->schur; \
459:   } while (0)

461: #define PostMumpsCall(inner, outer) \
462:   do { \
463:     outer->pivnul_list = inner->pivnul_list; \
464:     outer->mapping     = inner->mapping; \
465:   } while (0)

467: // Entry for PETSc to call mumps
468: static inline PetscErrorCode PetscCallMumps_Private(XMUMPS_STRUC_C *outer)
469: {
470:   PetscFunctionBegin;
471: #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
472:   #if defined(PETSC_USE_COMPLEX)
473:   if (outer->precision == PETSC_PRECISION_SINGLE) {
474:     CMUMPS_STRUC_C *inner = (CMUMPS_STRUC_C *)outer->internal_id;
475:     PreMumpsCall(inner, outer, CMUMPS_COMPLEX);
476:     PetscCallExternalVoid("cmumps_c", cmumps_c(inner));
477:     PostMumpsCall(inner, outer);
478:   } else {
479:     ZMUMPS_STRUC_C *inner = (ZMUMPS_STRUC_C *)outer->internal_id;
480:     PreMumpsCall(inner, outer, ZMUMPS_COMPLEX);
481:     PetscCallExternalVoid("zmumps_c", zmumps_c(inner));
482:     PostMumpsCall(inner, outer);
483:   }
484:   #else
485:   if (outer->precision == PETSC_PRECISION_SINGLE) {
486:     SMUMPS_STRUC_C *inner = (SMUMPS_STRUC_C *)outer->internal_id;
487:     PreMumpsCall(inner, outer, SMUMPS_REAL);
488:     PetscCallExternalVoid("smumps_c", smumps_c(inner));
489:     PostMumpsCall(inner, outer);
490:   } else {
491:     DMUMPS_STRUC_C *inner = (DMUMPS_STRUC_C *)outer->internal_id;
492:     PreMumpsCall(inner, outer, DMUMPS_REAL);
493:     PetscCallExternalVoid("dmumps_c", dmumps_c(inner));
494:     PostMumpsCall(inner, outer);
495:   }
496:   #endif
497: #else
498:   MUMPS_STRUC_C *inner = (MUMPS_STRUC_C *)outer->internal_id;
499:   PreMumpsCall(inner, outer, MumpsScalar);
500:   PetscCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(inner));
501:   PostMumpsCall(inner, outer);
502: #endif
503:   PetscFunctionReturn(PETSC_SUCCESS);
504: }

506: /* macros s.t. indices match MUMPS documentation */
507: #define ICNTL(I) icntl[(I) - 1]
508: #define INFOG(I) infog[(I) - 1]
509: #define INFO(I)  info[(I) - 1]

511: // Get a value from a MumpsScalar array, which is the  field in the struct of MUMPS_STRUC_C. The value is convertible to PetscScalar. Note no minus 1 on I!
512: #if defined(PETSC_USE_COMPLEX)
513:   #define ID_FIELD_GET(ID, F, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((CMUMPS_COMPLEX *)(ID).F)[I].r + PETSC_i * ((CMUMPS_COMPLEX *)(ID).F)[I].i : ((ZMUMPS_COMPLEX *)(ID).F)[I].r + PETSC_i * ((ZMUMPS_COMPLEX *)(ID).F)[I].i)
514: #else
515:   #define ID_FIELD_GET(ID, F, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).F)[I] : ((double *)(ID).F)[I])
516: #endif

518: // Get a value from MumpsReal arrays. The value is convertible to PetscReal.
519: #define ID_CNTL_GET(ID, I)   ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).cntl)[(I) - 1] : ((double *)(ID).cntl)[(I) - 1])
520: #define ID_RINFOG_GET(ID, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).rinfog)[(I) - 1] : ((double *)(ID).rinfog)[(I) - 1])
521: #define ID_RINFO_GET(ID, I)  ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).rinfo)[(I) - 1] : ((double *)(ID).rinfo)[(I) - 1])

523: // Set the I-th entry of the MumpsReal array id.cntl[] with a PetscReal 
524: #define ID_CNTL_SET(ID, I, VAL) \
525:   do { \
526:     if ((ID).precision == PETSC_PRECISION_SINGLE) ((float *)(ID).cntl)[(I) - 1] = (VAL); \
527:     else ((double *)(ID).cntl)[(I) - 1] = (VAL); \
528:   } while (0)

530: /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */
531: #if defined(PETSC_HAVE_OPENMP_SUPPORT)
532:   #define PetscMUMPS_c(mumps) \
533:     do { \
534:       if (mumps->use_petsc_omp_support) { \
535:         if (mumps->is_omp_master) { \
536:           PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \
537:           PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \
538:           PetscCall(PetscCallMumps_Private(&mumps->id)); \
539:           PetscCall(PetscFPTrapPop()); \
540:           PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \
541:         } \
542:         PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \
543:         /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific      \
544:          to processes, so we only Bcast info[1], an error code and leave others (since they do not have   \
545:          an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82.                   \
546:          omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \
547:       */ \
548:         MUMPS_STRUC_C tmp; /* All MUMPS_STRUC_C types have same lengths on these info arrays */ \
549:         PetscCallMPI(MPI_Bcast(mumps->id.infog, PETSC_STATIC_ARRAY_LENGTH(tmp.infog), MPIU_MUMPSINT, 0, mumps->omp_comm)); \
550:         PetscCallMPI(MPI_Bcast(mumps->id.info, PETSC_STATIC_ARRAY_LENGTH(tmp.info), MPIU_MUMPSINT, 0, mumps->omp_comm)); \
551:         PetscCallMPI(MPI_Bcast(mumps->id.rinfog, PETSC_STATIC_ARRAY_LENGTH(tmp.rinfog), MPIU_MUMPSREAL(&mumps->id), 0, mumps->omp_comm)); \
552:         PetscCallMPI(MPI_Bcast(mumps->id.rinfo, PETSC_STATIC_ARRAY_LENGTH(tmp.rinfo), MPIU_MUMPSREAL(&mumps->id), 0, mumps->omp_comm)); \
553:       } else { \
554:         PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \
555:         PetscCall(PetscCallMumps_Private(&mumps->id)); \
556:         PetscCall(PetscFPTrapPop()); \
557:       } \
558:     } while (0)
559: #else
560:   #define PetscMUMPS_c(mumps) \
561:     do { \
562:       PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \
563:       PetscCall(PetscCallMumps_Private(&mumps->id)); \
564:       PetscCall(PetscFPTrapPop()); \
565:     } while (0)
566: #endif

568: typedef struct Mat_MUMPS Mat_MUMPS;
569: struct Mat_MUMPS {
570:   XMUMPS_STRUC_C id;

572:   MatStructure   matstruc;
573:   PetscMPIInt    myid, petsc_size;
574:   PetscMUMPSInt *irn, *jcn;       /* the (i,j,v) triplets passed to mumps. */
575:   PetscScalar   *val, *val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */
576:   PetscCount     nnz;             /* number of nonzeros. The type is called selective 64-bit in mumps */
577:   PetscMUMPSInt  sym;
578:   MPI_Comm       mumps_comm;
579:   PetscMUMPSInt *ICNTL_pre;
580:   PetscReal     *CNTL_pre;
581:   PetscMUMPSInt  ICNTL9_pre;         /* check if ICNTL(9) is changed from previous MatSolve */
582:   VecScatter     scat_rhs, scat_sol; /* used by MatSolve() */
583:   PetscMUMPSInt  ICNTL20;            /* use centralized (0) or distributed (10) dense RHS */
584:   PetscMUMPSInt  ICNTL26;
585:   PetscMUMPSInt  lrhs_loc, nloc_rhs, *irhs_loc;
586: #if defined(PETSC_HAVE_OPENMP_SUPPORT)
587:   PetscInt    *rhs_nrow, max_nrhs;
588:   PetscMPIInt *rhs_recvcounts, *rhs_disps;
589:   PetscScalar *rhs_loc, *rhs_recvbuf;
590: #endif
591:   Vec            b_seq, x_seq;
592:   PetscInt       ninfo, *info; /* which INFO to display */
593:   PetscInt       sizeredrhs;
594:   PetscScalar   *schur_sol;
595:   PetscInt       schur_sizesol;
596:   PetscScalar   *redrhs;              // buffer in PetscScalar in case MumpsScalar is in a different precision
597:   PetscMUMPSInt *ia_alloc, *ja_alloc; /* work arrays used for the CSR struct for sparse rhs */
598:   PetscCount     cur_ilen, cur_jlen;  /* current len of ia_alloc[], ja_alloc[] */
599:   PetscErrorCode (*ConvertToTriples)(Mat, PetscInt, MatReuse, Mat_MUMPS *);

601:   /* Support for MATNEST */
602:   PetscErrorCode (**nest_convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *);
603:   PetscCount  *nest_vals_start;
604:   PetscScalar *nest_vals;

606:   /* stuff used by petsc/mumps OpenMP support*/
607:   PetscBool    use_petsc_omp_support;
608:   PetscOmpCtrl omp_ctrl;             /* an OpenMP controller that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */
609:   MPI_Comm     petsc_comm, omp_comm; /* petsc_comm is PETSc matrix's comm */
610:   PetscCount  *recvcount;            /* a collection of nnz on omp_master */
611:   PetscMPIInt  tag, omp_comm_size;
612:   PetscBool    is_omp_master; /* is this rank the master of omp_comm */
613:   MPI_Request *reqs;
614: };

616: /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt.
617:    Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices.
618:  */
619: static PetscErrorCode PetscMUMPSIntCSRCast(PETSC_UNUSED Mat_MUMPS *mumps, PetscInt nrow, PetscInt *ia, PetscInt *ja, PetscMUMPSInt **ia_mumps, PetscMUMPSInt **ja_mumps, PetscMUMPSInt *nnz_mumps)
620: {
621:   PetscInt nnz = ia[nrow] - 1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscCount since mumps only uses PetscMUMPSInt for rhs */

623:   PetscFunctionBegin;
624: #if defined(PETSC_USE_64BIT_INDICES)
625:   {
626:     if (nrow + 1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */
627:       PetscCall(PetscFree(mumps->ia_alloc));
628:       PetscCall(PetscMalloc1(nrow + 1, &mumps->ia_alloc));
629:       mumps->cur_ilen = nrow + 1;
630:     }
631:     if (nnz > mumps->cur_jlen) {
632:       PetscCall(PetscFree(mumps->ja_alloc));
633:       PetscCall(PetscMalloc1(nnz, &mumps->ja_alloc));
634:       mumps->cur_jlen = nnz;
635:     }
636:     for (PetscInt i = 0; i < nrow + 1; i++) PetscCall(PetscMUMPSIntCast(ia[i], &mumps->ia_alloc[i]));
637:     for (PetscInt i = 0; i < nnz; i++) PetscCall(PetscMUMPSIntCast(ja[i], &mumps->ja_alloc[i]));
638:     *ia_mumps = mumps->ia_alloc;
639:     *ja_mumps = mumps->ja_alloc;
640:   }
641: #else
642:   *ia_mumps = ia;
643:   *ja_mumps = ja;
644: #endif
645:   PetscCall(PetscMUMPSIntCast(nnz, nnz_mumps));
646:   PetscFunctionReturn(PETSC_SUCCESS);
647: }

649: static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS *mumps)
650: {
651:   PetscFunctionBegin;
652:   PetscCall(PetscFree(mumps->id.listvar_schur));
653:   PetscCall(PetscFree(mumps->redrhs)); // if needed, id.redrhs will be freed in MatMumpsFreeInternalID()
654:   PetscCall(PetscFree(mumps->schur_sol));
655:   mumps->id.size_schur = 0;
656:   mumps->id.schur_lld  = 0;
657:   if (mumps->id.internal_id) mumps->id.ICNTL(19) = 0; // sometimes, the inner id is yet built
658:   PetscFunctionReturn(PETSC_SUCCESS);
659: }

661: /* solve with rhs in mumps->id.redrhs and return in the same location */
662: static PetscErrorCode MatMumpsSolveSchur_Private(Mat F)
663: {
664:   Mat_MUMPS           *mumps = (Mat_MUMPS *)F->data;
665:   Mat                  S, B, X; // solve S*X = B; all three matrices are dense
666:   MatFactorSchurStatus schurstatus;
667:   PetscInt             sizesol;
668:   const PetscScalar   *xarray;

670:   PetscFunctionBegin;
671:   PetscCall(MatFactorFactorizeSchurComplement(F));
672:   PetscCall(MatFactorGetSchurComplement(F, &S, &schurstatus));
673:   PetscCall(MatMumpsCastMumpsScalarArray(mumps->sizeredrhs, mumps->id.precision, mumps->id.redrhs, mumps->redrhs));

675:   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->redrhs, &B));
676:   PetscCall(MatSetType(B, ((PetscObject)S)->type_name));
677: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
678:   PetscCall(MatBindToCPU(B, S->boundtocpu));
679: #endif
680:   switch (schurstatus) {
681:   case MAT_FACTOR_SCHUR_FACTORED:
682:     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->redrhs, &X));
683:     PetscCall(MatSetType(X, ((PetscObject)S)->type_name));
684: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
685:     PetscCall(MatBindToCPU(X, S->boundtocpu));
686: #endif
687:     if (!mumps->id.ICNTL(9)) { /* transpose solve */
688:       PetscCall(MatMatSolveTranspose(S, B, X));
689:     } else {
690:       PetscCall(MatMatSolve(S, B, X));
691:     }
692:     break;
693:   case MAT_FACTOR_SCHUR_INVERTED:
694:     sizesol = mumps->id.nrhs * mumps->id.size_schur;
695:     if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) {
696:       PetscCall(PetscFree(mumps->schur_sol));
697:       PetscCall(PetscMalloc1(sizesol, &mumps->schur_sol));
698:       mumps->schur_sizesol = sizesol;
699:     }
700:     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->schur_sol, &X));
701:     PetscCall(MatSetType(X, ((PetscObject)S)->type_name));
702: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
703:     PetscCall(MatBindToCPU(X, S->boundtocpu));
704: #endif
705:     PetscCall(MatProductCreateWithMat(S, B, NULL, X));
706:     if (!mumps->id.ICNTL(9)) { /* transpose solve */
707:       PetscCall(MatProductSetType(X, MATPRODUCT_AtB));
708:     } else {
709:       PetscCall(MatProductSetType(X, MATPRODUCT_AB));
710:     }
711:     PetscCall(MatProductSetFromOptions(X));
712:     PetscCall(MatProductSymbolic(X));
713:     PetscCall(MatProductNumeric(X));

715:     PetscCall(MatCopy(X, B, SAME_NONZERO_PATTERN));
716:     break;
717:   default:
718:     SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unhandled MatFactorSchurStatus %d", F->schur_status);
719:   }
720:   // MUST get the array from X (not B), though they share the same host array. We can only guarantee X has the correct data on device.
721:   PetscCall(MatDenseGetArrayRead(X, &xarray)); // xarray should be mumps->redrhs, but using MatDenseGetArrayRead is safer with GPUs.
722:   PetscCall(MatMumpsCastPetscScalarArray(mumps->sizeredrhs, xarray, mumps->id.precision, mumps->id.redrhs));
723:   PetscCall(MatDenseRestoreArrayRead(X, &xarray));
724:   PetscCall(MatFactorRestoreSchurComplement(F, &S, schurstatus));
725:   PetscCall(MatDestroy(&B));
726:   PetscCall(MatDestroy(&X));
727:   PetscFunctionReturn(PETSC_SUCCESS);
728: }

730: static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion)
731: {
732:   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;

734:   PetscFunctionBegin;
735:   if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */
736:     PetscFunctionReturn(PETSC_SUCCESS);
737:   }
738:   if (!expansion) { /* prepare for the condensation step */
739:     PetscInt sizeredrhs = mumps->id.nrhs * mumps->id.size_schur;
740:     /* allocate MUMPS internal array to store reduced right-hand sides */
741:     if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) {
742:       mumps->id.lredrhs = mumps->id.size_schur;
743:       mumps->sizeredrhs = mumps->id.nrhs * mumps->id.lredrhs;
744:       if (mumps->id.redrhs_len) PetscCall(PetscFree(mumps->id.redrhs));
745:       PetscCall(PetscFree(mumps->redrhs));
746:       PetscCall(PetscMalloc1(mumps->sizeredrhs, &mumps->redrhs));
747:       PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, mumps->sizeredrhs, mumps->redrhs, mumps->id.precision, &mumps->id.redrhs_len, &mumps->id.redrhs));
748:     }
749:   } else {                                    /* prepare for the expansion step */
750:     PetscCall(MatMumpsSolveSchur_Private(F)); /* solve Schur complement, put solution in id.redrhs (this has to be done by the MUMPS user, so basically us) */
751:     mumps->id.ICNTL(26) = 2;                  /* expansion phase */
752:     PetscMUMPS_c(mumps);
753:     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));
754:     /* restore defaults */
755:     mumps->id.ICNTL(26) = -1;
756:     /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */
757:     if (mumps->id.nrhs > 1) {
758:       if (mumps->id.redrhs_len) PetscCall(PetscFree(mumps->id.redrhs));
759:       PetscCall(PetscFree(mumps->redrhs));
760:       mumps->id.redrhs_len = 0;
761:       mumps->id.lredrhs    = 0;
762:       mumps->sizeredrhs    = 0;
763:     }
764:   }
765:   PetscFunctionReturn(PETSC_SUCCESS);
766: }

768: /*
769:   MatConvertToTriples_A_B - convert PETSc matrix to triples: row[nz], col[nz], val[nz]

771:   input:
772:     A       - matrix in aij,baij or sbaij format
773:     shift   - 0: C style output triple; 1: Fortran style output triple.
774:     reuse   - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple
775:               MAT_REUSE_MATRIX:   only the values in v array are updated
776:   output:
777:     nnz     - dim of r, c, and v (number of local nonzero entries of A)
778:     r, c, v - row and col index, matrix values (matrix triples)

780:   The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is
781:   freed with PetscFree(mumps->irn);  This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means
782:   that the PetscMalloc() cannot easily be replaced with a PetscMalloc3().

784:  */

786: static PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
787: {
788:   const PetscScalar *av;
789:   const PetscInt    *ai, *aj, *ajj, M = A->rmap->n;
790:   PetscCount         nz, rnz, k;
791:   PetscMUMPSInt     *row, *col;
792:   Mat_SeqAIJ        *aa = (Mat_SeqAIJ *)A->data;

794:   PetscFunctionBegin;
795:   PetscCall(MatSeqAIJGetArrayRead(A, &av));
796:   if (reuse == MAT_INITIAL_MATRIX) {
797:     nz = aa->nz;
798:     ai = aa->i;
799:     aj = aa->j;
800:     PetscCall(PetscMalloc2(nz, &row, nz, &col));
801:     for (PetscCount i = k = 0; i < M; i++) {
802:       rnz = ai[i + 1] - ai[i];
803:       ajj = aj + ai[i];
804:       for (PetscCount j = 0; j < rnz; j++) {
805:         PetscCall(PetscMUMPSIntCast(i + shift, &row[k]));
806:         PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[k]));
807:         k++;
808:       }
809:     }
810:     mumps->val = (PetscScalar *)av;
811:     mumps->irn = row;
812:     mumps->jcn = col;
813:     mumps->nnz = nz;
814:   } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, av, aa->nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqaij_seqaij(), so one needs to copy the memory */
815:   else mumps->val = (PetscScalar *)av;                                           /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
816:   PetscCall(MatSeqAIJRestoreArrayRead(A, &av));
817:   PetscFunctionReturn(PETSC_SUCCESS);
818: }

820: static PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
821: {
822:   PetscCount     nz, i, j, k, r;
823:   Mat_SeqSELL   *a = (Mat_SeqSELL *)A->data;
824:   PetscMUMPSInt *row, *col;

826:   PetscFunctionBegin;
827:   nz = a->sliidx[a->totalslices];
828:   if (reuse == MAT_INITIAL_MATRIX) {
829:     PetscCall(PetscMalloc2(nz, &row, nz, &col));
830:     for (i = k = 0; i < a->totalslices; i++) {
831:       for (j = a->sliidx[i], r = 0; j < a->sliidx[i + 1]; j++, r = ((r + 1) & 0x07)) PetscCall(PetscMUMPSIntCast(8 * i + r + shift, &row[k++]));
832:     }
833:     for (i = 0; i < nz; i++) PetscCall(PetscMUMPSIntCast(a->colidx[i] + shift, &col[i]));
834:     mumps->irn = row;
835:     mumps->jcn = col;
836:     mumps->nnz = nz;
837:     mumps->val = a->val;
838:   } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, a->val, nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqsell_seqaij(), so one needs to copy the memory */
839:   else mumps->val = a->val;                                                      /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
840:   PetscFunctionReturn(PETSC_SUCCESS);
841: }

843: static PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
844: {
845:   Mat_SeqBAIJ    *aa = (Mat_SeqBAIJ *)A->data;
846:   const PetscInt *ai, *aj, *ajj, bs2 = aa->bs2;
847:   PetscCount      M, nz = bs2 * aa->nz, idx = 0, rnz, i, j, k, m;
848:   PetscInt        bs;
849:   PetscMUMPSInt  *row, *col;

851:   PetscFunctionBegin;
852:   if (reuse == MAT_INITIAL_MATRIX) {
853:     PetscCall(MatGetBlockSize(A, &bs));
854:     M  = A->rmap->N / bs;
855:     ai = aa->i;
856:     aj = aa->j;
857:     PetscCall(PetscMalloc2(nz, &row, nz, &col));
858:     for (i = 0; i < M; i++) {
859:       ajj = aj + ai[i];
860:       rnz = ai[i + 1] - ai[i];
861:       for (k = 0; k < rnz; k++) {
862:         for (j = 0; j < bs; j++) {
863:           for (m = 0; m < bs; m++) {
864:             PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[idx]));
865:             PetscCall(PetscMUMPSIntCast(bs * ajj[k] + j + shift, &col[idx]));
866:             idx++;
867:           }
868:         }
869:       }
870:     }
871:     mumps->irn = row;
872:     mumps->jcn = col;
873:     mumps->nnz = nz;
874:     mumps->val = aa->a;
875:   } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, aa->a, nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqbaij_seqaij(), so one needs to copy the memory */
876:   else mumps->val = aa->a;                                                      /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
877:   PetscFunctionReturn(PETSC_SUCCESS);
878: }

880: static PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
881: {
882:   const PetscInt *ai, *aj, *ajj;
883:   PetscInt        bs;
884:   PetscCount      nz, rnz, i, j, k, m;
885:   PetscMUMPSInt  *row, *col;
886:   PetscScalar    *val;
887:   Mat_SeqSBAIJ   *aa  = (Mat_SeqSBAIJ *)A->data;
888:   const PetscInt  bs2 = aa->bs2, mbs = aa->mbs;
889: #if defined(PETSC_USE_COMPLEX)
890:   PetscBool isset, hermitian;
891: #endif

893:   PetscFunctionBegin;
894: #if defined(PETSC_USE_COMPLEX)
895:   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
896:   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
897: #endif
898:   ai = aa->i;
899:   aj = aa->j;
900:   PetscCall(MatGetBlockSize(A, &bs));
901:   if (reuse == MAT_INITIAL_MATRIX) {
902:     const PetscCount alloc_size = aa->nz * bs2;

904:     PetscCall(PetscMalloc2(alloc_size, &row, alloc_size, &col));
905:     if (bs > 1) {
906:       PetscCall(PetscMalloc1(alloc_size, &mumps->val_alloc));
907:       mumps->val = mumps->val_alloc;
908:     } else {
909:       mumps->val = aa->a;
910:     }
911:     mumps->irn = row;
912:     mumps->jcn = col;
913:   } else {
914:     row = mumps->irn;
915:     col = mumps->jcn;
916:   }
917:   val = mumps->val;

919:   nz = 0;
920:   if (bs > 1) {
921:     for (i = 0; i < mbs; i++) {
922:       rnz = ai[i + 1] - ai[i];
923:       ajj = aj + ai[i];
924:       for (j = 0; j < rnz; j++) {
925:         for (k = 0; k < bs; k++) {
926:           for (m = 0; m < bs; m++) {
927:             if (ajj[j] > i || k >= m) {
928:               if (reuse == MAT_INITIAL_MATRIX) {
929:                 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[nz]));
930:                 PetscCall(PetscMUMPSIntCast(ajj[j] * bs + k + shift, &col[nz]));
931:               }
932:               val[nz++] = aa->a[(ai[i] + j) * bs2 + m + k * bs];
933:             }
934:           }
935:         }
936:       }
937:     }
938:   } else if (reuse == MAT_INITIAL_MATRIX) {
939:     for (i = 0; i < mbs; i++) {
940:       rnz = ai[i + 1] - ai[i];
941:       ajj = aj + ai[i];
942:       for (j = 0; j < rnz; j++) {
943:         PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
944:         PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
945:         nz++;
946:       }
947:     }
948:     PetscCheck(nz == aa->nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different numbers of nonzeros %" PetscCount_FMT " != %" PetscInt_FMT, nz, aa->nz);
949:   } else if (mumps->nest_vals)
950:     PetscCall(PetscArraycpy(mumps->val, aa->a, aa->nz)); /* bs == 1 and MAT_REUSE_MATRIX, MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqsbaij_seqsbaij(), so one needs to copy the memory */
951:   else mumps->val = aa->a;                               /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
952:   if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz;
953:   PetscFunctionReturn(PETSC_SUCCESS);
954: }

956: static PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
957: {
958:   const PetscInt    *ai, *aj, *ajj, *adiag, M = A->rmap->n;
959:   PetscCount         nz, rnz, i, j;
960:   const PetscScalar *av, *v1;
961:   PetscScalar       *val;
962:   PetscMUMPSInt     *row, *col;
963:   Mat_SeqAIJ        *aa = (Mat_SeqAIJ *)A->data;
964:   PetscBool          diagDense;
965: #if defined(PETSC_USE_COMPLEX)
966:   PetscBool hermitian, isset;
967: #endif

969:   PetscFunctionBegin;
970: #if defined(PETSC_USE_COMPLEX)
971:   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
972:   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
973: #endif
974:   PetscCall(MatSeqAIJGetArrayRead(A, &av));
975:   ai = aa->i;
976:   aj = aa->j;
977:   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, &diagDense));
978:   if (reuse == MAT_INITIAL_MATRIX) {
979:     /* count nz in the upper triangular part of A */
980:     nz = 0;
981:     if (!diagDense) {
982:       for (i = 0; i < M; i++) {
983:         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
984:           for (j = ai[i]; j < ai[i + 1]; j++) {
985:             if (aj[j] < i) continue;
986:             nz++;
987:           }
988:         } else {
989:           nz += ai[i + 1] - adiag[i];
990:         }
991:       }
992:     } else {
993:       for (i = 0; i < M; i++) nz += ai[i + 1] - adiag[i];
994:     }
995:     PetscCall(PetscMalloc2(nz, &row, nz, &col));
996:     PetscCall(PetscMalloc1(nz, &val));
997:     mumps->nnz = nz;
998:     mumps->irn = row;
999:     mumps->jcn = col;
1000:     mumps->val = mumps->val_alloc = val;

1002:     nz = 0;
1003:     if (!diagDense) {
1004:       for (i = 0; i < M; i++) {
1005:         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
1006:           for (j = ai[i]; j < ai[i + 1]; j++) {
1007:             if (aj[j] < i) continue;
1008:             PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
1009:             PetscCall(PetscMUMPSIntCast(aj[j] + shift, &col[nz]));
1010:             val[nz] = av[j];
1011:             nz++;
1012:           }
1013:         } else {
1014:           rnz = ai[i + 1] - adiag[i];
1015:           ajj = aj + adiag[i];
1016:           v1  = av + adiag[i];
1017:           for (j = 0; j < rnz; j++) {
1018:             PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
1019:             PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
1020:             val[nz++] = v1[j];
1021:           }
1022:         }
1023:       }
1024:     } else {
1025:       for (i = 0; i < M; i++) {
1026:         rnz = ai[i + 1] - adiag[i];
1027:         ajj = aj + adiag[i];
1028:         v1  = av + adiag[i];
1029:         for (j = 0; j < rnz; j++) {
1030:           PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
1031:           PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
1032:           val[nz++] = v1[j];
1033:         }
1034:       }
1035:     }
1036:   } else {
1037:     nz  = 0;
1038:     val = mumps->val;
1039:     if (!diagDense) {
1040:       for (i = 0; i < M; i++) {
1041:         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
1042:           for (j = ai[i]; j < ai[i + 1]; j++) {
1043:             if (aj[j] < i) continue;
1044:             val[nz++] = av[j];
1045:           }
1046:         } else {
1047:           rnz = ai[i + 1] - adiag[i];
1048:           v1  = av + adiag[i];
1049:           for (j = 0; j < rnz; j++) val[nz++] = v1[j];
1050:         }
1051:       }
1052:     } else {
1053:       for (i = 0; i < M; i++) {
1054:         rnz = ai[i + 1] - adiag[i];
1055:         v1  = av + adiag[i];
1056:         for (j = 0; j < rnz; j++) val[nz++] = v1[j];
1057:       }
1058:     }
1059:   }
1060:   PetscCall(MatSeqAIJRestoreArrayRead(A, &av));
1061:   PetscFunctionReturn(PETSC_SUCCESS);
1062: }

1064: static PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1065: {
1066:   const PetscInt    *ai, *aj, *bi, *bj, *garray, *ajj, *bjj;
1067:   PetscInt           bs;
1068:   PetscCount         rstart, nz, i, j, k, m, jj, irow, countA, countB;
1069:   PetscMUMPSInt     *row, *col;
1070:   const PetscScalar *av, *bv, *v1, *v2;
1071:   PetscScalar       *val;
1072:   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ *)A->data;
1073:   Mat_SeqSBAIJ      *aa  = (Mat_SeqSBAIJ *)mat->A->data;
1074:   Mat_SeqBAIJ       *bb  = (Mat_SeqBAIJ *)mat->B->data;
1075:   const PetscInt     bs2 = aa->bs2, mbs = aa->mbs;
1076: #if defined(PETSC_USE_COMPLEX)
1077:   PetscBool hermitian, isset;
1078: #endif

1080:   PetscFunctionBegin;
1081: #if defined(PETSC_USE_COMPLEX)
1082:   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
1083:   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
1084: #endif
1085:   PetscCall(MatGetBlockSize(A, &bs));
1086:   rstart = A->rmap->rstart;
1087:   ai     = aa->i;
1088:   aj     = aa->j;
1089:   bi     = bb->i;
1090:   bj     = bb->j;
1091:   av     = aa->a;
1092:   bv     = bb->a;

1094:   garray = mat->garray;

1096:   if (reuse == MAT_INITIAL_MATRIX) {
1097:     nz = (aa->nz + bb->nz) * bs2; /* just a conservative estimate */
1098:     PetscCall(PetscMalloc2(nz, &row, nz, &col));
1099:     PetscCall(PetscMalloc1(nz, &val));
1100:     /* can not decide the exact mumps->nnz now because of the SBAIJ */
1101:     mumps->irn = row;
1102:     mumps->jcn = col;
1103:     mumps->val = mumps->val_alloc = val;
1104:   } else {
1105:     val = mumps->val;
1106:   }

1108:   jj   = 0;
1109:   irow = rstart;
1110:   for (i = 0; i < mbs; i++) {
1111:     ajj    = aj + ai[i]; /* ptr to the beginning of this row */
1112:     countA = ai[i + 1] - ai[i];
1113:     countB = bi[i + 1] - bi[i];
1114:     bjj    = bj + bi[i];
1115:     v1     = av + ai[i] * bs2;
1116:     v2     = bv + bi[i] * bs2;

1118:     if (bs > 1) {
1119:       /* A-part */
1120:       for (j = 0; j < countA; j++) {
1121:         for (k = 0; k < bs; k++) {
1122:           for (m = 0; m < bs; m++) {
1123:             if (rstart + ajj[j] * bs > irow || k >= m) {
1124:               if (reuse == MAT_INITIAL_MATRIX) {
1125:                 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj]));
1126:                 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] * bs + k + shift, &col[jj]));
1127:               }
1128:               val[jj++] = v1[j * bs2 + m + k * bs];
1129:             }
1130:           }
1131:         }
1132:       }

1134:       /* B-part */
1135:       for (j = 0; j < countB; j++) {
1136:         for (k = 0; k < bs; k++) {
1137:           for (m = 0; m < bs; m++) {
1138:             if (reuse == MAT_INITIAL_MATRIX) {
1139:               PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj]));
1140:               PetscCall(PetscMUMPSIntCast(garray[bjj[j]] * bs + k + shift, &col[jj]));
1141:             }
1142:             val[jj++] = v2[j * bs2 + m + k * bs];
1143:           }
1144:         }
1145:       }
1146:     } else {
1147:       /* A-part */
1148:       for (j = 0; j < countA; j++) {
1149:         if (reuse == MAT_INITIAL_MATRIX) {
1150:           PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
1151:           PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj]));
1152:         }
1153:         val[jj++] = v1[j];
1154:       }

1156:       /* B-part */
1157:       for (j = 0; j < countB; j++) {
1158:         if (reuse == MAT_INITIAL_MATRIX) {
1159:           PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
1160:           PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
1161:         }
1162:         val[jj++] = v2[j];
1163:       }
1164:     }
1165:     irow += bs;
1166:   }
1167:   if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = jj;
1168:   PetscFunctionReturn(PETSC_SUCCESS);
1169: }

1171: static PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1172: {
1173:   const PetscInt    *ai, *aj, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj;
1174:   PetscCount         rstart, cstart, nz, i, j, jj, irow, countA, countB;
1175:   PetscMUMPSInt     *row, *col;
1176:   const PetscScalar *av, *bv, *v1, *v2;
1177:   PetscScalar       *val;
1178:   Mat                Ad, Ao;
1179:   Mat_SeqAIJ        *aa;
1180:   Mat_SeqAIJ        *bb;

1182:   PetscFunctionBegin;
1183:   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray));
1184:   PetscCall(MatSeqAIJGetArrayRead(Ad, &av));
1185:   PetscCall(MatSeqAIJGetArrayRead(Ao, &bv));

1187:   aa = (Mat_SeqAIJ *)Ad->data;
1188:   bb = (Mat_SeqAIJ *)Ao->data;
1189:   ai = aa->i;
1190:   aj = aa->j;
1191:   bi = bb->i;
1192:   bj = bb->j;

1194:   rstart = A->rmap->rstart;
1195:   cstart = A->cmap->rstart;

1197:   if (reuse == MAT_INITIAL_MATRIX) {
1198:     nz = (PetscCount)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */
1199:     PetscCall(PetscMalloc2(nz, &row, nz, &col));
1200:     PetscCall(PetscMalloc1(nz, &val));
1201:     mumps->nnz = nz;
1202:     mumps->irn = row;
1203:     mumps->jcn = col;
1204:     mumps->val = mumps->val_alloc = val;
1205:   } else {
1206:     val = mumps->val;
1207:   }

1209:   jj   = 0;
1210:   irow = rstart;
1211:   for (i = 0; i < m; i++) {
1212:     ajj    = aj + ai[i]; /* ptr to the beginning of this row */
1213:     countA = ai[i + 1] - ai[i];
1214:     countB = bi[i + 1] - bi[i];
1215:     bjj    = bj + bi[i];
1216:     v1     = av + ai[i];
1217:     v2     = bv + bi[i];

1219:     /* A-part */
1220:     for (j = 0; j < countA; j++) {
1221:       if (reuse == MAT_INITIAL_MATRIX) {
1222:         PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
1223:         PetscCall(PetscMUMPSIntCast(cstart + ajj[j] + shift, &col[jj]));
1224:       }
1225:       val[jj++] = v1[j];
1226:     }

1228:     /* B-part */
1229:     for (j = 0; j < countB; j++) {
1230:       if (reuse == MAT_INITIAL_MATRIX) {
1231:         PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
1232:         PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
1233:       }
1234:       val[jj++] = v2[j];
1235:     }
1236:     irow++;
1237:   }
1238:   PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av));
1239:   PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv));
1240:   PetscFunctionReturn(PETSC_SUCCESS);
1241: }

1243: static PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1244: {
1245:   Mat_MPIBAIJ       *mat = (Mat_MPIBAIJ *)A->data;
1246:   Mat_SeqBAIJ       *aa  = (Mat_SeqBAIJ *)mat->A->data;
1247:   Mat_SeqBAIJ       *bb  = (Mat_SeqBAIJ *)mat->B->data;
1248:   const PetscInt    *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j, *ajj, *bjj;
1249:   const PetscInt    *garray = mat->garray, mbs = mat->mbs, rstart = A->rmap->rstart, cstart = A->cmap->rstart;
1250:   const PetscInt     bs2 = mat->bs2;
1251:   PetscInt           bs;
1252:   PetscCount         nz, i, j, k, n, jj, irow, countA, countB, idx;
1253:   PetscMUMPSInt     *row, *col;
1254:   const PetscScalar *av = aa->a, *bv = bb->a, *v1, *v2;
1255:   PetscScalar       *val;

1257:   PetscFunctionBegin;
1258:   PetscCall(MatGetBlockSize(A, &bs));
1259:   if (reuse == MAT_INITIAL_MATRIX) {
1260:     nz = bs2 * (aa->nz + bb->nz);
1261:     PetscCall(PetscMalloc2(nz, &row, nz, &col));
1262:     PetscCall(PetscMalloc1(nz, &val));
1263:     mumps->nnz = nz;
1264:     mumps->irn = row;
1265:     mumps->jcn = col;
1266:     mumps->val = mumps->val_alloc = val;
1267:   } else {
1268:     val = mumps->val;
1269:   }

1271:   jj   = 0;
1272:   irow = rstart;
1273:   for (i = 0; i < mbs; i++) {
1274:     countA = ai[i + 1] - ai[i];
1275:     countB = bi[i + 1] - bi[i];
1276:     ajj    = aj + ai[i];
1277:     bjj    = bj + bi[i];
1278:     v1     = av + bs2 * ai[i];
1279:     v2     = bv + bs2 * bi[i];

1281:     idx = 0;
1282:     /* A-part */
1283:     for (k = 0; k < countA; k++) {
1284:       for (j = 0; j < bs; j++) {
1285:         for (n = 0; n < bs; n++) {
1286:           if (reuse == MAT_INITIAL_MATRIX) {
1287:             PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj]));
1288:             PetscCall(PetscMUMPSIntCast(cstart + bs * ajj[k] + j + shift, &col[jj]));
1289:           }
1290:           val[jj++] = v1[idx++];
1291:         }
1292:       }
1293:     }

1295:     idx = 0;
1296:     /* B-part */
1297:     for (k = 0; k < countB; k++) {
1298:       for (j = 0; j < bs; j++) {
1299:         for (n = 0; n < bs; n++) {
1300:           if (reuse == MAT_INITIAL_MATRIX) {
1301:             PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj]));
1302:             PetscCall(PetscMUMPSIntCast(bs * garray[bjj[k]] + j + shift, &col[jj]));
1303:           }
1304:           val[jj++] = v2[idx++];
1305:         }
1306:       }
1307:     }
1308:     irow += bs;
1309:   }
1310:   PetscFunctionReturn(PETSC_SUCCESS);
1311: }

1313: static PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1314: {
1315:   const PetscInt    *ai, *aj, *adiag, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj;
1316:   PetscCount         rstart, nz, nza, nzb, i, j, jj, irow, countA, countB;
1317:   PetscMUMPSInt     *row, *col;
1318:   const PetscScalar *av, *bv, *v1, *v2;
1319:   PetscScalar       *val;
1320:   Mat                Ad, Ao;
1321:   Mat_SeqAIJ        *aa;
1322:   Mat_SeqAIJ        *bb;
1323: #if defined(PETSC_USE_COMPLEX)
1324:   PetscBool hermitian, isset;
1325: #endif

1327:   PetscFunctionBegin;
1328: #if defined(PETSC_USE_COMPLEX)
1329:   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
1330:   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
1331: #endif
1332:   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray));
1333:   PetscCall(MatSeqAIJGetArrayRead(Ad, &av));
1334:   PetscCall(MatSeqAIJGetArrayRead(Ao, &bv));

1336:   aa = (Mat_SeqAIJ *)Ad->data;
1337:   bb = (Mat_SeqAIJ *)Ao->data;
1338:   ai = aa->i;
1339:   aj = aa->j;
1340:   bi = bb->i;
1341:   bj = bb->j;
1342:   PetscCall(MatGetDiagonalMarkers_SeqAIJ(Ad, &adiag, NULL));
1343:   rstart = A->rmap->rstart;

1345:   if (reuse == MAT_INITIAL_MATRIX) {
1346:     nza = 0; /* num of upper triangular entries in mat->A, including diagonals */
1347:     nzb = 0; /* num of upper triangular entries in mat->B */
1348:     for (i = 0; i < m; i++) {
1349:       nza += (ai[i + 1] - adiag[i]);
1350:       countB = bi[i + 1] - bi[i];
1351:       bjj    = bj + bi[i];
1352:       for (j = 0; j < countB; j++) {
1353:         if (garray[bjj[j]] > rstart) nzb++;
1354:       }
1355:     }

1357:     nz = nza + nzb; /* total nz of upper triangular part of mat */
1358:     PetscCall(PetscMalloc2(nz, &row, nz, &col));
1359:     PetscCall(PetscMalloc1(nz, &val));
1360:     mumps->nnz = nz;
1361:     mumps->irn = row;
1362:     mumps->jcn = col;
1363:     mumps->val = mumps->val_alloc = val;
1364:   } else {
1365:     val = mumps->val;
1366:   }

1368:   jj   = 0;
1369:   irow = rstart;
1370:   for (i = 0; i < m; i++) {
1371:     ajj    = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */
1372:     v1     = av + adiag[i];
1373:     countA = ai[i + 1] - adiag[i];
1374:     countB = bi[i + 1] - bi[i];
1375:     bjj    = bj + bi[i];
1376:     v2     = bv + bi[i];

1378:     /* A-part */
1379:     for (j = 0; j < countA; j++) {
1380:       if (reuse == MAT_INITIAL_MATRIX) {
1381:         PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
1382:         PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj]));
1383:       }
1384:       val[jj++] = v1[j];
1385:     }

1387:     /* B-part */
1388:     for (j = 0; j < countB; j++) {
1389:       if (garray[bjj[j]] > rstart) {
1390:         if (reuse == MAT_INITIAL_MATRIX) {
1391:           PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
1392:           PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
1393:         }
1394:         val[jj++] = v2[j];
1395:       }
1396:     }
1397:     irow++;
1398:   }
1399:   PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av));
1400:   PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv));
1401:   PetscFunctionReturn(PETSC_SUCCESS);
1402: }

1404: static PetscErrorCode MatConvertToTriples_diagonal_xaij(Mat A, PETSC_UNUSED PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1405: {
1406:   const PetscScalar *av;
1407:   const PetscInt     M = A->rmap->n;
1408:   PetscCount         i;
1409:   PetscMUMPSInt     *row, *col;
1410:   Vec                v;

1412:   PetscFunctionBegin;
1413:   PetscCall(MatDiagonalGetDiagonal(A, &v));
1414:   PetscCall(VecGetArrayRead(v, &av));
1415:   if (reuse == MAT_INITIAL_MATRIX) {
1416:     PetscCall(PetscMalloc2(M, &row, M, &col));
1417:     for (i = 0; i < M; i++) {
1418:       PetscCall(PetscMUMPSIntCast(i + A->rmap->rstart, &row[i]));
1419:       col[i] = row[i];
1420:     }
1421:     mumps->val = (PetscScalar *)av;
1422:     mumps->irn = row;
1423:     mumps->jcn = col;
1424:     mumps->nnz = M;
1425:   } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, av, M)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_diagonal_xaij(), so one needs to copy the memory */
1426:   else mumps->val = (PetscScalar *)av;                                      /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
1427:   PetscCall(VecRestoreArrayRead(v, &av));
1428:   PetscFunctionReturn(PETSC_SUCCESS);
1429: }

1431: static PetscErrorCode MatConvertToTriples_dense_xaij(Mat A, PETSC_UNUSED PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1432: {
1433:   PetscScalar   *v;
1434:   const PetscInt m = A->rmap->n, N = A->cmap->N;
1435:   PetscInt       lda;
1436:   PetscCount     i, j;
1437:   PetscMUMPSInt *row, *col;

1439:   PetscFunctionBegin;
1440:   PetscCall(MatDenseGetArray(A, &v));
1441:   PetscCall(MatDenseGetLDA(A, &lda));
1442:   if (reuse == MAT_INITIAL_MATRIX) {
1443:     PetscCall(PetscMalloc2(m * N, &row, m * N, &col));
1444:     for (i = 0; i < m; i++) {
1445:       col[i] = 0;
1446:       PetscCall(PetscMUMPSIntCast(i + A->rmap->rstart, &row[i]));
1447:     }
1448:     for (j = 1; j < N; j++) {
1449:       for (i = 0; i < m; i++) PetscCall(PetscMUMPSIntCast(j, col + i + m * j));
1450:       PetscCall(PetscArraycpy(row + m * j, row + m * (j - 1), m));
1451:     }
1452:     if (lda == m) mumps->val = v;
1453:     else {
1454:       PetscCall(PetscMalloc1(m * N, &mumps->val));
1455:       mumps->val_alloc = mumps->val;
1456:       for (j = 0; j < N; j++) PetscCall(PetscArraycpy(mumps->val + m * j, v + lda * j, m));
1457:     }
1458:     mumps->irn = row;
1459:     mumps->jcn = col;
1460:     mumps->nnz = m * N;
1461:   } else {
1462:     if (lda == m && !mumps->nest_vals) mumps->val = v;
1463:     else {
1464:       for (j = 0; j < N; j++) PetscCall(PetscArraycpy(mumps->val + m * j, v + lda * j, m));
1465:     }
1466:   }
1467:   PetscCall(MatDenseRestoreArray(A, &v));
1468:   PetscFunctionReturn(PETSC_SUCCESS);
1469: }

1471: // If the input Mat (sub) is either MATTRANSPOSEVIRTUAL or MATHERMITIANTRANSPOSEVIRTUAL, this function gets the parent Mat until it is not a
1472: // MATTRANSPOSEVIRTUAL or MATHERMITIANTRANSPOSEVIRTUAL itself and returns the appropriate shift, scaling, and whether the parent Mat should be conjugated
1473: // and its rows and columns permuted
1474: // TODO FIXME: this should not be in this file and should instead be refactored where the same logic applies, e.g., MatAXPY_Dense_Nest()
1475: static PetscErrorCode MatGetTranspose_TransposeVirtual(Mat *sub, PetscBool *conjugate, PetscScalar *vshift, PetscScalar *vscale, PetscBool *swap)
1476: {
1477:   Mat         A;
1478:   PetscScalar s[2];
1479:   PetscBool   isTrans, isHTrans, compare;

1481:   PetscFunctionBegin;
1482:   do {
1483:     PetscCall(PetscObjectTypeCompare((PetscObject)*sub, MATTRANSPOSEVIRTUAL, &isTrans));
1484:     if (isTrans) {
1485:       PetscCall(MatTransposeGetMat(*sub, &A));
1486:       isHTrans = PETSC_FALSE;
1487:     } else {
1488:       PetscCall(PetscObjectTypeCompare((PetscObject)*sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans));
1489:       if (isHTrans) PetscCall(MatHermitianTransposeGetMat(*sub, &A));
1490:     }
1491:     compare = (PetscBool)(isTrans || isHTrans);
1492:     if (compare) {
1493:       if (vshift && vscale) {
1494:         PetscCall(MatShellGetScalingShifts(*sub, s, s + 1, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
1495:         if (!*conjugate) {
1496:           *vshift += s[0] * *vscale;
1497:           *vscale *= s[1];
1498:         } else {
1499:           *vshift += PetscConj(s[0]) * *vscale;
1500:           *vscale *= PetscConj(s[1]);
1501:         }
1502:       }
1503:       if (swap) *swap = (PetscBool)!*swap;
1504:       if (isHTrans && conjugate) *conjugate = (PetscBool)!*conjugate;
1505:       *sub = A;
1506:     }
1507:   } while (compare);
1508:   PetscFunctionReturn(PETSC_SUCCESS);
1509: }

1511: static PetscErrorCode MatConvertToTriples_nest_xaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1512: {
1513:   Mat     **mats;
1514:   PetscInt  nr, nc;
1515:   PetscBool chol = mumps->sym ? PETSC_TRUE : PETSC_FALSE;

1517:   PetscFunctionBegin;
1518:   PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats));
1519:   if (reuse == MAT_INITIAL_MATRIX) {
1520:     PetscMUMPSInt *irns, *jcns;
1521:     PetscScalar   *vals;
1522:     PetscCount     totnnz, cumnnz, maxnnz;
1523:     PetscInt      *pjcns_w, Mbs = 0;
1524:     IS            *rows, *cols;
1525:     PetscInt     **rows_idx, **cols_idx;

1527:     cumnnz = 0;
1528:     maxnnz = 0;
1529:     PetscCall(PetscMalloc2(nr * nc + 1, &mumps->nest_vals_start, nr * nc, &mumps->nest_convert_to_triples));
1530:     for (PetscInt r = 0; r < nr; r++) {
1531:       for (PetscInt c = 0; c < nc; c++) {
1532:         Mat sub = mats[r][c];

1534:         mumps->nest_convert_to_triples[r * nc + c] = NULL;
1535:         if (chol && c < r) continue; /* skip lower-triangular block for Cholesky */
1536:         if (sub) {
1537:           PetscErrorCode (*convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *) = NULL;
1538:           PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isDiag, isDense;
1539:           MatInfo   info;

1541:           PetscCall(MatGetTranspose_TransposeVirtual(&sub, NULL, NULL, NULL, NULL));
1542:           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ));
1543:           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ));
1544:           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ));
1545:           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ));
1546:           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ));
1547:           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ));
1548:           PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag));
1549:           PetscCall(PetscObjectTypeCompareAny((PetscObject)sub, &isDense, MATSEQDENSE, MATMPIDENSE, NULL));

1551:           if (chol) {
1552:             if (r == c) {
1553:               if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqsbaij;
1554:               else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpisbaij;
1555:               else if (isSeqSBAIJ) convert_to_triples = MatConvertToTriples_seqsbaij_seqsbaij;
1556:               else if (isMPISBAIJ) convert_to_triples = MatConvertToTriples_mpisbaij_mpisbaij;
1557:               else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij;
1558:               else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij;
1559:             } else {
1560:               if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij;
1561:               else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij;
1562:               else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij;
1563:               else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij;
1564:               else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij;
1565:               else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij;
1566:             }
1567:           } else {
1568:             if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij;
1569:             else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij;
1570:             else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij;
1571:             else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij;
1572:             else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij;
1573:             else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij;
1574:           }
1575:           PetscCheck(convert_to_triples, PetscObjectComm((PetscObject)sub), PETSC_ERR_SUP, "Not for block of type %s", ((PetscObject)sub)->type_name);
1576:           mumps->nest_convert_to_triples[r * nc + c] = convert_to_triples;
1577:           PetscCall(MatGetInfo(sub, MAT_LOCAL, &info));
1578:           cumnnz += (PetscCount)info.nz_used; /* can be overestimated for Cholesky */
1579:           maxnnz = PetscMax(maxnnz, info.nz_used);
1580:         }
1581:       }
1582:     }

1584:     /* Allocate total COO */
1585:     totnnz = cumnnz;
1586:     PetscCall(PetscMalloc2(totnnz, &irns, totnnz, &jcns));
1587:     PetscCall(PetscMalloc1(totnnz, &vals));

1589:     /* Handle rows and column maps
1590:        We directly map rows and use an SF for the columns */
1591:     PetscCall(PetscMalloc4(nr, &rows, nc, &cols, nr, &rows_idx, nc, &cols_idx));
1592:     PetscCall(MatNestGetISs(A, rows, cols));
1593:     for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetIndices(rows[r], (const PetscInt **)&rows_idx[r]));
1594:     for (PetscInt c = 0; c < nc; c++) PetscCall(ISGetIndices(cols[c], (const PetscInt **)&cols_idx[c]));
1595:     if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscMalloc1(maxnnz, &pjcns_w));
1596:     else (void)maxnnz;

1598:     cumnnz = 0;
1599:     for (PetscInt r = 0; r < nr; r++) {
1600:       for (PetscInt c = 0; c < nc; c++) {
1601:         Mat             sub    = mats[r][c];
1602:         const PetscInt *ridx   = rows_idx[r];
1603:         const PetscInt *cidx   = cols_idx[c];
1604:         PetscScalar     vscale = 1.0, vshift = 0.0;
1605:         PetscInt        rst, size, bs;
1606:         PetscSF         csf;
1607:         PetscBool       conjugate = PETSC_FALSE, swap = PETSC_FALSE;
1608:         PetscLayout     cmap;
1609:         PetscInt        innz;

1611:         mumps->nest_vals_start[r * nc + c] = cumnnz;
1612:         if (c == r) {
1613:           PetscCall(ISGetSize(rows[r], &size));
1614:           if (!mumps->nest_convert_to_triples[r * nc + c]) {
1615:             for (PetscInt c = 0; c < nc && !sub; ++c) sub = mats[r][c]; // diagonal Mat is NULL, so start over from the beginning of the current row
1616:           }
1617:           PetscCall(MatGetBlockSize(sub, &bs));
1618:           Mbs += size / bs;
1619:         }
1620:         if (!mumps->nest_convert_to_triples[r * nc + c]) continue;

1622:         /* Extract inner blocks if needed */
1623:         PetscCall(MatGetTranspose_TransposeVirtual(&sub, &conjugate, &vshift, &vscale, &swap));
1624:         PetscCheck(vshift == 0.0, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Nonzero shift in parent MatShell");

1626:         /* Get column layout to map off-process columns */
1627:         PetscCall(MatGetLayouts(sub, NULL, &cmap));

1629:         /* Get row start to map on-process rows */
1630:         PetscCall(MatGetOwnershipRange(sub, &rst, NULL));

1632:         /* Directly use the mumps datastructure and use C ordering for now */
1633:         PetscCall((*mumps->nest_convert_to_triples[r * nc + c])(sub, 0, MAT_INITIAL_MATRIX, mumps));

1635:         /* Swap the role of rows and columns indices for transposed blocks
1636:            since we need values with global final ordering */
1637:         if (swap) {
1638:           cidx = rows_idx[r];
1639:           ridx = cols_idx[c];
1640:         }

1642:         /* Communicate column indices
1643:            This could have been done with a single SF but it would have complicated the code a lot.
1644:            But since we do it only once, we pay the price of setting up an SF for each block */
1645:         if (PetscDefined(USE_64BIT_INDICES)) {
1646:           for (PetscInt k = 0; k < mumps->nnz; k++) pjcns_w[k] = mumps->jcn[k];
1647:         } else pjcns_w = (PetscInt *)mumps->jcn; /* This cast is needed only to silence warnings for 64bit integers builds */
1648:         PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &csf));
1649:         PetscCall(PetscIntCast(mumps->nnz, &innz));
1650:         PetscCall(PetscSFSetGraphLayout(csf, cmap, innz, NULL, PETSC_OWN_POINTER, pjcns_w));
1651:         PetscCall(PetscSFBcastBegin(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE));
1652:         PetscCall(PetscSFBcastEnd(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE));
1653:         PetscCall(PetscSFDestroy(&csf));

1655:         /* Import indices: use direct map for rows and mapped indices for columns */
1656:         if (swap) {
1657:           for (PetscInt k = 0; k < mumps->nnz; k++) {
1658:             PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &jcns[cumnnz + k]));
1659:             PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &irns[cumnnz + k]));
1660:           }
1661:         } else {
1662:           for (PetscInt k = 0; k < mumps->nnz; k++) {
1663:             PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &irns[cumnnz + k]));
1664:             PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &jcns[cumnnz + k]));
1665:           }
1666:         }

1668:         /* Import values to full COO */
1669:         if (conjugate) { /* conjugate the entries */
1670:           PetscScalar *v = vals + cumnnz;
1671:           for (PetscInt k = 0; k < mumps->nnz; k++) v[k] = vscale * PetscConj(mumps->val[k]);
1672:         } else if (vscale != 1.0) {
1673:           PetscScalar *v = vals + cumnnz;
1674:           for (PetscInt k = 0; k < mumps->nnz; k++) v[k] = vscale * mumps->val[k];
1675:         } else PetscCall(PetscArraycpy(vals + cumnnz, mumps->val, mumps->nnz));

1677:         /* Shift new starting point and sanity check */
1678:         cumnnz += mumps->nnz;
1679:         PetscCheck(cumnnz <= totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected number of nonzeros %" PetscCount_FMT " != %" PetscCount_FMT, cumnnz, totnnz);

1681:         /* Free scratch memory */
1682:         PetscCall(PetscFree2(mumps->irn, mumps->jcn));
1683:         PetscCall(PetscFree(mumps->val_alloc));
1684:         mumps->val = NULL;
1685:         mumps->nnz = 0;
1686:       }
1687:     }
1688:     if (mumps->id.ICNTL(15) == 1) {
1689:       if (Mbs != A->rmap->N) {
1690:         PetscMPIInt rank, size;

1692:         PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
1693:         PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
1694:         if (rank == 0) {
1695:           PetscInt shift = 0;

1697:           PetscCall(PetscMUMPSIntCast(Mbs, &mumps->id.nblk));
1698:           PetscCall(PetscFree(mumps->id.blkptr));
1699:           PetscCall(PetscMalloc1(Mbs + 1, &mumps->id.blkptr));
1700:           mumps->id.blkptr[0] = 1;
1701:           for (PetscInt i = 0; i < size; ++i) {
1702:             for (PetscInt r = 0; r < nr; r++) {
1703:               Mat             sub = mats[r][r];
1704:               const PetscInt *ranges;
1705:               PetscInt        bs;

1707:               for (PetscInt c = 0; c < nc && !sub; ++c) sub = mats[r][c]; // diagonal Mat is NULL, so start over from the beginning of the current row
1708:               PetscCall(MatGetOwnershipRanges(sub, &ranges));
1709:               PetscCall(MatGetBlockSize(sub, &bs));
1710:               for (PetscInt j = 0, start = mumps->id.blkptr[shift] + bs; j < ranges[i + 1] - ranges[i]; j += bs) PetscCall(PetscMUMPSIntCast(start + j, mumps->id.blkptr + shift + j / bs + 1));
1711:               shift += (ranges[i + 1] - ranges[i]) / bs;
1712:             }
1713:           }
1714:         }
1715:       } else mumps->id.ICNTL(15) = 0;
1716:     }
1717:     if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscFree(pjcns_w));
1718:     for (PetscInt r = 0; r < nr; r++) PetscCall(ISRestoreIndices(rows[r], (const PetscInt **)&rows_idx[r]));
1719:     for (PetscInt c = 0; c < nc; c++) PetscCall(ISRestoreIndices(cols[c], (const PetscInt **)&cols_idx[c]));
1720:     PetscCall(PetscFree4(rows, cols, rows_idx, cols_idx));
1721:     if (!chol) PetscCheck(cumnnz == totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different number of nonzeros %" PetscCount_FMT " != %" PetscCount_FMT, cumnnz, totnnz);
1722:     mumps->nest_vals_start[nr * nc] = cumnnz;

1724:     /* Set pointers for final MUMPS data structure */
1725:     mumps->nest_vals = vals;
1726:     mumps->val_alloc = NULL; /* do not use val_alloc since it may be reallocated with the OMP callpath */
1727:     mumps->val       = vals;
1728:     mumps->irn       = irns;
1729:     mumps->jcn       = jcns;
1730:     mumps->nnz       = cumnnz;
1731:   } else {
1732:     PetscScalar *oval = mumps->nest_vals;
1733:     for (PetscInt r = 0; r < nr; r++) {
1734:       for (PetscInt c = 0; c < nc; c++) {
1735:         PetscBool   conjugate = PETSC_FALSE;
1736:         Mat         sub       = mats[r][c];
1737:         PetscScalar vscale = 1.0, vshift = 0.0;
1738:         PetscInt    midx = r * nc + c;

1740:         if (!mumps->nest_convert_to_triples[midx]) continue;
1741:         PetscCall(MatGetTranspose_TransposeVirtual(&sub, &conjugate, &vshift, &vscale, NULL));
1742:         PetscCheck(vshift == 0.0, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Nonzero shift in parent MatShell");
1743:         mumps->val = oval + mumps->nest_vals_start[midx];
1744:         PetscCall((*mumps->nest_convert_to_triples[midx])(sub, shift, MAT_REUSE_MATRIX, mumps));
1745:         if (conjugate) {
1746:           PetscCount nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx];
1747:           for (PetscCount k = 0; k < nnz; k++) mumps->val[k] = vscale * PetscConj(mumps->val[k]);
1748:         } else if (vscale != 1.0) {
1749:           PetscCount nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx];
1750:           for (PetscCount k = 0; k < nnz; k++) mumps->val[k] *= vscale;
1751:         }
1752:       }
1753:     }
1754:     mumps->val = oval;
1755:   }
1756:   PetscFunctionReturn(PETSC_SUCCESS);
1757: }

1759: static PetscErrorCode MatDestroy_MUMPS(Mat A)
1760: {
1761:   Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;

1763:   PetscFunctionBegin;
1764:   PetscCall(PetscFree(mumps->id.isol_loc));
1765:   PetscCall(VecScatterDestroy(&mumps->scat_rhs));
1766:   PetscCall(VecScatterDestroy(&mumps->scat_sol));
1767:   PetscCall(VecDestroy(&mumps->b_seq));
1768:   PetscCall(VecDestroy(&mumps->x_seq));
1769:   PetscCall(PetscFree(mumps->id.perm_in));
1770:   PetscCall(PetscFree(mumps->id.blkvar));
1771:   PetscCall(PetscFree(mumps->id.blkptr));
1772:   PetscCall(PetscFree2(mumps->irn, mumps->jcn));
1773:   PetscCall(PetscFree(mumps->val_alloc));
1774:   PetscCall(PetscFree(mumps->info));
1775:   PetscCall(PetscFree(mumps->ICNTL_pre));
1776:   PetscCall(PetscFree(mumps->CNTL_pre));
1777:   PetscCall(MatMumpsResetSchur_Private(mumps));
1778:   if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */
1779:     mumps->id.job = JOB_END;
1780:     PetscMUMPS_c(mumps);
1781:     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in termination: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
1782:     if (mumps->mumps_comm != MPI_COMM_NULL) {
1783:       if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) PetscCallMPI(MPI_Comm_free(&mumps->mumps_comm));
1784:       else PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &mumps->mumps_comm));
1785:     }
1786:   }
1787:   PetscCall(MatMumpsFreeInternalID(&mumps->id));
1788: #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1789:   if (mumps->use_petsc_omp_support) {
1790:     PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl));
1791:     PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf));
1792:     PetscCall(PetscFree3(mumps->rhs_nrow, mumps->rhs_recvcounts, mumps->rhs_disps));
1793:   }
1794: #endif
1795:   PetscCall(PetscFree(mumps->ia_alloc));
1796:   PetscCall(PetscFree(mumps->ja_alloc));
1797:   PetscCall(PetscFree(mumps->recvcount));
1798:   PetscCall(PetscFree(mumps->reqs));
1799:   PetscCall(PetscFree(mumps->irhs_loc));
1800:   PetscCall(PetscFree2(mumps->nest_vals_start, mumps->nest_convert_to_triples));
1801:   PetscCall(PetscFree(mumps->nest_vals));
1802:   PetscCall(PetscFree(A->data));

1804:   /* clear composed functions */
1805:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
1806:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorSetSchurIS_C", NULL));
1807:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorCreateSchurComplement_C", NULL));
1808:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetIcntl_C", NULL));
1809:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetIcntl_C", NULL));
1810:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetCntl_C", NULL));
1811:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetCntl_C", NULL));
1812:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfo_C", NULL));
1813:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfog_C", NULL));
1814:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfo_C", NULL));
1815:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfog_C", NULL));
1816:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetNullPivots_C", NULL));
1817:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverse_C", NULL));
1818:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverseTranspose_C", NULL));
1819:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetBlk_C", NULL));
1820:   PetscFunctionReturn(PETSC_SUCCESS);
1821: }

1823: /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */
1824: static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A, PetscInt nrhs, const PetscScalar *array)
1825: {
1826:   Mat_MUMPS        *mumps   = (Mat_MUMPS *)A->data;
1827:   const PetscMPIInt ompsize = mumps->omp_comm_size;
1828:   PetscInt          i, m, M, rstart;

1830:   PetscFunctionBegin;
1831:   PetscCall(MatGetSize(A, &M, NULL));
1832:   PetscCall(MatGetLocalSize(A, &m, NULL));
1833:   PetscCheck(M <= PETSC_MUMPS_INT_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt");
1834:   if (ompsize == 1) {
1835:     if (!mumps->irhs_loc) {
1836:       mumps->nloc_rhs = (PetscMUMPSInt)m;
1837:       PetscCall(PetscMalloc1(m, &mumps->irhs_loc));
1838:       PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
1839:       for (i = 0; i < m; i++) PetscCall(PetscMUMPSIntCast(rstart + i + 1, &mumps->irhs_loc[i])); /* use 1-based indices */
1840:     }
1841:     PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, m * nrhs, array, mumps->id.precision, &mumps->id.rhs_loc_len, &mumps->id.rhs_loc));
1842:   } else {
1843: #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1844:     const PetscInt *ranges;
1845:     PetscMPIInt     j, k, sendcount, *petsc_ranks, *omp_ranks;
1846:     MPI_Group       petsc_group, omp_group;
1847:     PetscScalar    *recvbuf = NULL;

1849:     if (mumps->is_omp_master) {
1850:       /* Lazily initialize the omp stuff for distributed rhs */
1851:       if (!mumps->irhs_loc) {
1852:         PetscCall(PetscMalloc2(ompsize, &omp_ranks, ompsize, &petsc_ranks));
1853:         PetscCall(PetscMalloc3(ompsize, &mumps->rhs_nrow, ompsize, &mumps->rhs_recvcounts, ompsize, &mumps->rhs_disps));
1854:         PetscCallMPI(MPI_Comm_group(mumps->petsc_comm, &petsc_group));
1855:         PetscCallMPI(MPI_Comm_group(mumps->omp_comm, &omp_group));
1856:         for (j = 0; j < ompsize; j++) omp_ranks[j] = j;
1857:         PetscCallMPI(MPI_Group_translate_ranks(omp_group, ompsize, omp_ranks, petsc_group, petsc_ranks));

1859:         /* Populate mumps->irhs_loc[], rhs_nrow[] */
1860:         mumps->nloc_rhs = 0;
1861:         PetscCall(MatGetOwnershipRanges(A, &ranges));
1862:         for (j = 0; j < ompsize; j++) {
1863:           mumps->rhs_nrow[j] = ranges[petsc_ranks[j] + 1] - ranges[petsc_ranks[j]];
1864:           mumps->nloc_rhs += mumps->rhs_nrow[j];
1865:         }
1866:         PetscCall(PetscMalloc1(mumps->nloc_rhs, &mumps->irhs_loc));
1867:         for (j = k = 0; j < ompsize; j++) {
1868:           for (i = ranges[petsc_ranks[j]]; i < ranges[petsc_ranks[j] + 1]; i++, k++) PetscCall(PetscMUMPSIntCast(i + 1, &mumps->irhs_loc[k])); /* uses 1-based indices */
1869:         }

1871:         PetscCall(PetscFree2(omp_ranks, petsc_ranks));
1872:         PetscCallMPI(MPI_Group_free(&petsc_group));
1873:         PetscCallMPI(MPI_Group_free(&omp_group));
1874:       }

1876:       /* Realloc buffers when current nrhs is bigger than what we have met */
1877:       if (nrhs > mumps->max_nrhs) {
1878:         PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf));
1879:         PetscCall(PetscMalloc2(mumps->nloc_rhs * nrhs, &mumps->rhs_loc, mumps->nloc_rhs * nrhs, &mumps->rhs_recvbuf));
1880:         mumps->max_nrhs = nrhs;
1881:       }

1883:       /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */
1884:       for (j = 0; j < ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j] * nrhs, &mumps->rhs_recvcounts[j]));
1885:       mumps->rhs_disps[0] = 0;
1886:       for (j = 1; j < ompsize; j++) {
1887:         mumps->rhs_disps[j] = mumps->rhs_disps[j - 1] + mumps->rhs_recvcounts[j - 1];
1888:         PetscCheck(mumps->rhs_disps[j] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscMPIInt overflow!");
1889:       }
1890:       recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */
1891:     }

1893:     PetscCall(PetscMPIIntCast(m * nrhs, &sendcount));
1894:     PetscCallMPI(MPI_Gatherv(array, sendcount, MPIU_SCALAR, recvbuf, mumps->rhs_recvcounts, mumps->rhs_disps, MPIU_SCALAR, 0, mumps->omp_comm));

1896:     if (mumps->is_omp_master) {
1897:       if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */
1898:         PetscScalar *dst, *dstbase = mumps->rhs_loc;
1899:         for (j = 0; j < ompsize; j++) {
1900:           const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j];
1901:           dst                    = dstbase;
1902:           for (i = 0; i < nrhs; i++) {
1903:             PetscCall(PetscArraycpy(dst, src, mumps->rhs_nrow[j]));
1904:             src += mumps->rhs_nrow[j];
1905:             dst += mumps->nloc_rhs;
1906:           }
1907:           dstbase += mumps->rhs_nrow[j];
1908:         }
1909:       }
1910:       PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nloc_rhs * nrhs, mumps->rhs_loc, mumps->id.precision, &mumps->id.rhs_loc_len, &mumps->id.rhs_loc));
1911:     }
1912: #endif /* PETSC_HAVE_OPENMP_SUPPORT */
1913:   }
1914:   mumps->id.nrhs     = (PetscMUMPSInt)nrhs;
1915:   mumps->id.nloc_rhs = (PetscMUMPSInt)mumps->nloc_rhs;
1916:   mumps->id.lrhs_loc = mumps->nloc_rhs;
1917:   mumps->id.irhs_loc = mumps->irhs_loc;
1918:   PetscFunctionReturn(PETSC_SUCCESS);
1919: }

1921: static PetscErrorCode MatSolve_MUMPS(Mat A, Vec b, Vec x)
1922: {
1923:   Mat_MUMPS         *mumps  = (Mat_MUMPS *)A->data;
1924:   const PetscScalar *barray = NULL;
1925:   PetscScalar       *array;
1926:   IS                 is_iden, is_petsc;
1927:   PetscBool          second_solve = PETSC_FALSE;
1928:   static PetscBool   cite1 = PETSC_FALSE, cite2 = PETSC_FALSE;

1930:   PetscFunctionBegin;
1931:   PetscCall(PetscCitationsRegister("@article{MUMPS01,\n  author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n  title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n  journal = {SIAM "
1932:                                    "Journal on Matrix Analysis and Applications},\n  volume = {23},\n  number = {1},\n  pages = {15--41},\n  year = {2001}\n}\n",
1933:                                    &cite1));
1934:   PetscCall(PetscCitationsRegister("@article{MUMPS02,\n  author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n  title = {Hybrid scheduling for the parallel solution of linear systems},\n  journal = {Parallel "
1935:                                    "Computing},\n  volume = {32},\n  number = {2},\n  pages = {136--156},\n  year = {2006}\n}\n",
1936:                                    &cite2));

1938:   PetscCall(VecFlag(x, A->factorerrortype));
1939:   if (A->factorerrortype) {
1940:     PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1941:     PetscFunctionReturn(PETSC_SUCCESS);
1942:   }

1944:   mumps->id.nrhs = 1;
1945:   if (mumps->petsc_size > 1) {
1946:     if (mumps->ICNTL20 == 10) {
1947:       mumps->id.ICNTL(20) = 10; /* dense distributed RHS, need to set rhs_loc[], irhs_loc[] */
1948:       PetscCall(VecGetArrayRead(b, &barray));
1949:       PetscCall(MatMumpsSetUpDistRHSInfo(A, 1, barray));
1950:     } else {
1951:       mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential b_seq vector*/
1952:       PetscCall(VecScatterBegin(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD));
1953:       PetscCall(VecScatterEnd(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD));
1954:       if (!mumps->myid) {
1955:         PetscCall(VecGetArray(mumps->b_seq, &array));
1956:         PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->b_seq->map->n, array, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs));
1957:       }
1958:     }
1959:   } else { /* petsc_size == 1, use MUMPS's dense centralized RHS feature, so that we don't need to bother with isol_loc[] to get the solution */
1960:     mumps->id.ICNTL(20) = 0;
1961:     PetscCall(VecCopy(b, x));
1962:     PetscCall(VecGetArray(x, &array));
1963:     PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, x->map->n, array, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs));
1964:   }

1966:   /*
1967:      handle condensation step of Schur complement (if any)
1968:      We set by default ICNTL(26) == -1 when Schur indices have been provided by the user.
1969:      According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase
1970:      Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system.
1971:      This requires an extra call to PetscMUMPS_c and the computation of the factors for S
1972:   */
1973:   if (mumps->id.size_schur > 0) {
1974:     PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc");
1975:     if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) {
1976:       second_solve = PETSC_TRUE;
1977:       PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); // allocate id.redrhs
1978:       mumps->id.ICNTL(26) = 1;                                /* condensation phase */
1979:     } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE));
1980:   }

1982:   mumps->id.job = JOB_SOLVE;
1983:   PetscMUMPS_c(mumps); // reduced solve, put solution in id.redrhs
1984:   PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));

1986:   /* handle expansion step of Schur complement (if any) */
1987:   if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE));
1988:   else if (mumps->id.ICNTL(26) == 1) { // condense the right hand side
1989:     PetscCall(MatMumpsSolveSchur_Private(A));
1990:     for (PetscInt i = 0; i < mumps->id.size_schur; ++i) array[mumps->id.listvar_schur[i] - 1] = ID_FIELD_GET(mumps->id, redrhs, i);
1991:   }

1993:   if (mumps->petsc_size > 1) { /* convert mumps distributed solution to PETSc mpi x */
1994:     if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) {
1995:       /* when id.ICNTL(9) changes, the contents of ilsol_loc may change (not its size, lsol_loc), recreates scat_sol */
1996:       PetscCall(VecScatterDestroy(&mumps->scat_sol));
1997:     }
1998:     if (!mumps->scat_sol) { /* create scatter scat_sol */
1999:       PetscInt *isol2_loc = NULL;
2000:       PetscCall(ISCreateStride(PETSC_COMM_SELF, mumps->id.lsol_loc, 0, 1, &is_iden)); /* from */
2001:       PetscCall(PetscMalloc1(mumps->id.lsol_loc, &isol2_loc));
2002:       for (PetscInt i = 0; i < mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i] - 1;               /* change Fortran style to C style */
2003:       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, mumps->id.lsol_loc, isol2_loc, PETSC_OWN_POINTER, &is_petsc)); /* to */
2004:       PetscCall(VecScatterCreate(mumps->x_seq, is_iden, x, is_petsc, &mumps->scat_sol));
2005:       PetscCall(ISDestroy(&is_iden));
2006:       PetscCall(ISDestroy(&is_petsc));
2007:       mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */
2008:     }

2010:     PetscScalar *xarray;
2011:     PetscCall(VecGetArray(mumps->x_seq, &xarray));
2012:     PetscCall(MatMumpsCastMumpsScalarArray(mumps->id.lsol_loc, mumps->id.precision, mumps->id.sol_loc, xarray));
2013:     PetscCall(VecRestoreArray(mumps->x_seq, &xarray));
2014:     PetscCall(VecScatterBegin(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD));
2015:     PetscCall(VecScatterEnd(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD));

2017:     if (mumps->ICNTL20 == 10) { // distributed RHS
2018:       PetscCall(VecRestoreArrayRead(b, &barray));
2019:     } else if (!mumps->myid) { // centralized RHS
2020:       PetscCall(VecRestoreArray(mumps->b_seq, &array));
2021:     }
2022:   } else {
2023:     // id.rhs has the solution in mumps precision
2024:     PetscCall(MatMumpsCastMumpsScalarArray(x->map->n, mumps->id.precision, mumps->id.rhs, array));
2025:     PetscCall(VecRestoreArray(x, &array));
2026:   }

2028:   PetscCall(PetscLogFlops(2.0 * PetscMax(0, (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n)));
2029:   PetscFunctionReturn(PETSC_SUCCESS);
2030: }

2032: static PetscErrorCode MatSolveTranspose_MUMPS(Mat A, Vec b, Vec x)
2033: {
2034:   Mat_MUMPS          *mumps = (Mat_MUMPS *)A->data;
2035:   const PetscMUMPSInt value = mumps->id.ICNTL(9);

2037:   PetscFunctionBegin;
2038:   mumps->id.ICNTL(9) = 0;
2039:   PetscCall(MatSolve_MUMPS(A, b, x));
2040:   mumps->id.ICNTL(9) = value;
2041:   PetscFunctionReturn(PETSC_SUCCESS);
2042: }

2044: static PetscErrorCode MatMatSolve_MUMPS(Mat A, Mat B, Mat X)
2045: {
2046:   Mat                Bt = NULL;
2047:   PetscBool          denseX, denseB, flg, flgT;
2048:   Mat_MUMPS         *mumps = (Mat_MUMPS *)A->data;
2049:   PetscInt           i, nrhs, M, nrhsM;
2050:   PetscScalar       *array;
2051:   const PetscScalar *barray;
2052:   PetscInt           lsol_loc, nlsol_loc, *idxx, iidx = 0;
2053:   PetscMUMPSInt     *isol_loc, *isol_loc_save;
2054:   PetscScalar       *sol_loc;
2055:   void              *sol_loc_save;
2056:   PetscCount         sol_loc_len_save;
2057:   IS                 is_to, is_from;
2058:   PetscInt           k, proc, j, m, myrstart;
2059:   const PetscInt    *rstart;
2060:   Vec                v_mpi, msol_loc;
2061:   VecScatter         scat_sol;
2062:   Vec                b_seq;
2063:   VecScatter         scat_rhs;
2064:   PetscScalar       *aa;
2065:   PetscInt           spnr, *ia, *ja;
2066:   Mat_MPIAIJ        *b = NULL;

2068:   PetscFunctionBegin;
2069:   PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &denseX, MATSEQDENSE, MATMPIDENSE, NULL));
2070:   PetscCheck(denseX, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix");

2072:   PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &denseB, MATSEQDENSE, MATMPIDENSE, NULL));

2074:   if (denseB) {
2075:     PetscCheck(B->rmap->n == X->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix B and X must have same row distribution");
2076:     mumps->id.ICNTL(20) = 0; /* dense RHS */
2077:   } else {                   /* sparse B */
2078:     PetscCheck(X != B, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_IDN, "X and B must be different matrices");
2079:     PetscCall(PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSEVIRTUAL, &flgT));
2080:     PetscCheck(flgT, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONG, "Matrix B must be MATTRANSPOSEVIRTUAL matrix");
2081:     PetscCall(MatShellGetScalingShifts(B, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
2082:     /* input B is transpose of actual RHS matrix,
2083:      because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */
2084:     PetscCall(MatTransposeGetMat(B, &Bt));
2085:     mumps->id.ICNTL(20) = 1; /* sparse RHS */
2086:   }

2088:   PetscCall(MatGetSize(B, &M, &nrhs));
2089:   PetscCall(PetscIntMultError(nrhs, M, &nrhsM));
2090:   mumps->id.nrhs = (PetscMUMPSInt)nrhs;
2091:   mumps->id.lrhs = (PetscMUMPSInt)M;

2093:   if (mumps->petsc_size == 1) { // handle this easy case specially and return early
2094:     PetscScalar *aa;
2095:     PetscInt     spnr, *ia, *ja;
2096:     PetscBool    second_solve = PETSC_FALSE;

2098:     PetscCall(MatDenseGetArray(X, &array));
2099:     if (denseB) {
2100:       /* copy B to X */
2101:       PetscCall(MatDenseGetArrayRead(B, &barray));
2102:       PetscCall(PetscArraycpy(array, barray, nrhsM));
2103:       PetscCall(MatDenseRestoreArrayRead(B, &barray));
2104:     } else { /* sparse B */
2105:       PetscCall(MatSeqAIJGetArray(Bt, &aa));
2106:       PetscCall(MatGetRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
2107:       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
2108:       PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
2109:       PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->id.nz_rhs, aa, mumps->id.precision, &mumps->id.rhs_sparse_len, &mumps->id.rhs_sparse));
2110:     }
2111:     PetscCall(MatMumpsMakeMumpsScalarArray(denseB, nrhsM, array, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs));

2113:     /* handle condensation step of Schur complement (if any) */
2114:     if (mumps->id.size_schur > 0) {
2115:       if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) {
2116:         second_solve = PETSC_TRUE;
2117:         PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); // allocate id.redrhs
2118:         mumps->id.ICNTL(26) = 1;                                /* condensation phase, i.e, to solve id.redrhs */
2119:       } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE));
2120:     }

2122:     mumps->id.job = JOB_SOLVE;
2123:     PetscMUMPS_c(mumps);
2124:     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));

2126:     /* handle expansion step of Schur complement (if any) */
2127:     if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE));
2128:     else if (mumps->id.ICNTL(26) == 1) { // condense the right hand side
2129:       PetscCall(MatMumpsSolveSchur_Private(A));
2130:       for (j = 0; j < nrhs; ++j)
2131:         for (i = 0; i < mumps->id.size_schur; ++i) array[mumps->id.listvar_schur[i] - 1 + j * M] = ID_FIELD_GET(mumps->id, redrhs, i + j * mumps->id.lredrhs);
2132:     }

2134:     if (!denseB) { /* sparse B, restore ia, ja */
2135:       PetscCall(MatSeqAIJRestoreArray(Bt, &aa));
2136:       PetscCall(MatRestoreRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
2137:       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure");
2138:     }

2140:     // no matter dense B or sparse B, solution is in id.rhs; convert it to array of X.
2141:     PetscCall(MatMumpsCastMumpsScalarArray(nrhsM, mumps->id.precision, mumps->id.rhs, array));
2142:     PetscCall(MatDenseRestoreArray(X, &array));
2143:     PetscFunctionReturn(PETSC_SUCCESS);
2144:   }

2146:   /* parallel case: MUMPS requires rhs B to be centralized on the host! */
2147:   PetscCheck(!mumps->id.ICNTL(19), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc");

2149:   /* create msol_loc to hold mumps local solution */
2150:   isol_loc_save         = mumps->id.isol_loc; /* save these, as we want to reuse them in MatSolve() */
2151:   sol_loc_save          = mumps->id.sol_loc;
2152:   sol_loc_len_save      = mumps->id.sol_loc_len;
2153:   mumps->id.isol_loc    = NULL; // an init state
2154:   mumps->id.sol_loc     = NULL;
2155:   mumps->id.sol_loc_len = 0;

2157:   lsol_loc = mumps->id.lsol_loc;
2158:   PetscCall(PetscIntMultError(nrhs, lsol_loc, &nlsol_loc)); /* length of sol_loc */
2159:   PetscCall(PetscMalloc2(nlsol_loc, &sol_loc, lsol_loc, &isol_loc));
2160:   PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, nlsol_loc, sol_loc, mumps->id.precision, &mumps->id.sol_loc_len, &mumps->id.sol_loc));
2161:   mumps->id.isol_loc = isol_loc;

2163:   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nlsol_loc, (PetscScalar *)sol_loc, &msol_loc));

2165:   if (denseB) {
2166:     if (mumps->ICNTL20 == 10) {
2167:       mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
2168:       PetscCall(MatDenseGetArrayRead(B, &barray));
2169:       PetscCall(MatMumpsSetUpDistRHSInfo(A, nrhs, barray)); // put barray to rhs_loc
2170:       PetscCall(MatDenseRestoreArrayRead(B, &barray));
2171:       PetscCall(MatGetLocalSize(B, &m, NULL));
2172:       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhsM, NULL, &v_mpi)); // will scatter the solution to v_mpi, which wraps X
2173:     } else {
2174:       mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
2175:       /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in
2176:         very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank
2177:         0, re-arrange B into desired order, which is a local operation.
2178:       */

2180:       /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */
2181:       /* wrap dense rhs matrix B into a vector v_mpi */
2182:       PetscCall(MatGetLocalSize(B, &m, NULL));
2183:       PetscCall(MatDenseGetArrayRead(B, &barray));
2184:       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhsM, barray, &v_mpi));
2185:       PetscCall(MatDenseRestoreArrayRead(B, &barray));

2187:       /* scatter v_mpi to b_seq in proc[0]. With ICNTL(20) = 0, MUMPS requires rhs to be centralized on the host! */
2188:       if (!mumps->myid) {
2189:         PetscInt *idx;
2190:         /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */
2191:         PetscCall(PetscMalloc1(nrhsM, &idx));
2192:         PetscCall(MatGetOwnershipRanges(B, &rstart));
2193:         for (proc = 0, k = 0; proc < mumps->petsc_size; proc++) {
2194:           for (j = 0; j < nrhs; j++) {
2195:             for (i = rstart[proc]; i < rstart[proc + 1]; i++) idx[k++] = j * M + i;
2196:           }
2197:         }

2199:         PetscCall(VecCreateSeq(PETSC_COMM_SELF, nrhsM, &b_seq));
2200:         PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nrhsM, idx, PETSC_OWN_POINTER, &is_to));
2201:         PetscCall(ISCreateStride(PETSC_COMM_SELF, nrhsM, 0, 1, &is_from));
2202:       } else {
2203:         PetscCall(VecCreateSeq(PETSC_COMM_SELF, 0, &b_seq));
2204:         PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_to));
2205:         PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_from));
2206:       }

2208:       PetscCall(VecScatterCreate(v_mpi, is_from, b_seq, is_to, &scat_rhs));
2209:       PetscCall(VecScatterBegin(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD));
2210:       PetscCall(ISDestroy(&is_to));
2211:       PetscCall(ISDestroy(&is_from));
2212:       PetscCall(VecScatterEnd(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD));

2214:       if (!mumps->myid) { /* define rhs on the host */
2215:         PetscCall(VecGetArrayRead(b_seq, &barray));
2216:         PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, nrhsM, barray, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs));
2217:         PetscCall(VecRestoreArrayRead(b_seq, &barray));
2218:       }
2219:     }
2220:   } else { /* sparse B */
2221:     b = (Mat_MPIAIJ *)Bt->data;

2223:     /* wrap dense X into a vector v_mpi */
2224:     PetscCall(MatGetLocalSize(X, &m, NULL));
2225:     PetscCall(MatDenseGetArrayRead(X, &barray));
2226:     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X), 1, nrhs * m, nrhsM, barray, &v_mpi));
2227:     PetscCall(MatDenseRestoreArrayRead(X, &barray));

2229:     if (!mumps->myid) {
2230:       PetscCall(MatSeqAIJGetArray(b->A, &aa));
2231:       PetscCall(MatGetRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
2232:       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
2233:       PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
2234:       PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, ((Mat_SeqAIJ *)b->A->data)->nz, aa, mumps->id.precision, &mumps->id.rhs_sparse_len, &mumps->id.rhs_sparse));
2235:     } else {
2236:       mumps->id.irhs_ptr    = NULL;
2237:       mumps->id.irhs_sparse = NULL;
2238:       mumps->id.nz_rhs      = 0;
2239:       if (mumps->id.rhs_sparse_len) {
2240:         PetscCall(PetscFree(mumps->id.rhs_sparse));
2241:         mumps->id.rhs_sparse_len = 0;
2242:       }
2243:     }
2244:   }

2246:   /* solve phase */
2247:   mumps->id.job = JOB_SOLVE;
2248:   PetscMUMPS_c(mumps);
2249:   PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));

2251:   /* scatter mumps distributed solution to PETSc vector v_mpi, which shares local arrays with solution matrix X */
2252:   PetscCall(MatDenseGetArray(X, &array));
2253:   PetscCall(VecPlaceArray(v_mpi, array));

2255:   /* create scatter scat_sol */
2256:   PetscCall(MatGetOwnershipRanges(X, &rstart));
2257:   /* iidx: index for scatter mumps solution to PETSc X */

2259:   PetscCall(ISCreateStride(PETSC_COMM_SELF, nlsol_loc, 0, 1, &is_from));
2260:   PetscCall(PetscMalloc1(nlsol_loc, &idxx));
2261:   for (i = 0; i < lsol_loc; i++) {
2262:     isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */

2264:     for (proc = 0; proc < mumps->petsc_size; proc++) {
2265:       if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc + 1]) {
2266:         myrstart = rstart[proc];
2267:         k        = isol_loc[i] - myrstart;          /* local index on 1st column of PETSc vector X */
2268:         iidx     = k + myrstart * nrhs;             /* maps mumps isol_loc[i] to PETSc index in X */
2269:         m        = rstart[proc + 1] - rstart[proc]; /* rows of X for this proc */
2270:         break;
2271:       }
2272:     }

2274:     for (j = 0; j < nrhs; j++) idxx[i + j * lsol_loc] = iidx + j * m;
2275:   }
2276:   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nlsol_loc, idxx, PETSC_COPY_VALUES, &is_to));
2277:   PetscCall(MatMumpsCastMumpsScalarArray(nlsol_loc, mumps->id.precision, mumps->id.sol_loc, sol_loc)); // Vec msol_loc is created with sol_loc[]
2278:   PetscCall(VecScatterCreate(msol_loc, is_from, v_mpi, is_to, &scat_sol));
2279:   PetscCall(VecScatterBegin(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD));
2280:   PetscCall(ISDestroy(&is_from));
2281:   PetscCall(ISDestroy(&is_to));
2282:   PetscCall(VecScatterEnd(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD));
2283:   PetscCall(MatDenseRestoreArray(X, &array));

2285:   if (mumps->id.sol_loc_len) { // in case we allocated intermediate buffers
2286:     mumps->id.sol_loc_len = 0;
2287:     PetscCall(PetscFree(mumps->id.sol_loc));
2288:   }

2290:   // restore old values
2291:   mumps->id.sol_loc     = sol_loc_save;
2292:   mumps->id.sol_loc_len = sol_loc_len_save;
2293:   mumps->id.isol_loc    = isol_loc_save;

2295:   PetscCall(PetscFree2(sol_loc, isol_loc));
2296:   PetscCall(PetscFree(idxx));
2297:   PetscCall(VecDestroy(&msol_loc));
2298:   PetscCall(VecDestroy(&v_mpi));
2299:   if (!denseB) {
2300:     if (!mumps->myid) {
2301:       b = (Mat_MPIAIJ *)Bt->data;
2302:       PetscCall(MatSeqAIJRestoreArray(b->A, &aa));
2303:       PetscCall(MatRestoreRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
2304:       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure");
2305:     }
2306:   } else {
2307:     if (mumps->ICNTL20 == 0) {
2308:       PetscCall(VecDestroy(&b_seq));
2309:       PetscCall(VecScatterDestroy(&scat_rhs));
2310:     }
2311:   }
2312:   PetscCall(VecScatterDestroy(&scat_sol));
2313:   PetscCall(PetscLogFlops(nrhs * PetscMax(0, 2.0 * (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n)));
2314:   PetscFunctionReturn(PETSC_SUCCESS);
2315: }

2317: static PetscErrorCode MatMatSolveTranspose_MUMPS(Mat A, Mat B, Mat X)
2318: {
2319:   Mat_MUMPS          *mumps = (Mat_MUMPS *)A->data;
2320:   const PetscMUMPSInt value = mumps->id.ICNTL(9);

2322:   PetscFunctionBegin;
2323:   mumps->id.ICNTL(9) = 0;
2324:   PetscCall(MatMatSolve_MUMPS(A, B, X));
2325:   mumps->id.ICNTL(9) = value;
2326:   PetscFunctionReturn(PETSC_SUCCESS);
2327: }

2329: static PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A, Mat Bt, Mat X)
2330: {
2331:   PetscBool flg;
2332:   Mat       B;

2334:   PetscFunctionBegin;
2335:   PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt, &flg, MATSEQAIJ, MATMPIAIJ, NULL));
2336:   PetscCheck(flg, PetscObjectComm((PetscObject)Bt), PETSC_ERR_ARG_WRONG, "Matrix Bt must be MATAIJ matrix");

2338:   /* Create B=Bt^T that uses Bt's data structure */
2339:   PetscCall(MatCreateTranspose(Bt, &B));

2341:   PetscCall(MatMatSolve_MUMPS(A, B, X));
2342:   PetscCall(MatDestroy(&B));
2343:   PetscFunctionReturn(PETSC_SUCCESS);
2344: }

2346: #if !defined(PETSC_USE_COMPLEX)
2347: /*
2348:   input:
2349:    F:        numeric factor
2350:   output:
2351:    nneg:     total number of negative pivots
2352:    nzero:    total number of zero pivots
2353:    npos:     (global dimension of F) - nneg - nzero
2354: */
2355: static PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos)
2356: {
2357:   Mat_MUMPS  *mumps = (Mat_MUMPS *)F->data;
2358:   PetscMPIInt size;

2360:   PetscFunctionBegin;
2361:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F), &size));
2362:   /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */
2363:   PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia", mumps->id.INFOG(13));

2365:   if (nneg) *nneg = mumps->id.INFOG(12);
2366:   if (nzero || npos) {
2367:     PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection");
2368:     if (nzero) *nzero = mumps->id.INFOG(28);
2369:     if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28));
2370:   }
2371:   PetscFunctionReturn(PETSC_SUCCESS);
2372: }
2373: #endif

2375: static PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse, Mat_MUMPS *mumps)
2376: {
2377:   PetscMPIInt    nreqs;
2378:   PetscMUMPSInt *irn, *jcn;
2379:   PetscMPIInt    count;
2380:   PetscCount     totnnz, remain;
2381:   const PetscInt osize = mumps->omp_comm_size;
2382:   PetscScalar   *val;

2384:   PetscFunctionBegin;
2385:   if (osize > 1) {
2386:     if (reuse == MAT_INITIAL_MATRIX) {
2387:       /* master first gathers counts of nonzeros to receive */
2388:       if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize, &mumps->recvcount));
2389:       PetscCallMPI(MPI_Gather(&mumps->nnz, 1, MPIU_INT64, mumps->recvcount, 1, MPIU_INT64, 0 /*master*/, mumps->omp_comm));

2391:       /* Then each computes number of send/recvs */
2392:       if (mumps->is_omp_master) {
2393:         /* Start from 1 since self communication is not done in MPI */
2394:         nreqs = 0;
2395:         for (PetscMPIInt i = 1; i < osize; i++) nreqs += (mumps->recvcount[i] + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX;
2396:       } else {
2397:         nreqs = (PetscMPIInt)(((mumps->nnz + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX));
2398:       }
2399:       PetscCall(PetscMalloc1(nreqs * 3, &mumps->reqs)); /* Triple the requests since we send irn, jcn and val separately */

2401:       /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others.
2402:          MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz
2403:          might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size
2404:          is very small, the current approach should have no extra overhead compared to MPI_Gatherv.
2405:        */
2406:       nreqs = 0; /* counter for actual send/recvs */
2407:       if (mumps->is_omp_master) {
2408:         totnnz = 0;

2410:         for (PetscMPIInt i = 0; i < osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */
2411:         PetscCall(PetscMalloc2(totnnz, &irn, totnnz, &jcn));
2412:         PetscCall(PetscMalloc1(totnnz, &val));

2414:         /* Self communication */
2415:         PetscCall(PetscArraycpy(irn, mumps->irn, mumps->nnz));
2416:         PetscCall(PetscArraycpy(jcn, mumps->jcn, mumps->nnz));
2417:         PetscCall(PetscArraycpy(val, mumps->val, mumps->nnz));

2419:         /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */
2420:         PetscCall(PetscFree2(mumps->irn, mumps->jcn));
2421:         PetscCall(PetscFree(mumps->val_alloc));
2422:         mumps->nnz = totnnz;
2423:         mumps->irn = irn;
2424:         mumps->jcn = jcn;
2425:         mumps->val = mumps->val_alloc = val;

2427:         irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */
2428:         jcn += mumps->recvcount[0];
2429:         val += mumps->recvcount[0];

2431:         /* Remote communication */
2432:         for (PetscMPIInt i = 1; i < osize; i++) {
2433:           count  = (PetscMPIInt)PetscMin(mumps->recvcount[i], (PetscMPIInt)PETSC_MPI_INT_MAX);
2434:           remain = mumps->recvcount[i] - count;
2435:           while (count > 0) {
2436:             PetscCallMPI(MPIU_Irecv(irn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2437:             PetscCallMPI(MPIU_Irecv(jcn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2438:             PetscCallMPI(MPIU_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2439:             irn += count;
2440:             jcn += count;
2441:             val += count;
2442:             count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX);
2443:             remain -= count;
2444:           }
2445:         }
2446:       } else {
2447:         irn    = mumps->irn;
2448:         jcn    = mumps->jcn;
2449:         val    = mumps->val;
2450:         count  = (PetscMPIInt)PetscMin(mumps->nnz, (PetscMPIInt)PETSC_MPI_INT_MAX);
2451:         remain = mumps->nnz - count;
2452:         while (count > 0) {
2453:           PetscCallMPI(MPIU_Isend(irn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2454:           PetscCallMPI(MPIU_Isend(jcn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2455:           PetscCallMPI(MPIU_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2456:           irn += count;
2457:           jcn += count;
2458:           val += count;
2459:           count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX);
2460:           remain -= count;
2461:         }
2462:       }
2463:     } else {
2464:       nreqs = 0;
2465:       if (mumps->is_omp_master) {
2466:         val = mumps->val + mumps->recvcount[0];
2467:         for (PetscMPIInt i = 1; i < osize; i++) { /* Remote communication only since self data is already in place */
2468:           count  = (PetscMPIInt)PetscMin(mumps->recvcount[i], (PetscMPIInt)PETSC_MPI_INT_MAX);
2469:           remain = mumps->recvcount[i] - count;
2470:           while (count > 0) {
2471:             PetscCallMPI(MPIU_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2472:             val += count;
2473:             count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX);
2474:             remain -= count;
2475:           }
2476:         }
2477:       } else {
2478:         val    = mumps->val;
2479:         count  = (PetscMPIInt)PetscMin(mumps->nnz, (PetscMPIInt)PETSC_MPI_INT_MAX);
2480:         remain = mumps->nnz - count;
2481:         while (count > 0) {
2482:           PetscCallMPI(MPIU_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2483:           val += count;
2484:           count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX);
2485:           remain -= count;
2486:         }
2487:       }
2488:     }
2489:     PetscCallMPI(MPI_Waitall(nreqs, mumps->reqs, MPI_STATUSES_IGNORE));
2490:     mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */
2491:   }
2492:   PetscFunctionReturn(PETSC_SUCCESS);
2493: }

2495: static PetscErrorCode MatFactorNumeric_MUMPS(Mat F, Mat A, PETSC_UNUSED const MatFactorInfo *info)
2496: {
2497:   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;

2499:   PetscFunctionBegin;
2500:   if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) {
2501:     if (mumps->id.INFOG(1) == -6) PetscCall(PetscInfo(A, "MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2502:     PetscCall(PetscInfo(A, "MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2503:     PetscFunctionReturn(PETSC_SUCCESS);
2504:   }

2506:   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps));
2507:   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX, mumps));

2509:   /* numerical factorization phase */
2510:   mumps->id.job = JOB_FACTNUMERIC;
2511:   if (!mumps->id.ICNTL(18)) { /* A is centralized */
2512:     if (!mumps->myid) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a));
2513:   } else {
2514:     PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc));
2515:   }

2517:   if (F->schur) {
2518:     const PetscScalar *array;
2519:     MUMPS_INT          size = mumps->id.size_schur;
2520:     PetscCall(MatDenseGetArrayRead(F->schur, &array));
2521:     PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, size * size, array, mumps->id.precision, &mumps->id.schur_len, &mumps->id.schur));
2522:     PetscCall(MatDenseRestoreArrayRead(F->schur, &array));
2523:   }

2525:   if (mumps->id.ICNTL(22)) PetscCall(PetscStrncpy(mumps->id.ooc_prefix, ((PetscObject)F)->prefix, sizeof(((MUMPS_STRUC_C *)NULL)->ooc_prefix)));
2526:   if (A->rmap->N && A->cmap->N) PetscMUMPS_c(mumps);
2527:   if (mumps->id.INFOG(1) < 0) {
2528:     PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));
2529:     if (mumps->id.INFOG(1) == -10) {
2530:       PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2531:       F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
2532:     } else if (mumps->id.INFOG(1) == -13) {
2533:       PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, cannot allocate required memory %d megabytes\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2534:       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
2535:     } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) {
2536:       PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d, problem with work array\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2537:       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
2538:     } else {
2539:       PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2540:       F->factorerrortype = MAT_FACTOR_OTHER;
2541:     }
2542:   }
2543:   PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: ICNTL(16)=%d " MUMPS_MANUALS, mumps->id.INFOG(16));

2545:   F->assembled = PETSC_TRUE;

2547:   if (F->schur) { /* reset Schur status to unfactored */
2548: #if defined(PETSC_HAVE_CUDA)
2549:     F->schur->offloadmask = PETSC_OFFLOAD_CPU;
2550: #endif
2551:     PetscScalar *array;
2552:     PetscCall(MatDenseGetArray(F->schur, &array));
2553:     PetscCall(MatMumpsCastMumpsScalarArray(mumps->id.size_schur * mumps->id.size_schur, mumps->id.precision, mumps->id.schur, array));
2554:     PetscCall(MatDenseRestoreArray(F->schur, &array));
2555:     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
2556:       mumps->id.ICNTL(19) = 2;
2557:       PetscCall(MatTranspose(F->schur, MAT_INPLACE_MATRIX, &F->schur));
2558:     }
2559:     PetscCall(MatFactorRestoreSchurComplement(F, NULL, MAT_FACTOR_SCHUR_UNFACTORED));
2560:   }

2562:   /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */
2563:   if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3;

2565:   if (!mumps->is_omp_master) mumps->id.INFO(23) = 0;
2566:   // MUMPS userguide: ISOL_loc should be allocated by the user between the factorization and the
2567:   // solve phases. On exit from the solve phase, ISOL_loc(i) contains the index of the variables for
2568:   // which the solution (in SOL_loc) is available on the local processor.
2569:   // If successive calls to the solve phase (JOB= 3) are performed for a given matrix, ISOL_loc will
2570:   // normally have the same contents for each of these calls. The only exception is the case of
2571:   // unsymmetric matrices (SYM=1) when the transpose option is changed (see ICNTL(9)) and non
2572:   // symmetric row/column exchanges (see ICNTL(6)) have occurred before the solve phase.
2573:   if (mumps->petsc_size > 1) {
2574:     PetscInt     lsol_loc;
2575:     PetscScalar *array;

2577:     /* distributed solution; Create x_seq=sol_loc for repeated use */
2578:     if (mumps->x_seq) {
2579:       PetscCall(VecScatterDestroy(&mumps->scat_sol));
2580:       PetscCall(PetscFree(mumps->id.isol_loc));
2581:       PetscCall(VecDestroy(&mumps->x_seq));
2582:     }
2583:     lsol_loc = mumps->id.INFO(23); /* length of sol_loc */
2584:     PetscCall(PetscMalloc1(lsol_loc, &mumps->id.isol_loc));
2585:     PetscCall(VecCreateSeq(PETSC_COMM_SELF, lsol_loc, &mumps->x_seq));
2586:     PetscCall(VecGetArray(mumps->x_seq, &array));
2587:     PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, lsol_loc, array, mumps->id.precision, &mumps->id.sol_loc_len, &mumps->id.sol_loc));
2588:     PetscCall(VecRestoreArray(mumps->x_seq, &array));
2589:     mumps->id.lsol_loc = (PetscMUMPSInt)lsol_loc;
2590:   }
2591:   PetscCall(PetscLogFlops((double)ID_RINFO_GET(mumps->id, 2)));
2592:   PetscFunctionReturn(PETSC_SUCCESS);
2593: }

2595: /* Sets MUMPS options from the options database */
2596: static PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A)
2597: {
2598:   Mat_MUMPS    *mumps = (Mat_MUMPS *)F->data;
2599:   PetscReal     cntl;
2600:   PetscMUMPSInt icntl = 0, size, *listvar_schur;
2601:   PetscInt      info[80], i, ninfo = 80, rbs, cbs;
2602:   PetscBool     flg   = PETSC_FALSE;
2603:   PetscBool     schur = mumps->id.icntl ? (PetscBool)(mumps->id.ICNTL(26) == -1) : (PetscBool)(mumps->ICNTL26 == -1);
2604:   void         *arr;

2606:   PetscFunctionBegin;
2607:   PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MUMPS Options", "Mat");
2608:   if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */
2609:     PetscPrecision precision  = PetscDefined(USE_REAL_SINGLE) ? PETSC_PRECISION_SINGLE : PETSC_PRECISION_DOUBLE;
2610:     PetscInt       nthreads   = 0;
2611:     PetscInt       nCNTL_pre  = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
2612:     PetscInt       nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0;
2613:     PetscMUMPSInt  nblk, *blkvar, *blkptr;

2615:     mumps->petsc_comm = PetscObjectComm((PetscObject)A);
2616:     PetscCallMPI(MPI_Comm_size(mumps->petsc_comm, &mumps->petsc_size));
2617:     PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm, &mumps->myid)); /* "if (!myid)" still works even if mumps_comm is different */

2619:     PetscCall(PetscOptionsName("-mat_mumps_use_omp_threads", "Convert MPI processes into OpenMP threads", "None", &mumps->use_petsc_omp_support));
2620:     if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */
2621:     /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */
2622:     PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)F)->prefix, "-mat_mumps_use_omp_threads", &nthreads, NULL));
2623:     if (mumps->use_petsc_omp_support) {
2624:       PetscCheck(!schur, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature", ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : "");
2625: #if defined(PETSC_HAVE_OPENMP_SUPPORT)
2626:       PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm, nthreads, &mumps->omp_ctrl));
2627:       PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl, &mumps->omp_comm, &mumps->mumps_comm, &mumps->is_omp_master));
2628: #else
2629:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual",
2630:               ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : "");
2631: #endif
2632:     } else {
2633:       mumps->omp_comm      = PETSC_COMM_SELF;
2634:       mumps->mumps_comm    = mumps->petsc_comm;
2635:       mumps->is_omp_master = PETSC_TRUE;
2636:     }
2637:     PetscCallMPI(MPI_Comm_size(mumps->omp_comm, &mumps->omp_comm_size));
2638:     mumps->reqs = NULL;
2639:     mumps->tag  = 0;

2641:     if (mumps->mumps_comm != MPI_COMM_NULL) {
2642:       if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) {
2643:         /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */
2644:         MPI_Comm comm;
2645:         PetscCallMPI(MPI_Comm_dup(mumps->mumps_comm, &comm));
2646:         mumps->mumps_comm = comm;
2647:       } else PetscCall(PetscCommGetComm(mumps->petsc_comm, &mumps->mumps_comm));
2648:     }

2650:     mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm);
2651:     mumps->id.job          = JOB_INIT;
2652:     mumps->id.par          = 1; /* host participates factorizaton and solve */
2653:     mumps->id.sym          = mumps->sym;

2655:     size          = mumps->id.size_schur;
2656:     arr           = mumps->id.schur;
2657:     listvar_schur = mumps->id.listvar_schur;
2658:     nblk          = mumps->id.nblk;
2659:     blkvar        = mumps->id.blkvar;
2660:     blkptr        = mumps->id.blkptr;
2661:     if (PetscDefined(USE_DEBUG)) {
2662:       for (PetscInt i = 0; i < size; i++)
2663:         PetscCheck(listvar_schur[i] - 1 >= 0 && listvar_schur[i] - 1 < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid Schur index at position %" PetscInt_FMT "! %" PetscInt_FMT " must be in [0, %" PetscInt_FMT ")", i, (PetscInt)listvar_schur[i] - 1,
2664:                    A->rmap->N);
2665:     }

2667:     PetscCall(PetscOptionsEnum("-pc_precision", "Precision used by MUMPS", "MATSOLVERMUMPS", PetscPrecisionTypes, (PetscEnum)precision, (PetscEnum *)&precision, NULL));
2668:     PetscCheck(precision == PETSC_PRECISION_SINGLE || precision == PETSC_PRECISION_DOUBLE, PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "MUMPS does not support %s precision", PetscPrecisionTypes[precision]);
2669:     PetscCheck(precision == PETSC_SCALAR_PRECISION || PetscDefined(HAVE_MUMPS_MIXED_PRECISION), PetscObjectComm((PetscObject)F), PETSC_ERR_USER, "Your MUMPS library does not support mixed precision, but which is needed with your specified PetscScalar");
2670:     PetscCall(MatMumpsAllocateInternalID(&mumps->id, precision));

2672:     PetscMUMPS_c(mumps);
2673:     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));

2675:     /* set PETSc-MUMPS default options - override MUMPS default */
2676:     mumps->id.ICNTL(3) = 0;
2677:     mumps->id.ICNTL(4) = 0;
2678:     if (mumps->petsc_size == 1) {
2679:       mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */
2680:       mumps->id.ICNTL(7)  = 7; /* automatic choice of ordering done by the package */
2681:     } else {
2682:       mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */
2683:       mumps->id.ICNTL(21) = 1; /* distributed solution */
2684:     }
2685:     if (nblk && blkptr) {
2686:       mumps->id.ICNTL(15) = 1;
2687:       mumps->id.nblk      = nblk;
2688:       mumps->id.blkvar    = blkvar;
2689:       mumps->id.blkptr    = blkptr;
2690:     } else mumps->id.ICNTL(15) = 0;

2692:     /* restore cached ICNTL and CNTL values */
2693:     for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1 + 2 * icntl]) = mumps->ICNTL_pre[2 + 2 * icntl];
2694:     for (icntl = 0; icntl < nCNTL_pre; ++icntl) ID_CNTL_SET(mumps->id, (PetscInt)mumps->CNTL_pre[1 + 2 * icntl], mumps->CNTL_pre[2 + 2 * icntl]);

2696:     PetscCall(PetscFree(mumps->ICNTL_pre));
2697:     PetscCall(PetscFree(mumps->CNTL_pre));

2699:     if (schur) {
2700:       mumps->id.size_schur    = size;
2701:       mumps->id.schur_lld     = size;
2702:       mumps->id.schur         = arr;
2703:       mumps->id.listvar_schur = listvar_schur;
2704:       if (mumps->petsc_size > 1) {
2705:         PetscBool gs; /* gs is false if any rank other than root has non-empty IS */

2707:         mumps->id.ICNTL(19) = 1;                                                                            /* MUMPS returns Schur centralized on the host */
2708:         gs                  = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */
2709:         PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &gs, 1, MPI_C_BOOL, MPI_LAND, mumps->petsc_comm));
2710:         PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_SUP, "MUMPS distributed parallel Schur complements not yet supported from PETSc");
2711:       } else {
2712:         if (F->factortype == MAT_FACTOR_LU) {
2713:           mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */
2714:         } else {
2715:           mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */
2716:         }
2717:       }
2718:       mumps->id.ICNTL(26) = -1;
2719:     }

2721:     /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code.
2722:        For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS.
2723:      */
2724:     PetscCallMPI(MPI_Bcast(mumps->id.icntl, 40, MPI_INT, 0, mumps->omp_comm));
2725:     PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15, MPIU_MUMPSREAL(&mumps->id), 0, mumps->omp_comm));

2727:     mumps->scat_rhs = NULL;
2728:     mumps->scat_sol = NULL;
2729:   }
2730:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1", "ICNTL(1): output stream for error messages", "None", mumps->id.ICNTL(1), &icntl, &flg));
2731:   if (flg) mumps->id.ICNTL(1) = icntl;
2732:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2", "ICNTL(2): output stream for diagnostic printing, statistics, and warning", "None", mumps->id.ICNTL(2), &icntl, &flg));
2733:   if (flg) mumps->id.ICNTL(2) = icntl;
2734:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3", "ICNTL(3): output stream for global information, collected on the host", "None", mumps->id.ICNTL(3), &icntl, &flg));
2735:   if (flg) mumps->id.ICNTL(3) = icntl;

2737:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4", "ICNTL(4): level of printing (0 to 4)", "None", mumps->id.ICNTL(4), &icntl, &flg));
2738:   if (flg) mumps->id.ICNTL(4) = icntl;
2739:   if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */

2741:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6", "ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)", "None", mumps->id.ICNTL(6), &icntl, &flg));
2742:   if (flg) mumps->id.ICNTL(6) = icntl;

2744:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7", "ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)", "None", mumps->id.ICNTL(7), &icntl, &flg));
2745:   if (flg) {
2746:     PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto");
2747:     mumps->id.ICNTL(7) = icntl;
2748:   }

2750:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8", "ICNTL(8): scaling strategy (-2 to 8 or 77)", "None", mumps->id.ICNTL(8), &mumps->id.ICNTL(8), NULL));
2751:   /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */
2752:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10", "ICNTL(10): max num of refinements", "None", mumps->id.ICNTL(10), &mumps->id.ICNTL(10), NULL));
2753:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11", "ICNTL(11): statistics related to an error analysis (via -ksp_view)", "None", mumps->id.ICNTL(11), &mumps->id.ICNTL(11), NULL));
2754:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12", "ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)", "None", mumps->id.ICNTL(12), &mumps->id.ICNTL(12), NULL));
2755:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13", "ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting", "None", mumps->id.ICNTL(13), &mumps->id.ICNTL(13), NULL));
2756:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14", "ICNTL(14): percentage increase in the estimated working space", "None", mumps->id.ICNTL(14), &mumps->id.ICNTL(14), NULL));
2757:   PetscCall(MatGetBlockSizes(A, &rbs, &cbs));
2758:   if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = (PetscMUMPSInt)-rbs;
2759:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15", "ICNTL(15): compression of the input matrix resulting from a block format", "None", mumps->id.ICNTL(15), &mumps->id.ICNTL(15), &flg));
2760:   if (flg) {
2761:     if (mumps->id.ICNTL(15) < 0) PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes");
2762:     else if (mumps->id.ICNTL(15) > 0) {
2763:       const PetscInt *bsizes;
2764:       PetscInt        nblocks, p, *blkptr = NULL;
2765:       PetscMPIInt    *recvcounts, *displs, n;
2766:       PetscMPIInt     rank, size = 0;

2768:       PetscCall(MatGetVariableBlockSizes(A, &nblocks, &bsizes));
2769:       flg = PETSC_TRUE;
2770:       for (p = 0; p < nblocks; ++p) {
2771:         if (bsizes[p] > 1) break;
2772:       }
2773:       if (p == nblocks) flg = PETSC_FALSE;
2774:       PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &flg, 1, MPI_C_BOOL, MPI_LOR, PetscObjectComm((PetscObject)A)));
2775:       if (flg) { // if at least one process supplies variable block sizes and they are not all set to 1
2776:         PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
2777:         if (rank == 0) PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
2778:         PetscCall(PetscCalloc2(size, &recvcounts, size + 1, &displs));
2779:         PetscCall(PetscMPIIntCast(nblocks, &n));
2780:         PetscCallMPI(MPI_Gather(&n, 1, MPI_INT, recvcounts, 1, MPI_INT, 0, PetscObjectComm((PetscObject)A)));
2781:         for (PetscInt p = 0; p < size; ++p) displs[p + 1] = displs[p] + recvcounts[p];
2782:         PetscCall(PetscMalloc1(displs[size] + 1, &blkptr));
2783:         PetscCallMPI(MPI_Bcast(displs + size, 1, MPIU_INT, 0, PetscObjectComm((PetscObject)A)));
2784:         PetscCallMPI(MPI_Gatherv(bsizes, n, MPIU_INT, blkptr + 1, recvcounts, displs, MPIU_INT, 0, PetscObjectComm((PetscObject)A)));
2785:         if (rank == 0) {
2786:           blkptr[0] = 1;
2787:           for (PetscInt p = 0; p < n; ++p) blkptr[p + 1] += blkptr[p];
2788:           PetscCall(MatMumpsSetBlk(F, displs[size], NULL, blkptr));
2789:         }
2790:         PetscCall(PetscFree2(recvcounts, displs));
2791:         PetscCall(PetscFree(blkptr));
2792:       }
2793:     }
2794:   }
2795:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19", "ICNTL(19): computes the Schur complement", "None", mumps->id.ICNTL(19), &mumps->id.ICNTL(19), NULL));
2796:   if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any Schur data (if any) */
2797:     PetscCall(MatDestroy(&F->schur));
2798:     PetscCall(MatMumpsResetSchur_Private(mumps));
2799:   }

2801:   /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps",
2802:      and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran
2803:      and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...'
2804:      This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug,
2805:      see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590.
2806:      In short, we could not use distributed RHS until with MPICH v4.0b1 or we enabled a workaround in mumps-5.6.2+
2807:    */
2808:   mumps->ICNTL20 = 10; /* Distributed dense RHS, by default */
2809: #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) || (PetscDefined(HAVE_MPICH) && MPICH_NUMVERSION < 40000101) || PetscDefined(HAVE_MSMPI)
2810:   mumps->ICNTL20 = 0; /* Centralized dense RHS, if need be */
2811: #endif
2812:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20", "ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides", "None", mumps->ICNTL20, &mumps->ICNTL20, &flg));
2813:   PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10", (int)mumps->ICNTL20);
2814: #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0)
2815:   PetscCheck(!flg || mumps->ICNTL20 != 10, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=10 is not supported before MUMPS-5.3.0");
2816: #endif
2817:   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */

2819:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22", "ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)", "None", mumps->id.ICNTL(22), &mumps->id.ICNTL(22), &flg));
2820:   if (flg && mumps->id.ICNTL(22) != 1) mumps->id.ICNTL(22) = 0; // MUMPS treats values other than 1 as 0. Normalize it so we can safely use 'if (mumps->id.ICNTL(22))'
2821:   if (mumps->id.ICNTL(22)) {
2822:     // MUMPS will use the /tmp directory if -mat_mumps_ooc_tmpdir is not set by user.
2823:     // We don't provide option -mat_mumps_ooc_prefix, as we use F's prefix as OOC_PREFIX, which is set later during MatFactorNumeric_MUMPS() to also handle cases where users enable OOC via MatMumpsSetICNTL().
2824:     PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "Out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(((MUMPS_STRUC_C *)NULL)->ooc_tmpdir), NULL));
2825:   }
2826:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23", "ICNTL(23): max size of the working memory (MB) that can allocate per processor", "None", mumps->id.ICNTL(23), &mumps->id.ICNTL(23), NULL));
2827:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24", "ICNTL(24): detection of null pivot rows (0 or 1)", "None", mumps->id.ICNTL(24), &mumps->id.ICNTL(24), NULL));
2828:   if (mumps->id.ICNTL(24)) mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */

2830:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25", "ICNTL(25): computes a solution of a deficient matrix and a null space basis", "None", mumps->id.ICNTL(25), &mumps->id.ICNTL(25), NULL));
2831:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26", "ICNTL(26): drives the solution phase if a Schur complement matrix", "None", mumps->id.ICNTL(26), &mumps->id.ICNTL(26), NULL));
2832:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27", "ICNTL(27): controls the blocking size for multiple right-hand sides", "None", mumps->id.ICNTL(27), &mumps->id.ICNTL(27), NULL));
2833:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28", "ICNTL(28): use 1 for sequential analysis and ICNTL(7) ordering, or 2 for parallel analysis and ICNTL(29) ordering", "None", mumps->id.ICNTL(28), &mumps->id.ICNTL(28), NULL));
2834:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29", "ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis", "None", mumps->id.ICNTL(29), &mumps->id.ICNTL(29), NULL));
2835:   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */
2836:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31", "ICNTL(31): indicates which factors may be discarded during factorization", "None", mumps->id.ICNTL(31), &mumps->id.ICNTL(31), NULL));
2837:   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elimination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL));  -- not supported by PETSc API */
2838:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33", "ICNTL(33): compute determinant", "None", mumps->id.ICNTL(33), &mumps->id.ICNTL(33), NULL));
2839:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35", "ICNTL(35): activates Block Low Rank (BLR) based factorization", "None", mumps->id.ICNTL(35), &mumps->id.ICNTL(35), NULL));
2840:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36", "ICNTL(36): choice of BLR factorization variant", "None", mumps->id.ICNTL(36), &mumps->id.ICNTL(36), NULL));
2841:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_37", "ICNTL(37): compression of the contribution blocks (CB)", "None", mumps->id.ICNTL(37), &mumps->id.ICNTL(37), NULL));
2842:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38", "ICNTL(38): estimated compression rate of LU factors with BLR", "None", mumps->id.ICNTL(38), &mumps->id.ICNTL(38), NULL));
2843:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_40", "ICNTL(40): adaptive BLR precision feature", "None", mumps->id.ICNTL(40), &mumps->id.ICNTL(40), NULL));
2844:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_47", "ICNTL(47): single precision factorization in a double precision instance", "None", mumps->id.ICNTL(47), &mumps->id.ICNTL(47), NULL));
2845:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_48", "ICNTL(48): multithreading with tree parallelism", "None", mumps->id.ICNTL(48), &mumps->id.ICNTL(48), NULL));
2846:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_49", "ICNTL(49): compact workarray at the end of factorization phase", "None", mumps->id.ICNTL(49), &mumps->id.ICNTL(49), NULL));
2847:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_56", "ICNTL(56): postponing and rank-revealing factorization", "None", mumps->id.ICNTL(56), &mumps->id.ICNTL(56), NULL));
2848:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_58", "ICNTL(58): defines options for symbolic factorization", "None", mumps->id.ICNTL(58), &mumps->id.ICNTL(58), NULL));

2850:   PetscCall(PetscOptionsReal("-mat_mumps_cntl_1", "CNTL(1): relative pivoting threshold", "None", (PetscReal)ID_CNTL_GET(mumps->id, 1), &cntl, &flg));
2851:   if (flg) ID_CNTL_SET(mumps->id, 1, cntl);
2852:   PetscCall(PetscOptionsReal("-mat_mumps_cntl_2", "CNTL(2): stopping criterion of refinement", "None", (PetscReal)ID_CNTL_GET(mumps->id, 2), &cntl, &flg));
2853:   if (flg) ID_CNTL_SET(mumps->id, 2, cntl);
2854:   PetscCall(PetscOptionsReal("-mat_mumps_cntl_3", "CNTL(3): absolute pivoting threshold", "None", (PetscReal)ID_CNTL_GET(mumps->id, 3), &cntl, &flg));
2855:   if (flg) ID_CNTL_SET(mumps->id, 3, cntl);
2856:   PetscCall(PetscOptionsReal("-mat_mumps_cntl_4", "CNTL(4): value for static pivoting", "None", (PetscReal)ID_CNTL_GET(mumps->id, 4), &cntl, &flg));
2857:   if (flg) ID_CNTL_SET(mumps->id, 4, cntl);
2858:   PetscCall(PetscOptionsReal("-mat_mumps_cntl_5", "CNTL(5): fixation for null pivots", "None", (PetscReal)ID_CNTL_GET(mumps->id, 5), &cntl, &flg));
2859:   if (flg) ID_CNTL_SET(mumps->id, 5, cntl);
2860:   PetscCall(PetscOptionsReal("-mat_mumps_cntl_7", "CNTL(7): dropping parameter used during BLR", "None", (PetscReal)ID_CNTL_GET(mumps->id, 7), &cntl, &flg));
2861:   if (flg) ID_CNTL_SET(mumps->id, 7, cntl);

2863:   PetscCall(PetscOptionsIntArray("-mat_mumps_view_info", "request INFO local to each processor", "", info, &ninfo, NULL));
2864:   if (ninfo) {
2865:     PetscCheck(ninfo <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "number of INFO %" PetscInt_FMT " must <= 80", ninfo);
2866:     PetscCall(PetscMalloc1(ninfo, &mumps->info));
2867:     mumps->ninfo = ninfo;
2868:     for (i = 0; i < ninfo; i++) {
2869:       PetscCheck(info[i] >= 0 && info[i] <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "index of INFO %" PetscInt_FMT " must between 1 and 80", ninfo);
2870:       mumps->info[i] = info[i];
2871:     }
2872:   }
2873:   PetscOptionsEnd();
2874:   PetscFunctionReturn(PETSC_SUCCESS);
2875: }

2877: static PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F, Mat A, PETSC_UNUSED const MatFactorInfo *info, Mat_MUMPS *mumps)
2878: {
2879:   PetscFunctionBegin;
2880:   if (mumps->id.INFOG(1) < 0) {
2881:     PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in analysis: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
2882:     if (mumps->id.INFOG(1) == -6) {
2883:       PetscCall(PetscInfo(F, "MUMPS error in analysis: matrix is singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2884:       F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT;
2885:     } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) {
2886:       PetscCall(PetscInfo(F, "MUMPS error in analysis: problem with work array, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2887:       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
2888:     } else {
2889:       PetscCall(PetscInfo(F, "MUMPS error in analysis: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS "\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2890:       F->factorerrortype = MAT_FACTOR_OTHER;
2891:     }
2892:   }
2893:   if (!mumps->id.n) F->factorerrortype = MAT_FACTOR_NOERROR;
2894:   PetscFunctionReturn(PETSC_SUCCESS);
2895: }

2897: static PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F, Mat A, IS r, PETSC_UNUSED IS c, const MatFactorInfo *info)
2898: {
2899:   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
2900:   Vec            b;
2901:   const PetscInt M = A->rmap->N;

2903:   PetscFunctionBegin;
2904:   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2905:     /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */
2906:     PetscFunctionReturn(PETSC_SUCCESS);
2907:   }

2909:   /* Set MUMPS options from the options database */
2910:   PetscCall(MatSetFromOptions_MUMPS(F, A));

2912:   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2913:   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));

2915:   /* analysis phase */
2916:   mumps->id.job = JOB_FACTSYMBOLIC;
2917:   PetscCall(PetscMUMPSIntCast(M, &mumps->id.n));
2918:   switch (mumps->id.ICNTL(18)) {
2919:   case 0: /* centralized assembled matrix input */
2920:     if (!mumps->myid) {
2921:       mumps->id.nnz = mumps->nnz;
2922:       mumps->id.irn = mumps->irn;
2923:       mumps->id.jcn = mumps->jcn;
2924:       if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a));
2925:       if (r && mumps->id.ICNTL(7) == 7 && !F->schur) {
2926:         mumps->id.ICNTL(7) = 1;
2927:         if (!mumps->myid) {
2928:           const PetscInt *idx;

2930:           PetscCall(PetscMalloc1(M, &mumps->id.perm_in));
2931:           PetscCall(ISGetIndices(r, &idx));
2932:           for (PetscInt i = 0; i < M; i++) PetscCall(PetscMUMPSIntCast(idx[i] + 1, &mumps->id.perm_in[i])); /* perm_in[]: start from 1, not 0! */
2933:           PetscCall(ISRestoreIndices(r, &idx));
2934:         }
2935:       }
2936:     }
2937:     break;
2938:   case 3: /* distributed assembled matrix input (size>1) */
2939:     mumps->id.nnz_loc = mumps->nnz;
2940:     mumps->id.irn_loc = mumps->irn;
2941:     mumps->id.jcn_loc = mumps->jcn;
2942:     if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc));
2943:     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2944:       PetscCall(MatCreateVecs(A, NULL, &b));
2945:       PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
2946:       PetscCall(VecDestroy(&b));
2947:     }
2948:     break;
2949:   }
2950:   if (A->rmap->N && A->cmap->N) {
2951:     PetscMUMPS_c(mumps);
2952:     PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
2953:   }
2954:   F->ops->lufactornumeric   = MatFactorNumeric_MUMPS;
2955:   F->ops->solve             = MatSolve_MUMPS;
2956:   F->ops->solvetranspose    = MatSolveTranspose_MUMPS;
2957:   F->ops->matsolve          = MatMatSolve_MUMPS;
2958:   F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS;
2959:   F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS;

2961:   mumps->matstruc = SAME_NONZERO_PATTERN;
2962:   PetscFunctionReturn(PETSC_SUCCESS);
2963: }

2965: /* Note the PETSc r and c permutations are ignored */
2966: static PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F, Mat A, PETSC_UNUSED IS r, PETSC_UNUSED IS c, const MatFactorInfo *info)
2967: {
2968:   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
2969:   Vec            b;
2970:   const PetscInt M = A->rmap->N;

2972:   PetscFunctionBegin;
2973:   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2974:     /* F is assembled by a previous call of MatLUFactorSymbolic_BAIJMUMPS() */
2975:     PetscFunctionReturn(PETSC_SUCCESS);
2976:   }

2978:   /* Set MUMPS options from the options database */
2979:   PetscCall(MatSetFromOptions_MUMPS(F, A));

2981:   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2982:   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));

2984:   /* analysis phase */
2985:   mumps->id.job = JOB_FACTSYMBOLIC;
2986:   PetscCall(PetscMUMPSIntCast(M, &mumps->id.n));
2987:   switch (mumps->id.ICNTL(18)) {
2988:   case 0: /* centralized assembled matrix input */
2989:     if (!mumps->myid) {
2990:       mumps->id.nnz = mumps->nnz;
2991:       mumps->id.irn = mumps->irn;
2992:       mumps->id.jcn = mumps->jcn;
2993:       if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a));
2994:     }
2995:     break;
2996:   case 3: /* distributed assembled matrix input (size>1) */
2997:     mumps->id.nnz_loc = mumps->nnz;
2998:     mumps->id.irn_loc = mumps->irn;
2999:     mumps->id.jcn_loc = mumps->jcn;
3000:     if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc));
3001:     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
3002:       PetscCall(MatCreateVecs(A, NULL, &b));
3003:       PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
3004:       PetscCall(VecDestroy(&b));
3005:     }
3006:     break;
3007:   }
3008:   if (A->rmap->N && A->cmap->N) {
3009:     PetscMUMPS_c(mumps);
3010:     PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
3011:   }
3012:   F->ops->lufactornumeric   = MatFactorNumeric_MUMPS;
3013:   F->ops->solve             = MatSolve_MUMPS;
3014:   F->ops->solvetranspose    = MatSolveTranspose_MUMPS;
3015:   F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS;

3017:   mumps->matstruc = SAME_NONZERO_PATTERN;
3018:   PetscFunctionReturn(PETSC_SUCCESS);
3019: }

3021: /* Note the PETSc r permutation and factor info are ignored */
3022: static PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F, Mat A, PETSC_UNUSED IS r, const MatFactorInfo *info)
3023: {
3024:   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
3025:   Vec            b;
3026:   const PetscInt M = A->rmap->N;

3028:   PetscFunctionBegin;
3029:   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
3030:     /* F is assembled by a previous call of MatCholeskyFactorSymbolic_MUMPS() */
3031:     PetscFunctionReturn(PETSC_SUCCESS);
3032:   }

3034:   /* Set MUMPS options from the options database */
3035:   PetscCall(MatSetFromOptions_MUMPS(F, A));

3037:   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
3038:   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));

3040:   /* analysis phase */
3041:   mumps->id.job = JOB_FACTSYMBOLIC;
3042:   PetscCall(PetscMUMPSIntCast(M, &mumps->id.n));
3043:   switch (mumps->id.ICNTL(18)) {
3044:   case 0: /* centralized assembled matrix input */
3045:     if (!mumps->myid) {
3046:       mumps->id.nnz = mumps->nnz;
3047:       mumps->id.irn = mumps->irn;
3048:       mumps->id.jcn = mumps->jcn;
3049:       if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a));
3050:     }
3051:     break;
3052:   case 3: /* distributed assembled matrix input (size>1) */
3053:     mumps->id.nnz_loc = mumps->nnz;
3054:     mumps->id.irn_loc = mumps->irn;
3055:     mumps->id.jcn_loc = mumps->jcn;
3056:     if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc));
3057:     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
3058:       PetscCall(MatCreateVecs(A, NULL, &b));
3059:       PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
3060:       PetscCall(VecDestroy(&b));
3061:     }
3062:     break;
3063:   }
3064:   if (A->rmap->N && A->cmap->N) {
3065:     PetscMUMPS_c(mumps);
3066:     PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
3067:   }
3068:   F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS;
3069:   F->ops->solve                 = MatSolve_MUMPS;
3070:   F->ops->solvetranspose        = MatSolve_MUMPS;
3071:   F->ops->matsolve              = MatMatSolve_MUMPS;
3072:   F->ops->mattransposesolve     = MatMatTransposeSolve_MUMPS;
3073:   F->ops->matsolvetranspose     = MatMatSolveTranspose_MUMPS;
3074: #if defined(PETSC_USE_COMPLEX)
3075:   F->ops->getinertia = NULL;
3076: #else
3077:   F->ops->getinertia = MatGetInertia_SBAIJMUMPS;
3078: #endif

3080:   mumps->matstruc = SAME_NONZERO_PATTERN;
3081:   PetscFunctionReturn(PETSC_SUCCESS);
3082: }

3084: static PetscErrorCode MatView_MUMPS(Mat A, PetscViewer viewer)
3085: {
3086:   PetscBool         isascii;
3087:   PetscViewerFormat format;
3088:   Mat_MUMPS        *mumps = (Mat_MUMPS *)A->data;

3090:   PetscFunctionBegin;
3091:   /* check if matrix is mumps type */
3092:   if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(PETSC_SUCCESS);

3094:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
3095:   if (isascii) {
3096:     PetscCall(PetscViewerGetFormat(viewer, &format));
3097:     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
3098:       PetscCall(PetscViewerASCIIPrintf(viewer, "MUMPS run parameters:\n"));
3099:       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
3100:         PetscCall(PetscViewerASCIIPrintf(viewer, "  SYM (matrix type):                   %d\n", mumps->id.sym));
3101:         PetscCall(PetscViewerASCIIPrintf(viewer, "  PAR (host participation):            %d\n", mumps->id.par));
3102:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(1) (output for error):         %d\n", mumps->id.ICNTL(1)));
3103:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(2) (output of diagnostic msg): %d\n", mumps->id.ICNTL(2)));
3104:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(3) (output for global info):   %d\n", mumps->id.ICNTL(3)));
3105:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(4) (level of printing):        %d\n", mumps->id.ICNTL(4)));
3106:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(5) (input mat struct):         %d\n", mumps->id.ICNTL(5)));
3107:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(6) (matrix prescaling):        %d\n", mumps->id.ICNTL(6)));
3108:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(7) (sequential matrix ordering):%d\n", mumps->id.ICNTL(7)));
3109:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(8) (scaling strategy):         %d\n", mumps->id.ICNTL(8)));
3110:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(10) (max num of refinements):  %d\n", mumps->id.ICNTL(10)));
3111:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(11) (error analysis):          %d\n", mumps->id.ICNTL(11)));
3112:         if (mumps->id.ICNTL(11) > 0) {
3113:           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(4) (inf norm of input mat):        %g\n", (double)ID_RINFOG_GET(mumps->id, 4)));
3114:           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(5) (inf norm of solution):         %g\n", (double)ID_RINFOG_GET(mumps->id, 5)));
3115:           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(6) (inf norm of residual):         %g\n", (double)ID_RINFOG_GET(mumps->id, 6)));
3116:           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(7),RINFOG(8) (backward error est): %g, %g\n", (double)ID_RINFOG_GET(mumps->id, 7), (double)ID_RINFOG_GET(mumps->id, 8)));
3117:           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(9) (error estimate):               %g\n", (double)ID_RINFOG_GET(mumps->id, 9)));
3118:           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n", (double)ID_RINFOG_GET(mumps->id, 10), (double)ID_RINFOG_GET(mumps->id, 11)));
3119:         }
3120:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(12) (efficiency control):                         %d\n", mumps->id.ICNTL(12)));
3121:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(13) (sequential factorization of the root node):  %d\n", mumps->id.ICNTL(13)));
3122:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(14) (percentage of estimated workspace increase): %d\n", mumps->id.ICNTL(14)));
3123:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(15) (compression of the input matrix):            %d\n", mumps->id.ICNTL(15)));
3124:         /* ICNTL(15-17) not used */
3125:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(18) (input mat struct):                           %d\n", mumps->id.ICNTL(18)));
3126:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(19) (Schur complement info):                      %d\n", mumps->id.ICNTL(19)));
3127:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(20) (RHS sparse pattern):                         %d\n", mumps->id.ICNTL(20)));
3128:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(21) (solution struct):                            %d\n", mumps->id.ICNTL(21)));
3129:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(22) (in-core/out-of-core facility):               %d\n", mumps->id.ICNTL(22)));
3130:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(23) (max size of memory allocated locally):       %d\n", mumps->id.ICNTL(23)));

3132:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(24) (detection of null pivot rows):               %d\n", mumps->id.ICNTL(24)));
3133:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(25) (computation of a null space basis):          %d\n", mumps->id.ICNTL(25)));
3134:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(26) (Schur options for RHS or solution):          %d\n", mumps->id.ICNTL(26)));
3135:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(27) (blocking size for multiple RHS):             %d\n", mumps->id.ICNTL(27)));
3136:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(28) (use parallel or sequential ordering):        %d\n", mumps->id.ICNTL(28)));
3137:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(29) (parallel ordering):                          %d\n", mumps->id.ICNTL(29)));

3139:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(30) (user-specified set of entries in inv(A)):    %d\n", mumps->id.ICNTL(30)));
3140:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(31) (factors is discarded in the solve phase):    %d\n", mumps->id.ICNTL(31)));
3141:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(33) (compute determinant):                        %d\n", mumps->id.ICNTL(33)));
3142:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(35) (activate BLR based factorization):           %d\n", mumps->id.ICNTL(35)));
3143:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(36) (choice of BLR factorization variant):        %d\n", mumps->id.ICNTL(36)));
3144:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(37) (compression of the contribution blocks):     %d\n", mumps->id.ICNTL(37)));
3145:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(38) (estimated compression rate of LU factors):   %d\n", mumps->id.ICNTL(38)));
3146:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(40) (adaptive BLR precision feature):             %d\n", mumps->id.ICNTL(40)));
3147:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(47) (single precision factorization in a double precision instance): %d\n", mumps->id.ICNTL(47)));
3148:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(48) (multithreading with tree parallelism):       %d\n", mumps->id.ICNTL(48)));
3149:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(49) (compact workarray at the end of factorization phase):%d\n", mumps->id.ICNTL(49)));
3150:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(56) (postponing and rank-revealing factorization):%d\n", mumps->id.ICNTL(56)));
3151:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(58) (options for symbolic factorization):         %d\n", mumps->id.ICNTL(58)));

3153:         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(1) (relative pivoting threshold):      %g\n", (double)ID_CNTL_GET(mumps->id, 1)));
3154:         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(2) (stopping criterion of refinement): %g\n", (double)ID_CNTL_GET(mumps->id, 2)));
3155:         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(3) (absolute pivoting threshold):      %g\n", (double)ID_CNTL_GET(mumps->id, 3)));
3156:         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(4) (value of static pivoting):         %g\n", (double)ID_CNTL_GET(mumps->id, 4)));
3157:         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(5) (fixation for null pivots):         %g\n", (double)ID_CNTL_GET(mumps->id, 5)));
3158:         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(7) (dropping parameter for BLR):       %g\n", (double)ID_CNTL_GET(mumps->id, 7)));

3160:         /* information local to each processor */
3161:         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(1) (local estimated flops for the elimination after analysis):\n"));
3162:         PetscCall(PetscViewerASCIIPushSynchronized(viewer));
3163:         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %g\n", mumps->myid, (double)ID_RINFO_GET(mumps->id, 1)));
3164:         PetscCall(PetscViewerFlush(viewer));
3165:         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(2) (local estimated flops for the assembly after factorization):\n"));
3166:         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %g\n", mumps->myid, (double)ID_RINFO_GET(mumps->id, 2)));
3167:         PetscCall(PetscViewerFlush(viewer));
3168:         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(3) (local estimated flops for the elimination after factorization):\n"));
3169:         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %g\n", mumps->myid, (double)ID_RINFO_GET(mumps->id, 3)));
3170:         PetscCall(PetscViewerFlush(viewer));

3172:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization):\n"));
3173:         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(15)));
3174:         PetscCall(PetscViewerFlush(viewer));

3176:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization):\n"));
3177:         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(16)));
3178:         PetscCall(PetscViewerFlush(viewer));

3180:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(23) (num of pivots eliminated on this processor after factorization):\n"));
3181:         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(23)));
3182:         PetscCall(PetscViewerFlush(viewer));

3184:         if (mumps->ninfo && mumps->ninfo <= 80) {
3185:           for (PetscInt i = 0; i < mumps->ninfo; i++) {
3186:             PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(%" PetscInt_FMT "):\n", mumps->info[i]));
3187:             PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(mumps->info[i])));
3188:             PetscCall(PetscViewerFlush(viewer));
3189:           }
3190:         }
3191:         PetscCall(PetscViewerASCIIPopSynchronized(viewer));
3192:       } else PetscCall(PetscViewerASCIIPrintf(viewer, "  Use -%sksp_view ::ascii_info_detail to display information for all processes\n", ((PetscObject)A)->prefix ? ((PetscObject)A)->prefix : ""));

3194:       if (mumps->myid == 0) { /* information from the host */
3195:         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFOG(1) (global estimated flops for the elimination after analysis): %g\n", (double)ID_RINFOG_GET(mumps->id, 1)));
3196:         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFOG(2) (global estimated flops for the assembly after factorization): %g\n", (double)ID_RINFOG_GET(mumps->id, 2)));
3197:         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFOG(3) (global estimated flops for the elimination after factorization): %g\n", (double)ID_RINFOG_GET(mumps->id, 3)));
3198:         PetscCall(PetscViewerASCIIPrintf(viewer, "  (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n", (double)ID_RINFOG_GET(mumps->id, 12), (double)ID_RINFOG_GET(mumps->id, 13), mumps->id.INFOG(34)));

3200:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(3)));
3201:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(4)));
3202:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(5) (estimated maximum front size in the complete tree): %d\n", mumps->id.INFOG(5)));
3203:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(6) (number of nodes in the complete tree): %d\n", mumps->id.INFOG(6)));
3204:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(7) (ordering option effectively used after analysis): %d\n", mumps->id.INFOG(7)));
3205:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n", mumps->id.INFOG(8)));
3206:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n", mumps->id.INFOG(9)));
3207:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(10) (total integer space store the matrix factors after factorization): %d\n", mumps->id.INFOG(10)));
3208:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(11) (order of largest frontal matrix after factorization): %d\n", mumps->id.INFOG(11)));
3209:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(12) (number of off-diagonal pivots): %d\n", mumps->id.INFOG(12)));
3210:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(13) (number of delayed pivots after factorization): %d\n", mumps->id.INFOG(13)));
3211:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(14) (number of memory compress after factorization): %d\n", mumps->id.INFOG(14)));
3212:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(15) (number of steps of iterative refinement after solution): %d\n", mumps->id.INFOG(15)));
3213:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n", mumps->id.INFOG(16)));
3214:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n", mumps->id.INFOG(17)));
3215:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n", mumps->id.INFOG(18)));
3216:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n", mumps->id.INFOG(19)));
3217:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(20) (estimated number of entries in the factors): %d\n", mumps->id.INFOG(20)));
3218:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n", mumps->id.INFOG(21)));
3219:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n", mumps->id.INFOG(22)));
3220:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n", mumps->id.INFOG(23)));
3221:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n", mumps->id.INFOG(24)));
3222:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n", mumps->id.INFOG(25)));
3223:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(28) (after factorization: number of null pivots encountered): %d\n", mumps->id.INFOG(28)));
3224:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n", mumps->id.INFOG(29)));
3225:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n", mumps->id.INFOG(30), mumps->id.INFOG(31)));
3226:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(32) (after analysis: type of analysis done): %d\n", mumps->id.INFOG(32)));
3227:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(33) (value used for ICNTL(8)): %d\n", mumps->id.INFOG(33)));
3228:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(34) (exponent of the determinant if determinant is requested): %d\n", mumps->id.INFOG(34)));
3229:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n", mumps->id.INFOG(35)));
3230:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(36)));
3231:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n", mumps->id.INFOG(37)));
3232:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(38)));
3233:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n", mumps->id.INFOG(39)));
3234:       }
3235:     }
3236:   }
3237:   PetscFunctionReturn(PETSC_SUCCESS);
3238: }

3240: static PetscErrorCode MatGetInfo_MUMPS(Mat A, PETSC_UNUSED MatInfoType flag, MatInfo *info)
3241: {
3242:   Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;

3244:   PetscFunctionBegin;
3245:   info->block_size        = 1.0;
3246:   info->nz_allocated      = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20);
3247:   info->nz_used           = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20);
3248:   info->nz_unneeded       = 0.0;
3249:   info->assemblies        = 0.0;
3250:   info->mallocs           = 0.0;
3251:   info->memory            = 0.0;
3252:   info->fill_ratio_given  = 0;
3253:   info->fill_ratio_needed = 0;
3254:   info->factor_mallocs    = 0;
3255:   PetscFunctionReturn(PETSC_SUCCESS);
3256: }

3258: static PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is)
3259: {
3260:   Mat_MUMPS      *mumps = (Mat_MUMPS *)F->data;
3261:   const PetscInt *idxs;
3262:   PetscInt        size, i;

3264:   PetscFunctionBegin;
3265:   PetscCall(ISGetLocalSize(is, &size));
3266:   /* Schur complement matrix */
3267:   PetscCall(MatDestroy(&F->schur));
3268:   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size, size, NULL, &F->schur));
3269:   // don't allocate mumps->id.schur[] now as its precision is yet to know
3270:   PetscCall(PetscMUMPSIntCast(size, &mumps->id.size_schur));
3271:   PetscCall(PetscMUMPSIntCast(size, &mumps->id.schur_lld));
3272:   if (mumps->sym == 1) PetscCall(MatSetOption(F->schur, MAT_SPD, PETSC_TRUE));

3274:   /* MUMPS expects Fortran style indices */
3275:   PetscCall(PetscFree(mumps->id.listvar_schur));
3276:   PetscCall(PetscMalloc1(size, &mumps->id.listvar_schur));
3277:   PetscCall(ISGetIndices(is, &idxs));
3278:   for (i = 0; i < size; i++) PetscCall(PetscMUMPSIntCast(idxs[i] + 1, &mumps->id.listvar_schur[i]));
3279:   PetscCall(ISRestoreIndices(is, &idxs));
3280:   /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */
3281:   if (mumps->id.icntl) mumps->id.ICNTL(26) = -1;
3282:   else mumps->ICNTL26 = -1;
3283:   PetscFunctionReturn(PETSC_SUCCESS);
3284: }

3286: static PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F, Mat *S)
3287: {
3288:   Mat          St;
3289:   Mat_MUMPS   *mumps = (Mat_MUMPS *)F->data;
3290:   PetscScalar *array;
3291:   PetscInt     i, j, N = mumps->id.size_schur;

3293:   PetscFunctionBegin;
3294:   PetscCheck(mumps->id.ICNTL(19), PetscObjectComm((PetscObject)F), PETSC_ERR_ORDER, "Schur complement mode not selected! Call MatFactorSetSchurIS() to enable it");
3295:   PetscCall(MatCreate(PETSC_COMM_SELF, &St));
3296:   PetscCall(MatSetSizes(St, PETSC_DECIDE, PETSC_DECIDE, mumps->id.size_schur, mumps->id.size_schur));
3297:   PetscCall(MatSetType(St, MATDENSE));
3298:   PetscCall(MatSetUp(St));
3299:   PetscCall(MatDenseGetArray(St, &array));
3300:   if (!mumps->sym) {                /* MUMPS always return a full matrix */
3301:     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
3302:       for (i = 0; i < N; i++) {
3303:         for (j = 0; j < N; j++) array[j * N + i] = ID_FIELD_GET(mumps->id, schur, i * N + j);
3304:       }
3305:     } else { /* stored by columns */
3306:       PetscCall(MatMumpsCastMumpsScalarArray(N * N, mumps->id.precision, mumps->id.schur, array));
3307:     }
3308:   } else {                          /* either full or lower-triangular (not packed) */
3309:     if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */
3310:       for (i = 0; i < N; i++) {
3311:         for (j = i; j < N; j++) array[i * N + j] = array[j * N + i] = ID_FIELD_GET(mumps->id, schur, i * N + j);
3312:       }
3313:     } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */
3314:       PetscCall(MatMumpsCastMumpsScalarArray(N * N, mumps->id.precision, mumps->id.schur, array));
3315:     } else { /* ICNTL(19) == 1 lower triangular stored by rows */
3316:       for (i = 0; i < N; i++) {
3317:         for (j = 0; j < i + 1; j++) array[i * N + j] = array[j * N + i] = ID_FIELD_GET(mumps->id, schur, i * N + j);
3318:       }
3319:     }
3320:   }
3321:   PetscCall(MatDenseRestoreArray(St, &array));
3322:   *S = St;
3323:   PetscFunctionReturn(PETSC_SUCCESS);
3324: }

3326: static PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt ival)
3327: {
3328:   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;

3330:   PetscFunctionBegin;
3331:   PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 40 || icntl == 47 || icntl == 48 || icntl == 49 || icntl == 56 || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl);
3332:   if (mumps->id.job == JOB_NULL) {                                            /* need to cache icntl and ival since PetscMUMPS_c() has never been called */
3333:     PetscMUMPSInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */
3334:     for (i = 0; i < nICNTL_pre; ++i)
3335:       if (mumps->ICNTL_pre[1 + 2 * i] == icntl) break; /* is this ICNTL already cached? */
3336:     if (i == nICNTL_pre) {                             /* not already cached */
3337:       if (i > 0) PetscCall(PetscRealloc(sizeof(PetscMUMPSInt) * (2 * nICNTL_pre + 3), &mumps->ICNTL_pre));
3338:       else PetscCall(PetscCalloc(sizeof(PetscMUMPSInt) * 3, &mumps->ICNTL_pre));
3339:       mumps->ICNTL_pre[0]++;
3340:     }
3341:     mumps->ICNTL_pre[1 + 2 * i] = (PetscMUMPSInt)icntl;
3342:     PetscCall(PetscMUMPSIntCast(ival, mumps->ICNTL_pre + 2 + 2 * i));
3343:   } else PetscCall(PetscMUMPSIntCast(ival, &mumps->id.ICNTL(icntl)));
3344:   PetscFunctionReturn(PETSC_SUCCESS);
3345: }

3347: static PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt *ival)
3348: {
3349:   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;

3351:   PetscFunctionBegin;
3352:   PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 40 || icntl == 47 || icntl == 48 || icntl == 49 || icntl == 56 || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl);
3353:   if (mumps->id.job == JOB_NULL) {
3354:     PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0;
3355:     *ival = 0;
3356:     for (i = 0; i < nICNTL_pre; ++i) {
3357:       if (mumps->ICNTL_pre[1 + 2 * i] == icntl) *ival = mumps->ICNTL_pre[2 + 2 * i];
3358:     }
3359:   } else *ival = mumps->id.ICNTL(icntl);
3360:   PetscFunctionReturn(PETSC_SUCCESS);
3361: }

3363: static PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal val)
3364: {
3365:   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;

3367:   PetscFunctionBegin;
3368:   PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl);
3369:   if (mumps->id.job == JOB_NULL) {
3370:     PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
3371:     for (i = 0; i < nCNTL_pre; ++i)
3372:       if (mumps->CNTL_pre[1 + 2 * i] == icntl) break;
3373:     if (i == nCNTL_pre) {
3374:       if (i > 0) PetscCall(PetscRealloc(sizeof(PetscReal) * (2 * nCNTL_pre + 3), &mumps->CNTL_pre));
3375:       else PetscCall(PetscCalloc(sizeof(PetscReal) * 3, &mumps->CNTL_pre));
3376:       mumps->CNTL_pre[0]++;
3377:     }
3378:     mumps->CNTL_pre[1 + 2 * i] = icntl;
3379:     mumps->CNTL_pre[2 + 2 * i] = val;
3380:   } else ID_CNTL_SET(mumps->id, icntl, val);
3381:   PetscFunctionReturn(PETSC_SUCCESS);
3382: }

3384: static PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal *val)
3385: {
3386:   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;

3388:   PetscFunctionBegin;
3389:   PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl);
3390:   if (mumps->id.job == JOB_NULL) {
3391:     PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
3392:     *val = 0.0;
3393:     for (i = 0; i < nCNTL_pre; ++i) {
3394:       if (mumps->CNTL_pre[1 + 2 * i] == icntl) *val = mumps->CNTL_pre[2 + 2 * i];
3395:     }
3396:   } else *val = ID_CNTL_GET(mumps->id, icntl);
3397:   PetscFunctionReturn(PETSC_SUCCESS);
3398: }

3400: /*@C
3401:   MatMumpsSetOocTmpDir - Set MUMPS out-of-core `OOC_TMPDIR` <https://mumps-solver.org/index.php?page=doc>

3403:   Logically Collective

3405:   Input Parameters:
3406: + F      - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`.
3407: - tmpdir - temporary directory for out-of-core facility.

3409:   Level: beginner

3411:   Note:
3412:   To make it effective, this routine must be called before the numeric factorization, i.e., `PCSetUp()`.
3413:   If `ooc_tmpdir` is not set, MUMPS will also check the environment variable `MUMPS_OOC_TMPDIR`. But if neither was defined, it will use /tmp by default.

3415: .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsGetOocTmpDir`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3416: @*/
3417: PetscErrorCode MatMumpsSetOocTmpDir(Mat F, const char *tmpdir)
3418: {
3419:   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;

3421:   PetscFunctionBegin;
3424:   PetscCall(PetscStrncpy(mumps->id.ooc_tmpdir, tmpdir, sizeof(((MUMPS_STRUC_C *)NULL)->ooc_tmpdir)));
3425:   PetscFunctionReturn(PETSC_SUCCESS);
3426: }

3428: /*@C
3429:   MatMumpsGetOocTmpDir - Get MUMPS out-of-core `OOC_TMPDIR` <https://mumps-solver.org/index.php?page=doc>

3431:   Logically Collective

3433:   Input Parameter:
3434: . F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`.

3436:   Output Parameter:
3437: . tmpdir - temporary directory for out-of-core facility.

3439:   Level: beginner

3441:   Note:
3442:   The returned string is read-only and user should not try to change it.

3444: .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetOocTmpDir`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3445: @*/
3446: PetscErrorCode MatMumpsGetOocTmpDir(Mat F, const char **tmpdir)
3447: {
3448:   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;

3450:   PetscFunctionBegin;
3453:   if (tmpdir) *tmpdir = mumps->id.ooc_tmpdir;
3454:   PetscFunctionReturn(PETSC_SUCCESS);
3455: }

3457: static PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F, PetscInt icntl, PetscInt *info)
3458: {
3459:   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;

3461:   PetscFunctionBegin;
3462:   *info = mumps->id.INFO(icntl);
3463:   PetscFunctionReturn(PETSC_SUCCESS);
3464: }

3466: static PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F, PetscInt icntl, PetscInt *infog)
3467: {
3468:   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;

3470:   PetscFunctionBegin;
3471:   *infog = mumps->id.INFOG(icntl);
3472:   PetscFunctionReturn(PETSC_SUCCESS);
3473: }

3475: static PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfo)
3476: {
3477:   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;

3479:   PetscFunctionBegin;
3480:   *rinfo = ID_RINFO_GET(mumps->id, icntl);
3481:   PetscFunctionReturn(PETSC_SUCCESS);
3482: }

3484: static PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfog)
3485: {
3486:   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;

3488:   PetscFunctionBegin;
3489:   *rinfog = ID_RINFOG_GET(mumps->id, icntl);
3490:   PetscFunctionReturn(PETSC_SUCCESS);
3491: }

3493: static PetscErrorCode MatMumpsGetNullPivots_MUMPS(Mat F, PetscInt *size, PetscInt **array)
3494: {
3495:   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;

3497:   PetscFunctionBegin;
3498:   PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection");
3499:   *size  = 0;
3500:   *array = NULL;
3501:   if (!mumps->myid) {
3502:     *size = mumps->id.INFOG(28);
3503:     PetscCall(PetscMalloc1(*size, array));
3504:     for (int i = 0; i < *size; i++) (*array)[i] = mumps->id.pivnul_list[i] - 1;
3505:   }
3506:   PetscFunctionReturn(PETSC_SUCCESS);
3507: }

3509: static PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F, Mat spRHS)
3510: {
3511:   Mat          Bt = NULL, Btseq = NULL;
3512:   PetscBool    flg;
3513:   Mat_MUMPS   *mumps = (Mat_MUMPS *)F->data;
3514:   PetscScalar *aa;
3515:   PetscInt     spnr, *ia, *ja, M, nrhs;

3517:   PetscFunctionBegin;
3518:   PetscAssertPointer(spRHS, 2);
3519:   PetscCall(PetscObjectTypeCompare((PetscObject)spRHS, MATTRANSPOSEVIRTUAL, &flg));
3520:   PetscCheck(flg, PetscObjectComm((PetscObject)spRHS), PETSC_ERR_ARG_WRONG, "Matrix spRHS must be type MATTRANSPOSEVIRTUAL matrix");
3521:   PetscCall(MatShellGetScalingShifts(spRHS, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
3522:   PetscCall(MatTransposeGetMat(spRHS, &Bt));

3524:   PetscCall(MatMumpsSetIcntl(F, 30, 1));

3526:   if (mumps->petsc_size > 1) {
3527:     Mat_MPIAIJ *b = (Mat_MPIAIJ *)Bt->data;
3528:     Btseq         = b->A;
3529:   } else {
3530:     Btseq = Bt;
3531:   }

3533:   PetscCall(MatGetSize(spRHS, &M, &nrhs));
3534:   mumps->id.nrhs = (PetscMUMPSInt)nrhs;
3535:   PetscCall(PetscMUMPSIntCast(M, &mumps->id.lrhs));
3536:   mumps->id.rhs = NULL;

3538:   if (!mumps->myid) {
3539:     PetscCall(MatSeqAIJGetArray(Btseq, &aa));
3540:     PetscCall(MatGetRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
3541:     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
3542:     PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
3543:     PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, ((Mat_SeqAIJ *)Btseq->data)->nz, aa, mumps->id.precision, &mumps->id.rhs_sparse_len, &mumps->id.rhs_sparse));
3544:   } else {
3545:     mumps->id.irhs_ptr    = NULL;
3546:     mumps->id.irhs_sparse = NULL;
3547:     mumps->id.nz_rhs      = 0;
3548:     if (mumps->id.rhs_sparse_len) {
3549:       PetscCall(PetscFree(mumps->id.rhs_sparse));
3550:       mumps->id.rhs_sparse_len = 0;
3551:     }
3552:   }
3553:   mumps->id.ICNTL(20) = 1; /* rhs is sparse */
3554:   mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */

3556:   /* solve phase */
3557:   mumps->id.job = JOB_SOLVE;
3558:   PetscMUMPS_c(mumps);
3559:   PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));

3561:   if (!mumps->myid) {
3562:     PetscCall(MatSeqAIJRestoreArray(Btseq, &aa));
3563:     PetscCall(MatRestoreRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
3564:     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
3565:   }
3566:   PetscFunctionReturn(PETSC_SUCCESS);
3567: }

3569: static PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F, Mat spRHST)
3570: {
3571:   Mat spRHS;

3573:   PetscFunctionBegin;
3574:   PetscCall(MatCreateTranspose(spRHST, &spRHS));
3575:   PetscCall(MatMumpsGetInverse_MUMPS(F, spRHS));
3576:   PetscCall(MatDestroy(&spRHS));
3577:   PetscFunctionReturn(PETSC_SUCCESS);
3578: }

3580: static PetscErrorCode MatMumpsSetBlk_MUMPS(Mat F, PetscInt nblk, const PetscInt blkvar[], const PetscInt blkptr[])
3581: {
3582:   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;

3584:   PetscFunctionBegin;
3585:   if (nblk) {
3586:     PetscAssertPointer(blkptr, 4);
3587:     PetscCall(PetscMUMPSIntCast(nblk, &mumps->id.nblk));
3588:     PetscCall(PetscFree(mumps->id.blkptr));
3589:     PetscCall(PetscMalloc1(nblk + 1, &mumps->id.blkptr));
3590:     for (PetscInt i = 0; i < nblk + 1; ++i) PetscCall(PetscMUMPSIntCast(blkptr[i], mumps->id.blkptr + i));
3591:     // mumps->id.icntl[] might have not been allocated, which is done in MatSetFromOptions_MUMPS(). So we don't assign ICNTL(15).
3592:     // We use id.nblk and id.blkptr to know what values to set to ICNTL(15) in MatSetFromOptions_MUMPS().
3593:     // mumps->id.ICNTL(15) = 1;
3594:     if (blkvar) {
3595:       PetscCall(PetscFree(mumps->id.blkvar));
3596:       PetscCall(PetscMalloc1(F->rmap->N, &mumps->id.blkvar));
3597:       for (PetscInt i = 0; i < F->rmap->N; ++i) PetscCall(PetscMUMPSIntCast(blkvar[i], mumps->id.blkvar + i));
3598:     }
3599:   } else {
3600:     PetscCall(PetscFree(mumps->id.blkptr));
3601:     PetscCall(PetscFree(mumps->id.blkvar));
3602:     // mumps->id.ICNTL(15) = 0;
3603:     mumps->id.nblk = 0;
3604:   }
3605:   PetscFunctionReturn(PETSC_SUCCESS);
3606: }

3608: /*MC
3609:   MATSOLVERMUMPS -  A matrix type providing direct solvers (LU and Cholesky) for
3610:   MPI distributed and sequential matrices via the external package MUMPS <https://mumps-solver.org/index.php?page=doc>

3612:   Works with `MATAIJ` and `MATSBAIJ` matrices

3614:   Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS

3616:   Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode.
3617:   See details below.

3619:   Use `-pc_type cholesky` or `lu` `-pc_factor_mat_solver_type mumps` to use this direct solver

3621:   Options Database Keys:
3622: +  -mat_mumps_icntl_1           - ICNTL(1): output stream for error messages
3623: .  -mat_mumps_icntl_2           - ICNTL(2): output stream for diagnostic printing, statistics, and warning
3624: .  -mat_mumps_icntl_3           - ICNTL(3): output stream for global information, collected on the host
3625: .  -mat_mumps_icntl_4           - ICNTL(4): level of printing (0 to 4)
3626: .  -mat_mumps_icntl_6           - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)
3627: .  -mat_mumps_icntl_7           - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto
3628:                                   Use -pc_factor_mat_ordering_type type to have PETSc perform the ordering (sequential only)
3629: .  -mat_mumps_icntl_8           - ICNTL(8): scaling strategy (-2 to 8 or 77)
3630: .  -mat_mumps_icntl_10          - ICNTL(10): max num of refinements
3631: .  -mat_mumps_icntl_11          - ICNTL(11): statistics related to an error analysis (via -ksp_view)
3632: .  -mat_mumps_icntl_12          - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)
3633: .  -mat_mumps_icntl_13          - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting
3634: .  -mat_mumps_icntl_14          - ICNTL(14): percentage increase in the estimated working space
3635: .  -mat_mumps_icntl_15          - ICNTL(15): compression of the input matrix resulting from a block format
3636: .  -mat_mumps_icntl_19          - ICNTL(19): computes the Schur complement
3637: .  -mat_mumps_icntl_20          - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS
3638: .  -mat_mumps_icntl_22          - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)
3639: .  -mat_mumps_icntl_23          - ICNTL(23): max size of the working memory (MB) that can allocate per processor
3640: .  -mat_mumps_icntl_24          - ICNTL(24): detection of null pivot rows (0 or 1)
3641: .  -mat_mumps_icntl_25          - ICNTL(25): compute a solution of a deficient matrix and a null space basis
3642: .  -mat_mumps_icntl_26          - ICNTL(26): drives the solution phase if a Schur complement matrix
3643: .  -mat_mumps_icntl_28          - ICNTL(28): use 1 for sequential analysis and ICNTL(7) ordering, or 2 for parallel analysis and ICNTL(29) ordering
3644: .  -mat_mumps_icntl_29          - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis
3645: .  -mat_mumps_icntl_30          - ICNTL(30): compute user-specified set of entries in inv(A)
3646: .  -mat_mumps_icntl_31          - ICNTL(31): indicates which factors may be discarded during factorization
3647: .  -mat_mumps_icntl_33          - ICNTL(33): compute determinant
3648: .  -mat_mumps_icntl_35          - ICNTL(35): level of activation of BLR (Block Low-Rank) feature
3649: .  -mat_mumps_icntl_36          - ICNTL(36): controls the choice of BLR factorization variant
3650: .  -mat_mumps_icntl_37          - ICNTL(37): compression of the contribution blocks (CB)
3651: .  -mat_mumps_icntl_38          - ICNTL(38): sets the estimated compression rate of LU factors with BLR
3652: .  -mat_mumps_icntl_40          - ICNTL(40): adaptive BLR precision feature
3653: .  -mat_mumps_icntl_47          - ICNTL(47): single precision factorization in a double precision instance
3654: .  -mat_mumps_icntl_48          - ICNTL(48): multithreading with tree parallelism
3655: .  -mat_mumps_icntl_49          - ICNTL(49): compact workarray at the end of factorization phase
3656: .  -mat_mumps_icntl_58          - ICNTL(58): options for symbolic factorization
3657: .  -mat_mumps_cntl_1            - CNTL(1): relative pivoting threshold
3658: .  -mat_mumps_cntl_2            - CNTL(2): stopping criterion of refinement
3659: .  -mat_mumps_cntl_3            - CNTL(3): absolute pivoting threshold
3660: .  -mat_mumps_cntl_4            - CNTL(4): value for static pivoting
3661: .  -mat_mumps_cntl_5            - CNTL(5): fixation for null pivots
3662: .  -mat_mumps_cntl_7            - CNTL(7): precision of the dropping parameter used during BLR factorization
3663: -  -mat_mumps_use_omp_threads m - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS.
3664:                                   Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual.

3666:   Level: beginner

3668:   Notes:
3669:   MUMPS Cholesky does not handle (complex) Hermitian matrices (see User's Guide at <https://mumps-solver.org/index.php?page=doc>) so using it will
3670:   error if the matrix is Hermitian.

3672:   When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling
3673:   `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix.

3675:   When a MUMPS factorization fails inside a KSP solve, for example with a `KSP_DIVERGED_PC_FAILED`, one can find the MUMPS information about
3676:   the failure with
3677: .vb
3678:           KSPGetPC(ksp,&pc);
3679:           PCFactorGetMatrix(pc,&mat);
3680:           MatMumpsGetInfo(mat,....);
3681:           MatMumpsGetInfog(mat,....); etc.
3682: .ve
3683:   Or run with `-ksp_error_if_not_converged` and the program will be stopped and the information printed in the error message.

3685:   MUMPS provides 64-bit integer support in two build modes:
3686:   full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and
3687:   requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI).

3689:   selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices,
3690:   MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and
3691:   columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit
3692:   integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS.

3694:   With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc.

3696:   Two modes to run MUMPS/PETSc with OpenMP
3697: .vb
3698:    Set `OMP_NUM_THREADS` and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP
3699:    threads per rank, then you may use "export `OMP_NUM_THREADS` = 16 && mpiexec -n 4 ./test".
3700: .ve

3702: .vb
3703:    `-mat_mumps_use_omp_threads` [m] and run your code with as many MPI ranks as the number of cores. For example,
3704:    if a compute node has 32 cores and you run on two nodes, you may use "mpiexec -n 64 ./test -mat_mumps_use_omp_threads 16"
3705: .ve

3707:    To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part
3708:    (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with `--with-openmp` `--download-hwloc`
3709:    (or `--with-hwloc`), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS
3710:    libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS
3711:    (PETSc will automatically try to utilized a threaded BLAS if `--with-openmp` is provided).

3713:    If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI
3714:    processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of
3715:    size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm
3716:    are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set
3717:    by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs.
3718:    In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets,
3719:    if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind
3720:    MPI ranks to cores, then with `-mat_mumps_use_omp_threads` 16, a master rank (and threads it spawns) will use half cores in socket 0, and half
3721:    cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the
3722:    problem will not happen. Therefore, when you use `-mat_mumps_use_omp_threads`, you need to keep an eye on your MPI rank mapping and CPU binding.
3723:    For example, with the Slurm job scheduler, one can use srun `--cpu-bind`=verbose -m block:block to map consecutive MPI ranks to sockets and
3724:    examine the mapping result.

3726:    PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set `OMP_PROC_BIND` and `OMP_PLACES` in job scripts,
3727:    for example, export `OMP_PLACES`=threads and export `OMP_PROC_BIND`=spread. One does not need to export `OMP_NUM_THREADS`=m in job scripts as PETSc
3728:    calls `omp_set_num_threads`(m) internally before calling MUMPS.

3730:    See {cite}`heroux2011bi` and {cite}`gutierrez2017accommodating`

3732: .seealso: [](ch_matrices), `Mat`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `MatMumpsSetBlk()`, `KSPGetPC()`, `PCFactorGetMatrix()`
3733: M*/

3735: static PetscErrorCode MatFactorGetSolverType_mumps(PETSC_UNUSED Mat A, MatSolverType *type)
3736: {
3737:   PetscFunctionBegin;
3738:   *type = MATSOLVERMUMPS;
3739:   PetscFunctionReturn(PETSC_SUCCESS);
3740: }

3742: /* MatGetFactor for Seq and MPI AIJ matrices */
3743: static PetscErrorCode MatGetFactor_aij_mumps(Mat A, MatFactorType ftype, Mat *F)
3744: {
3745:   Mat         B;
3746:   Mat_MUMPS  *mumps;
3747:   PetscBool   isSeqAIJ, isDiag, isDense;
3748:   PetscMPIInt size;

3750:   PetscFunctionBegin;
3751:   if (PetscDefined(USE_COMPLEX) && ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
3752:     PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n"));
3753:     *F = NULL;
3754:     PetscFunctionReturn(PETSC_SUCCESS);
3755:   }
3756:   /* Create the factorization matrix */
3757:   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ));
3758:   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATDIAGONAL, &isDiag));
3759:   PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &isDense, MATSEQDENSE, MATMPIDENSE, NULL));
3760:   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3761:   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3762:   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
3763:   PetscCall(MatSetUp(B));

3765:   PetscCall(PetscNew(&mumps));

3767:   B->ops->view    = MatView_MUMPS;
3768:   B->ops->getinfo = MatGetInfo_MUMPS;

3770:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
3771:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
3772:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
3773:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
3774:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
3775:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
3776:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
3777:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
3778:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
3779:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
3780:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
3781:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
3782:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
3783:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
3784:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS));

3786:   if (ftype == MAT_FACTOR_LU) {
3787:     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3788:     B->factortype            = MAT_FACTOR_LU;
3789:     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij;
3790:     else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij;
3791:     else if (isDense) mumps->ConvertToTriples = MatConvertToTriples_dense_xaij;
3792:     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij;
3793:     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
3794:     mumps->sym = 0;
3795:   } else {
3796:     B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3797:     B->factortype                  = MAT_FACTOR_CHOLESKY;
3798:     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij;
3799:     else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij;
3800:     else if (isDense) mumps->ConvertToTriples = MatConvertToTriples_dense_xaij;
3801:     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij;
3802:     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY]));
3803:     if (PetscDefined(USE_COMPLEX)) mumps->sym = 2;
3804:     else if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
3805:     else mumps->sym = 2;
3806:   }

3808:   /* set solvertype */
3809:   PetscCall(PetscFree(B->solvertype));
3810:   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
3811:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
3812:   if (size == 1) {
3813:     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3814:     B->canuseordering = PETSC_TRUE;
3815:   }
3816:   B->ops->destroy = MatDestroy_MUMPS;
3817:   B->data         = (void *)mumps;

3819:   *F               = B;
3820:   mumps->id.job    = JOB_NULL;
3821:   mumps->ICNTL_pre = NULL;
3822:   mumps->CNTL_pre  = NULL;
3823:   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
3824:   PetscFunctionReturn(PETSC_SUCCESS);
3825: }

3827: /* MatGetFactor for Seq and MPI SBAIJ matrices */
3828: static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A, PETSC_UNUSED MatFactorType ftype, Mat *F)
3829: {
3830:   Mat         B;
3831:   Mat_MUMPS  *mumps;
3832:   PetscBool   isSeqSBAIJ;
3833:   PetscMPIInt size;

3835:   PetscFunctionBegin;
3836:   if (PetscDefined(USE_COMPLEX) && ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
3837:     PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n"));
3838:     *F = NULL;
3839:     PetscFunctionReturn(PETSC_SUCCESS);
3840:   }
3841:   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3842:   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3843:   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
3844:   PetscCall(MatSetUp(B));

3846:   PetscCall(PetscNew(&mumps));
3847:   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &isSeqSBAIJ));
3848:   if (isSeqSBAIJ) {
3849:     mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij;
3850:   } else {
3851:     mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij;
3852:   }

3854:   B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3855:   B->ops->view                   = MatView_MUMPS;
3856:   B->ops->getinfo                = MatGetInfo_MUMPS;

3858:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
3859:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
3860:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
3861:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
3862:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
3863:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
3864:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
3865:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
3866:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
3867:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
3868:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
3869:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
3870:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
3871:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
3872:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS));

3874:   B->factortype = MAT_FACTOR_CHOLESKY;
3875:   if (PetscDefined(USE_COMPLEX)) mumps->sym = 2;
3876:   else if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
3877:   else mumps->sym = 2;

3879:   /* set solvertype */
3880:   PetscCall(PetscFree(B->solvertype));
3881:   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
3882:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
3883:   if (size == 1) {
3884:     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3885:     B->canuseordering = PETSC_TRUE;
3886:   }
3887:   PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY]));
3888:   B->ops->destroy = MatDestroy_MUMPS;
3889:   B->data         = (void *)mumps;

3891:   *F               = B;
3892:   mumps->id.job    = JOB_NULL;
3893:   mumps->ICNTL_pre = NULL;
3894:   mumps->CNTL_pre  = NULL;
3895:   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
3896:   PetscFunctionReturn(PETSC_SUCCESS);
3897: }

3899: static PetscErrorCode MatGetFactor_baij_mumps(Mat A, MatFactorType ftype, Mat *F)
3900: {
3901:   Mat         B;
3902:   Mat_MUMPS  *mumps;
3903:   PetscBool   isSeqBAIJ;
3904:   PetscMPIInt size;

3906:   PetscFunctionBegin;
3907:   /* Create the factorization matrix */
3908:   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &isSeqBAIJ));
3909:   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3910:   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3911:   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
3912:   PetscCall(MatSetUp(B));

3914:   PetscCall(PetscNew(&mumps));
3915:   PetscCheck(ftype == MAT_FACTOR_LU, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead");
3916:   B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS;
3917:   B->factortype            = MAT_FACTOR_LU;
3918:   if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij;
3919:   else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij;
3920:   mumps->sym = 0;
3921:   PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));

3923:   B->ops->view    = MatView_MUMPS;
3924:   B->ops->getinfo = MatGetInfo_MUMPS;

3926:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
3927:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
3928:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
3929:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
3930:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
3931:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
3932:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
3933:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
3934:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
3935:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
3936:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
3937:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
3938:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
3939:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
3940:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS));

3942:   /* set solvertype */
3943:   PetscCall(PetscFree(B->solvertype));
3944:   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
3945:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
3946:   if (size == 1) {
3947:     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3948:     B->canuseordering = PETSC_TRUE;
3949:   }
3950:   B->ops->destroy = MatDestroy_MUMPS;
3951:   B->data         = (void *)mumps;

3953:   *F               = B;
3954:   mumps->id.job    = JOB_NULL;
3955:   mumps->ICNTL_pre = NULL;
3956:   mumps->CNTL_pre  = NULL;
3957:   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
3958:   PetscFunctionReturn(PETSC_SUCCESS);
3959: }

3961: /* MatGetFactor for Seq and MPI SELL matrices */
3962: static PetscErrorCode MatGetFactor_sell_mumps(Mat A, MatFactorType ftype, Mat *F)
3963: {
3964:   Mat         B;
3965:   Mat_MUMPS  *mumps;
3966:   PetscBool   isSeqSELL;
3967:   PetscMPIInt size;

3969:   PetscFunctionBegin;
3970:   /* Create the factorization matrix */
3971:   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSELL, &isSeqSELL));
3972:   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3973:   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3974:   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
3975:   PetscCall(MatSetUp(B));

3977:   PetscCall(PetscNew(&mumps));

3979:   B->ops->view    = MatView_MUMPS;
3980:   B->ops->getinfo = MatGetInfo_MUMPS;

3982:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
3983:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
3984:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
3985:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
3986:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
3987:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
3988:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
3989:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
3990:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
3991:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
3992:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
3993:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));

3995:   PetscCheck(ftype == MAT_FACTOR_LU, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented");
3996:   B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3997:   B->factortype            = MAT_FACTOR_LU;
3998:   PetscCheck(isSeqSELL, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented");
3999:   mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij;
4000:   mumps->sym              = 0;
4001:   PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));

4003:   /* set solvertype */
4004:   PetscCall(PetscFree(B->solvertype));
4005:   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
4006:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
4007:   if (size == 1) {
4008:     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization  */
4009:     B->canuseordering = PETSC_TRUE;
4010:   }
4011:   B->ops->destroy = MatDestroy_MUMPS;
4012:   B->data         = (void *)mumps;

4014:   *F               = B;
4015:   mumps->id.job    = JOB_NULL;
4016:   mumps->ICNTL_pre = NULL;
4017:   mumps->CNTL_pre  = NULL;
4018:   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
4019:   PetscFunctionReturn(PETSC_SUCCESS);
4020: }

4022: /* MatGetFactor for MATNEST matrices */
4023: static PetscErrorCode MatGetFactor_nest_mumps(Mat A, MatFactorType ftype, Mat *F)
4024: {
4025:   Mat         B, **mats;
4026:   Mat_MUMPS  *mumps;
4027:   PetscInt    nr, nc;
4028:   PetscMPIInt size;
4029:   PetscBool   flg = PETSC_TRUE;

4031:   PetscFunctionBegin;
4032:   if (PetscDefined(USE_COMPLEX) && ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
4033:     PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n"));
4034:     *F = NULL;
4035:     PetscFunctionReturn(PETSC_SUCCESS);
4036:   }

4038:   /* Return if some condition is not satisfied */
4039:   *F = NULL;
4040:   PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats));
4041:   if (ftype == MAT_FACTOR_CHOLESKY) {
4042:     IS       *rows, *cols;
4043:     PetscInt *m, *M;

4045:     PetscCheck(nr == nc, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_FACTOR_CHOLESKY not supported for nest sizes %" PetscInt_FMT " != %" PetscInt_FMT ". Use MAT_FACTOR_LU.", nr, nc);
4046:     PetscCall(PetscMalloc2(nr, &rows, nc, &cols));
4047:     PetscCall(MatNestGetISs(A, rows, cols));
4048:     for (PetscInt r = 0; flg && r < nr; r++) PetscCall(ISEqualUnsorted(rows[r], cols[r], &flg));
4049:     if (!flg) {
4050:       PetscCall(PetscFree2(rows, cols));
4051:       PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for unequal row and column maps. Use MAT_FACTOR_LU.\n"));
4052:       PetscFunctionReturn(PETSC_SUCCESS);
4053:     }
4054:     PetscCall(PetscMalloc2(nr, &m, nr, &M));
4055:     for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetMinMax(rows[r], &m[r], &M[r]));
4056:     for (PetscInt r = 0; flg && r < nr; r++)
4057:       for (PetscInt k = r + 1; flg && k < nr; k++)
4058:         if ((m[k] <= m[r] && m[r] <= M[k]) || (m[k] <= M[r] && M[r] <= M[k])) flg = PETSC_FALSE;
4059:     PetscCall(PetscFree2(m, M));
4060:     PetscCall(PetscFree2(rows, cols));
4061:     if (!flg) {
4062:       PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for intersecting row maps. Use MAT_FACTOR_LU.\n"));
4063:       PetscFunctionReturn(PETSC_SUCCESS);
4064:     }
4065:   }

4067:   for (PetscInt r = 0; r < nr; r++) {
4068:     for (PetscInt c = 0; c < nc; c++) {
4069:       Mat       sub = mats[r][c];
4070:       PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isDiag, isDense;

4072:       if (!sub || (ftype == MAT_FACTOR_CHOLESKY && c < r)) continue;
4073:       PetscCall(MatGetTranspose_TransposeVirtual(&sub, NULL, NULL, NULL, NULL));
4074:       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ));
4075:       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ));
4076:       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ));
4077:       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ));
4078:       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ));
4079:       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ));
4080:       PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag));
4081:       PetscCall(PetscObjectTypeCompareAny((PetscObject)sub, &isDense, MATSEQDENSE, MATMPIDENSE, NULL));
4082:       if (ftype == MAT_FACTOR_CHOLESKY) {
4083:         if (r == c) {
4084:           if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isSeqSBAIJ && !isMPISBAIJ && !isDiag && !isDense) {
4085:             PetscCall(PetscInfo(sub, "MAT_FACTOR_CHOLESKY not supported for diagonal block of type %s.\n", ((PetscObject)sub)->type_name));
4086:             flg = PETSC_FALSE;
4087:           }
4088:         } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag && !isDense) {
4089:           PetscCall(PetscInfo(sub, "MAT_FACTOR_CHOLESKY not supported for off-diagonal block of type %s.\n", ((PetscObject)sub)->type_name));
4090:           flg = PETSC_FALSE;
4091:         }
4092:       } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag && !isDense) {
4093:         PetscCall(PetscInfo(sub, "MAT_FACTOR_LU not supported for block of type %s.\n", ((PetscObject)sub)->type_name));
4094:         flg = PETSC_FALSE;
4095:       }
4096:     }
4097:   }
4098:   if (!flg) PetscFunctionReturn(PETSC_SUCCESS);

4100:   /* Create the factorization matrix */
4101:   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
4102:   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
4103:   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
4104:   PetscCall(MatSetUp(B));

4106:   PetscCall(PetscNew(&mumps));

4108:   B->ops->view    = MatView_MUMPS;
4109:   B->ops->getinfo = MatGetInfo_MUMPS;

4111:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
4112:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
4113:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
4114:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
4115:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
4116:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
4117:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
4118:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
4119:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
4120:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
4121:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
4122:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
4123:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
4124:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
4125:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS));

4127:   if (ftype == MAT_FACTOR_LU) {
4128:     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
4129:     B->factortype            = MAT_FACTOR_LU;
4130:     mumps->sym               = 0;
4131:   } else {
4132:     B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
4133:     B->factortype                  = MAT_FACTOR_CHOLESKY;
4134:     if (PetscDefined(USE_COMPLEX)) mumps->sym = 2;
4135:     else if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
4136:     else mumps->sym = 2;
4137:   }
4138:   mumps->ConvertToTriples = MatConvertToTriples_nest_xaij;
4139:   PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[ftype]));

4141:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
4142:   if (size == 1) {
4143:     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
4144:     B->canuseordering = PETSC_TRUE;
4145:   }

4147:   /* set solvertype */
4148:   PetscCall(PetscFree(B->solvertype));
4149:   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
4150:   B->ops->destroy = MatDestroy_MUMPS;
4151:   B->data         = (void *)mumps;

4153:   *F               = B;
4154:   mumps->id.job    = JOB_NULL;
4155:   mumps->ICNTL_pre = NULL;
4156:   mumps->CNTL_pre  = NULL;
4157:   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
4158:   PetscFunctionReturn(PETSC_SUCCESS);
4159: }

4161: PETSC_INTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void)
4162: {
4163:   PetscFunctionBegin;
4164:   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
4165:   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
4166:   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps));
4167:   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps));
4168:   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPISBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps));
4169:   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
4170:   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
4171:   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps));
4172:   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps));
4173:   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps));
4174:   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSELL, MAT_FACTOR_LU, MatGetFactor_sell_mumps));
4175:   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
4176:   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
4177:   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQDENSE, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
4178:   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQDENSE, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
4179:   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIDENSE, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
4180:   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIDENSE, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
4181:   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_LU, MatGetFactor_nest_mumps));
4182:   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_CHOLESKY, MatGetFactor_nest_mumps));
4183:   PetscFunctionReturn(PETSC_SUCCESS);
4184: }