Actual source code: mumps.c

  1: /*
  2:     Provides an interface to the MUMPS sparse solver
  3: */
  4: #include <petscpkg_version.h>
  5: #include <petscsf.h>
  6: #include <../src/mat/impls/aij/mpi/mpiaij.h>
  7: #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
  8: #include <../src/mat/impls/sell/mpi/mpisell.h>
  9: #include <petsc/private/vecimpl.h>

 11: #define MUMPS_MANUALS "(see users manual https://mumps-solver.org/index.php?page=doc \"Error and warning diagnostics\")"

 13: EXTERN_C_BEGIN
 14: #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
 15:   #include <cmumps_c.h>
 16:   #include <zmumps_c.h>
 17:   #include <smumps_c.h>
 18:   #include <dmumps_c.h>
 19: #else
 20:   #if defined(PETSC_USE_COMPLEX)
 21:     #if defined(PETSC_USE_REAL_SINGLE)
 22:       #include <cmumps_c.h>
 23:       #define MUMPS_c       cmumps_c
 24:       #define MUMPS_STRUC_C CMUMPS_STRUC_C
 25:       #define MumpsScalar   CMUMPS_COMPLEX
 26:     #else
 27:       #include <zmumps_c.h>
 28:       #define MUMPS_c       zmumps_c
 29:       #define MUMPS_STRUC_C ZMUMPS_STRUC_C
 30:       #define MumpsScalar   ZMUMPS_COMPLEX
 31:     #endif
 32:   #else
 33:     #if defined(PETSC_USE_REAL_SINGLE)
 34:       #include <smumps_c.h>
 35:       #define MUMPS_c       smumps_c
 36:       #define MUMPS_STRUC_C SMUMPS_STRUC_C
 37:       #define MumpsScalar   SMUMPS_REAL
 38:     #else
 39:       #include <dmumps_c.h>
 40:       #define MUMPS_c       dmumps_c
 41:       #define MUMPS_STRUC_C DMUMPS_STRUC_C
 42:       #define MumpsScalar   DMUMPS_REAL
 43:     #endif
 44:   #endif
 45: #endif
 46: EXTERN_C_END

 48: #define JOB_INIT         -1
 49: #define JOB_NULL         0
 50: #define JOB_FACTSYMBOLIC 1
 51: #define JOB_FACTNUMERIC  2
 52: #define JOB_SOLVE        3
 53: #define JOB_END          -2

 55: /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for
 56:    number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the
 57:    naming convention in PetscMPIInt, PetscBLASInt etc.
 58: */
 59: typedef MUMPS_INT PetscMUMPSInt;

 61: #if PETSC_PKG_MUMPS_VERSION_GE(5, 3, 0)
 62:   #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */
 63:     #error "PETSc has not been tested with full 64-bit MUMPS and we choose to error out"
 64:   #endif
 65: #else
 66:   #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */
 67:     #error "PETSc has not been tested with full 64-bit MUMPS and we choose to error out"
 68:   #endif
 69: #endif

 71: #define MPIU_MUMPSINT       MPI_INT
 72: #define PETSC_MUMPS_INT_MAX 2147483647
 73: #define PETSC_MUMPS_INT_MIN -2147483648

 75: /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/
 76: static inline PetscErrorCode PetscMUMPSIntCast(PetscCount a, PetscMUMPSInt *b)
 77: {
 78:   PetscFunctionBegin;
 79: #if PetscDefined(USE_64BIT_INDICES)
 80:   PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt");
 81: #endif
 82:   *b = (PetscMUMPSInt)a;
 83:   PetscFunctionReturn(PETSC_SUCCESS);
 84: }

 86: /* Put these utility routines here since they are only used in this file */
 87: static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems PetscOptionsObject, const char opt[], const char text[], const char man[], PetscMUMPSInt currentvalue, PetscMUMPSInt *value, PetscBool *set, PetscMUMPSInt lb, PetscMUMPSInt ub)
 88: {
 89:   PetscInt  myval;
 90:   PetscBool myset;

 92:   PetscFunctionBegin;
 93:   /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */
 94:   PetscCall(PetscOptionsInt_Private(PetscOptionsObject, opt, text, man, (PetscInt)currentvalue, &myval, &myset, lb, ub));
 95:   if (myset) PetscCall(PetscMUMPSIntCast(myval, value));
 96:   if (set) *set = myset;
 97:   PetscFunctionReturn(PETSC_SUCCESS);
 98: }
 99: #define PetscOptionsMUMPSInt(a, b, c, d, e, f) PetscOptionsMUMPSInt_Private(PetscOptionsObject, a, b, c, d, e, f, PETSC_MUMPS_INT_MIN, PETSC_MUMPS_INT_MAX)

101: // An abstract type for specific MUMPS types {S,D,C,Z}MUMPS_STRUC_C.
102: //
103: // With the abstract (outer) type, we can write shared code. We call MUMPS through a type-to-be-determined inner field within the abstract type.
104: // Before/after calling MUMPS, we need to copy in/out fields between the outer and the inner, which seems expensive. But note that the large fixed size
105: // arrays within the types are directly linked. At the end, we only need to copy ~20 intergers/pointers, which is doable. See PreMumpsCall()/PostMumpsCall().
106: //
107: // Not all fields in the specific types are exposed in the abstract type. We only need those used by the PETSc/MUMPS interface.
108: // Notably, DMUMPS_COMPLEX* and DMUMPS_REAL* fields are now declared as void *. Their type will be determined by the the actual precision to be used.
109: // Also note that we added some *_len fields not in specific types to track sizes of those MumpsScalar buffers.
110: typedef struct {
111:   PetscPrecision precision;   // precision used by MUMPS
112:   void          *internal_id; // the data structure passed to MUMPS, whose actual type {S,D,C,Z}MUMPS_STRUC_C is to be decided by precision and PETSc's use of complex

114:   // aliased fields from internal_id, so that we can use XMUMPS_STRUC_C to write shared code across different precisions.
115:   MUMPS_INT  sym, par, job;
116:   MUMPS_INT  comm_fortran; /* Fortran communicator */
117:   MUMPS_INT *icntl;
118:   void      *cntl; // MumpsReal, fixed size array
119:   MUMPS_INT  n;
120:   MUMPS_INT  nblk;

122:   /* Assembled entry */
123:   MUMPS_INT8 nnz;
124:   MUMPS_INT *irn;
125:   MUMPS_INT *jcn;
126:   void      *a; // MumpsScalar, centralized input
127:   PetscCount a_len;

129:   /* Distributed entry */
130:   MUMPS_INT8 nnz_loc;
131:   MUMPS_INT *irn_loc;
132:   MUMPS_INT *jcn_loc;
133:   void      *a_loc; // MumpsScalar, distributed input
134:   PetscCount a_loc_len;

136:   /* Matrix by blocks */
137:   MUMPS_INT *blkptr;
138:   MUMPS_INT *blkvar;

140:   /* Ordering, if given by user */
141:   MUMPS_INT *perm_in;

143:   /* RHS, solution, ouptput data and statistics */
144:   void      *rhs, *redrhs, *rhs_sparse, *sol_loc, *rhs_loc;                 // MumpsScalar buffers
145:   PetscCount rhs_len, redrhs_len, rhs_sparse_len, sol_loc_len, rhs_loc_len; // length of buffers (in MumpsScalar) IF allocated in a different precision than PetscScalar

147:   MUMPS_INT *irhs_sparse, *irhs_ptr, *isol_loc, *irhs_loc;
148:   MUMPS_INT  nrhs, lrhs, lredrhs, nz_rhs, lsol_loc, nloc_rhs, lrhs_loc;
149:   // MUMPS_INT  nsol_loc; // introduced in MUMPS-5.7, but PETSc doesn't use it; would cause compile errors with the widely used 5.6. If you add it, must also update PreMumpsCall() and guard this with #if PETSC_PKG_MUMPS_VERSION_GE(5, 7, 0)
150:   MUMPS_INT  schur_lld;
151:   MUMPS_INT *info, *infog;   // fixed size array
152:   void      *rinfo, *rinfog; // MumpsReal, fixed size array

154:   /* Null space */
155:   MUMPS_INT *pivnul_list; // allocated by MUMPS!
156:   MUMPS_INT *mapping;     // allocated by MUMPS!

158:   /* Schur */
159:   MUMPS_INT  size_schur;
160:   MUMPS_INT *listvar_schur;
161:   void      *schur; // MumpsScalar
162:   PetscCount schur_len;

164:   /* For out-of-core */
165:   char *ooc_tmpdir; // fixed size array
166:   char *ooc_prefix; // fixed size array
167: } XMUMPS_STRUC_C;

169: // Note: fixed-size arrays are allocated by MUMPS; redirect them to the outer struct
170: #define AllocatInternalID(MUMPS_STRUC_T, outer) \
171:   do { \
172:     MUMPS_STRUC_T *inner; \
173:     PetscCall(PetscNew(&inner)); \
174:     outer->icntl      = inner->icntl; \
175:     outer->cntl       = inner->cntl; \
176:     outer->info       = inner->info; \
177:     outer->infog      = inner->infog; \
178:     outer->rinfo      = inner->rinfo; \
179:     outer->rinfog     = inner->rinfog; \
180:     outer->ooc_tmpdir = inner->ooc_tmpdir; \
181:     outer->ooc_prefix = inner->ooc_prefix; \
182:     /* the three field should never change after init */ \
183:     inner->comm_fortran = outer->comm_fortran; \
184:     inner->par          = outer->par; \
185:     inner->sym          = outer->sym; \
186:     outer->internal_id  = inner; \
187:   } while (0)

189: // Allocate the internal [SDCZ]MUMPS_STRUC_C ID data structure in the given , and link fields of the outer and the inner
190: static inline PetscErrorCode MatMumpsAllocateInternalID(XMUMPS_STRUC_C *outer, PetscPrecision precision)
191: {
192:   PetscFunctionBegin;
193:   outer->precision = precision;
194: #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
195:   #if defined(PETSC_USE_COMPLEX)
196:   if (precision == PETSC_PRECISION_SINGLE) AllocatInternalID(CMUMPS_STRUC_C, outer);
197:   else AllocatInternalID(ZMUMPS_STRUC_C, outer);
198:   #else
199:   if (precision == PETSC_PRECISION_SINGLE) AllocatInternalID(SMUMPS_STRUC_C, outer);
200:   else AllocatInternalID(DMUMPS_STRUC_C, outer);
201:   #endif
202: #else
203:   AllocatInternalID(MUMPS_STRUC_C, outer);
204: #endif
205:   PetscFunctionReturn(PETSC_SUCCESS);
206: }

208: #define FreeInternalIDFields(MUMPS_STRUC_T, outer) \
209:   do { \
210:     MUMPS_STRUC_T *inner = (MUMPS_STRUC_T *)(outer)->internal_id; \
211:     PetscCall(PetscFree(inner->a)); \
212:     PetscCall(PetscFree(inner->a_loc)); \
213:     PetscCall(PetscFree(inner->redrhs)); \
214:     PetscCall(PetscFree(inner->rhs)); \
215:     PetscCall(PetscFree(inner->rhs_sparse)); \
216:     PetscCall(PetscFree(inner->rhs_loc)); \
217:     PetscCall(PetscFree(inner->sol_loc)); \
218:     PetscCall(PetscFree(inner->schur)); \
219:   } while (0)

221: static inline PetscErrorCode MatMumpsFreeInternalID(XMUMPS_STRUC_C *outer)
222: {
223:   PetscFunctionBegin;
224:   if (outer->internal_id) { // sometimes, the inner is never created before we destroy the outer
225: #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
226:     const PetscPrecision mumps_precision = outer->precision;
227:     if (mumps_precision != PETSC_SCALAR_PRECISION) { // Free internal buffers if we used mixed precision
228:   #if defined(PETSC_USE_COMPLEX)
229:       if (mumps_precision == PETSC_PRECISION_SINGLE) FreeInternalIDFields(CMUMPS_STRUC_C, outer);
230:       else FreeInternalIDFields(ZMUMPS_STRUC_C, outer);
231:   #else
232:       if (mumps_precision == PETSC_PRECISION_SINGLE) FreeInternalIDFields(SMUMPS_STRUC_C, outer);
233:       else FreeInternalIDFields(DMUMPS_STRUC_C, outer);
234:   #endif
235:     }
236: #endif
237:     PetscCall(PetscFree(outer->internal_id));
238:   }
239:   PetscFunctionReturn(PETSC_SUCCESS);
240: }

242: // Make a companion MumpsScalar array (with a given PetscScalar array), to hold at least  MumpsScalars in the given  and return the address at .
243: //  indicates if we need to convert PetscScalars to MumpsScalars after allocating the MumpsScalar array.
244: // (For bravity, we use  for array address and  for its length in MumpsScalar, though in code they should be <*ma> and <*m>)
245: // If  already points to a buffer/array, on input  should be its length. Note the buffer might be freed if it is not big enough for this request.
246: //
247: // The returned array is a companion, so how it is created depends on if PetscScalar and MumpsScalar are the same.
248: // 1) If they are different, a separate array will be made and its length and address will be provided at  and  on output.
249: // 2) Otherwise,  will be returned in , and  will be zero on output.
250: //
251: //
252: //   Input parameters:
253: // + convert   - whether to do PetscScalar to MumpsScalar conversion
254: // . n         - length of the PetscScalar array
255: // . pa        - [n]], points to the PetscScalar array
256: // . precision - precision of MumpsScalar
257: // . m         - on input, length of an existing MumpsScalar array  if any, otherwise *m is just zero.
258: // - ma        - on input, an existing MumpsScalar array if any.
259: //
260: //   Output parameters:
261: // + m  - length of the MumpsScalar buffer at  if MumpsScalar is different from PetscScalar, otherwise 0
262: // . ma - the MumpsScalar array, which could be an alias of  when the two types are the same.
263: //
264: //   Note:
265: //    New memory, if allocated, is done via PetscMalloc1(), and is owned by caller.
266: static PetscErrorCode MatMumpsMakeMumpsScalarArray(PetscBool convert, PetscCount n, const PetscScalar *pa, PetscPrecision precision, PetscCount *m, void **ma)
267: {
268:   PetscFunctionBegin;
269: #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
270:   const PetscPrecision mumps_precision = precision;
271:   PetscCheck(precision == PETSC_PRECISION_SINGLE || precision == PETSC_PRECISION_DOUBLE, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unsupported precicison (%d). Must be single or double", (int)precision);
272:   #if defined(PETSC_USE_COMPLEX)
273:   if (mumps_precision != PETSC_SCALAR_PRECISION) {
274:     if (mumps_precision == PETSC_PRECISION_SINGLE) {
275:       if (*m < n) {
276:         PetscCall(PetscFree(*ma));
277:         PetscCall(PetscMalloc1(n, (CMUMPS_COMPLEX **)ma));
278:         *m = n;
279:       }
280:       if (convert) {
281:         CMUMPS_COMPLEX *b = *(CMUMPS_COMPLEX **)ma;
282:         for (PetscCount i = 0; i < n; i++) {
283:           b[i].r = PetscRealPart(pa[i]);
284:           b[i].i = PetscImaginaryPart(pa[i]);
285:         }
286:       }
287:     } else {
288:       if (*m < n) {
289:         PetscCall(PetscFree(*ma));
290:         PetscCall(PetscMalloc1(n, (ZMUMPS_COMPLEX **)ma));
291:         *m = n;
292:       }
293:       if (convert) {
294:         ZMUMPS_COMPLEX *b = *(ZMUMPS_COMPLEX **)ma;
295:         for (PetscCount i = 0; i < n; i++) {
296:           b[i].r = PetscRealPart(pa[i]);
297:           b[i].i = PetscImaginaryPart(pa[i]);
298:         }
299:       }
300:     }
301:   }
302:   #else
303:   if (mumps_precision != PETSC_SCALAR_PRECISION) {
304:     if (mumps_precision == PETSC_PRECISION_SINGLE) {
305:       if (*m < n) {
306:         PetscCall(PetscFree(*ma));
307:         PetscCall(PetscMalloc1(n, (SMUMPS_REAL **)ma));
308:         *m = n;
309:       }
310:       if (convert) {
311:         SMUMPS_REAL *b = *(SMUMPS_REAL **)ma;
312:         for (PetscCount i = 0; i < n; i++) b[i] = pa[i];
313:       }
314:     } else {
315:       if (*m < n) {
316:         PetscCall(PetscFree(*ma));
317:         PetscCall(PetscMalloc1(n, (DMUMPS_REAL **)ma));
318:         *m = n;
319:       }
320:       if (convert) {
321:         DMUMPS_REAL *b = *(DMUMPS_REAL **)ma;
322:         for (PetscCount i = 0; i < n; i++) b[i] = pa[i];
323:       }
324:     }
325:   }
326:   #endif
327:   else
328: #endif
329:   {
330:     if (*m != 0) PetscCall(PetscFree(*ma)); // free existing buffer if any
331:     *ma = (void *)pa;                       // same precision, make them alias
332:     *m  = 0;
333:   }
334:   PetscFunctionReturn(PETSC_SUCCESS);
335: }

337: // Cast a MumpsScalar array  in  to a PetscScalar array at address .
338: //
339: // 1) If the two types are different, cast array elements.
340: // 2) Otherwise, this works as a memcpy; of course, if the two addresses are equal, it is a no-op.
341: static PetscErrorCode MatMumpsCastMumpsScalarArray(PetscCount n, PetscPrecision mumps_precision, const void *ma, PetscScalar *pa)
342: {
343:   PetscFunctionBegin;
344: #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
345:   if (mumps_precision != PETSC_SCALAR_PRECISION) {
346:   #if defined(PETSC_USE_COMPLEX)
347:     if (mumps_precision == PETSC_PRECISION_SINGLE) {
348:       PetscReal         *a = (PetscReal *)pa;
349:       const SMUMPS_REAL *b = (const SMUMPS_REAL *)ma;
350:       for (PetscCount i = 0; i < 2 * n; i++) a[i] = b[i];
351:     } else {
352:       PetscReal         *a = (PetscReal *)pa;
353:       const DMUMPS_REAL *b = (const DMUMPS_REAL *)ma;
354:       for (PetscCount i = 0; i < 2 * n; i++) a[i] = b[i];
355:     }
356:   #else
357:     if (mumps_precision == PETSC_PRECISION_SINGLE) {
358:       const SMUMPS_REAL *b = (const SMUMPS_REAL *)ma;
359:       for (PetscCount i = 0; i < n; i++) pa[i] = b[i];
360:     } else {
361:       const DMUMPS_REAL *b = (const DMUMPS_REAL *)ma;
362:       for (PetscCount i = 0; i < n; i++) pa[i] = b[i];
363:     }
364:   #endif
365:   } else
366: #endif
367:     PetscCall(PetscArraycpy((PetscScalar *)pa, (PetscScalar *)ma, n));
368:   PetscFunctionReturn(PETSC_SUCCESS);
369: }

371: // Cast a PetscScalar array  to a MumpsScalar array in the given  at address .
372: //
373: // 1) If the two types are different, cast array elements.
374: // 2) Otherwise, this works as a memcpy; of course, if the two addresses are equal, it is a no-op.
375: static PetscErrorCode MatMumpsCastPetscScalarArray(PetscCount n, const PetscScalar *pa, PetscPrecision mumps_precision, const void *ma)
376: {
377:   PetscFunctionBegin;
378: #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
379:   if (mumps_precision != PETSC_SCALAR_PRECISION) {
380:   #if defined(PETSC_USE_COMPLEX)
381:     if (mumps_precision == PETSC_PRECISION_SINGLE) {
382:       CMUMPS_COMPLEX *b = (CMUMPS_COMPLEX *)ma;
383:       for (PetscCount i = 0; i < n; i++) {
384:         b[i].r = PetscRealPart(pa[i]);
385:         b[i].i = PetscImaginaryPart(pa[i]);
386:       }
387:     } else {
388:       ZMUMPS_COMPLEX *b = (ZMUMPS_COMPLEX *)ma;
389:       for (PetscCount i = 0; i < n; i++) {
390:         b[i].r = PetscRealPart(pa[i]);
391:         b[i].i = PetscImaginaryPart(pa[i]);
392:       }
393:     }
394:   #else
395:     if (mumps_precision == PETSC_PRECISION_SINGLE) {
396:       SMUMPS_REAL *b = (SMUMPS_REAL *)ma;
397:       for (PetscCount i = 0; i < n; i++) b[i] = pa[i];
398:     } else {
399:       DMUMPS_REAL *b = (DMUMPS_REAL *)ma;
400:       for (PetscCount i = 0; i < n; i++) b[i] = pa[i];
401:     }
402:   #endif
403:   } else
404: #endif
405:     PetscCall(PetscArraycpy((PetscScalar *)ma, (PetscScalar *)pa, n));
406:   PetscFunctionReturn(PETSC_SUCCESS);
407: }

409: static inline MPI_Datatype MPIU_MUMPSREAL(const XMUMPS_STRUC_C *id)
410: {
411:   return id->precision == PETSC_PRECISION_DOUBLE ? MPI_DOUBLE : MPI_FLOAT;
412: }

414: #define PreMumpsCall(inner, outer, mumpsscalar) \
415:   do { \
416:     inner->job           = outer->job; \
417:     inner->n             = outer->n; \
418:     inner->nblk          = outer->nblk; \
419:     inner->nnz           = outer->nnz; \
420:     inner->irn           = outer->irn; \
421:     inner->jcn           = outer->jcn; \
422:     inner->a             = (mumpsscalar *)outer->a; \
423:     inner->nnz_loc       = outer->nnz_loc; \
424:     inner->irn_loc       = outer->irn_loc; \
425:     inner->jcn_loc       = outer->jcn_loc; \
426:     inner->a_loc         = (mumpsscalar *)outer->a_loc; \
427:     inner->blkptr        = outer->blkptr; \
428:     inner->blkvar        = outer->blkvar; \
429:     inner->perm_in       = outer->perm_in; \
430:     inner->rhs           = (mumpsscalar *)outer->rhs; \
431:     inner->redrhs        = (mumpsscalar *)outer->redrhs; \
432:     inner->rhs_sparse    = (mumpsscalar *)outer->rhs_sparse; \
433:     inner->sol_loc       = (mumpsscalar *)outer->sol_loc; \
434:     inner->rhs_loc       = (mumpsscalar *)outer->rhs_loc; \
435:     inner->irhs_sparse   = outer->irhs_sparse; \
436:     inner->irhs_ptr      = outer->irhs_ptr; \
437:     inner->isol_loc      = outer->isol_loc; \
438:     inner->irhs_loc      = outer->irhs_loc; \
439:     inner->nrhs          = outer->nrhs; \
440:     inner->lrhs          = outer->lrhs; \
441:     inner->lredrhs       = outer->lredrhs; \
442:     inner->nz_rhs        = outer->nz_rhs; \
443:     inner->lsol_loc      = outer->lsol_loc; \
444:     inner->nloc_rhs      = outer->nloc_rhs; \
445:     inner->lrhs_loc      = outer->lrhs_loc; \
446:     inner->schur_lld     = outer->schur_lld; \
447:     inner->size_schur    = outer->size_schur; \
448:     inner->listvar_schur = outer->listvar_schur; \
449:     inner->schur         = (mumpsscalar *)outer->schur; \
450:   } while (0)

452: #define PostMumpsCall(inner, outer) \
453:   do { \
454:     outer->pivnul_list = inner->pivnul_list; \
455:     outer->mapping     = inner->mapping; \
456:   } while (0)

458: // Entry for PETSc to call mumps
459: static inline PetscErrorCode PetscCallMumps_Private(XMUMPS_STRUC_C *outer)
460: {
461:   PetscFunctionBegin;
462: #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
463:   #if defined(PETSC_USE_COMPLEX)
464:   if (outer->precision == PETSC_PRECISION_SINGLE) {
465:     CMUMPS_STRUC_C *inner = (CMUMPS_STRUC_C *)outer->internal_id;
466:     PreMumpsCall(inner, outer, CMUMPS_COMPLEX);
467:     PetscStackCallExternalVoid("cmumps_c", cmumps_c(inner));
468:     PostMumpsCall(inner, outer);
469:   } else {
470:     ZMUMPS_STRUC_C *inner = (ZMUMPS_STRUC_C *)outer->internal_id;
471:     PreMumpsCall(inner, outer, ZMUMPS_COMPLEX);
472:     PetscStackCallExternalVoid("zmumps_c", zmumps_c(inner));
473:     PostMumpsCall(inner, outer);
474:   }
475:   #else
476:   if (outer->precision == PETSC_PRECISION_SINGLE) {
477:     SMUMPS_STRUC_C *inner = (SMUMPS_STRUC_C *)outer->internal_id;
478:     PreMumpsCall(inner, outer, SMUMPS_REAL);
479:     PetscStackCallExternalVoid("smumps_c", smumps_c(inner));
480:     PostMumpsCall(inner, outer);
481:   } else {
482:     DMUMPS_STRUC_C *inner = (DMUMPS_STRUC_C *)outer->internal_id;
483:     PreMumpsCall(inner, outer, DMUMPS_REAL);
484:     PetscStackCallExternalVoid("dmumps_c", dmumps_c(inner));
485:     PostMumpsCall(inner, outer);
486:   }
487:   #endif
488: #else
489:   MUMPS_STRUC_C *inner = (MUMPS_STRUC_C *)outer->internal_id;
490:   PreMumpsCall(inner, outer, MumpsScalar);
491:   PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(inner));
492:   PostMumpsCall(inner, outer);
493: #endif
494:   PetscFunctionReturn(PETSC_SUCCESS);
495: }

497: /* macros s.t. indices match MUMPS documentation */
498: #define ICNTL(I) icntl[(I) - 1]
499: #define INFOG(I) infog[(I) - 1]
500: #define INFO(I)  info[(I) - 1]

502: // Get a value from a MumpsScalar array, which is the  field in the struct of MUMPS_STRUC_C. The value is convertible to PetscScalar. Note no minus 1 on I!
503: #if defined(PETSC_USE_COMPLEX)
504:   #define ID_FIELD_GET(ID, F, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((CMUMPS_COMPLEX *)(ID).F)[I].r + PETSC_i * ((CMUMPS_COMPLEX *)(ID).F)[I].i : ((ZMUMPS_COMPLEX *)(ID).F)[I].r + PETSC_i * ((ZMUMPS_COMPLEX *)(ID).F)[I].i)
505: #else
506:   #define ID_FIELD_GET(ID, F, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).F)[I] : ((double *)(ID).F)[I])
507: #endif

509: // Get a value from MumpsReal arrays. The value is convertible to PetscReal.
510: #define ID_CNTL_GET(ID, I)   ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).cntl)[(I) - 1] : ((double *)(ID).cntl)[(I) - 1])
511: #define ID_RINFOG_GET(ID, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).rinfog)[(I) - 1] : ((double *)(ID).rinfog)[(I) - 1])
512: #define ID_RINFO_GET(ID, I)  ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).rinfo)[(I) - 1] : ((double *)(ID).rinfo)[(I) - 1])

514: // Set the I-th entry of the MumpsReal array id.cntl[] with a PetscReal 
515: #define ID_CNTL_SET(ID, I, VAL) \
516:   do { \
517:     if ((ID).precision == PETSC_PRECISION_SINGLE) ((float *)(ID).cntl)[(I) - 1] = (VAL); \
518:     else ((double *)(ID).cntl)[(I) - 1] = (VAL); \
519:   } while (0)

521: /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */
522: #if defined(PETSC_HAVE_OPENMP_SUPPORT)
523:   #define PetscMUMPS_c(mumps) \
524:     do { \
525:       if (mumps->use_petsc_omp_support) { \
526:         if (mumps->is_omp_master) { \
527:           PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \
528:           PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \
529:           PetscCall(PetscCallMumps_Private(&mumps->id)); \
530:           PetscCall(PetscFPTrapPop()); \
531:           PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \
532:         } \
533:         PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \
534:         /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific      \
535:          to processes, so we only Bcast info[1], an error code and leave others (since they do not have   \
536:          an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82.                   \
537:          omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \
538:       */ \
539:         SMUMPS_STRUC_C tmp; /* All MUMPS_STRUC_C types have same lengths on these info arrays */ \
540:         PetscCallMPI(MPI_Bcast(mumps->id.infog, PETSC_STATIC_ARRAY_LENGTH(tmp.infog), MPIU_MUMPSINT, 0, mumps->omp_comm)); \
541:         PetscCallMPI(MPI_Bcast(mumps->id.info, PETSC_STATIC_ARRAY_LENGTH(tmp.info), MPIU_MUMPSINT, 0, mumps->omp_comm)); \
542:         PetscCallMPI(MPI_Bcast(mumps->id.rinfog, PETSC_STATIC_ARRAY_LENGTH(tmp.rinfog), MPIU_MUMPSREAL(&mumps->id), 0, mumps->omp_comm)); \
543:         PetscCallMPI(MPI_Bcast(mumps->id.rinfo, PETSC_STATIC_ARRAY_LENGTH(tmp.rinfo), MPIU_MUMPSREAL(&mumps->id), 0, mumps->omp_comm)); \
544:       } else { \
545:         PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \
546:         PetscCall(PetscCallMumps_Private(&mumps->id)); \
547:         PetscCall(PetscFPTrapPop()); \
548:       } \
549:     } while (0)
550: #else
551:   #define PetscMUMPS_c(mumps) \
552:     do { \
553:       PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \
554:       PetscCall(PetscCallMumps_Private(&mumps->id)); \
555:       PetscCall(PetscFPTrapPop()); \
556:     } while (0)
557: #endif

559: typedef struct Mat_MUMPS Mat_MUMPS;
560: struct Mat_MUMPS {
561:   XMUMPS_STRUC_C id;

563:   MatStructure   matstruc;
564:   PetscMPIInt    myid, petsc_size;
565:   PetscMUMPSInt *irn, *jcn;       /* the (i,j,v) triplets passed to mumps. */
566:   PetscScalar   *val, *val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */
567:   PetscCount     nnz;             /* number of nonzeros. The type is called selective 64-bit in mumps */
568:   PetscMUMPSInt  sym;
569:   MPI_Comm       mumps_comm;
570:   PetscMUMPSInt *ICNTL_pre;
571:   PetscReal     *CNTL_pre;
572:   PetscMUMPSInt  ICNTL9_pre;         /* check if ICNTL(9) is changed from previous MatSolve */
573:   VecScatter     scat_rhs, scat_sol; /* used by MatSolve() */
574:   PetscMUMPSInt  ICNTL20;            /* use centralized (0) or distributed (10) dense RHS */
575:   PetscMUMPSInt  ICNTL26;
576:   PetscMUMPSInt  lrhs_loc, nloc_rhs, *irhs_loc;
577: #if defined(PETSC_HAVE_OPENMP_SUPPORT)
578:   PetscInt    *rhs_nrow, max_nrhs;
579:   PetscMPIInt *rhs_recvcounts, *rhs_disps;
580:   PetscScalar *rhs_loc, *rhs_recvbuf;
581: #endif
582:   Vec            b_seq, x_seq;
583:   PetscInt       ninfo, *info; /* which INFO to display */
584:   PetscInt       sizeredrhs;
585:   PetscScalar   *schur_sol;
586:   PetscInt       schur_sizesol;
587:   PetscScalar   *redrhs;              // buffer in PetscScalar in case MumpsScalar is in a different precision
588:   PetscMUMPSInt *ia_alloc, *ja_alloc; /* work arrays used for the CSR struct for sparse rhs */
589:   PetscCount     cur_ilen, cur_jlen;  /* current len of ia_alloc[], ja_alloc[] */
590:   PetscErrorCode (*ConvertToTriples)(Mat, PetscInt, MatReuse, Mat_MUMPS *);

592:   /* Support for MATNEST */
593:   PetscErrorCode (**nest_convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *);
594:   PetscCount  *nest_vals_start;
595:   PetscScalar *nest_vals;

597:   /* stuff used by petsc/mumps OpenMP support*/
598:   PetscBool    use_petsc_omp_support;
599:   PetscOmpCtrl omp_ctrl;             /* an OpenMP controller that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */
600:   MPI_Comm     petsc_comm, omp_comm; /* petsc_comm is PETSc matrix's comm */
601:   PetscCount  *recvcount;            /* a collection of nnz on omp_master */
602:   PetscMPIInt  tag, omp_comm_size;
603:   PetscBool    is_omp_master; /* is this rank the master of omp_comm */
604:   MPI_Request *reqs;
605: };

607: /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt.
608:    Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices.
609:  */
610: static PetscErrorCode PetscMUMPSIntCSRCast(PETSC_UNUSED Mat_MUMPS *mumps, PetscInt nrow, PetscInt *ia, PetscInt *ja, PetscMUMPSInt **ia_mumps, PetscMUMPSInt **ja_mumps, PetscMUMPSInt *nnz_mumps)
611: {
612:   PetscInt nnz = ia[nrow] - 1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscCount since mumps only uses PetscMUMPSInt for rhs */

614:   PetscFunctionBegin;
615: #if defined(PETSC_USE_64BIT_INDICES)
616:   {
617:     PetscInt i;
618:     if (nrow + 1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */
619:       PetscCall(PetscFree(mumps->ia_alloc));
620:       PetscCall(PetscMalloc1(nrow + 1, &mumps->ia_alloc));
621:       mumps->cur_ilen = nrow + 1;
622:     }
623:     if (nnz > mumps->cur_jlen) {
624:       PetscCall(PetscFree(mumps->ja_alloc));
625:       PetscCall(PetscMalloc1(nnz, &mumps->ja_alloc));
626:       mumps->cur_jlen = nnz;
627:     }
628:     for (i = 0; i < nrow + 1; i++) PetscCall(PetscMUMPSIntCast(ia[i], &mumps->ia_alloc[i]));
629:     for (i = 0; i < nnz; i++) PetscCall(PetscMUMPSIntCast(ja[i], &mumps->ja_alloc[i]));
630:     *ia_mumps = mumps->ia_alloc;
631:     *ja_mumps = mumps->ja_alloc;
632:   }
633: #else
634:   *ia_mumps = ia;
635:   *ja_mumps = ja;
636: #endif
637:   PetscCall(PetscMUMPSIntCast(nnz, nnz_mumps));
638:   PetscFunctionReturn(PETSC_SUCCESS);
639: }

641: static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS *mumps)
642: {
643:   PetscFunctionBegin;
644:   PetscCall(PetscFree(mumps->id.listvar_schur));
645:   PetscCall(PetscFree(mumps->redrhs)); // if needed, id.redrhs will be freed in MatMumpsFreeInternalID()
646:   PetscCall(PetscFree(mumps->schur_sol));
647:   mumps->id.size_schur = 0;
648:   mumps->id.schur_lld  = 0;
649:   if (mumps->id.internal_id) mumps->id.ICNTL(19) = 0; // sometimes, the inner id is yet built
650:   PetscFunctionReturn(PETSC_SUCCESS);
651: }

653: /* solve with rhs in mumps->id.redrhs and return in the same location */
654: static PetscErrorCode MatMumpsSolveSchur_Private(Mat F)
655: {
656:   Mat_MUMPS           *mumps = (Mat_MUMPS *)F->data;
657:   Mat                  S, B, X; // solve S*X = B; all three matrices are dense
658:   MatFactorSchurStatus schurstatus;
659:   PetscInt             sizesol;
660:   const PetscScalar   *xarray;

662:   PetscFunctionBegin;
663:   PetscCall(MatFactorFactorizeSchurComplement(F));
664:   PetscCall(MatFactorGetSchurComplement(F, &S, &schurstatus));
665:   PetscCall(MatMumpsCastMumpsScalarArray(mumps->sizeredrhs, mumps->id.precision, mumps->id.redrhs, mumps->redrhs));

667:   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->redrhs, &B));
668:   PetscCall(MatSetType(B, ((PetscObject)S)->type_name));
669: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
670:   PetscCall(MatBindToCPU(B, S->boundtocpu));
671: #endif
672:   switch (schurstatus) {
673:   case MAT_FACTOR_SCHUR_FACTORED:
674:     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->redrhs, &X));
675:     PetscCall(MatSetType(X, ((PetscObject)S)->type_name));
676: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
677:     PetscCall(MatBindToCPU(X, S->boundtocpu));
678: #endif
679:     if (!mumps->id.ICNTL(9)) { /* transpose solve */
680:       PetscCall(MatMatSolveTranspose(S, B, X));
681:     } else {
682:       PetscCall(MatMatSolve(S, B, X));
683:     }
684:     break;
685:   case MAT_FACTOR_SCHUR_INVERTED:
686:     sizesol = mumps->id.nrhs * mumps->id.size_schur;
687:     if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) {
688:       PetscCall(PetscFree(mumps->schur_sol));
689:       PetscCall(PetscMalloc1(sizesol, &mumps->schur_sol));
690:       mumps->schur_sizesol = sizesol;
691:     }
692:     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->schur_sol, &X));
693:     PetscCall(MatSetType(X, ((PetscObject)S)->type_name));
694: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
695:     PetscCall(MatBindToCPU(X, S->boundtocpu));
696: #endif
697:     PetscCall(MatProductCreateWithMat(S, B, NULL, X));
698:     if (!mumps->id.ICNTL(9)) { /* transpose solve */
699:       PetscCall(MatProductSetType(X, MATPRODUCT_AtB));
700:     } else {
701:       PetscCall(MatProductSetType(X, MATPRODUCT_AB));
702:     }
703:     PetscCall(MatProductSetFromOptions(X));
704:     PetscCall(MatProductSymbolic(X));
705:     PetscCall(MatProductNumeric(X));

707:     PetscCall(MatCopy(X, B, SAME_NONZERO_PATTERN));
708:     break;
709:   default:
710:     SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unhandled MatFactorSchurStatus %d", F->schur_status);
711:   }
712:   // MUST get the array from X (not B), though they share the same host array. We can only guarantee X has the correct data on device.
713:   PetscCall(MatDenseGetArrayRead(X, &xarray)); // xarray should be mumps->redrhs, but using MatDenseGetArrayRead is safer with GPUs.
714:   PetscCall(MatMumpsCastPetscScalarArray(mumps->sizeredrhs, xarray, mumps->id.precision, mumps->id.redrhs));
715:   PetscCall(MatDenseRestoreArrayRead(X, &xarray));
716:   PetscCall(MatFactorRestoreSchurComplement(F, &S, schurstatus));
717:   PetscCall(MatDestroy(&B));
718:   PetscCall(MatDestroy(&X));
719:   PetscFunctionReturn(PETSC_SUCCESS);
720: }

722: static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion)
723: {
724:   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;

726:   PetscFunctionBegin;
727:   if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */
728:     PetscFunctionReturn(PETSC_SUCCESS);
729:   }
730:   if (!expansion) { /* prepare for the condensation step */
731:     PetscInt sizeredrhs = mumps->id.nrhs * mumps->id.size_schur;
732:     /* allocate MUMPS internal array to store reduced right-hand sides */
733:     if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) {
734:       mumps->id.lredrhs = mumps->id.size_schur;
735:       mumps->sizeredrhs = mumps->id.nrhs * mumps->id.lredrhs;
736:       if (mumps->id.redrhs_len) PetscCall(PetscFree(mumps->id.redrhs));
737:       PetscCall(PetscFree(mumps->redrhs));
738:       PetscCall(PetscMalloc1(mumps->sizeredrhs, &mumps->redrhs));
739:       PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, mumps->sizeredrhs, mumps->redrhs, mumps->id.precision, &mumps->id.redrhs_len, &mumps->id.redrhs));
740:     }
741:   } else {                                    /* prepare for the expansion step */
742:     PetscCall(MatMumpsSolveSchur_Private(F)); /* solve Schur complement, put solution in id.redrhs (this has to be done by the MUMPS user, so basically us) */
743:     mumps->id.ICNTL(26) = 2;                  /* expansion phase */
744:     PetscMUMPS_c(mumps);
745:     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));
746:     /* restore defaults */
747:     mumps->id.ICNTL(26) = -1;
748:     /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */
749:     if (mumps->id.nrhs > 1) {
750:       if (mumps->id.redrhs_len) PetscCall(PetscFree(mumps->id.redrhs));
751:       PetscCall(PetscFree(mumps->redrhs));
752:       mumps->id.redrhs_len = 0;
753:       mumps->id.lredrhs    = 0;
754:       mumps->sizeredrhs    = 0;
755:     }
756:   }
757:   PetscFunctionReturn(PETSC_SUCCESS);
758: }

760: /*
761:   MatConvertToTriples_A_B - convert PETSc matrix to triples: row[nz], col[nz], val[nz]

763:   input:
764:     A       - matrix in aij,baij or sbaij format
765:     shift   - 0: C style output triple; 1: Fortran style output triple.
766:     reuse   - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple
767:               MAT_REUSE_MATRIX:   only the values in v array are updated
768:   output:
769:     nnz     - dim of r, c, and v (number of local nonzero entries of A)
770:     r, c, v - row and col index, matrix values (matrix triples)

772:   The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is
773:   freed with PetscFree(mumps->irn);  This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means
774:   that the PetscMalloc() cannot easily be replaced with a PetscMalloc3().

776:  */

778: static PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
779: {
780:   const PetscScalar *av;
781:   const PetscInt    *ai, *aj, *ajj, M = A->rmap->n;
782:   PetscCount         nz, rnz, k;
783:   PetscMUMPSInt     *row, *col;
784:   Mat_SeqAIJ        *aa = (Mat_SeqAIJ *)A->data;

786:   PetscFunctionBegin;
787:   PetscCall(MatSeqAIJGetArrayRead(A, &av));
788:   if (reuse == MAT_INITIAL_MATRIX) {
789:     nz = aa->nz;
790:     ai = aa->i;
791:     aj = aa->j;
792:     PetscCall(PetscMalloc2(nz, &row, nz, &col));
793:     for (PetscCount i = k = 0; i < M; i++) {
794:       rnz = ai[i + 1] - ai[i];
795:       ajj = aj + ai[i];
796:       for (PetscCount j = 0; j < rnz; j++) {
797:         PetscCall(PetscMUMPSIntCast(i + shift, &row[k]));
798:         PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[k]));
799:         k++;
800:       }
801:     }
802:     mumps->val = (PetscScalar *)av;
803:     mumps->irn = row;
804:     mumps->jcn = col;
805:     mumps->nnz = nz;
806:   } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, av, aa->nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqaij_seqaij(), so one needs to copy the memory */
807:   else mumps->val = (PetscScalar *)av;                                           /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
808:   PetscCall(MatSeqAIJRestoreArrayRead(A, &av));
809:   PetscFunctionReturn(PETSC_SUCCESS);
810: }

812: static PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
813: {
814:   PetscCount     nz, i, j, k, r;
815:   Mat_SeqSELL   *a = (Mat_SeqSELL *)A->data;
816:   PetscMUMPSInt *row, *col;

818:   PetscFunctionBegin;
819:   nz = a->sliidx[a->totalslices];
820:   if (reuse == MAT_INITIAL_MATRIX) {
821:     PetscCall(PetscMalloc2(nz, &row, nz, &col));
822:     for (i = k = 0; i < a->totalslices; i++) {
823:       for (j = a->sliidx[i], r = 0; j < a->sliidx[i + 1]; j++, r = ((r + 1) & 0x07)) PetscCall(PetscMUMPSIntCast(8 * i + r + shift, &row[k++]));
824:     }
825:     for (i = 0; i < nz; i++) PetscCall(PetscMUMPSIntCast(a->colidx[i] + shift, &col[i]));
826:     mumps->irn = row;
827:     mumps->jcn = col;
828:     mumps->nnz = nz;
829:     mumps->val = a->val;
830:   } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, a->val, nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqsell_seqaij(), so one needs to copy the memory */
831:   else mumps->val = a->val;                                                      /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
832:   PetscFunctionReturn(PETSC_SUCCESS);
833: }

835: static PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
836: {
837:   Mat_SeqBAIJ    *aa = (Mat_SeqBAIJ *)A->data;
838:   const PetscInt *ai, *aj, *ajj, bs2 = aa->bs2;
839:   PetscCount      M, nz = bs2 * aa->nz, idx = 0, rnz, i, j, k, m;
840:   PetscInt        bs;
841:   PetscMUMPSInt  *row, *col;

843:   PetscFunctionBegin;
844:   if (reuse == MAT_INITIAL_MATRIX) {
845:     PetscCall(MatGetBlockSize(A, &bs));
846:     M  = A->rmap->N / bs;
847:     ai = aa->i;
848:     aj = aa->j;
849:     PetscCall(PetscMalloc2(nz, &row, nz, &col));
850:     for (i = 0; i < M; i++) {
851:       ajj = aj + ai[i];
852:       rnz = ai[i + 1] - ai[i];
853:       for (k = 0; k < rnz; k++) {
854:         for (j = 0; j < bs; j++) {
855:           for (m = 0; m < bs; m++) {
856:             PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[idx]));
857:             PetscCall(PetscMUMPSIntCast(bs * ajj[k] + j + shift, &col[idx]));
858:             idx++;
859:           }
860:         }
861:       }
862:     }
863:     mumps->irn = row;
864:     mumps->jcn = col;
865:     mumps->nnz = nz;
866:     mumps->val = aa->a;
867:   } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, aa->a, nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqbaij_seqaij(), so one needs to copy the memory */
868:   else mumps->val = aa->a;                                                      /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
869:   PetscFunctionReturn(PETSC_SUCCESS);
870: }

872: static PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
873: {
874:   const PetscInt *ai, *aj, *ajj;
875:   PetscInt        bs;
876:   PetscCount      nz, rnz, i, j, k, m;
877:   PetscMUMPSInt  *row, *col;
878:   PetscScalar    *val;
879:   Mat_SeqSBAIJ   *aa  = (Mat_SeqSBAIJ *)A->data;
880:   const PetscInt  bs2 = aa->bs2, mbs = aa->mbs;
881: #if defined(PETSC_USE_COMPLEX)
882:   PetscBool isset, hermitian;
883: #endif

885:   PetscFunctionBegin;
886: #if defined(PETSC_USE_COMPLEX)
887:   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
888:   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
889: #endif
890:   ai = aa->i;
891:   aj = aa->j;
892:   PetscCall(MatGetBlockSize(A, &bs));
893:   if (reuse == MAT_INITIAL_MATRIX) {
894:     const PetscCount alloc_size = aa->nz * bs2;

896:     PetscCall(PetscMalloc2(alloc_size, &row, alloc_size, &col));
897:     if (bs > 1) {
898:       PetscCall(PetscMalloc1(alloc_size, &mumps->val_alloc));
899:       mumps->val = mumps->val_alloc;
900:     } else {
901:       mumps->val = aa->a;
902:     }
903:     mumps->irn = row;
904:     mumps->jcn = col;
905:   } else {
906:     row = mumps->irn;
907:     col = mumps->jcn;
908:   }
909:   val = mumps->val;

911:   nz = 0;
912:   if (bs > 1) {
913:     for (i = 0; i < mbs; i++) {
914:       rnz = ai[i + 1] - ai[i];
915:       ajj = aj + ai[i];
916:       for (j = 0; j < rnz; j++) {
917:         for (k = 0; k < bs; k++) {
918:           for (m = 0; m < bs; m++) {
919:             if (ajj[j] > i || k >= m) {
920:               if (reuse == MAT_INITIAL_MATRIX) {
921:                 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[nz]));
922:                 PetscCall(PetscMUMPSIntCast(ajj[j] * bs + k + shift, &col[nz]));
923:               }
924:               val[nz++] = aa->a[(ai[i] + j) * bs2 + m + k * bs];
925:             }
926:           }
927:         }
928:       }
929:     }
930:   } else if (reuse == MAT_INITIAL_MATRIX) {
931:     for (i = 0; i < mbs; i++) {
932:       rnz = ai[i + 1] - ai[i];
933:       ajj = aj + ai[i];
934:       for (j = 0; j < rnz; j++) {
935:         PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
936:         PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
937:         nz++;
938:       }
939:     }
940:     PetscCheck(nz == aa->nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different numbers of nonzeros %" PetscCount_FMT " != %" PetscInt_FMT, nz, aa->nz);
941:   } else if (mumps->nest_vals)
942:     PetscCall(PetscArraycpy(mumps->val, aa->a, aa->nz)); /* bs == 1 and MAT_REUSE_MATRIX, MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqsbaij_seqsbaij(), so one needs to copy the memory */
943:   else mumps->val = aa->a;                               /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
944:   if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz;
945:   PetscFunctionReturn(PETSC_SUCCESS);
946: }

948: static PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
949: {
950:   const PetscInt    *ai, *aj, *ajj, *adiag, M = A->rmap->n;
951:   PetscCount         nz, rnz, i, j;
952:   const PetscScalar *av, *v1;
953:   PetscScalar       *val;
954:   PetscMUMPSInt     *row, *col;
955:   Mat_SeqAIJ        *aa = (Mat_SeqAIJ *)A->data;
956:   PetscBool          missing;
957: #if defined(PETSC_USE_COMPLEX)
958:   PetscBool hermitian, isset;
959: #endif

961:   PetscFunctionBegin;
962: #if defined(PETSC_USE_COMPLEX)
963:   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
964:   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
965: #endif
966:   PetscCall(MatSeqAIJGetArrayRead(A, &av));
967:   ai    = aa->i;
968:   aj    = aa->j;
969:   adiag = aa->diag;
970:   PetscCall(MatMissingDiagonal_SeqAIJ(A, &missing, NULL));
971:   if (reuse == MAT_INITIAL_MATRIX) {
972:     /* count nz in the upper triangular part of A */
973:     nz = 0;
974:     if (missing) {
975:       for (i = 0; i < M; i++) {
976:         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
977:           for (j = ai[i]; j < ai[i + 1]; j++) {
978:             if (aj[j] < i) continue;
979:             nz++;
980:           }
981:         } else {
982:           nz += ai[i + 1] - adiag[i];
983:         }
984:       }
985:     } else {
986:       for (i = 0; i < M; i++) nz += ai[i + 1] - adiag[i];
987:     }
988:     PetscCall(PetscMalloc2(nz, &row, nz, &col));
989:     PetscCall(PetscMalloc1(nz, &val));
990:     mumps->nnz = nz;
991:     mumps->irn = row;
992:     mumps->jcn = col;
993:     mumps->val = mumps->val_alloc = val;

995:     nz = 0;
996:     if (missing) {
997:       for (i = 0; i < M; i++) {
998:         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
999:           for (j = ai[i]; j < ai[i + 1]; j++) {
1000:             if (aj[j] < i) continue;
1001:             PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
1002:             PetscCall(PetscMUMPSIntCast(aj[j] + shift, &col[nz]));
1003:             val[nz] = av[j];
1004:             nz++;
1005:           }
1006:         } else {
1007:           rnz = ai[i + 1] - adiag[i];
1008:           ajj = aj + adiag[i];
1009:           v1  = av + adiag[i];
1010:           for (j = 0; j < rnz; j++) {
1011:             PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
1012:             PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
1013:             val[nz++] = v1[j];
1014:           }
1015:         }
1016:       }
1017:     } else {
1018:       for (i = 0; i < M; i++) {
1019:         rnz = ai[i + 1] - adiag[i];
1020:         ajj = aj + adiag[i];
1021:         v1  = av + adiag[i];
1022:         for (j = 0; j < rnz; j++) {
1023:           PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
1024:           PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
1025:           val[nz++] = v1[j];
1026:         }
1027:       }
1028:     }
1029:   } else {
1030:     nz  = 0;
1031:     val = mumps->val;
1032:     if (missing) {
1033:       for (i = 0; i < M; i++) {
1034:         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
1035:           for (j = ai[i]; j < ai[i + 1]; j++) {
1036:             if (aj[j] < i) continue;
1037:             val[nz++] = av[j];
1038:           }
1039:         } else {
1040:           rnz = ai[i + 1] - adiag[i];
1041:           v1  = av + adiag[i];
1042:           for (j = 0; j < rnz; j++) val[nz++] = v1[j];
1043:         }
1044:       }
1045:     } else {
1046:       for (i = 0; i < M; i++) {
1047:         rnz = ai[i + 1] - adiag[i];
1048:         v1  = av + adiag[i];
1049:         for (j = 0; j < rnz; j++) val[nz++] = v1[j];
1050:       }
1051:     }
1052:   }
1053:   PetscCall(MatSeqAIJRestoreArrayRead(A, &av));
1054:   PetscFunctionReturn(PETSC_SUCCESS);
1055: }

1057: static PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1058: {
1059:   const PetscInt    *ai, *aj, *bi, *bj, *garray, *ajj, *bjj;
1060:   PetscInt           bs;
1061:   PetscCount         rstart, nz, i, j, k, m, jj, irow, countA, countB;
1062:   PetscMUMPSInt     *row, *col;
1063:   const PetscScalar *av, *bv, *v1, *v2;
1064:   PetscScalar       *val;
1065:   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ *)A->data;
1066:   Mat_SeqSBAIJ      *aa  = (Mat_SeqSBAIJ *)mat->A->data;
1067:   Mat_SeqBAIJ       *bb  = (Mat_SeqBAIJ *)mat->B->data;
1068:   const PetscInt     bs2 = aa->bs2, mbs = aa->mbs;
1069: #if defined(PETSC_USE_COMPLEX)
1070:   PetscBool hermitian, isset;
1071: #endif

1073:   PetscFunctionBegin;
1074: #if defined(PETSC_USE_COMPLEX)
1075:   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
1076:   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
1077: #endif
1078:   PetscCall(MatGetBlockSize(A, &bs));
1079:   rstart = A->rmap->rstart;
1080:   ai     = aa->i;
1081:   aj     = aa->j;
1082:   bi     = bb->i;
1083:   bj     = bb->j;
1084:   av     = aa->a;
1085:   bv     = bb->a;

1087:   garray = mat->garray;

1089:   if (reuse == MAT_INITIAL_MATRIX) {
1090:     nz = (aa->nz + bb->nz) * bs2; /* just a conservative estimate */
1091:     PetscCall(PetscMalloc2(nz, &row, nz, &col));
1092:     PetscCall(PetscMalloc1(nz, &val));
1093:     /* can not decide the exact mumps->nnz now because of the SBAIJ */
1094:     mumps->irn = row;
1095:     mumps->jcn = col;
1096:     mumps->val = mumps->val_alloc = val;
1097:   } else {
1098:     val = mumps->val;
1099:   }

1101:   jj   = 0;
1102:   irow = rstart;
1103:   for (i = 0; i < mbs; i++) {
1104:     ajj    = aj + ai[i]; /* ptr to the beginning of this row */
1105:     countA = ai[i + 1] - ai[i];
1106:     countB = bi[i + 1] - bi[i];
1107:     bjj    = bj + bi[i];
1108:     v1     = av + ai[i] * bs2;
1109:     v2     = bv + bi[i] * bs2;

1111:     if (bs > 1) {
1112:       /* A-part */
1113:       for (j = 0; j < countA; j++) {
1114:         for (k = 0; k < bs; k++) {
1115:           for (m = 0; m < bs; m++) {
1116:             if (rstart + ajj[j] * bs > irow || k >= m) {
1117:               if (reuse == MAT_INITIAL_MATRIX) {
1118:                 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj]));
1119:                 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] * bs + k + shift, &col[jj]));
1120:               }
1121:               val[jj++] = v1[j * bs2 + m + k * bs];
1122:             }
1123:           }
1124:         }
1125:       }

1127:       /* B-part */
1128:       for (j = 0; j < countB; j++) {
1129:         for (k = 0; k < bs; k++) {
1130:           for (m = 0; m < bs; m++) {
1131:             if (reuse == MAT_INITIAL_MATRIX) {
1132:               PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj]));
1133:               PetscCall(PetscMUMPSIntCast(garray[bjj[j]] * bs + k + shift, &col[jj]));
1134:             }
1135:             val[jj++] = v2[j * bs2 + m + k * bs];
1136:           }
1137:         }
1138:       }
1139:     } else {
1140:       /* A-part */
1141:       for (j = 0; j < countA; j++) {
1142:         if (reuse == MAT_INITIAL_MATRIX) {
1143:           PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
1144:           PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj]));
1145:         }
1146:         val[jj++] = v1[j];
1147:       }

1149:       /* B-part */
1150:       for (j = 0; j < countB; j++) {
1151:         if (reuse == MAT_INITIAL_MATRIX) {
1152:           PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
1153:           PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
1154:         }
1155:         val[jj++] = v2[j];
1156:       }
1157:     }
1158:     irow += bs;
1159:   }
1160:   if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = jj;
1161:   PetscFunctionReturn(PETSC_SUCCESS);
1162: }

1164: static PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1165: {
1166:   const PetscInt    *ai, *aj, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj;
1167:   PetscCount         rstart, cstart, nz, i, j, jj, irow, countA, countB;
1168:   PetscMUMPSInt     *row, *col;
1169:   const PetscScalar *av, *bv, *v1, *v2;
1170:   PetscScalar       *val;
1171:   Mat                Ad, Ao;
1172:   Mat_SeqAIJ        *aa;
1173:   Mat_SeqAIJ        *bb;

1175:   PetscFunctionBegin;
1176:   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray));
1177:   PetscCall(MatSeqAIJGetArrayRead(Ad, &av));
1178:   PetscCall(MatSeqAIJGetArrayRead(Ao, &bv));

1180:   aa = (Mat_SeqAIJ *)Ad->data;
1181:   bb = (Mat_SeqAIJ *)Ao->data;
1182:   ai = aa->i;
1183:   aj = aa->j;
1184:   bi = bb->i;
1185:   bj = bb->j;

1187:   rstart = A->rmap->rstart;
1188:   cstart = A->cmap->rstart;

1190:   if (reuse == MAT_INITIAL_MATRIX) {
1191:     nz = (PetscCount)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */
1192:     PetscCall(PetscMalloc2(nz, &row, nz, &col));
1193:     PetscCall(PetscMalloc1(nz, &val));
1194:     mumps->nnz = nz;
1195:     mumps->irn = row;
1196:     mumps->jcn = col;
1197:     mumps->val = mumps->val_alloc = val;
1198:   } else {
1199:     val = mumps->val;
1200:   }

1202:   jj   = 0;
1203:   irow = rstart;
1204:   for (i = 0; i < m; i++) {
1205:     ajj    = aj + ai[i]; /* ptr to the beginning of this row */
1206:     countA = ai[i + 1] - ai[i];
1207:     countB = bi[i + 1] - bi[i];
1208:     bjj    = bj + bi[i];
1209:     v1     = av + ai[i];
1210:     v2     = bv + bi[i];

1212:     /* A-part */
1213:     for (j = 0; j < countA; j++) {
1214:       if (reuse == MAT_INITIAL_MATRIX) {
1215:         PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
1216:         PetscCall(PetscMUMPSIntCast(cstart + ajj[j] + shift, &col[jj]));
1217:       }
1218:       val[jj++] = v1[j];
1219:     }

1221:     /* B-part */
1222:     for (j = 0; j < countB; j++) {
1223:       if (reuse == MAT_INITIAL_MATRIX) {
1224:         PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
1225:         PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
1226:       }
1227:       val[jj++] = v2[j];
1228:     }
1229:     irow++;
1230:   }
1231:   PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av));
1232:   PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv));
1233:   PetscFunctionReturn(PETSC_SUCCESS);
1234: }

1236: static PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1237: {
1238:   Mat_MPIBAIJ       *mat = (Mat_MPIBAIJ *)A->data;
1239:   Mat_SeqBAIJ       *aa  = (Mat_SeqBAIJ *)mat->A->data;
1240:   Mat_SeqBAIJ       *bb  = (Mat_SeqBAIJ *)mat->B->data;
1241:   const PetscInt    *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j, *ajj, *bjj;
1242:   const PetscInt    *garray = mat->garray, mbs = mat->mbs, rstart = A->rmap->rstart, cstart = A->cmap->rstart;
1243:   const PetscInt     bs2 = mat->bs2;
1244:   PetscInt           bs;
1245:   PetscCount         nz, i, j, k, n, jj, irow, countA, countB, idx;
1246:   PetscMUMPSInt     *row, *col;
1247:   const PetscScalar *av = aa->a, *bv = bb->a, *v1, *v2;
1248:   PetscScalar       *val;

1250:   PetscFunctionBegin;
1251:   PetscCall(MatGetBlockSize(A, &bs));
1252:   if (reuse == MAT_INITIAL_MATRIX) {
1253:     nz = bs2 * (aa->nz + bb->nz);
1254:     PetscCall(PetscMalloc2(nz, &row, nz, &col));
1255:     PetscCall(PetscMalloc1(nz, &val));
1256:     mumps->nnz = nz;
1257:     mumps->irn = row;
1258:     mumps->jcn = col;
1259:     mumps->val = mumps->val_alloc = val;
1260:   } else {
1261:     val = mumps->val;
1262:   }

1264:   jj   = 0;
1265:   irow = rstart;
1266:   for (i = 0; i < mbs; i++) {
1267:     countA = ai[i + 1] - ai[i];
1268:     countB = bi[i + 1] - bi[i];
1269:     ajj    = aj + ai[i];
1270:     bjj    = bj + bi[i];
1271:     v1     = av + bs2 * ai[i];
1272:     v2     = bv + bs2 * bi[i];

1274:     idx = 0;
1275:     /* A-part */
1276:     for (k = 0; k < countA; k++) {
1277:       for (j = 0; j < bs; j++) {
1278:         for (n = 0; n < bs; n++) {
1279:           if (reuse == MAT_INITIAL_MATRIX) {
1280:             PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj]));
1281:             PetscCall(PetscMUMPSIntCast(cstart + bs * ajj[k] + j + shift, &col[jj]));
1282:           }
1283:           val[jj++] = v1[idx++];
1284:         }
1285:       }
1286:     }

1288:     idx = 0;
1289:     /* B-part */
1290:     for (k = 0; k < countB; k++) {
1291:       for (j = 0; j < bs; j++) {
1292:         for (n = 0; n < bs; n++) {
1293:           if (reuse == MAT_INITIAL_MATRIX) {
1294:             PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj]));
1295:             PetscCall(PetscMUMPSIntCast(bs * garray[bjj[k]] + j + shift, &col[jj]));
1296:           }
1297:           val[jj++] = v2[idx++];
1298:         }
1299:       }
1300:     }
1301:     irow += bs;
1302:   }
1303:   PetscFunctionReturn(PETSC_SUCCESS);
1304: }

1306: static PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1307: {
1308:   const PetscInt    *ai, *aj, *adiag, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj;
1309:   PetscCount         rstart, nz, nza, nzb, i, j, jj, irow, countA, countB;
1310:   PetscMUMPSInt     *row, *col;
1311:   const PetscScalar *av, *bv, *v1, *v2;
1312:   PetscScalar       *val;
1313:   Mat                Ad, Ao;
1314:   Mat_SeqAIJ        *aa;
1315:   Mat_SeqAIJ        *bb;
1316: #if defined(PETSC_USE_COMPLEX)
1317:   PetscBool hermitian, isset;
1318: #endif

1320:   PetscFunctionBegin;
1321: #if defined(PETSC_USE_COMPLEX)
1322:   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
1323:   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
1324: #endif
1325:   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray));
1326:   PetscCall(MatSeqAIJGetArrayRead(Ad, &av));
1327:   PetscCall(MatSeqAIJGetArrayRead(Ao, &bv));

1329:   aa    = (Mat_SeqAIJ *)Ad->data;
1330:   bb    = (Mat_SeqAIJ *)Ao->data;
1331:   ai    = aa->i;
1332:   aj    = aa->j;
1333:   adiag = aa->diag;
1334:   bi    = bb->i;
1335:   bj    = bb->j;

1337:   rstart = A->rmap->rstart;

1339:   if (reuse == MAT_INITIAL_MATRIX) {
1340:     nza = 0; /* num of upper triangular entries in mat->A, including diagonals */
1341:     nzb = 0; /* num of upper triangular entries in mat->B */
1342:     for (i = 0; i < m; i++) {
1343:       nza += (ai[i + 1] - adiag[i]);
1344:       countB = bi[i + 1] - bi[i];
1345:       bjj    = bj + bi[i];
1346:       for (j = 0; j < countB; j++) {
1347:         if (garray[bjj[j]] > rstart) nzb++;
1348:       }
1349:     }

1351:     nz = nza + nzb; /* total nz of upper triangular part of mat */
1352:     PetscCall(PetscMalloc2(nz, &row, nz, &col));
1353:     PetscCall(PetscMalloc1(nz, &val));
1354:     mumps->nnz = nz;
1355:     mumps->irn = row;
1356:     mumps->jcn = col;
1357:     mumps->val = mumps->val_alloc = val;
1358:   } else {
1359:     val = mumps->val;
1360:   }

1362:   jj   = 0;
1363:   irow = rstart;
1364:   for (i = 0; i < m; i++) {
1365:     ajj    = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */
1366:     v1     = av + adiag[i];
1367:     countA = ai[i + 1] - adiag[i];
1368:     countB = bi[i + 1] - bi[i];
1369:     bjj    = bj + bi[i];
1370:     v2     = bv + bi[i];

1372:     /* A-part */
1373:     for (j = 0; j < countA; j++) {
1374:       if (reuse == MAT_INITIAL_MATRIX) {
1375:         PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
1376:         PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj]));
1377:       }
1378:       val[jj++] = v1[j];
1379:     }

1381:     /* B-part */
1382:     for (j = 0; j < countB; j++) {
1383:       if (garray[bjj[j]] > rstart) {
1384:         if (reuse == MAT_INITIAL_MATRIX) {
1385:           PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
1386:           PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
1387:         }
1388:         val[jj++] = v2[j];
1389:       }
1390:     }
1391:     irow++;
1392:   }
1393:   PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av));
1394:   PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv));
1395:   PetscFunctionReturn(PETSC_SUCCESS);
1396: }

1398: static PetscErrorCode MatConvertToTriples_diagonal_xaij(Mat A, PETSC_UNUSED PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1399: {
1400:   const PetscScalar *av;
1401:   const PetscInt     M = A->rmap->n;
1402:   PetscCount         i;
1403:   PetscMUMPSInt     *row, *col;
1404:   Vec                v;

1406:   PetscFunctionBegin;
1407:   PetscCall(MatDiagonalGetDiagonal(A, &v));
1408:   PetscCall(VecGetArrayRead(v, &av));
1409:   if (reuse == MAT_INITIAL_MATRIX) {
1410:     PetscCall(PetscMalloc2(M, &row, M, &col));
1411:     for (i = 0; i < M; i++) {
1412:       PetscCall(PetscMUMPSIntCast(i + A->rmap->rstart, &row[i]));
1413:       col[i] = row[i];
1414:     }
1415:     mumps->val = (PetscScalar *)av;
1416:     mumps->irn = row;
1417:     mumps->jcn = col;
1418:     mumps->nnz = M;
1419:   } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, av, M)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_diagonal_xaij(), so one needs to copy the memory */
1420:   else mumps->val = (PetscScalar *)av;                                      /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
1421:   PetscCall(VecRestoreArrayRead(v, &av));
1422:   PetscFunctionReturn(PETSC_SUCCESS);
1423: }

1425: static PetscErrorCode MatConvertToTriples_dense_xaij(Mat A, PETSC_UNUSED PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1426: {
1427:   PetscScalar   *v;
1428:   const PetscInt m = A->rmap->n, N = A->cmap->N;
1429:   PetscInt       lda;
1430:   PetscCount     i, j;
1431:   PetscMUMPSInt *row, *col;

1433:   PetscFunctionBegin;
1434:   PetscCall(MatDenseGetArray(A, &v));
1435:   PetscCall(MatDenseGetLDA(A, &lda));
1436:   if (reuse == MAT_INITIAL_MATRIX) {
1437:     PetscCall(PetscMalloc2(m * N, &row, m * N, &col));
1438:     for (i = 0; i < m; i++) {
1439:       col[i] = 0;
1440:       PetscCall(PetscMUMPSIntCast(i + A->rmap->rstart, &row[i]));
1441:     }
1442:     for (j = 1; j < N; j++) {
1443:       for (i = 0; i < m; i++) PetscCall(PetscMUMPSIntCast(j, col + i + m * j));
1444:       PetscCall(PetscArraycpy(row + m * j, row + m * (j - 1), m));
1445:     }
1446:     if (lda == m) mumps->val = v;
1447:     else {
1448:       PetscCall(PetscMalloc1(m * N, &mumps->val));
1449:       mumps->val_alloc = mumps->val;
1450:       for (j = 0; j < N; j++) PetscCall(PetscArraycpy(mumps->val + m * j, v + lda * j, m));
1451:     }
1452:     mumps->irn = row;
1453:     mumps->jcn = col;
1454:     mumps->nnz = m * N;
1455:   } else {
1456:     if (lda == m && !mumps->nest_vals) mumps->val = v;
1457:     else {
1458:       for (j = 0; j < N; j++) PetscCall(PetscArraycpy(mumps->val + m * j, v + lda * j, m));
1459:     }
1460:   }
1461:   PetscCall(MatDenseRestoreArray(A, &v));
1462:   PetscFunctionReturn(PETSC_SUCCESS);
1463: }

1465: // If the input Mat (sub) is either MATTRANSPOSEVIRTUAL or MATHERMITIANTRANSPOSEVIRTUAL, this function gets the parent Mat until it is not a
1466: // MATTRANSPOSEVIRTUAL or MATHERMITIANTRANSPOSEVIRTUAL itself and returns the appropriate shift, scaling, and whether the parent Mat should be conjugated
1467: // and its rows and columns permuted
1468: // TODO FIXME: this should not be in this file and should instead be refactored where the same logic applies, e.g., MatAXPY_Dense_Nest()
1469: static PetscErrorCode MatGetTranspose_TransposeVirtual(Mat *sub, PetscBool *conjugate, PetscScalar *vshift, PetscScalar *vscale, PetscBool *swap)
1470: {
1471:   Mat         A;
1472:   PetscScalar s[2];
1473:   PetscBool   isTrans, isHTrans, compare;

1475:   PetscFunctionBegin;
1476:   do {
1477:     PetscCall(PetscObjectTypeCompare((PetscObject)*sub, MATTRANSPOSEVIRTUAL, &isTrans));
1478:     if (isTrans) {
1479:       PetscCall(MatTransposeGetMat(*sub, &A));
1480:       isHTrans = PETSC_FALSE;
1481:     } else {
1482:       PetscCall(PetscObjectTypeCompare((PetscObject)*sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans));
1483:       if (isHTrans) PetscCall(MatHermitianTransposeGetMat(*sub, &A));
1484:     }
1485:     compare = (PetscBool)(isTrans || isHTrans);
1486:     if (compare) {
1487:       if (vshift && vscale) {
1488:         PetscCall(MatShellGetScalingShifts(*sub, s, s + 1, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
1489:         if (!*conjugate) {
1490:           *vshift += s[0] * *vscale;
1491:           *vscale *= s[1];
1492:         } else {
1493:           *vshift += PetscConj(s[0]) * *vscale;
1494:           *vscale *= PetscConj(s[1]);
1495:         }
1496:       }
1497:       if (swap) *swap = (PetscBool)!*swap;
1498:       if (isHTrans && conjugate) *conjugate = (PetscBool)!*conjugate;
1499:       *sub = A;
1500:     }
1501:   } while (compare);
1502:   PetscFunctionReturn(PETSC_SUCCESS);
1503: }

1505: static PetscErrorCode MatConvertToTriples_nest_xaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1506: {
1507:   Mat     **mats;
1508:   PetscInt  nr, nc;
1509:   PetscBool chol = mumps->sym ? PETSC_TRUE : PETSC_FALSE;

1511:   PetscFunctionBegin;
1512:   PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats));
1513:   if (reuse == MAT_INITIAL_MATRIX) {
1514:     PetscMUMPSInt *irns, *jcns;
1515:     PetscScalar   *vals;
1516:     PetscCount     totnnz, cumnnz, maxnnz;
1517:     PetscInt      *pjcns_w, Mbs = 0;
1518:     IS            *rows, *cols;
1519:     PetscInt     **rows_idx, **cols_idx;

1521:     cumnnz = 0;
1522:     maxnnz = 0;
1523:     PetscCall(PetscMalloc2(nr * nc + 1, &mumps->nest_vals_start, nr * nc, &mumps->nest_convert_to_triples));
1524:     for (PetscInt r = 0; r < nr; r++) {
1525:       for (PetscInt c = 0; c < nc; c++) {
1526:         Mat sub = mats[r][c];

1528:         mumps->nest_convert_to_triples[r * nc + c] = NULL;
1529:         if (chol && c < r) continue; /* skip lower-triangular block for Cholesky */
1530:         if (sub) {
1531:           PetscErrorCode (*convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *) = NULL;
1532:           PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isDiag, isDense;
1533:           MatInfo   info;

1535:           PetscCall(MatGetTranspose_TransposeVirtual(&sub, NULL, NULL, NULL, NULL));
1536:           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ));
1537:           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ));
1538:           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ));
1539:           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ));
1540:           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ));
1541:           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ));
1542:           PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag));
1543:           PetscCall(PetscObjectTypeCompareAny((PetscObject)sub, &isDense, MATSEQDENSE, MATMPIDENSE, NULL));

1545:           if (chol) {
1546:             if (r == c) {
1547:               if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqsbaij;
1548:               else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpisbaij;
1549:               else if (isSeqSBAIJ) convert_to_triples = MatConvertToTriples_seqsbaij_seqsbaij;
1550:               else if (isMPISBAIJ) convert_to_triples = MatConvertToTriples_mpisbaij_mpisbaij;
1551:               else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij;
1552:               else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij;
1553:             } else {
1554:               if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij;
1555:               else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij;
1556:               else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij;
1557:               else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij;
1558:               else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij;
1559:               else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij;
1560:             }
1561:           } else {
1562:             if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij;
1563:             else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij;
1564:             else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij;
1565:             else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij;
1566:             else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij;
1567:             else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij;
1568:           }
1569:           PetscCheck(convert_to_triples, PetscObjectComm((PetscObject)sub), PETSC_ERR_SUP, "Not for block of type %s", ((PetscObject)sub)->type_name);
1570:           mumps->nest_convert_to_triples[r * nc + c] = convert_to_triples;
1571:           PetscCall(MatGetInfo(sub, MAT_LOCAL, &info));
1572:           cumnnz += (PetscCount)info.nz_used; /* can be overestimated for Cholesky */
1573:           maxnnz = PetscMax(maxnnz, info.nz_used);
1574:         }
1575:       }
1576:     }

1578:     /* Allocate total COO */
1579:     totnnz = cumnnz;
1580:     PetscCall(PetscMalloc2(totnnz, &irns, totnnz, &jcns));
1581:     PetscCall(PetscMalloc1(totnnz, &vals));

1583:     /* Handle rows and column maps
1584:        We directly map rows and use an SF for the columns */
1585:     PetscCall(PetscMalloc4(nr, &rows, nc, &cols, nr, &rows_idx, nc, &cols_idx));
1586:     PetscCall(MatNestGetISs(A, rows, cols));
1587:     for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetIndices(rows[r], (const PetscInt **)&rows_idx[r]));
1588:     for (PetscInt c = 0; c < nc; c++) PetscCall(ISGetIndices(cols[c], (const PetscInt **)&cols_idx[c]));
1589:     if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscMalloc1(maxnnz, &pjcns_w));
1590:     else (void)maxnnz;

1592:     cumnnz = 0;
1593:     for (PetscInt r = 0; r < nr; r++) {
1594:       for (PetscInt c = 0; c < nc; c++) {
1595:         Mat             sub    = mats[r][c];
1596:         const PetscInt *ridx   = rows_idx[r];
1597:         const PetscInt *cidx   = cols_idx[c];
1598:         PetscScalar     vscale = 1.0, vshift = 0.0;
1599:         PetscInt        rst, size, bs;
1600:         PetscSF         csf;
1601:         PetscBool       conjugate = PETSC_FALSE, swap = PETSC_FALSE;
1602:         PetscLayout     cmap;
1603:         PetscInt        innz;

1605:         mumps->nest_vals_start[r * nc + c] = cumnnz;
1606:         if (c == r) {
1607:           PetscCall(ISGetSize(rows[r], &size));
1608:           if (!mumps->nest_convert_to_triples[r * nc + c]) {
1609:             for (PetscInt c = 0; c < nc && !sub; ++c) sub = mats[r][c]; // diagonal Mat is NULL, so start over from the beginning of the current row
1610:           }
1611:           PetscCall(MatGetBlockSize(sub, &bs));
1612:           Mbs += size / bs;
1613:         }
1614:         if (!mumps->nest_convert_to_triples[r * nc + c]) continue;

1616:         /* Extract inner blocks if needed */
1617:         PetscCall(MatGetTranspose_TransposeVirtual(&sub, &conjugate, &vshift, &vscale, &swap));
1618:         PetscCheck(vshift == 0.0, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Nonzero shift in parent MatShell");

1620:         /* Get column layout to map off-process columns */
1621:         PetscCall(MatGetLayouts(sub, NULL, &cmap));

1623:         /* Get row start to map on-process rows */
1624:         PetscCall(MatGetOwnershipRange(sub, &rst, NULL));

1626:         /* Directly use the mumps datastructure and use C ordering for now */
1627:         PetscCall((*mumps->nest_convert_to_triples[r * nc + c])(sub, 0, MAT_INITIAL_MATRIX, mumps));

1629:         /* Swap the role of rows and columns indices for transposed blocks
1630:            since we need values with global final ordering */
1631:         if (swap) {
1632:           cidx = rows_idx[r];
1633:           ridx = cols_idx[c];
1634:         }

1636:         /* Communicate column indices
1637:            This could have been done with a single SF but it would have complicated the code a lot.
1638:            But since we do it only once, we pay the price of setting up an SF for each block */
1639:         if (PetscDefined(USE_64BIT_INDICES)) {
1640:           for (PetscInt k = 0; k < mumps->nnz; k++) pjcns_w[k] = mumps->jcn[k];
1641:         } else pjcns_w = (PetscInt *)mumps->jcn; /* This cast is needed only to silence warnings for 64bit integers builds */
1642:         PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &csf));
1643:         PetscCall(PetscIntCast(mumps->nnz, &innz));
1644:         PetscCall(PetscSFSetGraphLayout(csf, cmap, innz, NULL, PETSC_OWN_POINTER, pjcns_w));
1645:         PetscCall(PetscSFBcastBegin(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE));
1646:         PetscCall(PetscSFBcastEnd(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE));
1647:         PetscCall(PetscSFDestroy(&csf));

1649:         /* Import indices: use direct map for rows and mapped indices for columns */
1650:         if (swap) {
1651:           for (PetscInt k = 0; k < mumps->nnz; k++) {
1652:             PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &jcns[cumnnz + k]));
1653:             PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &irns[cumnnz + k]));
1654:           }
1655:         } else {
1656:           for (PetscInt k = 0; k < mumps->nnz; k++) {
1657:             PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &irns[cumnnz + k]));
1658:             PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &jcns[cumnnz + k]));
1659:           }
1660:         }

1662:         /* Import values to full COO */
1663:         if (conjugate) { /* conjugate the entries */
1664:           PetscScalar *v = vals + cumnnz;
1665:           for (PetscInt k = 0; k < mumps->nnz; k++) v[k] = vscale * PetscConj(mumps->val[k]);
1666:         } else if (vscale != 1.0) {
1667:           PetscScalar *v = vals + cumnnz;
1668:           for (PetscInt k = 0; k < mumps->nnz; k++) v[k] = vscale * mumps->val[k];
1669:         } else PetscCall(PetscArraycpy(vals + cumnnz, mumps->val, mumps->nnz));

1671:         /* Shift new starting point and sanity check */
1672:         cumnnz += mumps->nnz;
1673:         PetscCheck(cumnnz <= totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected number of nonzeros %" PetscCount_FMT " != %" PetscCount_FMT, cumnnz, totnnz);

1675:         /* Free scratch memory */
1676:         PetscCall(PetscFree2(mumps->irn, mumps->jcn));
1677:         PetscCall(PetscFree(mumps->val_alloc));
1678:         mumps->val = NULL;
1679:         mumps->nnz = 0;
1680:       }
1681:     }
1682:     if (mumps->id.ICNTL(15) == 1) {
1683:       if (Mbs != A->rmap->N) {
1684:         PetscMPIInt rank, size;

1686:         PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
1687:         PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
1688:         if (rank == 0) {
1689:           PetscInt shift = 0;

1691:           PetscCall(PetscMUMPSIntCast(Mbs, &mumps->id.nblk));
1692:           PetscCall(PetscFree(mumps->id.blkptr));
1693:           PetscCall(PetscMalloc1(Mbs + 1, &mumps->id.blkptr));
1694:           mumps->id.blkptr[0] = 1;
1695:           for (PetscInt i = 0; i < size; ++i) {
1696:             for (PetscInt r = 0; r < nr; r++) {
1697:               Mat             sub = mats[r][r];
1698:               const PetscInt *ranges;
1699:               PetscInt        bs;

1701:               for (PetscInt c = 0; c < nc && !sub; ++c) sub = mats[r][c]; // diagonal Mat is NULL, so start over from the beginning of the current row
1702:               PetscCall(MatGetOwnershipRanges(sub, &ranges));
1703:               PetscCall(MatGetBlockSize(sub, &bs));
1704:               for (PetscInt j = 0, start = mumps->id.blkptr[shift] + bs; j < ranges[i + 1] - ranges[i]; j += bs) PetscCall(PetscMUMPSIntCast(start + j, mumps->id.blkptr + shift + j / bs + 1));
1705:               shift += (ranges[i + 1] - ranges[i]) / bs;
1706:             }
1707:           }
1708:         }
1709:       } else mumps->id.ICNTL(15) = 0;
1710:     }
1711:     if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscFree(pjcns_w));
1712:     for (PetscInt r = 0; r < nr; r++) PetscCall(ISRestoreIndices(rows[r], (const PetscInt **)&rows_idx[r]));
1713:     for (PetscInt c = 0; c < nc; c++) PetscCall(ISRestoreIndices(cols[c], (const PetscInt **)&cols_idx[c]));
1714:     PetscCall(PetscFree4(rows, cols, rows_idx, cols_idx));
1715:     if (!chol) PetscCheck(cumnnz == totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different number of nonzeros %" PetscCount_FMT " != %" PetscCount_FMT, cumnnz, totnnz);
1716:     mumps->nest_vals_start[nr * nc] = cumnnz;

1718:     /* Set pointers for final MUMPS data structure */
1719:     mumps->nest_vals = vals;
1720:     mumps->val_alloc = NULL; /* do not use val_alloc since it may be reallocated with the OMP callpath */
1721:     mumps->val       = vals;
1722:     mumps->irn       = irns;
1723:     mumps->jcn       = jcns;
1724:     mumps->nnz       = cumnnz;
1725:   } else {
1726:     PetscScalar *oval = mumps->nest_vals;
1727:     for (PetscInt r = 0; r < nr; r++) {
1728:       for (PetscInt c = 0; c < nc; c++) {
1729:         PetscBool   conjugate = PETSC_FALSE;
1730:         Mat         sub       = mats[r][c];
1731:         PetscScalar vscale = 1.0, vshift = 0.0;
1732:         PetscInt    midx = r * nc + c;

1734:         if (!mumps->nest_convert_to_triples[midx]) continue;
1735:         PetscCall(MatGetTranspose_TransposeVirtual(&sub, &conjugate, &vshift, &vscale, NULL));
1736:         PetscCheck(vshift == 0.0, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Nonzero shift in parent MatShell");
1737:         mumps->val = oval + mumps->nest_vals_start[midx];
1738:         PetscCall((*mumps->nest_convert_to_triples[midx])(sub, shift, MAT_REUSE_MATRIX, mumps));
1739:         if (conjugate) {
1740:           PetscCount nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx];
1741:           for (PetscCount k = 0; k < nnz; k++) mumps->val[k] = vscale * PetscConj(mumps->val[k]);
1742:         } else if (vscale != 1.0) {
1743:           PetscCount nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx];
1744:           for (PetscCount k = 0; k < nnz; k++) mumps->val[k] *= vscale;
1745:         }
1746:       }
1747:     }
1748:     mumps->val = oval;
1749:   }
1750:   PetscFunctionReturn(PETSC_SUCCESS);
1751: }

1753: static PetscErrorCode MatDestroy_MUMPS(Mat A)
1754: {
1755:   Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;

1757:   PetscFunctionBegin;
1758:   PetscCall(PetscFree(mumps->id.isol_loc));
1759:   PetscCall(VecScatterDestroy(&mumps->scat_rhs));
1760:   PetscCall(VecScatterDestroy(&mumps->scat_sol));
1761:   PetscCall(VecDestroy(&mumps->b_seq));
1762:   PetscCall(VecDestroy(&mumps->x_seq));
1763:   PetscCall(PetscFree(mumps->id.perm_in));
1764:   PetscCall(PetscFree(mumps->id.blkvar));
1765:   PetscCall(PetscFree(mumps->id.blkptr));
1766:   PetscCall(PetscFree2(mumps->irn, mumps->jcn));
1767:   PetscCall(PetscFree(mumps->val_alloc));
1768:   PetscCall(PetscFree(mumps->info));
1769:   PetscCall(PetscFree(mumps->ICNTL_pre));
1770:   PetscCall(PetscFree(mumps->CNTL_pre));
1771:   PetscCall(MatMumpsResetSchur_Private(mumps));
1772:   if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */
1773:     mumps->id.job = JOB_END;
1774:     PetscMUMPS_c(mumps);
1775:     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in termination: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
1776:     if (mumps->mumps_comm != MPI_COMM_NULL) {
1777:       if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) PetscCallMPI(MPI_Comm_free(&mumps->mumps_comm));
1778:       else PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &mumps->mumps_comm));
1779:     }
1780:   }
1781:   PetscCall(MatMumpsFreeInternalID(&mumps->id));
1782: #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1783:   if (mumps->use_petsc_omp_support) {
1784:     PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl));
1785:     PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf));
1786:     PetscCall(PetscFree3(mumps->rhs_nrow, mumps->rhs_recvcounts, mumps->rhs_disps));
1787:   }
1788: #endif
1789:   PetscCall(PetscFree(mumps->ia_alloc));
1790:   PetscCall(PetscFree(mumps->ja_alloc));
1791:   PetscCall(PetscFree(mumps->recvcount));
1792:   PetscCall(PetscFree(mumps->reqs));
1793:   PetscCall(PetscFree(mumps->irhs_loc));
1794:   PetscCall(PetscFree2(mumps->nest_vals_start, mumps->nest_convert_to_triples));
1795:   PetscCall(PetscFree(mumps->nest_vals));
1796:   PetscCall(PetscFree(A->data));

1798:   /* clear composed functions */
1799:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
1800:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorSetSchurIS_C", NULL));
1801:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorCreateSchurComplement_C", NULL));
1802:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetIcntl_C", NULL));
1803:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetIcntl_C", NULL));
1804:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetCntl_C", NULL));
1805:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetCntl_C", NULL));
1806:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfo_C", NULL));
1807:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfog_C", NULL));
1808:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfo_C", NULL));
1809:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfog_C", NULL));
1810:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetNullPivots_C", NULL));
1811:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverse_C", NULL));
1812:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverseTranspose_C", NULL));
1813:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetBlk_C", NULL));
1814:   PetscFunctionReturn(PETSC_SUCCESS);
1815: }

1817: /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */
1818: static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A, PetscInt nrhs, const PetscScalar *array)
1819: {
1820:   Mat_MUMPS        *mumps   = (Mat_MUMPS *)A->data;
1821:   const PetscMPIInt ompsize = mumps->omp_comm_size;
1822:   PetscInt          i, m, M, rstart;

1824:   PetscFunctionBegin;
1825:   PetscCall(MatGetSize(A, &M, NULL));
1826:   PetscCall(MatGetLocalSize(A, &m, NULL));
1827:   PetscCheck(M <= PETSC_MUMPS_INT_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt");
1828:   if (ompsize == 1) {
1829:     if (!mumps->irhs_loc) {
1830:       mumps->nloc_rhs = (PetscMUMPSInt)m;
1831:       PetscCall(PetscMalloc1(m, &mumps->irhs_loc));
1832:       PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
1833:       for (i = 0; i < m; i++) PetscCall(PetscMUMPSIntCast(rstart + i + 1, &mumps->irhs_loc[i])); /* use 1-based indices */
1834:     }
1835:     PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, m * nrhs, array, mumps->id.precision, &mumps->id.rhs_loc_len, &mumps->id.rhs_loc));
1836:   } else {
1837: #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1838:     const PetscInt *ranges;
1839:     PetscMPIInt     j, k, sendcount, *petsc_ranks, *omp_ranks;
1840:     MPI_Group       petsc_group, omp_group;
1841:     PetscScalar    *recvbuf = NULL;

1843:     if (mumps->is_omp_master) {
1844:       /* Lazily initialize the omp stuff for distributed rhs */
1845:       if (!mumps->irhs_loc) {
1846:         PetscCall(PetscMalloc2(ompsize, &omp_ranks, ompsize, &petsc_ranks));
1847:         PetscCall(PetscMalloc3(ompsize, &mumps->rhs_nrow, ompsize, &mumps->rhs_recvcounts, ompsize, &mumps->rhs_disps));
1848:         PetscCallMPI(MPI_Comm_group(mumps->petsc_comm, &petsc_group));
1849:         PetscCallMPI(MPI_Comm_group(mumps->omp_comm, &omp_group));
1850:         for (j = 0; j < ompsize; j++) omp_ranks[j] = j;
1851:         PetscCallMPI(MPI_Group_translate_ranks(omp_group, ompsize, omp_ranks, petsc_group, petsc_ranks));

1853:         /* Populate mumps->irhs_loc[], rhs_nrow[] */
1854:         mumps->nloc_rhs = 0;
1855:         PetscCall(MatGetOwnershipRanges(A, &ranges));
1856:         for (j = 0; j < ompsize; j++) {
1857:           mumps->rhs_nrow[j] = ranges[petsc_ranks[j] + 1] - ranges[petsc_ranks[j]];
1858:           mumps->nloc_rhs += mumps->rhs_nrow[j];
1859:         }
1860:         PetscCall(PetscMalloc1(mumps->nloc_rhs, &mumps->irhs_loc));
1861:         for (j = k = 0; j < ompsize; j++) {
1862:           for (i = ranges[petsc_ranks[j]]; i < ranges[petsc_ranks[j] + 1]; i++, k++) PetscCall(PetscMUMPSIntCast(i + 1, &mumps->irhs_loc[k])); /* uses 1-based indices */
1863:         }

1865:         PetscCall(PetscFree2(omp_ranks, petsc_ranks));
1866:         PetscCallMPI(MPI_Group_free(&petsc_group));
1867:         PetscCallMPI(MPI_Group_free(&omp_group));
1868:       }

1870:       /* Realloc buffers when current nrhs is bigger than what we have met */
1871:       if (nrhs > mumps->max_nrhs) {
1872:         PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf));
1873:         PetscCall(PetscMalloc2(mumps->nloc_rhs * nrhs, &mumps->rhs_loc, mumps->nloc_rhs * nrhs, &mumps->rhs_recvbuf));
1874:         mumps->max_nrhs = nrhs;
1875:       }

1877:       /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */
1878:       for (j = 0; j < ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j] * nrhs, &mumps->rhs_recvcounts[j]));
1879:       mumps->rhs_disps[0] = 0;
1880:       for (j = 1; j < ompsize; j++) {
1881:         mumps->rhs_disps[j] = mumps->rhs_disps[j - 1] + mumps->rhs_recvcounts[j - 1];
1882:         PetscCheck(mumps->rhs_disps[j] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscMPIInt overflow!");
1883:       }
1884:       recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */
1885:     }

1887:     PetscCall(PetscMPIIntCast(m * nrhs, &sendcount));
1888:     PetscCallMPI(MPI_Gatherv(array, sendcount, MPIU_SCALAR, recvbuf, mumps->rhs_recvcounts, mumps->rhs_disps, MPIU_SCALAR, 0, mumps->omp_comm));

1890:     if (mumps->is_omp_master) {
1891:       if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */
1892:         PetscScalar *dst, *dstbase = mumps->rhs_loc;
1893:         for (j = 0; j < ompsize; j++) {
1894:           const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j];
1895:           dst                    = dstbase;
1896:           for (i = 0; i < nrhs; i++) {
1897:             PetscCall(PetscArraycpy(dst, src, mumps->rhs_nrow[j]));
1898:             src += mumps->rhs_nrow[j];
1899:             dst += mumps->nloc_rhs;
1900:           }
1901:           dstbase += mumps->rhs_nrow[j];
1902:         }
1903:       }
1904:       PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nloc_rhs * nrhs, mumps->rhs_loc, mumps->id.precision, &mumps->id.rhs_loc_len, &mumps->id.rhs_loc));
1905:     }
1906: #endif /* PETSC_HAVE_OPENMP_SUPPORT */
1907:   }
1908:   mumps->id.nrhs     = (PetscMUMPSInt)nrhs;
1909:   mumps->id.nloc_rhs = (PetscMUMPSInt)mumps->nloc_rhs;
1910:   mumps->id.lrhs_loc = mumps->nloc_rhs;
1911:   mumps->id.irhs_loc = mumps->irhs_loc;
1912:   PetscFunctionReturn(PETSC_SUCCESS);
1913: }

1915: static PetscErrorCode MatSolve_MUMPS(Mat A, Vec b, Vec x)
1916: {
1917:   Mat_MUMPS         *mumps  = (Mat_MUMPS *)A->data;
1918:   const PetscScalar *barray = NULL;
1919:   PetscScalar       *array;
1920:   IS                 is_iden, is_petsc;
1921:   PetscInt           i;
1922:   PetscBool          second_solve = PETSC_FALSE;
1923:   static PetscBool   cite1 = PETSC_FALSE, cite2 = PETSC_FALSE;

1925:   PetscFunctionBegin;
1926:   PetscCall(PetscCitationsRegister("@article{MUMPS01,\n  author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n  title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n  journal = {SIAM "
1927:                                    "Journal on Matrix Analysis and Applications},\n  volume = {23},\n  number = {1},\n  pages = {15--41},\n  year = {2001}\n}\n",
1928:                                    &cite1));
1929:   PetscCall(PetscCitationsRegister("@article{MUMPS02,\n  author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n  title = {Hybrid scheduling for the parallel solution of linear systems},\n  journal = {Parallel "
1930:                                    "Computing},\n  volume = {32},\n  number = {2},\n  pages = {136--156},\n  year = {2006}\n}\n",
1931:                                    &cite2));

1933:   PetscCall(VecFlag(x, A->factorerrortype));
1934:   if (A->factorerrortype) {
1935:     PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1936:     PetscFunctionReturn(PETSC_SUCCESS);
1937:   }

1939:   mumps->id.nrhs = 1;
1940:   if (mumps->petsc_size > 1) {
1941:     if (mumps->ICNTL20 == 10) {
1942:       mumps->id.ICNTL(20) = 10; /* dense distributed RHS, need to set rhs_loc[], irhs_loc[] */
1943:       PetscCall(VecGetArrayRead(b, &barray));
1944:       PetscCall(MatMumpsSetUpDistRHSInfo(A, 1, barray));
1945:     } else {
1946:       mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential b_seq vector*/
1947:       PetscCall(VecScatterBegin(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD));
1948:       PetscCall(VecScatterEnd(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD));
1949:       if (!mumps->myid) {
1950:         PetscCall(VecGetArray(mumps->b_seq, &array));
1951:         PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->b_seq->map->n, array, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs));
1952:       }
1953:     }
1954:   } else { /* petsc_size == 1, use MUMPS's dense centralized RHS feature, so that we don't need to bother with isol_loc[] to get the solution */
1955:     mumps->id.ICNTL(20) = 0;
1956:     PetscCall(VecCopy(b, x));
1957:     PetscCall(VecGetArray(x, &array));
1958:     PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, x->map->n, array, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs));
1959:   }

1961:   /*
1962:      handle condensation step of Schur complement (if any)
1963:      We set by default ICNTL(26) == -1 when Schur indices have been provided by the user.
1964:      According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase
1965:      Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system.
1966:      This requires an extra call to PetscMUMPS_c and the computation of the factors for S
1967:   */
1968:   if (mumps->id.size_schur > 0) {
1969:     PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc");
1970:     if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) {
1971:       second_solve = PETSC_TRUE;
1972:       PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); // allocate id.redrhs
1973:       mumps->id.ICNTL(26) = 1;                                /* condensation phase */
1974:     } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE));
1975:   }

1977:   mumps->id.job = JOB_SOLVE;
1978:   PetscMUMPS_c(mumps); // reduced solve, put solution in id.redrhs
1979:   PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));

1981:   /* handle expansion step of Schur complement (if any) */
1982:   if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE));
1983:   else if (mumps->id.ICNTL(26) == 1) { // condense the right hand side
1984:     PetscCall(MatMumpsSolveSchur_Private(A));
1985:     for (i = 0; i < mumps->id.size_schur; ++i) array[mumps->id.listvar_schur[i] - 1] = ID_FIELD_GET(mumps->id, redrhs, i);
1986:   }

1988:   if (mumps->petsc_size > 1) { /* convert mumps distributed solution to PETSc mpi x */
1989:     if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) {
1990:       /* when id.ICNTL(9) changes, the contents of ilsol_loc may change (not its size, lsol_loc), recreates scat_sol */
1991:       PetscCall(VecScatterDestroy(&mumps->scat_sol));
1992:     }
1993:     if (!mumps->scat_sol) { /* create scatter scat_sol */
1994:       PetscInt *isol2_loc = NULL;
1995:       PetscCall(ISCreateStride(PETSC_COMM_SELF, mumps->id.lsol_loc, 0, 1, &is_iden)); /* from */
1996:       PetscCall(PetscMalloc1(mumps->id.lsol_loc, &isol2_loc));
1997:       for (i = 0; i < mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i] - 1;                        /* change Fortran style to C style */
1998:       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, mumps->id.lsol_loc, isol2_loc, PETSC_OWN_POINTER, &is_petsc)); /* to */
1999:       PetscCall(VecScatterCreate(mumps->x_seq, is_iden, x, is_petsc, &mumps->scat_sol));
2000:       PetscCall(ISDestroy(&is_iden));
2001:       PetscCall(ISDestroy(&is_petsc));
2002:       mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */
2003:     }

2005:     PetscScalar *xarray;
2006:     PetscCall(VecGetArray(mumps->x_seq, &xarray));
2007:     PetscCall(MatMumpsCastMumpsScalarArray(mumps->id.lsol_loc, mumps->id.precision, mumps->id.sol_loc, xarray));
2008:     PetscCall(VecRestoreArray(mumps->x_seq, &xarray));
2009:     PetscCall(VecScatterBegin(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD));
2010:     PetscCall(VecScatterEnd(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD));

2012:     if (mumps->ICNTL20 == 10) { // distributed RHS
2013:       PetscCall(VecRestoreArrayRead(b, &barray));
2014:     } else if (!mumps->myid) { // centralized RHS
2015:       PetscCall(VecRestoreArray(mumps->b_seq, &array));
2016:     }
2017:   } else {
2018:     // id.rhs has the solution in mumps precision
2019:     PetscCall(MatMumpsCastMumpsScalarArray(x->map->n, mumps->id.precision, mumps->id.rhs, array));
2020:     PetscCall(VecRestoreArray(x, &array));
2021:   }

2023:   PetscCall(PetscLogFlops(2.0 * PetscMax(0, (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n)));
2024:   PetscFunctionReturn(PETSC_SUCCESS);
2025: }

2027: static PetscErrorCode MatSolveTranspose_MUMPS(Mat A, Vec b, Vec x)
2028: {
2029:   Mat_MUMPS          *mumps = (Mat_MUMPS *)A->data;
2030:   const PetscMUMPSInt value = mumps->id.ICNTL(9);

2032:   PetscFunctionBegin;
2033:   mumps->id.ICNTL(9) = 0;
2034:   PetscCall(MatSolve_MUMPS(A, b, x));
2035:   mumps->id.ICNTL(9) = value;
2036:   PetscFunctionReturn(PETSC_SUCCESS);
2037: }

2039: static PetscErrorCode MatMatSolve_MUMPS(Mat A, Mat B, Mat X)
2040: {
2041:   Mat                Bt = NULL;
2042:   PetscBool          denseX, denseB, flg, flgT;
2043:   Mat_MUMPS         *mumps = (Mat_MUMPS *)A->data;
2044:   PetscInt           i, nrhs, M, nrhsM;
2045:   PetscScalar       *array;
2046:   const PetscScalar *barray;
2047:   PetscInt           lsol_loc, nlsol_loc, *idxx, iidx = 0;
2048:   PetscMUMPSInt     *isol_loc, *isol_loc_save;
2049:   PetscScalar       *sol_loc;
2050:   void              *sol_loc_save;
2051:   PetscCount         sol_loc_len_save;
2052:   IS                 is_to, is_from;
2053:   PetscInt           k, proc, j, m, myrstart;
2054:   const PetscInt    *rstart;
2055:   Vec                v_mpi, msol_loc;
2056:   VecScatter         scat_sol;
2057:   Vec                b_seq;
2058:   VecScatter         scat_rhs;
2059:   PetscScalar       *aa;
2060:   PetscInt           spnr, *ia, *ja;
2061:   Mat_MPIAIJ        *b = NULL;

2063:   PetscFunctionBegin;
2064:   PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &denseX, MATSEQDENSE, MATMPIDENSE, NULL));
2065:   PetscCheck(denseX, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix");

2067:   PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &denseB, MATSEQDENSE, MATMPIDENSE, NULL));

2069:   if (denseB) {
2070:     PetscCheck(B->rmap->n == X->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix B and X must have same row distribution");
2071:     mumps->id.ICNTL(20) = 0; /* dense RHS */
2072:   } else {                   /* sparse B */
2073:     PetscCheck(X != B, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_IDN, "X and B must be different matrices");
2074:     PetscCall(PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSEVIRTUAL, &flgT));
2075:     PetscCheck(flgT, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONG, "Matrix B must be MATTRANSPOSEVIRTUAL matrix");
2076:     PetscCall(MatShellGetScalingShifts(B, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
2077:     /* input B is transpose of actual RHS matrix,
2078:      because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */
2079:     PetscCall(MatTransposeGetMat(B, &Bt));
2080:     mumps->id.ICNTL(20) = 1; /* sparse RHS */
2081:   }

2083:   PetscCall(MatGetSize(B, &M, &nrhs));
2084:   PetscCall(PetscIntMultError(nrhs, M, &nrhsM));
2085:   mumps->id.nrhs = (PetscMUMPSInt)nrhs;
2086:   mumps->id.lrhs = (PetscMUMPSInt)M;

2088:   if (mumps->petsc_size == 1) { // handle this easy case specially and return early
2089:     PetscScalar *aa;
2090:     PetscInt     spnr, *ia, *ja;
2091:     PetscBool    second_solve = PETSC_FALSE;

2093:     PetscCall(MatDenseGetArray(X, &array));
2094:     if (denseB) {
2095:       /* copy B to X */
2096:       PetscCall(MatDenseGetArrayRead(B, &barray));
2097:       PetscCall(PetscArraycpy(array, barray, nrhsM));
2098:       PetscCall(MatDenseRestoreArrayRead(B, &barray));
2099:     } else { /* sparse B */
2100:       PetscCall(MatSeqAIJGetArray(Bt, &aa));
2101:       PetscCall(MatGetRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
2102:       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
2103:       PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
2104:       PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->id.nz_rhs, aa, mumps->id.precision, &mumps->id.rhs_sparse_len, &mumps->id.rhs_sparse));
2105:     }
2106:     PetscCall(MatMumpsMakeMumpsScalarArray(denseB, nrhsM, array, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs));

2108:     /* handle condensation step of Schur complement (if any) */
2109:     if (mumps->id.size_schur > 0) {
2110:       if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) {
2111:         second_solve = PETSC_TRUE;
2112:         PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); // allocate id.redrhs
2113:         mumps->id.ICNTL(26) = 1;                                /* condensation phase, i.e, to solve id.redrhs */
2114:       } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE));
2115:     }

2117:     mumps->id.job = JOB_SOLVE;
2118:     PetscMUMPS_c(mumps);
2119:     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));

2121:     /* handle expansion step of Schur complement (if any) */
2122:     if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE));
2123:     else if (mumps->id.ICNTL(26) == 1) { // condense the right hand side
2124:       PetscCall(MatMumpsSolveSchur_Private(A));
2125:       for (j = 0; j < nrhs; ++j)
2126:         for (i = 0; i < mumps->id.size_schur; ++i) array[mumps->id.listvar_schur[i] - 1 + j * M] = ID_FIELD_GET(mumps->id, redrhs, i + j * mumps->id.lredrhs);
2127:     }

2129:     if (!denseB) { /* sparse B, restore ia, ja */
2130:       PetscCall(MatSeqAIJRestoreArray(Bt, &aa));
2131:       PetscCall(MatRestoreRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
2132:       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure");
2133:     }

2135:     // no matter dense B or sparse B, solution is in id.rhs; convert it to array of X.
2136:     PetscCall(MatMumpsCastMumpsScalarArray(nrhsM, mumps->id.precision, mumps->id.rhs, array));
2137:     PetscCall(MatDenseRestoreArray(X, &array));
2138:     PetscFunctionReturn(PETSC_SUCCESS);
2139:   }

2141:   /* parallel case: MUMPS requires rhs B to be centralized on the host! */
2142:   PetscCheck(!mumps->id.ICNTL(19), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc");

2144:   /* create msol_loc to hold mumps local solution */
2145:   isol_loc_save         = mumps->id.isol_loc; /* save these, as we want to reuse them in MatSolve() */
2146:   sol_loc_save          = mumps->id.sol_loc;
2147:   sol_loc_len_save      = mumps->id.sol_loc_len;
2148:   mumps->id.isol_loc    = NULL; // an init state
2149:   mumps->id.sol_loc     = NULL;
2150:   mumps->id.sol_loc_len = 0;

2152:   lsol_loc = mumps->id.lsol_loc;
2153:   PetscCall(PetscIntMultError(nrhs, lsol_loc, &nlsol_loc)); /* length of sol_loc */
2154:   PetscCall(PetscMalloc2(nlsol_loc, &sol_loc, lsol_loc, &isol_loc));
2155:   PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, nlsol_loc, sol_loc, mumps->id.precision, &mumps->id.sol_loc_len, &mumps->id.sol_loc));
2156:   mumps->id.isol_loc = isol_loc;

2158:   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nlsol_loc, (PetscScalar *)sol_loc, &msol_loc));

2160:   if (denseB) {
2161:     if (mumps->ICNTL20 == 10) {
2162:       mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
2163:       PetscCall(MatDenseGetArrayRead(B, &barray));
2164:       PetscCall(MatMumpsSetUpDistRHSInfo(A, nrhs, barray)); // put barray to rhs_loc
2165:       PetscCall(MatDenseRestoreArrayRead(B, &barray));
2166:       PetscCall(MatGetLocalSize(B, &m, NULL));
2167:       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhsM, NULL, &v_mpi)); // will scatter the solution to v_mpi, which wraps X
2168:     } else {
2169:       mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
2170:       /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in
2171:         very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank
2172:         0, re-arrange B into desired order, which is a local operation.
2173:       */

2175:       /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */
2176:       /* wrap dense rhs matrix B into a vector v_mpi */
2177:       PetscCall(MatGetLocalSize(B, &m, NULL));
2178:       PetscCall(MatDenseGetArrayRead(B, &barray));
2179:       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhsM, barray, &v_mpi));
2180:       PetscCall(MatDenseRestoreArrayRead(B, &barray));

2182:       /* scatter v_mpi to b_seq in proc[0]. With ICNTL(20) = 0, MUMPS requires rhs to be centralized on the host! */
2183:       if (!mumps->myid) {
2184:         PetscInt *idx;
2185:         /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */
2186:         PetscCall(PetscMalloc1(nrhsM, &idx));
2187:         PetscCall(MatGetOwnershipRanges(B, &rstart));
2188:         for (proc = 0, k = 0; proc < mumps->petsc_size; proc++) {
2189:           for (j = 0; j < nrhs; j++) {
2190:             for (i = rstart[proc]; i < rstart[proc + 1]; i++) idx[k++] = j * M + i;
2191:           }
2192:         }

2194:         PetscCall(VecCreateSeq(PETSC_COMM_SELF, nrhsM, &b_seq));
2195:         PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nrhsM, idx, PETSC_OWN_POINTER, &is_to));
2196:         PetscCall(ISCreateStride(PETSC_COMM_SELF, nrhsM, 0, 1, &is_from));
2197:       } else {
2198:         PetscCall(VecCreateSeq(PETSC_COMM_SELF, 0, &b_seq));
2199:         PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_to));
2200:         PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_from));
2201:       }

2203:       PetscCall(VecScatterCreate(v_mpi, is_from, b_seq, is_to, &scat_rhs));
2204:       PetscCall(VecScatterBegin(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD));
2205:       PetscCall(ISDestroy(&is_to));
2206:       PetscCall(ISDestroy(&is_from));
2207:       PetscCall(VecScatterEnd(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD));

2209:       if (!mumps->myid) { /* define rhs on the host */
2210:         PetscCall(VecGetArrayRead(b_seq, &barray));
2211:         PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, nrhsM, barray, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs));
2212:         PetscCall(VecRestoreArrayRead(b_seq, &barray));
2213:       }
2214:     }
2215:   } else { /* sparse B */
2216:     b = (Mat_MPIAIJ *)Bt->data;

2218:     /* wrap dense X into a vector v_mpi */
2219:     PetscCall(MatGetLocalSize(X, &m, NULL));
2220:     PetscCall(MatDenseGetArrayRead(X, &barray));
2221:     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X), 1, nrhs * m, nrhsM, barray, &v_mpi));
2222:     PetscCall(MatDenseRestoreArrayRead(X, &barray));

2224:     if (!mumps->myid) {
2225:       PetscCall(MatSeqAIJGetArray(b->A, &aa));
2226:       PetscCall(MatGetRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
2227:       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
2228:       PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
2229:       PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, ((Mat_SeqAIJ *)b->A->data)->nz, aa, mumps->id.precision, &mumps->id.rhs_sparse_len, &mumps->id.rhs_sparse));
2230:     } else {
2231:       mumps->id.irhs_ptr    = NULL;
2232:       mumps->id.irhs_sparse = NULL;
2233:       mumps->id.nz_rhs      = 0;
2234:       if (mumps->id.rhs_sparse_len) {
2235:         PetscCall(PetscFree(mumps->id.rhs_sparse));
2236:         mumps->id.rhs_sparse_len = 0;
2237:       }
2238:     }
2239:   }

2241:   /* solve phase */
2242:   mumps->id.job = JOB_SOLVE;
2243:   PetscMUMPS_c(mumps);
2244:   PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));

2246:   /* scatter mumps distributed solution to PETSc vector v_mpi, which shares local arrays with solution matrix X */
2247:   PetscCall(MatDenseGetArray(X, &array));
2248:   PetscCall(VecPlaceArray(v_mpi, array));

2250:   /* create scatter scat_sol */
2251:   PetscCall(MatGetOwnershipRanges(X, &rstart));
2252:   /* iidx: index for scatter mumps solution to PETSc X */

2254:   PetscCall(ISCreateStride(PETSC_COMM_SELF, nlsol_loc, 0, 1, &is_from));
2255:   PetscCall(PetscMalloc1(nlsol_loc, &idxx));
2256:   for (i = 0; i < lsol_loc; i++) {
2257:     isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */

2259:     for (proc = 0; proc < mumps->petsc_size; proc++) {
2260:       if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc + 1]) {
2261:         myrstart = rstart[proc];
2262:         k        = isol_loc[i] - myrstart;          /* local index on 1st column of PETSc vector X */
2263:         iidx     = k + myrstart * nrhs;             /* maps mumps isol_loc[i] to PETSc index in X */
2264:         m        = rstart[proc + 1] - rstart[proc]; /* rows of X for this proc */
2265:         break;
2266:       }
2267:     }

2269:     for (j = 0; j < nrhs; j++) idxx[i + j * lsol_loc] = iidx + j * m;
2270:   }
2271:   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nlsol_loc, idxx, PETSC_COPY_VALUES, &is_to));
2272:   PetscCall(MatMumpsCastMumpsScalarArray(nlsol_loc, mumps->id.precision, mumps->id.sol_loc, sol_loc)); // Vec msol_loc is created with sol_loc[]
2273:   PetscCall(VecScatterCreate(msol_loc, is_from, v_mpi, is_to, &scat_sol));
2274:   PetscCall(VecScatterBegin(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD));
2275:   PetscCall(ISDestroy(&is_from));
2276:   PetscCall(ISDestroy(&is_to));
2277:   PetscCall(VecScatterEnd(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD));
2278:   PetscCall(MatDenseRestoreArray(X, &array));

2280:   if (mumps->id.sol_loc_len) { // in case we allocated intermediate buffers
2281:     mumps->id.sol_loc_len = 0;
2282:     PetscCall(PetscFree(mumps->id.sol_loc));
2283:   }

2285:   // restore old values
2286:   mumps->id.sol_loc     = sol_loc_save;
2287:   mumps->id.sol_loc_len = sol_loc_len_save;
2288:   mumps->id.isol_loc    = isol_loc_save;

2290:   PetscCall(PetscFree2(sol_loc, isol_loc));
2291:   PetscCall(PetscFree(idxx));
2292:   PetscCall(VecDestroy(&msol_loc));
2293:   PetscCall(VecDestroy(&v_mpi));
2294:   if (!denseB) {
2295:     if (!mumps->myid) {
2296:       b = (Mat_MPIAIJ *)Bt->data;
2297:       PetscCall(MatSeqAIJRestoreArray(b->A, &aa));
2298:       PetscCall(MatRestoreRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
2299:       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure");
2300:     }
2301:   } else {
2302:     if (mumps->ICNTL20 == 0) {
2303:       PetscCall(VecDestroy(&b_seq));
2304:       PetscCall(VecScatterDestroy(&scat_rhs));
2305:     }
2306:   }
2307:   PetscCall(VecScatterDestroy(&scat_sol));
2308:   PetscCall(PetscLogFlops(nrhs * PetscMax(0, 2.0 * (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n)));
2309:   PetscFunctionReturn(PETSC_SUCCESS);
2310: }

2312: static PetscErrorCode MatMatSolveTranspose_MUMPS(Mat A, Mat B, Mat X)
2313: {
2314:   Mat_MUMPS          *mumps = (Mat_MUMPS *)A->data;
2315:   const PetscMUMPSInt value = mumps->id.ICNTL(9);

2317:   PetscFunctionBegin;
2318:   mumps->id.ICNTL(9) = 0;
2319:   PetscCall(MatMatSolve_MUMPS(A, B, X));
2320:   mumps->id.ICNTL(9) = value;
2321:   PetscFunctionReturn(PETSC_SUCCESS);
2322: }

2324: static PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A, Mat Bt, Mat X)
2325: {
2326:   PetscBool flg;
2327:   Mat       B;

2329:   PetscFunctionBegin;
2330:   PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt, &flg, MATSEQAIJ, MATMPIAIJ, NULL));
2331:   PetscCheck(flg, PetscObjectComm((PetscObject)Bt), PETSC_ERR_ARG_WRONG, "Matrix Bt must be MATAIJ matrix");

2333:   /* Create B=Bt^T that uses Bt's data structure */
2334:   PetscCall(MatCreateTranspose(Bt, &B));

2336:   PetscCall(MatMatSolve_MUMPS(A, B, X));
2337:   PetscCall(MatDestroy(&B));
2338:   PetscFunctionReturn(PETSC_SUCCESS);
2339: }

2341: #if !defined(PETSC_USE_COMPLEX)
2342: /*
2343:   input:
2344:    F:        numeric factor
2345:   output:
2346:    nneg:     total number of negative pivots
2347:    nzero:    total number of zero pivots
2348:    npos:     (global dimension of F) - nneg - nzero
2349: */
2350: static PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos)
2351: {
2352:   Mat_MUMPS  *mumps = (Mat_MUMPS *)F->data;
2353:   PetscMPIInt size;

2355:   PetscFunctionBegin;
2356:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F), &size));
2357:   /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */
2358:   PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia", mumps->id.INFOG(13));

2360:   if (nneg) *nneg = mumps->id.INFOG(12);
2361:   if (nzero || npos) {
2362:     PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection");
2363:     if (nzero) *nzero = mumps->id.INFOG(28);
2364:     if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28));
2365:   }
2366:   PetscFunctionReturn(PETSC_SUCCESS);
2367: }
2368: #endif

2370: static PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse, Mat_MUMPS *mumps)
2371: {
2372:   PetscMPIInt    nreqs;
2373:   PetscMUMPSInt *irn, *jcn;
2374:   PetscMPIInt    count;
2375:   PetscCount     totnnz, remain;
2376:   const PetscInt osize = mumps->omp_comm_size;
2377:   PetscScalar   *val;

2379:   PetscFunctionBegin;
2380:   if (osize > 1) {
2381:     if (reuse == MAT_INITIAL_MATRIX) {
2382:       /* master first gathers counts of nonzeros to receive */
2383:       if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize, &mumps->recvcount));
2384:       PetscCallMPI(MPI_Gather(&mumps->nnz, 1, MPIU_INT64, mumps->recvcount, 1, MPIU_INT64, 0 /*master*/, mumps->omp_comm));

2386:       /* Then each computes number of send/recvs */
2387:       if (mumps->is_omp_master) {
2388:         /* Start from 1 since self communication is not done in MPI */
2389:         nreqs = 0;
2390:         for (PetscMPIInt i = 1; i < osize; i++) nreqs += (mumps->recvcount[i] + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX;
2391:       } else {
2392:         nreqs = (PetscMPIInt)(((mumps->nnz + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX));
2393:       }
2394:       PetscCall(PetscMalloc1(nreqs * 3, &mumps->reqs)); /* Triple the requests since we send irn, jcn and val separately */

2396:       /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others.
2397:          MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz
2398:          might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size
2399:          is very small, the current approach should have no extra overhead compared to MPI_Gatherv.
2400:        */
2401:       nreqs = 0; /* counter for actual send/recvs */
2402:       if (mumps->is_omp_master) {
2403:         totnnz = 0;

2405:         for (PetscMPIInt i = 0; i < osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */
2406:         PetscCall(PetscMalloc2(totnnz, &irn, totnnz, &jcn));
2407:         PetscCall(PetscMalloc1(totnnz, &val));

2409:         /* Self communication */
2410:         PetscCall(PetscArraycpy(irn, mumps->irn, mumps->nnz));
2411:         PetscCall(PetscArraycpy(jcn, mumps->jcn, mumps->nnz));
2412:         PetscCall(PetscArraycpy(val, mumps->val, mumps->nnz));

2414:         /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */
2415:         PetscCall(PetscFree2(mumps->irn, mumps->jcn));
2416:         PetscCall(PetscFree(mumps->val_alloc));
2417:         mumps->nnz = totnnz;
2418:         mumps->irn = irn;
2419:         mumps->jcn = jcn;
2420:         mumps->val = mumps->val_alloc = val;

2422:         irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */
2423:         jcn += mumps->recvcount[0];
2424:         val += mumps->recvcount[0];

2426:         /* Remote communication */
2427:         for (PetscMPIInt i = 1; i < osize; i++) {
2428:           count  = (PetscMPIInt)PetscMin(mumps->recvcount[i], (PetscMPIInt)PETSC_MPI_INT_MAX);
2429:           remain = mumps->recvcount[i] - count;
2430:           while (count > 0) {
2431:             PetscCallMPI(MPIU_Irecv(irn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2432:             PetscCallMPI(MPIU_Irecv(jcn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2433:             PetscCallMPI(MPIU_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2434:             irn += count;
2435:             jcn += count;
2436:             val += count;
2437:             count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX);
2438:             remain -= count;
2439:           }
2440:         }
2441:       } else {
2442:         irn    = mumps->irn;
2443:         jcn    = mumps->jcn;
2444:         val    = mumps->val;
2445:         count  = (PetscMPIInt)PetscMin(mumps->nnz, (PetscMPIInt)PETSC_MPI_INT_MAX);
2446:         remain = mumps->nnz - count;
2447:         while (count > 0) {
2448:           PetscCallMPI(MPIU_Isend(irn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2449:           PetscCallMPI(MPIU_Isend(jcn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2450:           PetscCallMPI(MPIU_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2451:           irn += count;
2452:           jcn += count;
2453:           val += count;
2454:           count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX);
2455:           remain -= count;
2456:         }
2457:       }
2458:     } else {
2459:       nreqs = 0;
2460:       if (mumps->is_omp_master) {
2461:         val = mumps->val + mumps->recvcount[0];
2462:         for (PetscMPIInt i = 1; i < osize; i++) { /* Remote communication only since self data is already in place */
2463:           count  = (PetscMPIInt)PetscMin(mumps->recvcount[i], (PetscMPIInt)PETSC_MPI_INT_MAX);
2464:           remain = mumps->recvcount[i] - count;
2465:           while (count > 0) {
2466:             PetscCallMPI(MPIU_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2467:             val += count;
2468:             count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX);
2469:             remain -= count;
2470:           }
2471:         }
2472:       } else {
2473:         val    = mumps->val;
2474:         count  = (PetscMPIInt)PetscMin(mumps->nnz, (PetscMPIInt)PETSC_MPI_INT_MAX);
2475:         remain = mumps->nnz - count;
2476:         while (count > 0) {
2477:           PetscCallMPI(MPIU_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2478:           val += count;
2479:           count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX);
2480:           remain -= count;
2481:         }
2482:       }
2483:     }
2484:     PetscCallMPI(MPI_Waitall(nreqs, mumps->reqs, MPI_STATUSES_IGNORE));
2485:     mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */
2486:   }
2487:   PetscFunctionReturn(PETSC_SUCCESS);
2488: }

2490: static PetscErrorCode MatFactorNumeric_MUMPS(Mat F, Mat A, PETSC_UNUSED const MatFactorInfo *info)
2491: {
2492:   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;

2494:   PetscFunctionBegin;
2495:   if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) {
2496:     if (mumps->id.INFOG(1) == -6) PetscCall(PetscInfo(A, "MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2497:     PetscCall(PetscInfo(A, "MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2498:     PetscFunctionReturn(PETSC_SUCCESS);
2499:   }

2501:   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps));
2502:   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX, mumps));

2504:   /* numerical factorization phase */
2505:   mumps->id.job = JOB_FACTNUMERIC;
2506:   if (!mumps->id.ICNTL(18)) { /* A is centralized */
2507:     if (!mumps->myid) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a));
2508:   } else {
2509:     PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc));
2510:   }

2512:   if (F->schur) {
2513:     const PetscScalar *array;
2514:     MUMPS_INT          size = mumps->id.size_schur;
2515:     PetscCall(MatDenseGetArrayRead(F->schur, &array));
2516:     PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, size * size, array, mumps->id.precision, &mumps->id.schur_len, &mumps->id.schur));
2517:     PetscCall(MatDenseRestoreArrayRead(F->schur, &array));
2518:   }

2520:   PetscMUMPS_c(mumps);
2521:   if (mumps->id.INFOG(1) < 0) {
2522:     PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));
2523:     if (mumps->id.INFOG(1) == -10) {
2524:       PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2525:       F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
2526:     } else if (mumps->id.INFOG(1) == -13) {
2527:       PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, cannot allocate required memory %d megabytes\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2528:       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
2529:     } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) {
2530:       PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d, problem with work array\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2531:       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
2532:     } else {
2533:       PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2534:       F->factorerrortype = MAT_FACTOR_OTHER;
2535:     }
2536:   }
2537:   PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: ICNTL(16)=%d " MUMPS_MANUALS, mumps->id.INFOG(16));

2539:   F->assembled = PETSC_TRUE;

2541:   if (F->schur) { /* reset Schur status to unfactored */
2542: #if defined(PETSC_HAVE_CUDA)
2543:     F->schur->offloadmask = PETSC_OFFLOAD_CPU;
2544: #endif
2545:     PetscScalar *array;
2546:     PetscCall(MatDenseGetArray(F->schur, &array));
2547:     PetscCall(MatMumpsCastMumpsScalarArray(mumps->id.size_schur * mumps->id.size_schur, mumps->id.precision, mumps->id.schur, array));
2548:     PetscCall(MatDenseRestoreArray(F->schur, &array));
2549:     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
2550:       mumps->id.ICNTL(19) = 2;
2551:       PetscCall(MatTranspose(F->schur, MAT_INPLACE_MATRIX, &F->schur));
2552:     }
2553:     PetscCall(MatFactorRestoreSchurComplement(F, NULL, MAT_FACTOR_SCHUR_UNFACTORED));
2554:   }

2556:   /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */
2557:   if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3;

2559:   if (!mumps->is_omp_master) mumps->id.INFO(23) = 0;
2560:   // MUMPS userguide: ISOL_loc should be allocated by the user between the factorization and the
2561:   // solve phases. On exit from the solve phase, ISOL_loc(i) contains the index of the variables for
2562:   // which the solution (in SOL_loc) is available on the local processor.
2563:   // If successive calls to the solve phase (JOB= 3) are performed for a given matrix, ISOL_loc will
2564:   // normally have the same contents for each of these calls. The only exception is the case of
2565:   // unsymmetric matrices (SYM=1) when the transpose option is changed (see ICNTL(9)) and non
2566:   // symmetric row/column exchanges (see ICNTL(6)) have occurred before the solve phase.
2567:   if (mumps->petsc_size > 1) {
2568:     PetscInt     lsol_loc;
2569:     PetscScalar *array;

2571:     /* distributed solution; Create x_seq=sol_loc for repeated use */
2572:     if (mumps->x_seq) {
2573:       PetscCall(VecScatterDestroy(&mumps->scat_sol));
2574:       PetscCall(PetscFree(mumps->id.isol_loc));
2575:       PetscCall(VecDestroy(&mumps->x_seq));
2576:     }
2577:     lsol_loc = mumps->id.INFO(23); /* length of sol_loc */
2578:     PetscCall(PetscMalloc1(lsol_loc, &mumps->id.isol_loc));
2579:     PetscCall(VecCreateSeq(PETSC_COMM_SELF, lsol_loc, &mumps->x_seq));
2580:     PetscCall(VecGetArray(mumps->x_seq, &array));
2581:     PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, lsol_loc, array, mumps->id.precision, &mumps->id.sol_loc_len, &mumps->id.sol_loc));
2582:     PetscCall(VecRestoreArray(mumps->x_seq, &array));
2583:     mumps->id.lsol_loc = (PetscMUMPSInt)lsol_loc;
2584:   }
2585:   PetscCall(PetscLogFlops((double)ID_RINFO_GET(mumps->id, 2)));
2586:   PetscFunctionReturn(PETSC_SUCCESS);
2587: }

2589: /* Sets MUMPS options from the options database */
2590: static PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A)
2591: {
2592:   Mat_MUMPS    *mumps = (Mat_MUMPS *)F->data;
2593:   PetscReal     cntl;
2594:   PetscMUMPSInt icntl = 0, size, *listvar_schur;
2595:   PetscInt      info[80], i, ninfo = 80, rbs, cbs;
2596:   PetscBool     flg   = PETSC_FALSE;
2597:   PetscBool     schur = mumps->id.icntl ? (PetscBool)(mumps->id.ICNTL(26) == -1) : (PetscBool)(mumps->ICNTL26 == -1);
2598:   void         *arr;

2600:   PetscFunctionBegin;
2601:   PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MUMPS Options", "Mat");
2602:   if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */
2603:     PetscPrecision precision  = PetscDefined(USE_REAL_SINGLE) ? PETSC_PRECISION_SINGLE : PETSC_PRECISION_DOUBLE;
2604:     PetscInt       nthreads   = 0;
2605:     PetscInt       nCNTL_pre  = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
2606:     PetscInt       nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0;
2607:     PetscMUMPSInt  nblk, *blkvar, *blkptr;

2609:     mumps->petsc_comm = PetscObjectComm((PetscObject)A);
2610:     PetscCallMPI(MPI_Comm_size(mumps->petsc_comm, &mumps->petsc_size));
2611:     PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm, &mumps->myid)); /* "if (!myid)" still works even if mumps_comm is different */

2613:     PetscCall(PetscOptionsName("-mat_mumps_use_omp_threads", "Convert MPI processes into OpenMP threads", "None", &mumps->use_petsc_omp_support));
2614:     if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */
2615:     /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */
2616:     PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)F)->prefix, "-mat_mumps_use_omp_threads", &nthreads, NULL));
2617:     if (mumps->use_petsc_omp_support) {
2618:       PetscCheck(!schur, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature", ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : "");
2619: #if defined(PETSC_HAVE_OPENMP_SUPPORT)
2620:       PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm, nthreads, &mumps->omp_ctrl));
2621:       PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl, &mumps->omp_comm, &mumps->mumps_comm, &mumps->is_omp_master));
2622: #else
2623:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual",
2624:               ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : "");
2625: #endif
2626:     } else {
2627:       mumps->omp_comm      = PETSC_COMM_SELF;
2628:       mumps->mumps_comm    = mumps->petsc_comm;
2629:       mumps->is_omp_master = PETSC_TRUE;
2630:     }
2631:     PetscCallMPI(MPI_Comm_size(mumps->omp_comm, &mumps->omp_comm_size));
2632:     mumps->reqs = NULL;
2633:     mumps->tag  = 0;

2635:     if (mumps->mumps_comm != MPI_COMM_NULL) {
2636:       if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) {
2637:         /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */
2638:         MPI_Comm comm;
2639:         PetscCallMPI(MPI_Comm_dup(mumps->mumps_comm, &comm));
2640:         mumps->mumps_comm = comm;
2641:       } else PetscCall(PetscCommGetComm(mumps->petsc_comm, &mumps->mumps_comm));
2642:     }

2644:     mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm);
2645:     mumps->id.job          = JOB_INIT;
2646:     mumps->id.par          = 1; /* host participates factorizaton and solve */
2647:     mumps->id.sym          = mumps->sym;

2649:     size          = mumps->id.size_schur;
2650:     arr           = mumps->id.schur;
2651:     listvar_schur = mumps->id.listvar_schur;
2652:     nblk          = mumps->id.nblk;
2653:     blkvar        = mumps->id.blkvar;
2654:     blkptr        = mumps->id.blkptr;
2655:     if (PetscDefined(USE_DEBUG)) {
2656:       for (PetscInt i = 0; i < size; i++)
2657:         PetscCheck(listvar_schur[i] - 1 >= 0 && listvar_schur[i] - 1 < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid Schur index at position %" PetscInt_FMT "! %" PetscInt_FMT " must be in [0, %" PetscInt_FMT ")", i, (PetscInt)listvar_schur[i] - 1,
2658:                    A->rmap->N);
2659:     }

2661:     PetscCall(PetscOptionsEnum("-pc_precision", "Precision used by MUMPS", "MATSOLVERMUMPS", PetscPrecisionTypes, (PetscEnum)precision, (PetscEnum *)&precision, NULL));
2662:     PetscCheck(precision == PETSC_PRECISION_SINGLE || precision == PETSC_PRECISION_DOUBLE, PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "MUMPS does not support %s precision", PetscPrecisionTypes[precision]);
2663:     PetscCheck(precision == PETSC_SCALAR_PRECISION || PetscDefined(HAVE_MUMPS_MIXED_PRECISION), PetscObjectComm((PetscObject)F), PETSC_ERR_USER, "Your MUMPS library does not support mixed precision, but which is needed with your specified PetscScalar");
2664:     PetscCall(MatMumpsAllocateInternalID(&mumps->id, precision));

2666:     PetscMUMPS_c(mumps);
2667:     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));

2669:     /* set PETSc-MUMPS default options - override MUMPS default */
2670:     mumps->id.ICNTL(3) = 0;
2671:     mumps->id.ICNTL(4) = 0;
2672:     if (mumps->petsc_size == 1) {
2673:       mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */
2674:       mumps->id.ICNTL(7)  = 7; /* automatic choice of ordering done by the package */
2675:     } else {
2676:       mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */
2677:       mumps->id.ICNTL(21) = 1; /* distributed solution */
2678:     }
2679:     if (nblk && blkptr) {
2680:       mumps->id.ICNTL(15) = 1;
2681:       mumps->id.nblk      = nblk;
2682:       mumps->id.blkvar    = blkvar;
2683:       mumps->id.blkptr    = blkptr;
2684:     } else mumps->id.ICNTL(15) = 0;

2686:     /* restore cached ICNTL and CNTL values */
2687:     for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1 + 2 * icntl]) = mumps->ICNTL_pre[2 + 2 * icntl];
2688:     for (icntl = 0; icntl < nCNTL_pre; ++icntl) ID_CNTL_SET(mumps->id, (PetscInt)mumps->CNTL_pre[1 + 2 * icntl], mumps->CNTL_pre[2 + 2 * icntl]);

2690:     PetscCall(PetscFree(mumps->ICNTL_pre));
2691:     PetscCall(PetscFree(mumps->CNTL_pre));

2693:     if (schur) {
2694:       mumps->id.size_schur    = size;
2695:       mumps->id.schur_lld     = size;
2696:       mumps->id.schur         = arr;
2697:       mumps->id.listvar_schur = listvar_schur;
2698:       if (mumps->petsc_size > 1) {
2699:         PetscBool gs; /* gs is false if any rank other than root has non-empty IS */

2701:         mumps->id.ICNTL(19) = 1;                                                                            /* MUMPS returns Schur centralized on the host */
2702:         gs                  = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */
2703:         PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &gs, 1, MPI_C_BOOL, MPI_LAND, mumps->petsc_comm));
2704:         PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_SUP, "MUMPS distributed parallel Schur complements not yet supported from PETSc");
2705:       } else {
2706:         if (F->factortype == MAT_FACTOR_LU) {
2707:           mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */
2708:         } else {
2709:           mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */
2710:         }
2711:       }
2712:       mumps->id.ICNTL(26) = -1;
2713:     }

2715:     /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code.
2716:        For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS.
2717:      */
2718:     PetscCallMPI(MPI_Bcast(mumps->id.icntl, 40, MPI_INT, 0, mumps->omp_comm));
2719:     PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15, MPIU_MUMPSREAL(&mumps->id), 0, mumps->omp_comm));

2721:     mumps->scat_rhs = NULL;
2722:     mumps->scat_sol = NULL;
2723:   }
2724:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1", "ICNTL(1): output stream for error messages", "None", mumps->id.ICNTL(1), &icntl, &flg));
2725:   if (flg) mumps->id.ICNTL(1) = icntl;
2726:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2", "ICNTL(2): output stream for diagnostic printing, statistics, and warning", "None", mumps->id.ICNTL(2), &icntl, &flg));
2727:   if (flg) mumps->id.ICNTL(2) = icntl;
2728:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3", "ICNTL(3): output stream for global information, collected on the host", "None", mumps->id.ICNTL(3), &icntl, &flg));
2729:   if (flg) mumps->id.ICNTL(3) = icntl;

2731:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4", "ICNTL(4): level of printing (0 to 4)", "None", mumps->id.ICNTL(4), &icntl, &flg));
2732:   if (flg) mumps->id.ICNTL(4) = icntl;
2733:   if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */

2735:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6", "ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)", "None", mumps->id.ICNTL(6), &icntl, &flg));
2736:   if (flg) mumps->id.ICNTL(6) = icntl;

2738:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7", "ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)", "None", mumps->id.ICNTL(7), &icntl, &flg));
2739:   if (flg) {
2740:     PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto");
2741:     mumps->id.ICNTL(7) = icntl;
2742:   }

2744:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8", "ICNTL(8): scaling strategy (-2 to 8 or 77)", "None", mumps->id.ICNTL(8), &mumps->id.ICNTL(8), NULL));
2745:   /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */
2746:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10", "ICNTL(10): max num of refinements", "None", mumps->id.ICNTL(10), &mumps->id.ICNTL(10), NULL));
2747:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11", "ICNTL(11): statistics related to an error analysis (via -ksp_view)", "None", mumps->id.ICNTL(11), &mumps->id.ICNTL(11), NULL));
2748:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12", "ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)", "None", mumps->id.ICNTL(12), &mumps->id.ICNTL(12), NULL));
2749:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13", "ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting", "None", mumps->id.ICNTL(13), &mumps->id.ICNTL(13), NULL));
2750:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14", "ICNTL(14): percentage increase in the estimated working space", "None", mumps->id.ICNTL(14), &mumps->id.ICNTL(14), NULL));
2751:   PetscCall(MatGetBlockSizes(A, &rbs, &cbs));
2752:   if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = (PetscMUMPSInt)-rbs;
2753:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15", "ICNTL(15): compression of the input matrix resulting from a block format", "None", mumps->id.ICNTL(15), &mumps->id.ICNTL(15), &flg));
2754:   if (flg) {
2755:     if (mumps->id.ICNTL(15) < 0) PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes");
2756:     else if (mumps->id.ICNTL(15) > 0) {
2757:       const PetscInt *bsizes;
2758:       PetscInt        nblocks, p, *blkptr = NULL;
2759:       PetscMPIInt    *recvcounts, *displs, n;
2760:       PetscMPIInt     rank, size = 0;

2762:       PetscCall(MatGetVariableBlockSizes(A, &nblocks, &bsizes));
2763:       flg = PETSC_TRUE;
2764:       for (p = 0; p < nblocks; ++p) {
2765:         if (bsizes[p] > 1) break;
2766:       }
2767:       if (p == nblocks) flg = PETSC_FALSE;
2768:       PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &flg, 1, MPI_C_BOOL, MPI_LOR, PetscObjectComm((PetscObject)A)));
2769:       if (flg) { // if at least one process supplies variable block sizes and they are not all set to 1
2770:         PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
2771:         if (rank == 0) PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
2772:         PetscCall(PetscCalloc2(size, &recvcounts, size + 1, &displs));
2773:         PetscCall(PetscMPIIntCast(nblocks, &n));
2774:         PetscCallMPI(MPI_Gather(&n, 1, MPI_INT, recvcounts, 1, MPI_INT, 0, PetscObjectComm((PetscObject)A)));
2775:         for (PetscInt p = 0; p < size; ++p) displs[p + 1] = displs[p] + recvcounts[p];
2776:         PetscCall(PetscMalloc1(displs[size] + 1, &blkptr));
2777:         PetscCallMPI(MPI_Bcast(displs + size, 1, MPIU_INT, 0, PetscObjectComm((PetscObject)A)));
2778:         PetscCallMPI(MPI_Gatherv(bsizes, n, MPIU_INT, blkptr + 1, recvcounts, displs, MPIU_INT, 0, PetscObjectComm((PetscObject)A)));
2779:         if (rank == 0) {
2780:           blkptr[0] = 1;
2781:           for (PetscInt p = 0; p < n; ++p) blkptr[p + 1] += blkptr[p];
2782:           PetscCall(MatMumpsSetBlk(F, displs[size], NULL, blkptr));
2783:         }
2784:         PetscCall(PetscFree2(recvcounts, displs));
2785:         PetscCall(PetscFree(blkptr));
2786:       }
2787:     }
2788:   }
2789:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19", "ICNTL(19): computes the Schur complement", "None", mumps->id.ICNTL(19), &mumps->id.ICNTL(19), NULL));
2790:   if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */
2791:     PetscCall(MatDestroy(&F->schur));
2792:     PetscCall(MatMumpsResetSchur_Private(mumps));
2793:   }

2795:   /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps",
2796:      and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran
2797:      and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...'
2798:      This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug,
2799:      see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590.
2800:      In short, we could not use distributed RHS until with MPICH v4.0b1 or we enabled a workaround in mumps-5.6.2+
2801:    */
2802:   mumps->ICNTL20 = 10; /* Distributed dense RHS, by default */
2803: #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) || (PetscDefined(HAVE_MPICH) && MPICH_NUMVERSION < 40000101) || PetscDefined(HAVE_MSMPI)
2804:   mumps->ICNTL20 = 0; /* Centralized dense RHS, if need be */
2805: #endif
2806:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20", "ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides", "None", mumps->ICNTL20, &mumps->ICNTL20, &flg));
2807:   PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10", (int)mumps->ICNTL20);
2808: #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0)
2809:   PetscCheck(!flg || mumps->ICNTL20 != 10, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=10 is not supported before MUMPS-5.3.0");
2810: #endif
2811:   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */

2813:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22", "ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)", "None", mumps->id.ICNTL(22), &mumps->id.ICNTL(22), NULL));
2814:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23", "ICNTL(23): max size of the working memory (MB) that can allocate per processor", "None", mumps->id.ICNTL(23), &mumps->id.ICNTL(23), NULL));
2815:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24", "ICNTL(24): detection of null pivot rows (0 or 1)", "None", mumps->id.ICNTL(24), &mumps->id.ICNTL(24), NULL));
2816:   if (mumps->id.ICNTL(24)) mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */

2818:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25", "ICNTL(25): computes a solution of a deficient matrix and a null space basis", "None", mumps->id.ICNTL(25), &mumps->id.ICNTL(25), NULL));
2819:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26", "ICNTL(26): drives the solution phase if a Schur complement matrix", "None", mumps->id.ICNTL(26), &mumps->id.ICNTL(26), NULL));
2820:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27", "ICNTL(27): controls the blocking size for multiple right-hand sides", "None", mumps->id.ICNTL(27), &mumps->id.ICNTL(27), NULL));
2821:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28", "ICNTL(28): use 1 for sequential analysis and ICNTL(7) ordering, or 2 for parallel analysis and ICNTL(29) ordering", "None", mumps->id.ICNTL(28), &mumps->id.ICNTL(28), NULL));
2822:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29", "ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis", "None", mumps->id.ICNTL(29), &mumps->id.ICNTL(29), NULL));
2823:   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */
2824:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31", "ICNTL(31): indicates which factors may be discarded during factorization", "None", mumps->id.ICNTL(31), &mumps->id.ICNTL(31), NULL));
2825:   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elimination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL));  -- not supported by PETSc API */
2826:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33", "ICNTL(33): compute determinant", "None", mumps->id.ICNTL(33), &mumps->id.ICNTL(33), NULL));
2827:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35", "ICNTL(35): activates Block Low Rank (BLR) based factorization", "None", mumps->id.ICNTL(35), &mumps->id.ICNTL(35), NULL));
2828:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36", "ICNTL(36): choice of BLR factorization variant", "None", mumps->id.ICNTL(36), &mumps->id.ICNTL(36), NULL));
2829:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_37", "ICNTL(37): compression of the contribution blocks (CB)", "None", mumps->id.ICNTL(37), &mumps->id.ICNTL(37), NULL));
2830:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38", "ICNTL(38): estimated compression rate of LU factors with BLR", "None", mumps->id.ICNTL(38), &mumps->id.ICNTL(38), NULL));
2831:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_48", "ICNTL(48): multithreading with tree parallelism", "None", mumps->id.ICNTL(48), &mumps->id.ICNTL(48), NULL));
2832:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_56", "ICNTL(56): postponing and rank-revealing factorization", "None", mumps->id.ICNTL(56), &mumps->id.ICNTL(56), NULL));
2833:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_58", "ICNTL(58): defines options for symbolic factorization", "None", mumps->id.ICNTL(58), &mumps->id.ICNTL(58), NULL));

2835:   PetscCall(PetscOptionsReal("-mat_mumps_cntl_1", "CNTL(1): relative pivoting threshold", "None", (PetscReal)ID_CNTL_GET(mumps->id, 1), &cntl, &flg));
2836:   if (flg) ID_CNTL_SET(mumps->id, 1, cntl);
2837:   PetscCall(PetscOptionsReal("-mat_mumps_cntl_2", "CNTL(2): stopping criterion of refinement", "None", (PetscReal)ID_CNTL_GET(mumps->id, 2), &cntl, &flg));
2838:   if (flg) ID_CNTL_SET(mumps->id, 2, cntl);
2839:   PetscCall(PetscOptionsReal("-mat_mumps_cntl_3", "CNTL(3): absolute pivoting threshold", "None", (PetscReal)ID_CNTL_GET(mumps->id, 3), &cntl, &flg));
2840:   if (flg) ID_CNTL_SET(mumps->id, 3, cntl);
2841:   PetscCall(PetscOptionsReal("-mat_mumps_cntl_4", "CNTL(4): value for static pivoting", "None", (PetscReal)ID_CNTL_GET(mumps->id, 4), &cntl, &flg));
2842:   if (flg) ID_CNTL_SET(mumps->id, 4, cntl);
2843:   PetscCall(PetscOptionsReal("-mat_mumps_cntl_5", "CNTL(5): fixation for null pivots", "None", (PetscReal)ID_CNTL_GET(mumps->id, 5), &cntl, &flg));
2844:   if (flg) ID_CNTL_SET(mumps->id, 5, cntl);
2845:   PetscCall(PetscOptionsReal("-mat_mumps_cntl_7", "CNTL(7): dropping parameter used during BLR", "None", (PetscReal)ID_CNTL_GET(mumps->id, 7), &cntl, &flg));
2846:   if (flg) ID_CNTL_SET(mumps->id, 7, cntl);

2848:   PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL));

2850:   PetscCall(PetscOptionsIntArray("-mat_mumps_view_info", "request INFO local to each processor", "", info, &ninfo, NULL));
2851:   if (ninfo) {
2852:     PetscCheck(ninfo <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "number of INFO %" PetscInt_FMT " must <= 80", ninfo);
2853:     PetscCall(PetscMalloc1(ninfo, &mumps->info));
2854:     mumps->ninfo = ninfo;
2855:     for (i = 0; i < ninfo; i++) {
2856:       PetscCheck(info[i] >= 0 && info[i] <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "index of INFO %" PetscInt_FMT " must between 1 and 80", ninfo);
2857:       mumps->info[i] = info[i];
2858:     }
2859:   }
2860:   PetscOptionsEnd();
2861:   PetscFunctionReturn(PETSC_SUCCESS);
2862: }

2864: static PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F, Mat A, PETSC_UNUSED const MatFactorInfo *info, Mat_MUMPS *mumps)
2865: {
2866:   PetscFunctionBegin;
2867:   if (mumps->id.INFOG(1) < 0) {
2868:     PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in analysis: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
2869:     if (mumps->id.INFOG(1) == -6) {
2870:       PetscCall(PetscInfo(F, "MUMPS error in analysis: matrix is singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2871:       F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT;
2872:     } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) {
2873:       PetscCall(PetscInfo(F, "MUMPS error in analysis: problem with work array, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2874:       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
2875:     } else {
2876:       PetscCall(PetscInfo(F, "MUMPS error in analysis: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS "\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2877:       F->factorerrortype = MAT_FACTOR_OTHER;
2878:     }
2879:   }
2880:   if (!mumps->id.n) F->factorerrortype = MAT_FACTOR_NOERROR;
2881:   PetscFunctionReturn(PETSC_SUCCESS);
2882: }

2884: static PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F, Mat A, IS r, PETSC_UNUSED IS c, const MatFactorInfo *info)
2885: {
2886:   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
2887:   Vec            b;
2888:   const PetscInt M = A->rmap->N;

2890:   PetscFunctionBegin;
2891:   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2892:     /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */
2893:     PetscFunctionReturn(PETSC_SUCCESS);
2894:   }

2896:   /* Set MUMPS options from the options database */
2897:   PetscCall(MatSetFromOptions_MUMPS(F, A));

2899:   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2900:   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));

2902:   /* analysis phase */
2903:   mumps->id.job = JOB_FACTSYMBOLIC;
2904:   PetscCall(PetscMUMPSIntCast(M, &mumps->id.n));
2905:   switch (mumps->id.ICNTL(18)) {
2906:   case 0: /* centralized assembled matrix input */
2907:     if (!mumps->myid) {
2908:       mumps->id.nnz = mumps->nnz;
2909:       mumps->id.irn = mumps->irn;
2910:       mumps->id.jcn = mumps->jcn;
2911:       if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a));
2912:       if (r && mumps->id.ICNTL(7) == 7) {
2913:         mumps->id.ICNTL(7) = 1;
2914:         if (!mumps->myid) {
2915:           const PetscInt *idx;
2916:           PetscInt        i;

2918:           PetscCall(PetscMalloc1(M, &mumps->id.perm_in));
2919:           PetscCall(ISGetIndices(r, &idx));
2920:           for (i = 0; i < M; i++) PetscCall(PetscMUMPSIntCast(idx[i] + 1, &mumps->id.perm_in[i])); /* perm_in[]: start from 1, not 0! */
2921:           PetscCall(ISRestoreIndices(r, &idx));
2922:         }
2923:       }
2924:     }
2925:     break;
2926:   case 3: /* distributed assembled matrix input (size>1) */
2927:     mumps->id.nnz_loc = mumps->nnz;
2928:     mumps->id.irn_loc = mumps->irn;
2929:     mumps->id.jcn_loc = mumps->jcn;
2930:     if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc));
2931:     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2932:       PetscCall(MatCreateVecs(A, NULL, &b));
2933:       PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
2934:       PetscCall(VecDestroy(&b));
2935:     }
2936:     break;
2937:   }
2938:   PetscMUMPS_c(mumps);
2939:   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));

2941:   F->ops->lufactornumeric   = MatFactorNumeric_MUMPS;
2942:   F->ops->solve             = MatSolve_MUMPS;
2943:   F->ops->solvetranspose    = MatSolveTranspose_MUMPS;
2944:   F->ops->matsolve          = MatMatSolve_MUMPS;
2945:   F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS;
2946:   F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS;

2948:   mumps->matstruc = SAME_NONZERO_PATTERN;
2949:   PetscFunctionReturn(PETSC_SUCCESS);
2950: }

2952: /* Note the PETSc r and c permutations are ignored */
2953: static PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F, Mat A, PETSC_UNUSED IS r, PETSC_UNUSED IS c, const MatFactorInfo *info)
2954: {
2955:   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
2956:   Vec            b;
2957:   const PetscInt M = A->rmap->N;

2959:   PetscFunctionBegin;
2960:   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2961:     /* F is assembled by a previous call of MatLUFactorSymbolic_BAIJMUMPS() */
2962:     PetscFunctionReturn(PETSC_SUCCESS);
2963:   }

2965:   /* Set MUMPS options from the options database */
2966:   PetscCall(MatSetFromOptions_MUMPS(F, A));

2968:   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2969:   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));

2971:   /* analysis phase */
2972:   mumps->id.job = JOB_FACTSYMBOLIC;
2973:   PetscCall(PetscMUMPSIntCast(M, &mumps->id.n));
2974:   switch (mumps->id.ICNTL(18)) {
2975:   case 0: /* centralized assembled matrix input */
2976:     if (!mumps->myid) {
2977:       mumps->id.nnz = mumps->nnz;
2978:       mumps->id.irn = mumps->irn;
2979:       mumps->id.jcn = mumps->jcn;
2980:       if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a));
2981:     }
2982:     break;
2983:   case 3: /* distributed assembled matrix input (size>1) */
2984:     mumps->id.nnz_loc = mumps->nnz;
2985:     mumps->id.irn_loc = mumps->irn;
2986:     mumps->id.jcn_loc = mumps->jcn;
2987:     if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc));
2988:     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2989:       PetscCall(MatCreateVecs(A, NULL, &b));
2990:       PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
2991:       PetscCall(VecDestroy(&b));
2992:     }
2993:     break;
2994:   }
2995:   PetscMUMPS_c(mumps);
2996:   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));

2998:   F->ops->lufactornumeric   = MatFactorNumeric_MUMPS;
2999:   F->ops->solve             = MatSolve_MUMPS;
3000:   F->ops->solvetranspose    = MatSolveTranspose_MUMPS;
3001:   F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS;

3003:   mumps->matstruc = SAME_NONZERO_PATTERN;
3004:   PetscFunctionReturn(PETSC_SUCCESS);
3005: }

3007: /* Note the PETSc r permutation and factor info are ignored */
3008: static PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F, Mat A, PETSC_UNUSED IS r, const MatFactorInfo *info)
3009: {
3010:   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
3011:   Vec            b;
3012:   const PetscInt M = A->rmap->N;

3014:   PetscFunctionBegin;
3015:   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
3016:     /* F is assembled by a previous call of MatCholeskyFactorSymbolic_MUMPS() */
3017:     PetscFunctionReturn(PETSC_SUCCESS);
3018:   }

3020:   /* Set MUMPS options from the options database */
3021:   PetscCall(MatSetFromOptions_MUMPS(F, A));

3023:   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
3024:   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));

3026:   /* analysis phase */
3027:   mumps->id.job = JOB_FACTSYMBOLIC;
3028:   PetscCall(PetscMUMPSIntCast(M, &mumps->id.n));
3029:   switch (mumps->id.ICNTL(18)) {
3030:   case 0: /* centralized assembled matrix input */
3031:     if (!mumps->myid) {
3032:       mumps->id.nnz = mumps->nnz;
3033:       mumps->id.irn = mumps->irn;
3034:       mumps->id.jcn = mumps->jcn;
3035:       if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a));
3036:     }
3037:     break;
3038:   case 3: /* distributed assembled matrix input (size>1) */
3039:     mumps->id.nnz_loc = mumps->nnz;
3040:     mumps->id.irn_loc = mumps->irn;
3041:     mumps->id.jcn_loc = mumps->jcn;
3042:     if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc));
3043:     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
3044:       PetscCall(MatCreateVecs(A, NULL, &b));
3045:       PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
3046:       PetscCall(VecDestroy(&b));
3047:     }
3048:     break;
3049:   }
3050:   PetscMUMPS_c(mumps);
3051:   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));

3053:   F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS;
3054:   F->ops->solve                 = MatSolve_MUMPS;
3055:   F->ops->solvetranspose        = MatSolve_MUMPS;
3056:   F->ops->matsolve              = MatMatSolve_MUMPS;
3057:   F->ops->mattransposesolve     = MatMatTransposeSolve_MUMPS;
3058:   F->ops->matsolvetranspose     = MatMatSolveTranspose_MUMPS;
3059: #if defined(PETSC_USE_COMPLEX)
3060:   F->ops->getinertia = NULL;
3061: #else
3062:   F->ops->getinertia = MatGetInertia_SBAIJMUMPS;
3063: #endif

3065:   mumps->matstruc = SAME_NONZERO_PATTERN;
3066:   PetscFunctionReturn(PETSC_SUCCESS);
3067: }

3069: static PetscErrorCode MatView_MUMPS(Mat A, PetscViewer viewer)
3070: {
3071:   PetscBool         isascii;
3072:   PetscViewerFormat format;
3073:   Mat_MUMPS        *mumps = (Mat_MUMPS *)A->data;

3075:   PetscFunctionBegin;
3076:   /* check if matrix is mumps type */
3077:   if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(PETSC_SUCCESS);

3079:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
3080:   if (isascii) {
3081:     PetscCall(PetscViewerGetFormat(viewer, &format));
3082:     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
3083:       PetscCall(PetscViewerASCIIPrintf(viewer, "MUMPS run parameters:\n"));
3084:       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
3085:         PetscCall(PetscViewerASCIIPrintf(viewer, "  SYM (matrix type):                   %d\n", mumps->id.sym));
3086:         PetscCall(PetscViewerASCIIPrintf(viewer, "  PAR (host participation):            %d\n", mumps->id.par));
3087:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(1) (output for error):         %d\n", mumps->id.ICNTL(1)));
3088:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(2) (output of diagnostic msg): %d\n", mumps->id.ICNTL(2)));
3089:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(3) (output for global info):   %d\n", mumps->id.ICNTL(3)));
3090:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(4) (level of printing):        %d\n", mumps->id.ICNTL(4)));
3091:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(5) (input mat struct):         %d\n", mumps->id.ICNTL(5)));
3092:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(6) (matrix prescaling):        %d\n", mumps->id.ICNTL(6)));
3093:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(7) (sequential matrix ordering):%d\n", mumps->id.ICNTL(7)));
3094:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(8) (scaling strategy):         %d\n", mumps->id.ICNTL(8)));
3095:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(10) (max num of refinements):  %d\n", mumps->id.ICNTL(10)));
3096:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(11) (error analysis):          %d\n", mumps->id.ICNTL(11)));
3097:         if (mumps->id.ICNTL(11) > 0) {
3098:           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(4) (inf norm of input mat):        %g\n", (double)ID_RINFOG_GET(mumps->id, 4)));
3099:           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(5) (inf norm of solution):         %g\n", (double)ID_RINFOG_GET(mumps->id, 5)));
3100:           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(6) (inf norm of residual):         %g\n", (double)ID_RINFOG_GET(mumps->id, 6)));
3101:           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(7),RINFOG(8) (backward error est): %g, %g\n", (double)ID_RINFOG_GET(mumps->id, 7), (double)ID_RINFOG_GET(mumps->id, 8)));
3102:           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(9) (error estimate):               %g\n", (double)ID_RINFOG_GET(mumps->id, 9)));
3103:           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n", (double)ID_RINFOG_GET(mumps->id, 10), (double)ID_RINFOG_GET(mumps->id, 11)));
3104:         }
3105:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(12) (efficiency control):                         %d\n", mumps->id.ICNTL(12)));
3106:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(13) (sequential factorization of the root node):  %d\n", mumps->id.ICNTL(13)));
3107:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(14) (percentage of estimated workspace increase): %d\n", mumps->id.ICNTL(14)));
3108:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(15) (compression of the input matrix):            %d\n", mumps->id.ICNTL(15)));
3109:         /* ICNTL(15-17) not used */
3110:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(18) (input mat struct):                           %d\n", mumps->id.ICNTL(18)));
3111:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(19) (Schur complement info):                      %d\n", mumps->id.ICNTL(19)));
3112:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(20) (RHS sparse pattern):                         %d\n", mumps->id.ICNTL(20)));
3113:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(21) (solution struct):                            %d\n", mumps->id.ICNTL(21)));
3114:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(22) (in-core/out-of-core facility):               %d\n", mumps->id.ICNTL(22)));
3115:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(23) (max size of memory can be allocated locally):%d\n", mumps->id.ICNTL(23)));

3117:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(24) (detection of null pivot rows):               %d\n", mumps->id.ICNTL(24)));
3118:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(25) (computation of a null space basis):          %d\n", mumps->id.ICNTL(25)));
3119:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(26) (Schur options for RHS or solution):          %d\n", mumps->id.ICNTL(26)));
3120:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(27) (blocking size for multiple RHS):             %d\n", mumps->id.ICNTL(27)));
3121:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(28) (use parallel or sequential ordering):        %d\n", mumps->id.ICNTL(28)));
3122:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(29) (parallel ordering):                          %d\n", mumps->id.ICNTL(29)));

3124:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(30) (user-specified set of entries in inv(A)):    %d\n", mumps->id.ICNTL(30)));
3125:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(31) (factors is discarded in the solve phase):    %d\n", mumps->id.ICNTL(31)));
3126:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(33) (compute determinant):                        %d\n", mumps->id.ICNTL(33)));
3127:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(35) (activate BLR based factorization):           %d\n", mumps->id.ICNTL(35)));
3128:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(36) (choice of BLR factorization variant):        %d\n", mumps->id.ICNTL(36)));
3129:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(37) (compression of the contribution blocks):     %d\n", mumps->id.ICNTL(37)));
3130:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(38) (estimated compression rate of LU factors):   %d\n", mumps->id.ICNTL(38)));
3131:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(48) (multithreading with tree parallelism):       %d\n", mumps->id.ICNTL(48)));
3132:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(56) (postponing and rank-revealing factorization):%d\n", mumps->id.ICNTL(56)));
3133:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(58) (options for symbolic factorization):         %d\n", mumps->id.ICNTL(58)));

3135:         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(1) (relative pivoting threshold):      %g\n", (double)ID_CNTL_GET(mumps->id, 1)));
3136:         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(2) (stopping criterion of refinement): %g\n", (double)ID_CNTL_GET(mumps->id, 2)));
3137:         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(3) (absolute pivoting threshold):      %g\n", (double)ID_CNTL_GET(mumps->id, 3)));
3138:         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(4) (value of static pivoting):         %g\n", (double)ID_CNTL_GET(mumps->id, 4)));
3139:         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(5) (fixation for null pivots):         %g\n", (double)ID_CNTL_GET(mumps->id, 5)));
3140:         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(7) (dropping parameter for BLR):       %g\n", (double)ID_CNTL_GET(mumps->id, 7)));

3142:         /* information local to each processor */
3143:         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(1) (local estimated flops for the elimination after analysis):\n"));
3144:         PetscCall(PetscViewerASCIIPushSynchronized(viewer));
3145:         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %g\n", mumps->myid, (double)ID_RINFO_GET(mumps->id, 1)));
3146:         PetscCall(PetscViewerFlush(viewer));
3147:         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(2) (local estimated flops for the assembly after factorization):\n"));
3148:         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %g\n", mumps->myid, (double)ID_RINFO_GET(mumps->id, 2)));
3149:         PetscCall(PetscViewerFlush(viewer));
3150:         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(3) (local estimated flops for the elimination after factorization):\n"));
3151:         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %g\n", mumps->myid, (double)ID_RINFO_GET(mumps->id, 3)));
3152:         PetscCall(PetscViewerFlush(viewer));

3154:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization):\n"));
3155:         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(15)));
3156:         PetscCall(PetscViewerFlush(viewer));

3158:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization):\n"));
3159:         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(16)));
3160:         PetscCall(PetscViewerFlush(viewer));

3162:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(23) (num of pivots eliminated on this processor after factorization):\n"));
3163:         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(23)));
3164:         PetscCall(PetscViewerFlush(viewer));

3166:         if (mumps->ninfo && mumps->ninfo <= 80) {
3167:           PetscInt i;
3168:           for (i = 0; i < mumps->ninfo; i++) {
3169:             PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(%" PetscInt_FMT "):\n", mumps->info[i]));
3170:             PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(mumps->info[i])));
3171:             PetscCall(PetscViewerFlush(viewer));
3172:           }
3173:         }
3174:         PetscCall(PetscViewerASCIIPopSynchronized(viewer));
3175:       } else PetscCall(PetscViewerASCIIPrintf(viewer, "  Use -%sksp_view ::ascii_info_detail to display information for all processes\n", ((PetscObject)A)->prefix ? ((PetscObject)A)->prefix : ""));

3177:       if (mumps->myid == 0) { /* information from the host */
3178:         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFOG(1) (global estimated flops for the elimination after analysis): %g\n", (double)ID_RINFOG_GET(mumps->id, 1)));
3179:         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFOG(2) (global estimated flops for the assembly after factorization): %g\n", (double)ID_RINFOG_GET(mumps->id, 2)));
3180:         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFOG(3) (global estimated flops for the elimination after factorization): %g\n", (double)ID_RINFOG_GET(mumps->id, 3)));
3181:         PetscCall(PetscViewerASCIIPrintf(viewer, "  (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n", (double)ID_RINFOG_GET(mumps->id, 12), (double)ID_RINFOG_GET(mumps->id, 13), mumps->id.INFOG(34)));

3183:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(3)));
3184:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(4)));
3185:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(5) (estimated maximum front size in the complete tree): %d\n", mumps->id.INFOG(5)));
3186:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(6) (number of nodes in the complete tree): %d\n", mumps->id.INFOG(6)));
3187:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(7) (ordering option effectively used after analysis): %d\n", mumps->id.INFOG(7)));
3188:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n", mumps->id.INFOG(8)));
3189:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n", mumps->id.INFOG(9)));
3190:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(10) (total integer space store the matrix factors after factorization): %d\n", mumps->id.INFOG(10)));
3191:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(11) (order of largest frontal matrix after factorization): %d\n", mumps->id.INFOG(11)));
3192:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(12) (number of off-diagonal pivots): %d\n", mumps->id.INFOG(12)));
3193:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(13) (number of delayed pivots after factorization): %d\n", mumps->id.INFOG(13)));
3194:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(14) (number of memory compress after factorization): %d\n", mumps->id.INFOG(14)));
3195:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(15) (number of steps of iterative refinement after solution): %d\n", mumps->id.INFOG(15)));
3196:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n", mumps->id.INFOG(16)));
3197:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n", mumps->id.INFOG(17)));
3198:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n", mumps->id.INFOG(18)));
3199:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n", mumps->id.INFOG(19)));
3200:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(20) (estimated number of entries in the factors): %d\n", mumps->id.INFOG(20)));
3201:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n", mumps->id.INFOG(21)));
3202:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n", mumps->id.INFOG(22)));
3203:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n", mumps->id.INFOG(23)));
3204:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n", mumps->id.INFOG(24)));
3205:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n", mumps->id.INFOG(25)));
3206:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(28) (after factorization: number of null pivots encountered): %d\n", mumps->id.INFOG(28)));
3207:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n", mumps->id.INFOG(29)));
3208:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n", mumps->id.INFOG(30), mumps->id.INFOG(31)));
3209:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(32) (after analysis: type of analysis done): %d\n", mumps->id.INFOG(32)));
3210:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(33) (value used for ICNTL(8)): %d\n", mumps->id.INFOG(33)));
3211:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(34) (exponent of the determinant if determinant is requested): %d\n", mumps->id.INFOG(34)));
3212:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n", mumps->id.INFOG(35)));
3213:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(36)));
3214:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n", mumps->id.INFOG(37)));
3215:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(38)));
3216:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n", mumps->id.INFOG(39)));
3217:       }
3218:     }
3219:   }
3220:   PetscFunctionReturn(PETSC_SUCCESS);
3221: }

3223: static PetscErrorCode MatGetInfo_MUMPS(Mat A, PETSC_UNUSED MatInfoType flag, MatInfo *info)
3224: {
3225:   Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;

3227:   PetscFunctionBegin;
3228:   info->block_size        = 1.0;
3229:   info->nz_allocated      = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20);
3230:   info->nz_used           = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20);
3231:   info->nz_unneeded       = 0.0;
3232:   info->assemblies        = 0.0;
3233:   info->mallocs           = 0.0;
3234:   info->memory            = 0.0;
3235:   info->fill_ratio_given  = 0;
3236:   info->fill_ratio_needed = 0;
3237:   info->factor_mallocs    = 0;
3238:   PetscFunctionReturn(PETSC_SUCCESS);
3239: }

3241: static PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is)
3242: {
3243:   Mat_MUMPS         *mumps = (Mat_MUMPS *)F->data;
3244:   const PetscScalar *arr;
3245:   const PetscInt    *idxs;
3246:   PetscInt           size, i;

3248:   PetscFunctionBegin;
3249:   PetscCall(ISGetLocalSize(is, &size));
3250:   /* Schur complement matrix */
3251:   PetscCall(MatDestroy(&F->schur));
3252:   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size, size, NULL, &F->schur));
3253:   PetscCall(MatDenseGetArrayRead(F->schur, &arr));
3254:   // don't allocate mumps->id.schur[] now as its precision is yet to know
3255:   PetscCall(PetscMUMPSIntCast(size, &mumps->id.size_schur));
3256:   PetscCall(PetscMUMPSIntCast(size, &mumps->id.schur_lld));
3257:   PetscCall(MatDenseRestoreArrayRead(F->schur, &arr));
3258:   if (mumps->sym == 1) PetscCall(MatSetOption(F->schur, MAT_SPD, PETSC_TRUE));

3260:   /* MUMPS expects Fortran style indices */
3261:   PetscCall(PetscFree(mumps->id.listvar_schur));
3262:   PetscCall(PetscMalloc1(size, &mumps->id.listvar_schur));
3263:   PetscCall(ISGetIndices(is, &idxs));
3264:   for (i = 0; i < size; i++) PetscCall(PetscMUMPSIntCast(idxs[i] + 1, &mumps->id.listvar_schur[i]));
3265:   PetscCall(ISRestoreIndices(is, &idxs));
3266:   /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */
3267:   if (mumps->id.icntl) mumps->id.ICNTL(26) = -1;
3268:   else mumps->ICNTL26 = -1;
3269:   PetscFunctionReturn(PETSC_SUCCESS);
3270: }

3272: static PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F, Mat *S)
3273: {
3274:   Mat          St;
3275:   Mat_MUMPS   *mumps = (Mat_MUMPS *)F->data;
3276:   PetscScalar *array;
3277:   PetscInt     i, j, N = mumps->id.size_schur;

3279:   PetscFunctionBegin;
3280:   PetscCheck(mumps->id.ICNTL(19), PetscObjectComm((PetscObject)F), PETSC_ERR_ORDER, "Schur complement mode not selected! Call MatFactorSetSchurIS() to enable it");
3281:   PetscCall(MatCreate(PETSC_COMM_SELF, &St));
3282:   PetscCall(MatSetSizes(St, PETSC_DECIDE, PETSC_DECIDE, mumps->id.size_schur, mumps->id.size_schur));
3283:   PetscCall(MatSetType(St, MATDENSE));
3284:   PetscCall(MatSetUp(St));
3285:   PetscCall(MatDenseGetArray(St, &array));
3286:   if (!mumps->sym) {                /* MUMPS always return a full matrix */
3287:     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
3288:       for (i = 0; i < N; i++) {
3289:         for (j = 0; j < N; j++) array[j * N + i] = ID_FIELD_GET(mumps->id, schur, i * N + j);
3290:       }
3291:     } else { /* stored by columns */
3292:       PetscCall(MatMumpsCastMumpsScalarArray(N * N, mumps->id.precision, mumps->id.schur, array));
3293:     }
3294:   } else {                          /* either full or lower-triangular (not packed) */
3295:     if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */
3296:       for (i = 0; i < N; i++) {
3297:         for (j = i; j < N; j++) array[i * N + j] = array[j * N + i] = ID_FIELD_GET(mumps->id, schur, i * N + j);
3298:       }
3299:     } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */
3300:       PetscCall(MatMumpsCastMumpsScalarArray(N * N, mumps->id.precision, mumps->id.schur, array));
3301:     } else { /* ICNTL(19) == 1 lower triangular stored by rows */
3302:       for (i = 0; i < N; i++) {
3303:         for (j = 0; j < i + 1; j++) array[i * N + j] = array[j * N + i] = ID_FIELD_GET(mumps->id, schur, i * N + j);
3304:       }
3305:     }
3306:   }
3307:   PetscCall(MatDenseRestoreArray(St, &array));
3308:   *S = St;
3309:   PetscFunctionReturn(PETSC_SUCCESS);
3310: }

3312: static PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt ival)
3313: {
3314:   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;

3316:   PetscFunctionBegin;
3317:   if (mumps->id.job == JOB_NULL) {                                            /* need to cache icntl and ival since PetscMUMPS_c() has never been called */
3318:     PetscMUMPSInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */
3319:     for (i = 0; i < nICNTL_pre; ++i)
3320:       if (mumps->ICNTL_pre[1 + 2 * i] == icntl) break; /* is this ICNTL already cached? */
3321:     if (i == nICNTL_pre) {                             /* not already cached */
3322:       if (i > 0) PetscCall(PetscRealloc(sizeof(PetscMUMPSInt) * (2 * nICNTL_pre + 3), &mumps->ICNTL_pre));
3323:       else PetscCall(PetscCalloc(sizeof(PetscMUMPSInt) * 3, &mumps->ICNTL_pre));
3324:       mumps->ICNTL_pre[0]++;
3325:     }
3326:     mumps->ICNTL_pre[1 + 2 * i] = (PetscMUMPSInt)icntl;
3327:     PetscCall(PetscMUMPSIntCast(ival, mumps->ICNTL_pre + 2 + 2 * i));
3328:   } else PetscCall(PetscMUMPSIntCast(ival, &mumps->id.ICNTL(icntl)));
3329:   PetscFunctionReturn(PETSC_SUCCESS);
3330: }

3332: static PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt *ival)
3333: {
3334:   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;

3336:   PetscFunctionBegin;
3337:   if (mumps->id.job == JOB_NULL) {
3338:     PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0;
3339:     *ival = 0;
3340:     for (i = 0; i < nICNTL_pre; ++i) {
3341:       if (mumps->ICNTL_pre[1 + 2 * i] == icntl) *ival = mumps->ICNTL_pre[2 + 2 * i];
3342:     }
3343:   } else *ival = mumps->id.ICNTL(icntl);
3344:   PetscFunctionReturn(PETSC_SUCCESS);
3345: }

3347: /*@
3348:   MatMumpsSetIcntl - Set MUMPS parameter ICNTL() <https://mumps-solver.org/index.php?page=doc>

3350:   Logically Collective

3352:   Input Parameters:
3353: + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3354: . icntl - index of MUMPS parameter array `ICNTL()`
3355: - ival  - value of MUMPS `ICNTL(icntl)`

3357:   Options Database Key:
3358: . -mat_mumps_icntl_<icntl> <ival> - change the option numbered `icntl` to `ival`

3360:   Level: beginner

3362:   Note:
3363:   Ignored if MUMPS is not installed or `F` is not a MUMPS matrix

3365: .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3366: @*/
3367: PetscErrorCode MatMumpsSetIcntl(Mat F, PetscInt icntl, PetscInt ival)
3368: {
3369:   PetscFunctionBegin;
3371:   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3374:   PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 48 || icntl == 56 || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl);
3375:   PetscTryMethod(F, "MatMumpsSetIcntl_C", (Mat, PetscInt, PetscInt), (F, icntl, ival));
3376:   PetscFunctionReturn(PETSC_SUCCESS);
3377: }

3379: /*@
3380:   MatMumpsGetIcntl - Get MUMPS parameter ICNTL() <https://mumps-solver.org/index.php?page=doc>

3382:   Logically Collective

3384:   Input Parameters:
3385: + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3386: - icntl - index of MUMPS parameter array ICNTL()

3388:   Output Parameter:
3389: . ival - value of MUMPS ICNTL(icntl)

3391:   Level: beginner

3393: .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3394: @*/
3395: PetscErrorCode MatMumpsGetIcntl(Mat F, PetscInt icntl, PetscInt *ival)
3396: {
3397:   PetscFunctionBegin;
3399:   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3401:   PetscAssertPointer(ival, 3);
3402:   PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 48 || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl);
3403:   PetscUseMethod(F, "MatMumpsGetIcntl_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
3404:   PetscFunctionReturn(PETSC_SUCCESS);
3405: }

3407: static PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal val)
3408: {
3409:   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;

3411:   PetscFunctionBegin;
3412:   if (mumps->id.job == JOB_NULL) {
3413:     PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
3414:     for (i = 0; i < nCNTL_pre; ++i)
3415:       if (mumps->CNTL_pre[1 + 2 * i] == icntl) break;
3416:     if (i == nCNTL_pre) {
3417:       if (i > 0) PetscCall(PetscRealloc(sizeof(PetscReal) * (2 * nCNTL_pre + 3), &mumps->CNTL_pre));
3418:       else PetscCall(PetscCalloc(sizeof(PetscReal) * 3, &mumps->CNTL_pre));
3419:       mumps->CNTL_pre[0]++;
3420:     }
3421:     mumps->CNTL_pre[1 + 2 * i] = icntl;
3422:     mumps->CNTL_pre[2 + 2 * i] = val;
3423:   } else ID_CNTL_SET(mumps->id, icntl, val);
3424:   PetscFunctionReturn(PETSC_SUCCESS);
3425: }

3427: static PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal *val)
3428: {
3429:   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;

3431:   PetscFunctionBegin;
3432:   if (mumps->id.job == JOB_NULL) {
3433:     PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
3434:     *val = 0.0;
3435:     for (i = 0; i < nCNTL_pre; ++i) {
3436:       if (mumps->CNTL_pre[1 + 2 * i] == icntl) *val = mumps->CNTL_pre[2 + 2 * i];
3437:     }
3438:   } else *val = ID_CNTL_GET(mumps->id, icntl);
3439:   PetscFunctionReturn(PETSC_SUCCESS);
3440: }

3442: /*@
3443:   MatMumpsSetCntl - Set MUMPS parameter CNTL() <https://mumps-solver.org/index.php?page=doc>

3445:   Logically Collective

3447:   Input Parameters:
3448: + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3449: . icntl - index of MUMPS parameter array `CNTL()`
3450: - val   - value of MUMPS `CNTL(icntl)`

3452:   Options Database Key:
3453: . -mat_mumps_cntl_<icntl> <val> - change the option numbered icntl to ival

3455:   Level: beginner

3457:   Note:
3458:   Ignored if MUMPS is not installed or `F` is not a MUMPS matrix

3460: .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3461: @*/
3462: PetscErrorCode MatMumpsSetCntl(Mat F, PetscInt icntl, PetscReal val)
3463: {
3464:   PetscFunctionBegin;
3466:   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3469:   PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl);
3470:   PetscTryMethod(F, "MatMumpsSetCntl_C", (Mat, PetscInt, PetscReal), (F, icntl, val));
3471:   PetscFunctionReturn(PETSC_SUCCESS);
3472: }

3474: /*@
3475:   MatMumpsGetCntl - Get MUMPS parameter CNTL() <https://mumps-solver.org/index.php?page=doc>

3477:   Logically Collective

3479:   Input Parameters:
3480: + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3481: - icntl - index of MUMPS parameter array CNTL()

3483:   Output Parameter:
3484: . val - value of MUMPS CNTL(icntl)

3486:   Level: beginner

3488: .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3489: @*/
3490: PetscErrorCode MatMumpsGetCntl(Mat F, PetscInt icntl, PetscReal *val)
3491: {
3492:   PetscFunctionBegin;
3494:   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3496:   PetscAssertPointer(val, 3);
3497:   PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl);
3498:   PetscUseMethod(F, "MatMumpsGetCntl_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
3499:   PetscFunctionReturn(PETSC_SUCCESS);
3500: }

3502: static PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F, PetscInt icntl, PetscInt *info)
3503: {
3504:   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;

3506:   PetscFunctionBegin;
3507:   *info = mumps->id.INFO(icntl);
3508:   PetscFunctionReturn(PETSC_SUCCESS);
3509: }

3511: static PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F, PetscInt icntl, PetscInt *infog)
3512: {
3513:   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;

3515:   PetscFunctionBegin;
3516:   *infog = mumps->id.INFOG(icntl);
3517:   PetscFunctionReturn(PETSC_SUCCESS);
3518: }

3520: static PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfo)
3521: {
3522:   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;

3524:   PetscFunctionBegin;
3525:   *rinfo = ID_RINFO_GET(mumps->id, icntl);
3526:   PetscFunctionReturn(PETSC_SUCCESS);
3527: }

3529: static PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfog)
3530: {
3531:   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;

3533:   PetscFunctionBegin;
3534:   *rinfog = ID_RINFOG_GET(mumps->id, icntl);
3535:   PetscFunctionReturn(PETSC_SUCCESS);
3536: }

3538: static PetscErrorCode MatMumpsGetNullPivots_MUMPS(Mat F, PetscInt *size, PetscInt **array)
3539: {
3540:   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;

3542:   PetscFunctionBegin;
3543:   PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection");
3544:   *size  = 0;
3545:   *array = NULL;
3546:   if (!mumps->myid) {
3547:     *size = mumps->id.INFOG(28);
3548:     PetscCall(PetscMalloc1(*size, array));
3549:     for (int i = 0; i < *size; i++) (*array)[i] = mumps->id.pivnul_list[i] - 1;
3550:   }
3551:   PetscFunctionReturn(PETSC_SUCCESS);
3552: }

3554: static PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F, Mat spRHS)
3555: {
3556:   Mat          Bt = NULL, Btseq = NULL;
3557:   PetscBool    flg;
3558:   Mat_MUMPS   *mumps = (Mat_MUMPS *)F->data;
3559:   PetscScalar *aa;
3560:   PetscInt     spnr, *ia, *ja, M, nrhs;

3562:   PetscFunctionBegin;
3563:   PetscAssertPointer(spRHS, 2);
3564:   PetscCall(PetscObjectTypeCompare((PetscObject)spRHS, MATTRANSPOSEVIRTUAL, &flg));
3565:   PetscCheck(flg, PetscObjectComm((PetscObject)spRHS), PETSC_ERR_ARG_WRONG, "Matrix spRHS must be type MATTRANSPOSEVIRTUAL matrix");
3566:   PetscCall(MatShellGetScalingShifts(spRHS, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
3567:   PetscCall(MatTransposeGetMat(spRHS, &Bt));

3569:   PetscCall(MatMumpsSetIcntl(F, 30, 1));

3571:   if (mumps->petsc_size > 1) {
3572:     Mat_MPIAIJ *b = (Mat_MPIAIJ *)Bt->data;
3573:     Btseq         = b->A;
3574:   } else {
3575:     Btseq = Bt;
3576:   }

3578:   PetscCall(MatGetSize(spRHS, &M, &nrhs));
3579:   mumps->id.nrhs = (PetscMUMPSInt)nrhs;
3580:   PetscCall(PetscMUMPSIntCast(M, &mumps->id.lrhs));
3581:   mumps->id.rhs = NULL;

3583:   if (!mumps->myid) {
3584:     PetscCall(MatSeqAIJGetArray(Btseq, &aa));
3585:     PetscCall(MatGetRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
3586:     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
3587:     PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
3588:     PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, ((Mat_SeqAIJ *)Btseq->data)->nz, aa, mumps->id.precision, &mumps->id.rhs_sparse_len, &mumps->id.rhs_sparse));
3589:   } else {
3590:     mumps->id.irhs_ptr    = NULL;
3591:     mumps->id.irhs_sparse = NULL;
3592:     mumps->id.nz_rhs      = 0;
3593:     if (mumps->id.rhs_sparse_len) {
3594:       PetscCall(PetscFree(mumps->id.rhs_sparse));
3595:       mumps->id.rhs_sparse_len = 0;
3596:     }
3597:   }
3598:   mumps->id.ICNTL(20) = 1; /* rhs is sparse */
3599:   mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */

3601:   /* solve phase */
3602:   mumps->id.job = JOB_SOLVE;
3603:   PetscMUMPS_c(mumps);
3604:   PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));

3606:   if (!mumps->myid) {
3607:     PetscCall(MatSeqAIJRestoreArray(Btseq, &aa));
3608:     PetscCall(MatRestoreRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
3609:     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
3610:   }
3611:   PetscFunctionReturn(PETSC_SUCCESS);
3612: }

3614: /*@
3615:   MatMumpsGetInverse - Get user-specified set of entries in inverse of `A` <https://mumps-solver.org/index.php?page=doc>

3617:   Logically Collective

3619:   Input Parameter:
3620: . F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`

3622:   Output Parameter:
3623: . spRHS - sequential sparse matrix in `MATTRANSPOSEVIRTUAL` format with requested entries of inverse of `A`

3625:   Level: beginner

3627: .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`
3628: @*/
3629: PetscErrorCode MatMumpsGetInverse(Mat F, Mat spRHS)
3630: {
3631:   PetscFunctionBegin;
3633:   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3634:   PetscUseMethod(F, "MatMumpsGetInverse_C", (Mat, Mat), (F, spRHS));
3635:   PetscFunctionReturn(PETSC_SUCCESS);
3636: }

3638: static PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F, Mat spRHST)
3639: {
3640:   Mat spRHS;

3642:   PetscFunctionBegin;
3643:   PetscCall(MatCreateTranspose(spRHST, &spRHS));
3644:   PetscCall(MatMumpsGetInverse_MUMPS(F, spRHS));
3645:   PetscCall(MatDestroy(&spRHS));
3646:   PetscFunctionReturn(PETSC_SUCCESS);
3647: }

3649: /*@
3650:   MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix $A^T $ <https://mumps-solver.org/index.php?page=doc>

3652:   Logically Collective

3654:   Input Parameter:
3655: . F - the factored matrix of A obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`

3657:   Output Parameter:
3658: . spRHST - sequential sparse matrix in `MATAIJ` format containing the requested entries of inverse of `A`^T

3660:   Level: beginner

3662: .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()`
3663: @*/
3664: PetscErrorCode MatMumpsGetInverseTranspose(Mat F, Mat spRHST)
3665: {
3666:   PetscBool flg;

3668:   PetscFunctionBegin;
3670:   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3671:   PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST, &flg, MATSEQAIJ, MATMPIAIJ, NULL));
3672:   PetscCheck(flg, PetscObjectComm((PetscObject)spRHST), PETSC_ERR_ARG_WRONG, "Matrix spRHST must be MATAIJ matrix");
3673:   PetscUseMethod(F, "MatMumpsGetInverseTranspose_C", (Mat, Mat), (F, spRHST));
3674:   PetscFunctionReturn(PETSC_SUCCESS);
3675: }

3677: static PetscErrorCode MatMumpsSetBlk_MUMPS(Mat F, PetscInt nblk, const PetscInt blkvar[], const PetscInt blkptr[])
3678: {
3679:   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;

3681:   PetscFunctionBegin;
3682:   if (nblk) {
3683:     PetscAssertPointer(blkptr, 4);
3684:     PetscCall(PetscMUMPSIntCast(nblk, &mumps->id.nblk));
3685:     PetscCall(PetscFree(mumps->id.blkptr));
3686:     PetscCall(PetscMalloc1(nblk + 1, &mumps->id.blkptr));
3687:     for (PetscInt i = 0; i < nblk + 1; ++i) PetscCall(PetscMUMPSIntCast(blkptr[i], mumps->id.blkptr + i));
3688:     // mumps->id.icntl[] might have not been allocated, which is done in MatSetFromOptions_MUMPS(). So we don't assign ICNTL(15).
3689:     // We use id.nblk and id.blkptr to know what values to set to ICNTL(15) in MatSetFromOptions_MUMPS().
3690:     // mumps->id.ICNTL(15) = 1;
3691:     if (blkvar) {
3692:       PetscCall(PetscFree(mumps->id.blkvar));
3693:       PetscCall(PetscMalloc1(F->rmap->N, &mumps->id.blkvar));
3694:       for (PetscInt i = 0; i < F->rmap->N; ++i) PetscCall(PetscMUMPSIntCast(blkvar[i], mumps->id.blkvar + i));
3695:     }
3696:   } else {
3697:     PetscCall(PetscFree(mumps->id.blkptr));
3698:     PetscCall(PetscFree(mumps->id.blkvar));
3699:     // mumps->id.ICNTL(15) = 0;
3700:     mumps->id.nblk = 0;
3701:   }
3702:   PetscFunctionReturn(PETSC_SUCCESS);
3703: }

3705: /*@
3706:   MatMumpsSetBlk - Set user-specified variable block sizes to be used with `-mat_mumps_icntl_15 1`

3708:   Not collective, only relevant on the first process of the MPI communicator

3710:   Input Parameters:
3711: + F      - the factored matrix of A obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3712: . nblk   - the number of blocks
3713: . blkvar - see MUMPS documentation, `blkvar(blkptr(iblk):blkptr(iblk+1)-1)`, (`iblk=1, nblk`) holds the variables associated to block `iblk`
3714: - blkptr - array starting at 1 and of size `nblk + 1` storing the prefix sum of all blocks

3716:   Level: advanced

3718: .seealso: [](ch_matrices), `MATSOLVERMUMPS`, `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatSetVariableBlockSizes()`
3719: @*/
3720: PetscErrorCode MatMumpsSetBlk(Mat F, PetscInt nblk, const PetscInt blkvar[], const PetscInt blkptr[])
3721: {
3722:   PetscFunctionBegin;
3724:   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3725:   PetscUseMethod(F, "MatMumpsSetBlk_C", (Mat, PetscInt, const PetscInt[], const PetscInt[]), (F, nblk, blkvar, blkptr));
3726:   PetscFunctionReturn(PETSC_SUCCESS);
3727: }

3729: /*@
3730:   MatMumpsGetInfo - Get MUMPS parameter INFO() <https://mumps-solver.org/index.php?page=doc>

3732:   Logically Collective

3734:   Input Parameters:
3735: + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3736: - icntl - index of MUMPS parameter array INFO()

3738:   Output Parameter:
3739: . ival - value of MUMPS INFO(icntl)

3741:   Level: beginner

3743: .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3744: @*/
3745: PetscErrorCode MatMumpsGetInfo(Mat F, PetscInt icntl, PetscInt *ival)
3746: {
3747:   PetscFunctionBegin;
3749:   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3750:   PetscAssertPointer(ival, 3);
3751:   PetscUseMethod(F, "MatMumpsGetInfo_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
3752:   PetscFunctionReturn(PETSC_SUCCESS);
3753: }

3755: /*@
3756:   MatMumpsGetInfog - Get MUMPS parameter INFOG() <https://mumps-solver.org/index.php?page=doc>

3758:   Logically Collective

3760:   Input Parameters:
3761: + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3762: - icntl - index of MUMPS parameter array INFOG()

3764:   Output Parameter:
3765: . ival - value of MUMPS INFOG(icntl)

3767:   Level: beginner

3769: .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3770: @*/
3771: PetscErrorCode MatMumpsGetInfog(Mat F, PetscInt icntl, PetscInt *ival)
3772: {
3773:   PetscFunctionBegin;
3775:   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3776:   PetscAssertPointer(ival, 3);
3777:   PetscUseMethod(F, "MatMumpsGetInfog_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
3778:   PetscFunctionReturn(PETSC_SUCCESS);
3779: }

3781: /*@
3782:   MatMumpsGetRinfo - Get MUMPS parameter RINFO() <https://mumps-solver.org/index.php?page=doc>

3784:   Logically Collective

3786:   Input Parameters:
3787: + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3788: - icntl - index of MUMPS parameter array RINFO()

3790:   Output Parameter:
3791: . val - value of MUMPS RINFO(icntl)

3793:   Level: beginner

3795: .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()`
3796: @*/
3797: PetscErrorCode MatMumpsGetRinfo(Mat F, PetscInt icntl, PetscReal *val)
3798: {
3799:   PetscFunctionBegin;
3801:   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3802:   PetscAssertPointer(val, 3);
3803:   PetscUseMethod(F, "MatMumpsGetRinfo_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
3804:   PetscFunctionReturn(PETSC_SUCCESS);
3805: }

3807: /*@
3808:   MatMumpsGetRinfog - Get MUMPS parameter RINFOG() <https://mumps-solver.org/index.php?page=doc>

3810:   Logically Collective

3812:   Input Parameters:
3813: + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3814: - icntl - index of MUMPS parameter array RINFOG()

3816:   Output Parameter:
3817: . val - value of MUMPS RINFOG(icntl)

3819:   Level: beginner

3821: .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`
3822: @*/
3823: PetscErrorCode MatMumpsGetRinfog(Mat F, PetscInt icntl, PetscReal *val)
3824: {
3825:   PetscFunctionBegin;
3827:   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3828:   PetscAssertPointer(val, 3);
3829:   PetscUseMethod(F, "MatMumpsGetRinfog_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
3830:   PetscFunctionReturn(PETSC_SUCCESS);
3831: }

3833: /*@
3834:   MatMumpsGetNullPivots - Get MUMPS parameter PIVNUL_LIST() <https://mumps-solver.org/index.php?page=doc>

3836:   Logically Collective

3838:   Input Parameter:
3839: . F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`

3841:   Output Parameters:
3842: + size  - local size of the array. The size of the array is non-zero only on MPI rank 0
3843: - array - array of rows with null pivot, these rows follow 0-based indexing. The array gets allocated within the function and the user is responsible
3844:           for freeing this array.

3846:   Level: beginner

3848: .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`
3849: @*/
3850: PetscErrorCode MatMumpsGetNullPivots(Mat F, PetscInt *size, PetscInt **array)
3851: {
3852:   PetscFunctionBegin;
3854:   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3855:   PetscAssertPointer(size, 2);
3856:   PetscAssertPointer(array, 3);
3857:   PetscUseMethod(F, "MatMumpsGetNullPivots_C", (Mat, PetscInt *, PetscInt **), (F, size, array));
3858:   PetscFunctionReturn(PETSC_SUCCESS);
3859: }

3861: /*MC
3862:   MATSOLVERMUMPS -  A matrix type providing direct solvers (LU and Cholesky) for
3863:   MPI distributed and sequential matrices via the external package MUMPS <https://mumps-solver.org/index.php?page=doc>

3865:   Works with `MATAIJ` and `MATSBAIJ` matrices

3867:   Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS

3869:   Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode.
3870:   See details below.

3872:   Use `-pc_type cholesky` or `lu` `-pc_factor_mat_solver_type mumps` to use this direct solver

3874:   Options Database Keys:
3875: +  -mat_mumps_icntl_1  - ICNTL(1): output stream for error messages
3876: .  -mat_mumps_icntl_2  - ICNTL(2): output stream for diagnostic printing, statistics, and warning
3877: .  -mat_mumps_icntl_3  - ICNTL(3): output stream for global information, collected on the host
3878: .  -mat_mumps_icntl_4  - ICNTL(4): level of printing (0 to 4)
3879: .  -mat_mumps_icntl_6  - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)
3880: .  -mat_mumps_icntl_7  - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto
3881:                           Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only)
3882: .  -mat_mumps_icntl_8  - ICNTL(8): scaling strategy (-2 to 8 or 77)
3883: .  -mat_mumps_icntl_10 - ICNTL(10): max num of refinements
3884: .  -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view)
3885: .  -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)
3886: .  -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting
3887: .  -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space
3888: .  -mat_mumps_icntl_15 - ICNTL(15): compression of the input matrix resulting from a block format
3889: .  -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement
3890: .  -mat_mumps_icntl_20 - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS
3891: .  -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)
3892: .  -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor
3893: .  -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1)
3894: .  -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis
3895: .  -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix
3896: .  -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ICNTL(7) ordering, or 2 for parallel analysis and ICNTL(29) ordering
3897: .  -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis
3898: .  -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A)
3899: .  -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization
3900: .  -mat_mumps_icntl_33 - ICNTL(33): compute determinant
3901: .  -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature
3902: .  -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant
3903: .  -mat_mumps_icntl_37 - ICNTL(37): compression of the contribution blocks (CB)
3904: .  -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR
3905: .  -mat_mumps_icntl_48 - ICNTL(48): multithreading with tree parallelism
3906: .  -mat_mumps_icntl_58 - ICNTL(58): options for symbolic factorization
3907: .  -mat_mumps_cntl_1   - CNTL(1): relative pivoting threshold
3908: .  -mat_mumps_cntl_2   - CNTL(2): stopping criterion of refinement
3909: .  -mat_mumps_cntl_3   - CNTL(3): absolute pivoting threshold
3910: .  -mat_mumps_cntl_4   - CNTL(4): value for static pivoting
3911: .  -mat_mumps_cntl_5   - CNTL(5): fixation for null pivots
3912: .  -mat_mumps_cntl_7   - CNTL(7): precision of the dropping parameter used during BLR factorization
3913: -  -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS.
3914:                                     Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual.

3916:   Level: beginner

3918:   Notes:
3919:   MUMPS Cholesky does not handle (complex) Hermitian matrices (see User's Guide at <https://mumps-solver.org/index.php?page=doc>) so using it will
3920:   error if the matrix is Hermitian.

3922:   When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling
3923:   `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix.

3925:   When a MUMPS factorization fails inside a KSP solve, for example with a `KSP_DIVERGED_PC_FAILED`, one can find the MUMPS information about
3926:   the failure with
3927: .vb
3928:           KSPGetPC(ksp,&pc);
3929:           PCFactorGetMatrix(pc,&mat);
3930:           MatMumpsGetInfo(mat,....);
3931:           MatMumpsGetInfog(mat,....); etc.
3932: .ve
3933:   Or run with `-ksp_error_if_not_converged` and the program will be stopped and the information printed in the error message.

3935:   MUMPS provides 64-bit integer support in two build modes:
3936:   full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and
3937:   requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI).

3939:   selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices,
3940:   MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and
3941:   columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit
3942:   integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS.

3944:   With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc.

3946:   Two modes to run MUMPS/PETSc with OpenMP
3947: .vb
3948:    Set `OMP_NUM_THREADS` and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP
3949:    threads per rank, then you may use "export `OMP_NUM_THREADS` = 16 && mpirun -n 4 ./test".
3950: .ve

3952: .vb
3953:    `-mat_mumps_use_omp_threads` [m] and run your code with as many MPI ranks as the number of cores. For example,
3954:    if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16"
3955: .ve

3957:    To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part
3958:    (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with `--with-openmp` `--download-hwloc`
3959:    (or `--with-hwloc`), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS
3960:    libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS
3961:    (PETSc will automatically try to utilized a threaded BLAS if `--with-openmp` is provided).

3963:    If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI
3964:    processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of
3965:    size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm
3966:    are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set
3967:    by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs.
3968:    In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets,
3969:    if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind
3970:    MPI ranks to cores, then with `-mat_mumps_use_omp_threads` 16, a master rank (and threads it spawns) will use half cores in socket 0, and half
3971:    cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the
3972:    problem will not happen. Therefore, when you use `-mat_mumps_use_omp_threads`, you need to keep an eye on your MPI rank mapping and CPU binding.
3973:    For example, with the Slurm job scheduler, one can use srun `--cpu-bind`=verbose -m block:block to map consecutive MPI ranks to sockets and
3974:    examine the mapping result.

3976:    PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set `OMP_PROC_BIND` and `OMP_PLACES` in job scripts,
3977:    for example, export `OMP_PLACES`=threads and export `OMP_PROC_BIND`=spread. One does not need to export `OMP_NUM_THREADS`=m in job scripts as PETSc
3978:    calls `omp_set_num_threads`(m) internally before calling MUMPS.

3980:    See {cite}`heroux2011bi` and {cite}`gutierrez2017accommodating`

3982: .seealso: [](ch_matrices), `Mat`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `MatMumpsSetBlk()`, `KSPGetPC()`, `PCFactorGetMatrix()`
3983: M*/

3985: static PetscErrorCode MatFactorGetSolverType_mumps(PETSC_UNUSED Mat A, MatSolverType *type)
3986: {
3987:   PetscFunctionBegin;
3988:   *type = MATSOLVERMUMPS;
3989:   PetscFunctionReturn(PETSC_SUCCESS);
3990: }

3992: /* MatGetFactor for Seq and MPI AIJ matrices */
3993: static PetscErrorCode MatGetFactor_aij_mumps(Mat A, MatFactorType ftype, Mat *F)
3994: {
3995:   Mat         B;
3996:   Mat_MUMPS  *mumps;
3997:   PetscBool   isSeqAIJ, isDiag, isDense;
3998:   PetscMPIInt size;

4000:   PetscFunctionBegin;
4001: #if defined(PETSC_USE_COMPLEX)
4002:   if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
4003:     PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n"));
4004:     *F = NULL;
4005:     PetscFunctionReturn(PETSC_SUCCESS);
4006:   }
4007: #endif
4008:   /* Create the factorization matrix */
4009:   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ));
4010:   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATDIAGONAL, &isDiag));
4011:   PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &isDense, MATSEQDENSE, MATMPIDENSE, NULL));
4012:   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
4013:   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
4014:   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
4015:   PetscCall(MatSetUp(B));

4017:   PetscCall(PetscNew(&mumps));

4019:   B->ops->view    = MatView_MUMPS;
4020:   B->ops->getinfo = MatGetInfo_MUMPS;

4022:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
4023:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
4024:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
4025:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
4026:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
4027:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
4028:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
4029:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
4030:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
4031:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
4032:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
4033:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
4034:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
4035:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
4036:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS));

4038:   if (ftype == MAT_FACTOR_LU) {
4039:     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
4040:     B->factortype            = MAT_FACTOR_LU;
4041:     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij;
4042:     else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij;
4043:     else if (isDense) mumps->ConvertToTriples = MatConvertToTriples_dense_xaij;
4044:     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij;
4045:     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
4046:     mumps->sym = 0;
4047:   } else {
4048:     B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
4049:     B->factortype                  = MAT_FACTOR_CHOLESKY;
4050:     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij;
4051:     else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij;
4052:     else if (isDense) mumps->ConvertToTriples = MatConvertToTriples_dense_xaij;
4053:     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij;
4054:     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY]));
4055: #if defined(PETSC_USE_COMPLEX)
4056:     mumps->sym = 2;
4057: #else
4058:     if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
4059:     else mumps->sym = 2;
4060: #endif
4061:   }

4063:   /* set solvertype */
4064:   PetscCall(PetscFree(B->solvertype));
4065:   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
4066:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
4067:   if (size == 1) {
4068:     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
4069:     B->canuseordering = PETSC_TRUE;
4070:   }
4071:   B->ops->destroy = MatDestroy_MUMPS;
4072:   B->data         = (void *)mumps;

4074:   *F               = B;
4075:   mumps->id.job    = JOB_NULL;
4076:   mumps->ICNTL_pre = NULL;
4077:   mumps->CNTL_pre  = NULL;
4078:   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
4079:   PetscFunctionReturn(PETSC_SUCCESS);
4080: }

4082: /* MatGetFactor for Seq and MPI SBAIJ matrices */
4083: static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A, PETSC_UNUSED MatFactorType ftype, Mat *F)
4084: {
4085:   Mat         B;
4086:   Mat_MUMPS  *mumps;
4087:   PetscBool   isSeqSBAIJ;
4088:   PetscMPIInt size;

4090:   PetscFunctionBegin;
4091: #if defined(PETSC_USE_COMPLEX)
4092:   if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
4093:     PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n"));
4094:     *F = NULL;
4095:     PetscFunctionReturn(PETSC_SUCCESS);
4096:   }
4097: #endif
4098:   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
4099:   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
4100:   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
4101:   PetscCall(MatSetUp(B));

4103:   PetscCall(PetscNew(&mumps));
4104:   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &isSeqSBAIJ));
4105:   if (isSeqSBAIJ) {
4106:     mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij;
4107:   } else {
4108:     mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij;
4109:   }

4111:   B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
4112:   B->ops->view                   = MatView_MUMPS;
4113:   B->ops->getinfo                = MatGetInfo_MUMPS;

4115:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
4116:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
4117:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
4118:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
4119:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
4120:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
4121:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
4122:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
4123:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
4124:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
4125:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
4126:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
4127:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
4128:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
4129:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS));

4131:   B->factortype = MAT_FACTOR_CHOLESKY;
4132: #if defined(PETSC_USE_COMPLEX)
4133:   mumps->sym = 2;
4134: #else
4135:   if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
4136:   else mumps->sym = 2;
4137: #endif

4139:   /* set solvertype */
4140:   PetscCall(PetscFree(B->solvertype));
4141:   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
4142:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
4143:   if (size == 1) {
4144:     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
4145:     B->canuseordering = PETSC_TRUE;
4146:   }
4147:   PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY]));
4148:   B->ops->destroy = MatDestroy_MUMPS;
4149:   B->data         = (void *)mumps;

4151:   *F               = B;
4152:   mumps->id.job    = JOB_NULL;
4153:   mumps->ICNTL_pre = NULL;
4154:   mumps->CNTL_pre  = NULL;
4155:   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
4156:   PetscFunctionReturn(PETSC_SUCCESS);
4157: }

4159: static PetscErrorCode MatGetFactor_baij_mumps(Mat A, MatFactorType ftype, Mat *F)
4160: {
4161:   Mat         B;
4162:   Mat_MUMPS  *mumps;
4163:   PetscBool   isSeqBAIJ;
4164:   PetscMPIInt size;

4166:   PetscFunctionBegin;
4167:   /* Create the factorization matrix */
4168:   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &isSeqBAIJ));
4169:   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
4170:   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
4171:   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
4172:   PetscCall(MatSetUp(B));

4174:   PetscCall(PetscNew(&mumps));
4175:   PetscCheck(ftype == MAT_FACTOR_LU, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead");
4176:   B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS;
4177:   B->factortype            = MAT_FACTOR_LU;
4178:   if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij;
4179:   else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij;
4180:   mumps->sym = 0;
4181:   PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));

4183:   B->ops->view    = MatView_MUMPS;
4184:   B->ops->getinfo = MatGetInfo_MUMPS;

4186:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
4187:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
4188:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
4189:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
4190:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
4191:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
4192:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
4193:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
4194:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
4195:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
4196:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
4197:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
4198:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
4199:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
4200:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS));

4202:   /* set solvertype */
4203:   PetscCall(PetscFree(B->solvertype));
4204:   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
4205:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
4206:   if (size == 1) {
4207:     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
4208:     B->canuseordering = PETSC_TRUE;
4209:   }
4210:   B->ops->destroy = MatDestroy_MUMPS;
4211:   B->data         = (void *)mumps;

4213:   *F               = B;
4214:   mumps->id.job    = JOB_NULL;
4215:   mumps->ICNTL_pre = NULL;
4216:   mumps->CNTL_pre  = NULL;
4217:   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
4218:   PetscFunctionReturn(PETSC_SUCCESS);
4219: }

4221: /* MatGetFactor for Seq and MPI SELL matrices */
4222: static PetscErrorCode MatGetFactor_sell_mumps(Mat A, MatFactorType ftype, Mat *F)
4223: {
4224:   Mat         B;
4225:   Mat_MUMPS  *mumps;
4226:   PetscBool   isSeqSELL;
4227:   PetscMPIInt size;

4229:   PetscFunctionBegin;
4230:   /* Create the factorization matrix */
4231:   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSELL, &isSeqSELL));
4232:   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
4233:   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
4234:   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
4235:   PetscCall(MatSetUp(B));

4237:   PetscCall(PetscNew(&mumps));

4239:   B->ops->view    = MatView_MUMPS;
4240:   B->ops->getinfo = MatGetInfo_MUMPS;

4242:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
4243:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
4244:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
4245:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
4246:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
4247:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
4248:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
4249:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
4250:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
4251:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
4252:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
4253:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));

4255:   PetscCheck(ftype == MAT_FACTOR_LU, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented");
4256:   B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
4257:   B->factortype            = MAT_FACTOR_LU;
4258:   PetscCheck(isSeqSELL, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented");
4259:   mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij;
4260:   mumps->sym              = 0;
4261:   PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));

4263:   /* set solvertype */
4264:   PetscCall(PetscFree(B->solvertype));
4265:   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
4266:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
4267:   if (size == 1) {
4268:     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization  */
4269:     B->canuseordering = PETSC_TRUE;
4270:   }
4271:   B->ops->destroy = MatDestroy_MUMPS;
4272:   B->data         = (void *)mumps;

4274:   *F               = B;
4275:   mumps->id.job    = JOB_NULL;
4276:   mumps->ICNTL_pre = NULL;
4277:   mumps->CNTL_pre  = NULL;
4278:   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
4279:   PetscFunctionReturn(PETSC_SUCCESS);
4280: }

4282: /* MatGetFactor for MATNEST matrices */
4283: static PetscErrorCode MatGetFactor_nest_mumps(Mat A, MatFactorType ftype, Mat *F)
4284: {
4285:   Mat         B, **mats;
4286:   Mat_MUMPS  *mumps;
4287:   PetscInt    nr, nc;
4288:   PetscMPIInt size;
4289:   PetscBool   flg = PETSC_TRUE;

4291:   PetscFunctionBegin;
4292: #if defined(PETSC_USE_COMPLEX)
4293:   if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
4294:     PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n"));
4295:     *F = NULL;
4296:     PetscFunctionReturn(PETSC_SUCCESS);
4297:   }
4298: #endif

4300:   /* Return if some condition is not satisfied */
4301:   *F = NULL;
4302:   PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats));
4303:   if (ftype == MAT_FACTOR_CHOLESKY) {
4304:     IS       *rows, *cols;
4305:     PetscInt *m, *M;

4307:     PetscCheck(nr == nc, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_FACTOR_CHOLESKY not supported for nest sizes %" PetscInt_FMT " != %" PetscInt_FMT ". Use MAT_FACTOR_LU.", nr, nc);
4308:     PetscCall(PetscMalloc2(nr, &rows, nc, &cols));
4309:     PetscCall(MatNestGetISs(A, rows, cols));
4310:     for (PetscInt r = 0; flg && r < nr; r++) PetscCall(ISEqualUnsorted(rows[r], cols[r], &flg));
4311:     if (!flg) {
4312:       PetscCall(PetscFree2(rows, cols));
4313:       PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for unequal row and column maps. Use MAT_FACTOR_LU.\n"));
4314:       PetscFunctionReturn(PETSC_SUCCESS);
4315:     }
4316:     PetscCall(PetscMalloc2(nr, &m, nr, &M));
4317:     for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetMinMax(rows[r], &m[r], &M[r]));
4318:     for (PetscInt r = 0; flg && r < nr; r++)
4319:       for (PetscInt k = r + 1; flg && k < nr; k++)
4320:         if ((m[k] <= m[r] && m[r] <= M[k]) || (m[k] <= M[r] && M[r] <= M[k])) flg = PETSC_FALSE;
4321:     PetscCall(PetscFree2(m, M));
4322:     PetscCall(PetscFree2(rows, cols));
4323:     if (!flg) {
4324:       PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for intersecting row maps. Use MAT_FACTOR_LU.\n"));
4325:       PetscFunctionReturn(PETSC_SUCCESS);
4326:     }
4327:   }

4329:   for (PetscInt r = 0; r < nr; r++) {
4330:     for (PetscInt c = 0; c < nc; c++) {
4331:       Mat       sub = mats[r][c];
4332:       PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isDiag, isDense;

4334:       if (!sub || (ftype == MAT_FACTOR_CHOLESKY && c < r)) continue;
4335:       PetscCall(MatGetTranspose_TransposeVirtual(&sub, NULL, NULL, NULL, NULL));
4336:       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ));
4337:       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ));
4338:       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ));
4339:       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ));
4340:       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ));
4341:       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ));
4342:       PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag));
4343:       PetscCall(PetscObjectTypeCompareAny((PetscObject)sub, &isDense, MATSEQDENSE, MATMPIDENSE, NULL));
4344:       if (ftype == MAT_FACTOR_CHOLESKY) {
4345:         if (r == c) {
4346:           if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isSeqSBAIJ && !isMPISBAIJ && !isDiag && !isDense) {
4347:             PetscCall(PetscInfo(sub, "MAT_FACTOR_CHOLESKY not supported for diagonal block of type %s.\n", ((PetscObject)sub)->type_name));
4348:             flg = PETSC_FALSE;
4349:           }
4350:         } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag && !isDense) {
4351:           PetscCall(PetscInfo(sub, "MAT_FACTOR_CHOLESKY not supported for off-diagonal block of type %s.\n", ((PetscObject)sub)->type_name));
4352:           flg = PETSC_FALSE;
4353:         }
4354:       } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag && !isDense) {
4355:         PetscCall(PetscInfo(sub, "MAT_FACTOR_LU not supported for block of type %s.\n", ((PetscObject)sub)->type_name));
4356:         flg = PETSC_FALSE;
4357:       }
4358:     }
4359:   }
4360:   if (!flg) PetscFunctionReturn(PETSC_SUCCESS);

4362:   /* Create the factorization matrix */
4363:   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
4364:   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
4365:   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
4366:   PetscCall(MatSetUp(B));

4368:   PetscCall(PetscNew(&mumps));

4370:   B->ops->view    = MatView_MUMPS;
4371:   B->ops->getinfo = MatGetInfo_MUMPS;

4373:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
4374:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
4375:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
4376:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
4377:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
4378:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
4379:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
4380:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
4381:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
4382:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
4383:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
4384:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
4385:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
4386:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
4387:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS));

4389:   if (ftype == MAT_FACTOR_LU) {
4390:     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
4391:     B->factortype            = MAT_FACTOR_LU;
4392:     mumps->sym               = 0;
4393:   } else {
4394:     B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
4395:     B->factortype                  = MAT_FACTOR_CHOLESKY;
4396: #if defined(PETSC_USE_COMPLEX)
4397:     mumps->sym = 2;
4398: #else
4399:     if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
4400:     else mumps->sym = 2;
4401: #endif
4402:   }
4403:   mumps->ConvertToTriples = MatConvertToTriples_nest_xaij;
4404:   PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[ftype]));

4406:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
4407:   if (size == 1) {
4408:     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
4409:     B->canuseordering = PETSC_TRUE;
4410:   }

4412:   /* set solvertype */
4413:   PetscCall(PetscFree(B->solvertype));
4414:   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
4415:   B->ops->destroy = MatDestroy_MUMPS;
4416:   B->data         = (void *)mumps;

4418:   *F               = B;
4419:   mumps->id.job    = JOB_NULL;
4420:   mumps->ICNTL_pre = NULL;
4421:   mumps->CNTL_pre  = NULL;
4422:   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
4423:   PetscFunctionReturn(PETSC_SUCCESS);
4424: }

4426: PETSC_INTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void)
4427: {
4428:   PetscFunctionBegin;
4429:   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
4430:   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
4431:   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps));
4432:   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps));
4433:   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPISBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps));
4434:   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
4435:   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
4436:   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps));
4437:   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps));
4438:   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps));
4439:   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSELL, MAT_FACTOR_LU, MatGetFactor_sell_mumps));
4440:   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
4441:   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
4442:   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQDENSE, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
4443:   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQDENSE, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
4444:   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIDENSE, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
4445:   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIDENSE, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
4446:   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_LU, MatGetFactor_nest_mumps));
4447:   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_CHOLESKY, MatGetFactor_nest_mumps));
4448:   PetscFunctionReturn(PETSC_SUCCESS);
4449: }