Actual source code: mumps.c

  1: /*
  2:     Provides an interface to the MUMPS sparse solver
  3: */
  4: #include <petscpkg_version.h>
  5: #include <petscsf.h>
  6: #include <../src/mat/impls/aij/mpi/mpiaij.h>
  7: #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
  8: #include <../src/mat/impls/sell/mpi/mpisell.h>
  9: #include <petsc/private/vecimpl.h>

 11: #define MUMPS_MANUALS "(see users manual https://mumps-solver.org/index.php?page=doc \"Error and warning diagnostics\")"

 13: EXTERN_C_BEGIN
 14: #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
 15:   #include <cmumps_c.h>
 16:   #include <zmumps_c.h>
 17:   #include <smumps_c.h>
 18:   #include <dmumps_c.h>
 19: #else
 20:   #if defined(PETSC_USE_COMPLEX)
 21:     #if defined(PETSC_USE_REAL_SINGLE)
 22:       #include <cmumps_c.h>
 23:       #define MUMPS_c     cmumps_c
 24:       #define MumpsScalar CMUMPS_COMPLEX
 25:     #else
 26:       #include <zmumps_c.h>
 27:       #define MUMPS_c     zmumps_c
 28:       #define MumpsScalar ZMUMPS_COMPLEX
 29:     #endif
 30:   #else
 31:     #if defined(PETSC_USE_REAL_SINGLE)
 32:       #include <smumps_c.h>
 33:       #define MUMPS_c     smumps_c
 34:       #define MumpsScalar SMUMPS_REAL
 35:     #else
 36:       #include <dmumps_c.h>
 37:       #define MUMPS_c     dmumps_c
 38:       #define MumpsScalar DMUMPS_REAL
 39:     #endif
 40:   #endif
 41: #endif
 42: #if defined(PETSC_USE_COMPLEX)
 43:   #if defined(PETSC_USE_REAL_SINGLE)
 44:     #define MUMPS_STRUC_C CMUMPS_STRUC_C
 45:   #else
 46:     #define MUMPS_STRUC_C ZMUMPS_STRUC_C
 47:   #endif
 48: #else
 49:   #if defined(PETSC_USE_REAL_SINGLE)
 50:     #define MUMPS_STRUC_C SMUMPS_STRUC_C
 51:   #else
 52:     #define MUMPS_STRUC_C DMUMPS_STRUC_C
 53:   #endif
 54: #endif
 55: EXTERN_C_END

 57: #define JOB_INIT         -1
 58: #define JOB_NULL         0
 59: #define JOB_FACTSYMBOLIC 1
 60: #define JOB_FACTNUMERIC  2
 61: #define JOB_SOLVE        3
 62: #define JOB_END          -2

 64: /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for
 65:    number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the
 66:    naming convention in PetscMPIInt, PetscBLASInt etc.
 67: */
 68: typedef MUMPS_INT PetscMUMPSInt;

 70: #if PETSC_PKG_MUMPS_VERSION_GE(5, 3, 0)
 71:   #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */
 72:     #error "PETSc has not been tested with full 64-bit MUMPS and we choose to error out"
 73:   #endif
 74: #else
 75:   #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */
 76:     #error "PETSc has not been tested with full 64-bit MUMPS and we choose to error out"
 77:   #endif
 78: #endif

 80: #define MPIU_MUMPSINT       MPI_INT
 81: #define PETSC_MUMPS_INT_MAX 2147483647
 82: #define PETSC_MUMPS_INT_MIN -2147483648

 84: /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/
 85: static inline PetscErrorCode PetscMUMPSIntCast(PetscCount a, PetscMUMPSInt *b)
 86: {
 87:   PetscFunctionBegin;
 88: #if PetscDefined(USE_64BIT_INDICES)
 89:   PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt");
 90: #endif
 91:   *b = (PetscMUMPSInt)a;
 92:   PetscFunctionReturn(PETSC_SUCCESS);
 93: }

 95: /* Put these utility routines here since they are only used in this file */
 96: static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems PetscOptionsObject, const char opt[], const char text[], const char man[], PetscMUMPSInt currentvalue, PetscMUMPSInt *value, PetscBool *set, PetscMUMPSInt lb, PetscMUMPSInt ub)
 97: {
 98:   PetscInt  myval;
 99:   PetscBool myset;

101:   PetscFunctionBegin;
102:   /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */
103:   PetscCall(PetscOptionsInt_Private(PetscOptionsObject, opt, text, man, (PetscInt)currentvalue, &myval, &myset, lb, ub));
104:   if (myset) PetscCall(PetscMUMPSIntCast(myval, value));
105:   if (set) *set = myset;
106:   PetscFunctionReturn(PETSC_SUCCESS);
107: }
108: #define PetscOptionsMUMPSInt(a, b, c, d, e, f) PetscOptionsMUMPSInt_Private(PetscOptionsObject, a, b, c, d, e, f, PETSC_MUMPS_INT_MIN, PETSC_MUMPS_INT_MAX)

110: // An abstract type for specific MUMPS types {S,D,C,Z}MUMPS_STRUC_C.
111: //
112: // With the abstract (outer) type, we can write shared code. We call MUMPS through a type-to-be-determined inner field within the abstract type.
113: // Before/after calling MUMPS, we need to copy in/out fields between the outer and the inner, which seems expensive. But note that the large fixed size
114: // arrays within the types are directly linked. At the end, we only need to copy ~20 intergers/pointers, which is doable. See PreMumpsCall()/PostMumpsCall().
115: //
116: // Not all fields in the specific types are exposed in the abstract type. We only need those used by the PETSc/MUMPS interface.
117: // Notably, DMUMPS_COMPLEX* and DMUMPS_REAL* fields are now declared as void *. Their type will be determined by the the actual precision to be used.
118: // Also note that we added some *_len fields not in specific types to track sizes of those MumpsScalar buffers.
119: typedef struct {
120:   PetscPrecision precision;   // precision used by MUMPS
121:   void          *internal_id; // the data structure passed to MUMPS, whose actual type {S,D,C,Z}MUMPS_STRUC_C is to be decided by precision and PETSc's use of complex

123:   // aliased fields from internal_id, so that we can use XMUMPS_STRUC_C to write shared code across different precisions.
124:   MUMPS_INT  sym, par, job;
125:   MUMPS_INT  comm_fortran; /* Fortran communicator */
126:   MUMPS_INT *icntl;
127:   void      *cntl; // MumpsReal, fixed size array
128:   MUMPS_INT  n;
129:   MUMPS_INT  nblk;

131:   /* Assembled entry */
132:   MUMPS_INT8 nnz;
133:   MUMPS_INT *irn;
134:   MUMPS_INT *jcn;
135:   void      *a; // MumpsScalar, centralized input
136:   PetscCount a_len;

138:   /* Distributed entry */
139:   MUMPS_INT8 nnz_loc;
140:   MUMPS_INT *irn_loc;
141:   MUMPS_INT *jcn_loc;
142:   void      *a_loc; // MumpsScalar, distributed input
143:   PetscCount a_loc_len;

145:   /* Matrix by blocks */
146:   MUMPS_INT *blkptr;
147:   MUMPS_INT *blkvar;

149:   /* Ordering, if given by user */
150:   MUMPS_INT *perm_in;

152:   /* RHS, solution, ouptput data and statistics */
153:   void      *rhs, *redrhs, *rhs_sparse, *sol_loc, *rhs_loc;                 // MumpsScalar buffers
154:   PetscCount rhs_len, redrhs_len, rhs_sparse_len, sol_loc_len, rhs_loc_len; // length of buffers (in MumpsScalar) IF allocated in a different precision than PetscScalar

156:   MUMPS_INT *irhs_sparse, *irhs_ptr, *isol_loc, *irhs_loc;
157:   MUMPS_INT  nrhs, lrhs, lredrhs, nz_rhs, lsol_loc, nloc_rhs, lrhs_loc;
158:   // MUMPS_INT  nsol_loc; // introduced in MUMPS-5.7, but PETSc doesn't use it; would cause compile errors with the widely used 5.6. If you add it, must also update PreMumpsCall() and guard this with #if PETSC_PKG_MUMPS_VERSION_GE(5, 7, 0)
159:   MUMPS_INT  schur_lld;
160:   MUMPS_INT *info, *infog;   // fixed size array
161:   void      *rinfo, *rinfog; // MumpsReal, fixed size array

163:   /* Null space */
164:   MUMPS_INT *pivnul_list; // allocated by MUMPS!
165:   MUMPS_INT *mapping;     // allocated by MUMPS!

167:   /* Schur */
168:   MUMPS_INT  size_schur;
169:   MUMPS_INT *listvar_schur;
170:   void      *schur; // MumpsScalar
171:   PetscCount schur_len;

173:   /* For out-of-core */
174:   char *ooc_tmpdir; // fixed size array
175:   char *ooc_prefix; // fixed size array
176: } XMUMPS_STRUC_C;

178: // Note: fixed-size arrays are allocated by MUMPS; redirect them to the outer struct
179: #define AllocatInternalID(MUMPS_STRUC_T, outer) \
180:   do { \
181:     MUMPS_STRUC_T *inner; \
182:     PetscCall(PetscNew(&inner)); \
183:     outer->icntl      = inner->icntl; \
184:     outer->cntl       = inner->cntl; \
185:     outer->info       = inner->info; \
186:     outer->infog      = inner->infog; \
187:     outer->rinfo      = inner->rinfo; \
188:     outer->rinfog     = inner->rinfog; \
189:     outer->ooc_tmpdir = inner->ooc_tmpdir; \
190:     outer->ooc_prefix = inner->ooc_prefix; \
191:     /* the three field should never change after init */ \
192:     inner->comm_fortran = outer->comm_fortran; \
193:     inner->par          = outer->par; \
194:     inner->sym          = outer->sym; \
195:     outer->internal_id  = inner; \
196:   } while (0)

198: // Allocate the internal [SDCZ]MUMPS_STRUC_C ID data structure in the given , and link fields of the outer and the inner
199: static inline PetscErrorCode MatMumpsAllocateInternalID(XMUMPS_STRUC_C *outer, PetscPrecision precision)
200: {
201:   PetscFunctionBegin;
202:   outer->precision = precision;
203: #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
204:   #if defined(PETSC_USE_COMPLEX)
205:   if (precision == PETSC_PRECISION_SINGLE) AllocatInternalID(CMUMPS_STRUC_C, outer);
206:   else AllocatInternalID(ZMUMPS_STRUC_C, outer);
207:   #else
208:   if (precision == PETSC_PRECISION_SINGLE) AllocatInternalID(SMUMPS_STRUC_C, outer);
209:   else AllocatInternalID(DMUMPS_STRUC_C, outer);
210:   #endif
211: #else
212:   AllocatInternalID(MUMPS_STRUC_C, outer);
213: #endif
214:   PetscFunctionReturn(PETSC_SUCCESS);
215: }

217: #define FreeInternalIDFields(MUMPS_STRUC_T, outer) \
218:   do { \
219:     MUMPS_STRUC_T *inner = (MUMPS_STRUC_T *)(outer)->internal_id; \
220:     PetscCall(PetscFree(inner->a)); \
221:     PetscCall(PetscFree(inner->a_loc)); \
222:     PetscCall(PetscFree(inner->redrhs)); \
223:     PetscCall(PetscFree(inner->rhs)); \
224:     PetscCall(PetscFree(inner->rhs_sparse)); \
225:     PetscCall(PetscFree(inner->rhs_loc)); \
226:     PetscCall(PetscFree(inner->sol_loc)); \
227:     PetscCall(PetscFree(inner->schur)); \
228:   } while (0)

230: static inline PetscErrorCode MatMumpsFreeInternalID(XMUMPS_STRUC_C *outer)
231: {
232:   PetscFunctionBegin;
233:   if (outer->internal_id) { // sometimes, the inner is never created before we destroy the outer
234: #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
235:     const PetscPrecision mumps_precision = outer->precision;
236:     if (mumps_precision != PETSC_SCALAR_PRECISION) { // Free internal buffers if we used mixed precision
237:   #if defined(PETSC_USE_COMPLEX)
238:       if (mumps_precision == PETSC_PRECISION_SINGLE) FreeInternalIDFields(CMUMPS_STRUC_C, outer);
239:       else FreeInternalIDFields(ZMUMPS_STRUC_C, outer);
240:   #else
241:       if (mumps_precision == PETSC_PRECISION_SINGLE) FreeInternalIDFields(SMUMPS_STRUC_C, outer);
242:       else FreeInternalIDFields(DMUMPS_STRUC_C, outer);
243:   #endif
244:     }
245: #endif
246:     PetscCall(PetscFree(outer->internal_id));
247:   }
248:   PetscFunctionReturn(PETSC_SUCCESS);
249: }

251: // Make a companion MumpsScalar array (with a given PetscScalar array), to hold at least  MumpsScalars in the given  and return the address at .
252: //  indicates if we need to convert PetscScalars to MumpsScalars after allocating the MumpsScalar array.
253: // (For bravity, we use  for array address and  for its length in MumpsScalar, though in code they should be <*ma> and <*m>)
254: // If  already points to a buffer/array, on input  should be its length. Note the buffer might be freed if it is not big enough for this request.
255: //
256: // The returned array is a companion, so how it is created depends on if PetscScalar and MumpsScalar are the same.
257: // 1) If they are different, a separate array will be made and its length and address will be provided at  and  on output.
258: // 2) Otherwise,  will be returned in , and  will be zero on output.
259: //
260: //
261: //   Input parameters:
262: // + convert   - whether to do PetscScalar to MumpsScalar conversion
263: // . n         - length of the PetscScalar array
264: // . pa        - [n]], points to the PetscScalar array
265: // . precision - precision of MumpsScalar
266: // . m         - on input, length of an existing MumpsScalar array  if any, otherwise *m is just zero.
267: // - ma        - on input, an existing MumpsScalar array if any.
268: //
269: //   Output parameters:
270: // + m  - length of the MumpsScalar buffer at  if MumpsScalar is different from PetscScalar, otherwise 0
271: // . ma - the MumpsScalar array, which could be an alias of  when the two types are the same.
272: //
273: //   Note:
274: //    New memory, if allocated, is done via PetscMalloc1(), and is owned by caller.
275: static PetscErrorCode MatMumpsMakeMumpsScalarArray(PetscBool convert, PetscCount n, const PetscScalar *pa, PetscPrecision precision, PetscCount *m, void **ma)
276: {
277:   PetscFunctionBegin;
278: #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
279:   const PetscPrecision mumps_precision = precision;
280:   PetscCheck(precision == PETSC_PRECISION_SINGLE || precision == PETSC_PRECISION_DOUBLE, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unsupported precicison (%d). Must be single or double", (int)precision);
281:   #if defined(PETSC_USE_COMPLEX)
282:   if (mumps_precision != PETSC_SCALAR_PRECISION) {
283:     if (mumps_precision == PETSC_PRECISION_SINGLE) {
284:       if (*m < n) {
285:         PetscCall(PetscFree(*ma));
286:         PetscCall(PetscMalloc1(n, (CMUMPS_COMPLEX **)ma));
287:         *m = n;
288:       }
289:       if (convert) {
290:         CMUMPS_COMPLEX *b = *(CMUMPS_COMPLEX **)ma;
291:         for (PetscCount i = 0; i < n; i++) {
292:           b[i].r = PetscRealPart(pa[i]);
293:           b[i].i = PetscImaginaryPart(pa[i]);
294:         }
295:       }
296:     } else {
297:       if (*m < n) {
298:         PetscCall(PetscFree(*ma));
299:         PetscCall(PetscMalloc1(n, (ZMUMPS_COMPLEX **)ma));
300:         *m = n;
301:       }
302:       if (convert) {
303:         ZMUMPS_COMPLEX *b = *(ZMUMPS_COMPLEX **)ma;
304:         for (PetscCount i = 0; i < n; i++) {
305:           b[i].r = PetscRealPart(pa[i]);
306:           b[i].i = PetscImaginaryPart(pa[i]);
307:         }
308:       }
309:     }
310:   }
311:   #else
312:   if (mumps_precision != PETSC_SCALAR_PRECISION) {
313:     if (mumps_precision == PETSC_PRECISION_SINGLE) {
314:       if (*m < n) {
315:         PetscCall(PetscFree(*ma));
316:         PetscCall(PetscMalloc1(n, (SMUMPS_REAL **)ma));
317:         *m = n;
318:       }
319:       if (convert) {
320:         SMUMPS_REAL *b = *(SMUMPS_REAL **)ma;
321:         for (PetscCount i = 0; i < n; i++) b[i] = pa[i];
322:       }
323:     } else {
324:       if (*m < n) {
325:         PetscCall(PetscFree(*ma));
326:         PetscCall(PetscMalloc1(n, (DMUMPS_REAL **)ma));
327:         *m = n;
328:       }
329:       if (convert) {
330:         DMUMPS_REAL *b = *(DMUMPS_REAL **)ma;
331:         for (PetscCount i = 0; i < n; i++) b[i] = pa[i];
332:       }
333:     }
334:   }
335:   #endif
336:   else
337: #endif
338:   {
339:     if (*m != 0) PetscCall(PetscFree(*ma)); // free existing buffer if any
340:     *ma = (void *)pa;                       // same precision, make them alias
341:     *m  = 0;
342:   }
343:   PetscFunctionReturn(PETSC_SUCCESS);
344: }

346: // Cast a MumpsScalar array  in  to a PetscScalar array at address .
347: //
348: // 1) If the two types are different, cast array elements.
349: // 2) Otherwise, this works as a memcpy; of course, if the two addresses are equal, it is a no-op.
350: static PetscErrorCode MatMumpsCastMumpsScalarArray(PetscCount n, PetscPrecision mumps_precision, const void *ma, PetscScalar *pa)
351: {
352:   PetscFunctionBegin;
353: #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
354:   if (mumps_precision != PETSC_SCALAR_PRECISION) {
355:   #if defined(PETSC_USE_COMPLEX)
356:     if (mumps_precision == PETSC_PRECISION_SINGLE) {
357:       PetscReal         *a = (PetscReal *)pa;
358:       const SMUMPS_REAL *b = (const SMUMPS_REAL *)ma;
359:       for (PetscCount i = 0; i < 2 * n; i++) a[i] = b[i];
360:     } else {
361:       PetscReal         *a = (PetscReal *)pa;
362:       const DMUMPS_REAL *b = (const DMUMPS_REAL *)ma;
363:       for (PetscCount i = 0; i < 2 * n; i++) a[i] = b[i];
364:     }
365:   #else
366:     if (mumps_precision == PETSC_PRECISION_SINGLE) {
367:       const SMUMPS_REAL *b = (const SMUMPS_REAL *)ma;
368:       for (PetscCount i = 0; i < n; i++) pa[i] = b[i];
369:     } else {
370:       const DMUMPS_REAL *b = (const DMUMPS_REAL *)ma;
371:       for (PetscCount i = 0; i < n; i++) pa[i] = b[i];
372:     }
373:   #endif
374:   } else
375: #endif
376:     PetscCall(PetscArraycpy((PetscScalar *)pa, (PetscScalar *)ma, n));
377:   PetscFunctionReturn(PETSC_SUCCESS);
378: }

380: // Cast a PetscScalar array  to a MumpsScalar array in the given  at address .
381: //
382: // 1) If the two types are different, cast array elements.
383: // 2) Otherwise, this works as a memcpy; of course, if the two addresses are equal, it is a no-op.
384: static PetscErrorCode MatMumpsCastPetscScalarArray(PetscCount n, const PetscScalar *pa, PetscPrecision mumps_precision, const void *ma)
385: {
386:   PetscFunctionBegin;
387: #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
388:   if (mumps_precision != PETSC_SCALAR_PRECISION) {
389:   #if defined(PETSC_USE_COMPLEX)
390:     if (mumps_precision == PETSC_PRECISION_SINGLE) {
391:       CMUMPS_COMPLEX *b = (CMUMPS_COMPLEX *)ma;
392:       for (PetscCount i = 0; i < n; i++) {
393:         b[i].r = PetscRealPart(pa[i]);
394:         b[i].i = PetscImaginaryPart(pa[i]);
395:       }
396:     } else {
397:       ZMUMPS_COMPLEX *b = (ZMUMPS_COMPLEX *)ma;
398:       for (PetscCount i = 0; i < n; i++) {
399:         b[i].r = PetscRealPart(pa[i]);
400:         b[i].i = PetscImaginaryPart(pa[i]);
401:       }
402:     }
403:   #else
404:     if (mumps_precision == PETSC_PRECISION_SINGLE) {
405:       SMUMPS_REAL *b = (SMUMPS_REAL *)ma;
406:       for (PetscCount i = 0; i < n; i++) b[i] = pa[i];
407:     } else {
408:       DMUMPS_REAL *b = (DMUMPS_REAL *)ma;
409:       for (PetscCount i = 0; i < n; i++) b[i] = pa[i];
410:     }
411:   #endif
412:   } else
413: #endif
414:     PetscCall(PetscArraycpy((PetscScalar *)ma, (PetscScalar *)pa, n));
415:   PetscFunctionReturn(PETSC_SUCCESS);
416: }

418: static inline MPI_Datatype MPIU_MUMPSREAL(const XMUMPS_STRUC_C *id)
419: {
420:   return id->precision == PETSC_PRECISION_DOUBLE ? MPI_DOUBLE : MPI_FLOAT;
421: }

423: #define PreMumpsCall(inner, outer, mumpsscalar) \
424:   do { \
425:     inner->job           = outer->job; \
426:     inner->n             = outer->n; \
427:     inner->nblk          = outer->nblk; \
428:     inner->nnz           = outer->nnz; \
429:     inner->irn           = outer->irn; \
430:     inner->jcn           = outer->jcn; \
431:     inner->a             = (mumpsscalar *)outer->a; \
432:     inner->nnz_loc       = outer->nnz_loc; \
433:     inner->irn_loc       = outer->irn_loc; \
434:     inner->jcn_loc       = outer->jcn_loc; \
435:     inner->a_loc         = (mumpsscalar *)outer->a_loc; \
436:     inner->blkptr        = outer->blkptr; \
437:     inner->blkvar        = outer->blkvar; \
438:     inner->perm_in       = outer->perm_in; \
439:     inner->rhs           = (mumpsscalar *)outer->rhs; \
440:     inner->redrhs        = (mumpsscalar *)outer->redrhs; \
441:     inner->rhs_sparse    = (mumpsscalar *)outer->rhs_sparse; \
442:     inner->sol_loc       = (mumpsscalar *)outer->sol_loc; \
443:     inner->rhs_loc       = (mumpsscalar *)outer->rhs_loc; \
444:     inner->irhs_sparse   = outer->irhs_sparse; \
445:     inner->irhs_ptr      = outer->irhs_ptr; \
446:     inner->isol_loc      = outer->isol_loc; \
447:     inner->irhs_loc      = outer->irhs_loc; \
448:     inner->nrhs          = outer->nrhs; \
449:     inner->lrhs          = outer->lrhs; \
450:     inner->lredrhs       = outer->lredrhs; \
451:     inner->nz_rhs        = outer->nz_rhs; \
452:     inner->lsol_loc      = outer->lsol_loc; \
453:     inner->nloc_rhs      = outer->nloc_rhs; \
454:     inner->lrhs_loc      = outer->lrhs_loc; \
455:     inner->schur_lld     = outer->schur_lld; \
456:     inner->size_schur    = outer->size_schur; \
457:     inner->listvar_schur = outer->listvar_schur; \
458:     inner->schur         = (mumpsscalar *)outer->schur; \
459:   } while (0)

461: #define PostMumpsCall(inner, outer) \
462:   do { \
463:     outer->pivnul_list = inner->pivnul_list; \
464:     outer->mapping     = inner->mapping; \
465:   } while (0)

467: // Entry for PETSc to call mumps
468: static inline PetscErrorCode PetscCallMumps_Private(XMUMPS_STRUC_C *outer)
469: {
470:   PetscFunctionBegin;
471: #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
472:   #if defined(PETSC_USE_COMPLEX)
473:   if (outer->precision == PETSC_PRECISION_SINGLE) {
474:     CMUMPS_STRUC_C *inner = (CMUMPS_STRUC_C *)outer->internal_id;
475:     PreMumpsCall(inner, outer, CMUMPS_COMPLEX);
476:     PetscStackCallExternalVoid("cmumps_c", cmumps_c(inner));
477:     PostMumpsCall(inner, outer);
478:   } else {
479:     ZMUMPS_STRUC_C *inner = (ZMUMPS_STRUC_C *)outer->internal_id;
480:     PreMumpsCall(inner, outer, ZMUMPS_COMPLEX);
481:     PetscStackCallExternalVoid("zmumps_c", zmumps_c(inner));
482:     PostMumpsCall(inner, outer);
483:   }
484:   #else
485:   if (outer->precision == PETSC_PRECISION_SINGLE) {
486:     SMUMPS_STRUC_C *inner = (SMUMPS_STRUC_C *)outer->internal_id;
487:     PreMumpsCall(inner, outer, SMUMPS_REAL);
488:     PetscStackCallExternalVoid("smumps_c", smumps_c(inner));
489:     PostMumpsCall(inner, outer);
490:   } else {
491:     DMUMPS_STRUC_C *inner = (DMUMPS_STRUC_C *)outer->internal_id;
492:     PreMumpsCall(inner, outer, DMUMPS_REAL);
493:     PetscStackCallExternalVoid("dmumps_c", dmumps_c(inner));
494:     PostMumpsCall(inner, outer);
495:   }
496:   #endif
497: #else
498:   MUMPS_STRUC_C *inner = (MUMPS_STRUC_C *)outer->internal_id;
499:   PreMumpsCall(inner, outer, MumpsScalar);
500:   PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(inner));
501:   PostMumpsCall(inner, outer);
502: #endif
503:   PetscFunctionReturn(PETSC_SUCCESS);
504: }

506: /* macros s.t. indices match MUMPS documentation */
507: #define ICNTL(I) icntl[(I) - 1]
508: #define INFOG(I) infog[(I) - 1]
509: #define INFO(I)  info[(I) - 1]

511: // Get a value from a MumpsScalar array, which is the  field in the struct of MUMPS_STRUC_C. The value is convertible to PetscScalar. Note no minus 1 on I!
512: #if defined(PETSC_USE_COMPLEX)
513:   #define ID_FIELD_GET(ID, F, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((CMUMPS_COMPLEX *)(ID).F)[I].r + PETSC_i * ((CMUMPS_COMPLEX *)(ID).F)[I].i : ((ZMUMPS_COMPLEX *)(ID).F)[I].r + PETSC_i * ((ZMUMPS_COMPLEX *)(ID).F)[I].i)
514: #else
515:   #define ID_FIELD_GET(ID, F, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).F)[I] : ((double *)(ID).F)[I])
516: #endif

518: // Get a value from MumpsReal arrays. The value is convertible to PetscReal.
519: #define ID_CNTL_GET(ID, I)   ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).cntl)[(I) - 1] : ((double *)(ID).cntl)[(I) - 1])
520: #define ID_RINFOG_GET(ID, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).rinfog)[(I) - 1] : ((double *)(ID).rinfog)[(I) - 1])
521: #define ID_RINFO_GET(ID, I)  ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).rinfo)[(I) - 1] : ((double *)(ID).rinfo)[(I) - 1])

523: // Set the I-th entry of the MumpsReal array id.cntl[] with a PetscReal 
524: #define ID_CNTL_SET(ID, I, VAL) \
525:   do { \
526:     if ((ID).precision == PETSC_PRECISION_SINGLE) ((float *)(ID).cntl)[(I) - 1] = (VAL); \
527:     else ((double *)(ID).cntl)[(I) - 1] = (VAL); \
528:   } while (0)

530: /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */
531: #if defined(PETSC_HAVE_OPENMP_SUPPORT)
532:   #define PetscMUMPS_c(mumps) \
533:     do { \
534:       if (mumps->use_petsc_omp_support) { \
535:         if (mumps->is_omp_master) { \
536:           PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \
537:           PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \
538:           PetscCall(PetscCallMumps_Private(&mumps->id)); \
539:           PetscCall(PetscFPTrapPop()); \
540:           PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \
541:         } \
542:         PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \
543:         /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific      \
544:          to processes, so we only Bcast info[1], an error code and leave others (since they do not have   \
545:          an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82.                   \
546:          omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \
547:       */ \
548:         MUMPS_STRUC_C tmp; /* All MUMPS_STRUC_C types have same lengths on these info arrays */ \
549:         PetscCallMPI(MPI_Bcast(mumps->id.infog, PETSC_STATIC_ARRAY_LENGTH(tmp.infog), MPIU_MUMPSINT, 0, mumps->omp_comm)); \
550:         PetscCallMPI(MPI_Bcast(mumps->id.info, PETSC_STATIC_ARRAY_LENGTH(tmp.info), MPIU_MUMPSINT, 0, mumps->omp_comm)); \
551:         PetscCallMPI(MPI_Bcast(mumps->id.rinfog, PETSC_STATIC_ARRAY_LENGTH(tmp.rinfog), MPIU_MUMPSREAL(&mumps->id), 0, mumps->omp_comm)); \
552:         PetscCallMPI(MPI_Bcast(mumps->id.rinfo, PETSC_STATIC_ARRAY_LENGTH(tmp.rinfo), MPIU_MUMPSREAL(&mumps->id), 0, mumps->omp_comm)); \
553:       } else { \
554:         PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \
555:         PetscCall(PetscCallMumps_Private(&mumps->id)); \
556:         PetscCall(PetscFPTrapPop()); \
557:       } \
558:     } while (0)
559: #else
560:   #define PetscMUMPS_c(mumps) \
561:     do { \
562:       PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \
563:       PetscCall(PetscCallMumps_Private(&mumps->id)); \
564:       PetscCall(PetscFPTrapPop()); \
565:     } while (0)
566: #endif

568: typedef struct Mat_MUMPS Mat_MUMPS;
569: struct Mat_MUMPS {
570:   XMUMPS_STRUC_C id;

572:   MatStructure   matstruc;
573:   PetscMPIInt    myid, petsc_size;
574:   PetscMUMPSInt *irn, *jcn;       /* the (i,j,v) triplets passed to mumps. */
575:   PetscScalar   *val, *val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */
576:   PetscCount     nnz;             /* number of nonzeros. The type is called selective 64-bit in mumps */
577:   PetscMUMPSInt  sym;
578:   MPI_Comm       mumps_comm;
579:   PetscMUMPSInt *ICNTL_pre;
580:   PetscReal     *CNTL_pre;
581:   PetscMUMPSInt  ICNTL9_pre;         /* check if ICNTL(9) is changed from previous MatSolve */
582:   VecScatter     scat_rhs, scat_sol; /* used by MatSolve() */
583:   PetscMUMPSInt  ICNTL20;            /* use centralized (0) or distributed (10) dense RHS */
584:   PetscMUMPSInt  ICNTL26;
585:   PetscMUMPSInt  lrhs_loc, nloc_rhs, *irhs_loc;
586: #if defined(PETSC_HAVE_OPENMP_SUPPORT)
587:   PetscInt    *rhs_nrow, max_nrhs;
588:   PetscMPIInt *rhs_recvcounts, *rhs_disps;
589:   PetscScalar *rhs_loc, *rhs_recvbuf;
590: #endif
591:   Vec            b_seq, x_seq;
592:   PetscInt       ninfo, *info; /* which INFO to display */
593:   PetscInt       sizeredrhs;
594:   PetscScalar   *schur_sol;
595:   PetscInt       schur_sizesol;
596:   PetscScalar   *redrhs;              // buffer in PetscScalar in case MumpsScalar is in a different precision
597:   PetscMUMPSInt *ia_alloc, *ja_alloc; /* work arrays used for the CSR struct for sparse rhs */
598:   PetscCount     cur_ilen, cur_jlen;  /* current len of ia_alloc[], ja_alloc[] */
599:   PetscErrorCode (*ConvertToTriples)(Mat, PetscInt, MatReuse, Mat_MUMPS *);

601:   /* Support for MATNEST */
602:   PetscErrorCode (**nest_convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *);
603:   PetscCount  *nest_vals_start;
604:   PetscScalar *nest_vals;

606:   /* stuff used by petsc/mumps OpenMP support*/
607:   PetscBool    use_petsc_omp_support;
608:   PetscOmpCtrl omp_ctrl;             /* an OpenMP controller that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */
609:   MPI_Comm     petsc_comm, omp_comm; /* petsc_comm is PETSc matrix's comm */
610:   PetscCount  *recvcount;            /* a collection of nnz on omp_master */
611:   PetscMPIInt  tag, omp_comm_size;
612:   PetscBool    is_omp_master; /* is this rank the master of omp_comm */
613:   MPI_Request *reqs;
614: };

616: /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt.
617:    Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices.
618:  */
619: static PetscErrorCode PetscMUMPSIntCSRCast(PETSC_UNUSED Mat_MUMPS *mumps, PetscInt nrow, PetscInt *ia, PetscInt *ja, PetscMUMPSInt **ia_mumps, PetscMUMPSInt **ja_mumps, PetscMUMPSInt *nnz_mumps)
620: {
621:   PetscInt nnz = ia[nrow] - 1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscCount since mumps only uses PetscMUMPSInt for rhs */

623:   PetscFunctionBegin;
624: #if defined(PETSC_USE_64BIT_INDICES)
625:   {
626:     PetscInt i;
627:     if (nrow + 1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */
628:       PetscCall(PetscFree(mumps->ia_alloc));
629:       PetscCall(PetscMalloc1(nrow + 1, &mumps->ia_alloc));
630:       mumps->cur_ilen = nrow + 1;
631:     }
632:     if (nnz > mumps->cur_jlen) {
633:       PetscCall(PetscFree(mumps->ja_alloc));
634:       PetscCall(PetscMalloc1(nnz, &mumps->ja_alloc));
635:       mumps->cur_jlen = nnz;
636:     }
637:     for (i = 0; i < nrow + 1; i++) PetscCall(PetscMUMPSIntCast(ia[i], &mumps->ia_alloc[i]));
638:     for (i = 0; i < nnz; i++) PetscCall(PetscMUMPSIntCast(ja[i], &mumps->ja_alloc[i]));
639:     *ia_mumps = mumps->ia_alloc;
640:     *ja_mumps = mumps->ja_alloc;
641:   }
642: #else
643:   *ia_mumps = ia;
644:   *ja_mumps = ja;
645: #endif
646:   PetscCall(PetscMUMPSIntCast(nnz, nnz_mumps));
647:   PetscFunctionReturn(PETSC_SUCCESS);
648: }

650: static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS *mumps)
651: {
652:   PetscFunctionBegin;
653:   PetscCall(PetscFree(mumps->id.listvar_schur));
654:   PetscCall(PetscFree(mumps->redrhs)); // if needed, id.redrhs will be freed in MatMumpsFreeInternalID()
655:   PetscCall(PetscFree(mumps->schur_sol));
656:   mumps->id.size_schur = 0;
657:   mumps->id.schur_lld  = 0;
658:   if (mumps->id.internal_id) mumps->id.ICNTL(19) = 0; // sometimes, the inner id is yet built
659:   PetscFunctionReturn(PETSC_SUCCESS);
660: }

662: /* solve with rhs in mumps->id.redrhs and return in the same location */
663: static PetscErrorCode MatMumpsSolveSchur_Private(Mat F)
664: {
665:   Mat_MUMPS           *mumps = (Mat_MUMPS *)F->data;
666:   Mat                  S, B, X; // solve S*X = B; all three matrices are dense
667:   MatFactorSchurStatus schurstatus;
668:   PetscInt             sizesol;
669:   const PetscScalar   *xarray;

671:   PetscFunctionBegin;
672:   PetscCall(MatFactorFactorizeSchurComplement(F));
673:   PetscCall(MatFactorGetSchurComplement(F, &S, &schurstatus));
674:   PetscCall(MatMumpsCastMumpsScalarArray(mumps->sizeredrhs, mumps->id.precision, mumps->id.redrhs, mumps->redrhs));

676:   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->redrhs, &B));
677:   PetscCall(MatSetType(B, ((PetscObject)S)->type_name));
678: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
679:   PetscCall(MatBindToCPU(B, S->boundtocpu));
680: #endif
681:   switch (schurstatus) {
682:   case MAT_FACTOR_SCHUR_FACTORED:
683:     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->redrhs, &X));
684:     PetscCall(MatSetType(X, ((PetscObject)S)->type_name));
685: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
686:     PetscCall(MatBindToCPU(X, S->boundtocpu));
687: #endif
688:     if (!mumps->id.ICNTL(9)) { /* transpose solve */
689:       PetscCall(MatMatSolveTranspose(S, B, X));
690:     } else {
691:       PetscCall(MatMatSolve(S, B, X));
692:     }
693:     break;
694:   case MAT_FACTOR_SCHUR_INVERTED:
695:     sizesol = mumps->id.nrhs * mumps->id.size_schur;
696:     if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) {
697:       PetscCall(PetscFree(mumps->schur_sol));
698:       PetscCall(PetscMalloc1(sizesol, &mumps->schur_sol));
699:       mumps->schur_sizesol = sizesol;
700:     }
701:     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->schur_sol, &X));
702:     PetscCall(MatSetType(X, ((PetscObject)S)->type_name));
703: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
704:     PetscCall(MatBindToCPU(X, S->boundtocpu));
705: #endif
706:     PetscCall(MatProductCreateWithMat(S, B, NULL, X));
707:     if (!mumps->id.ICNTL(9)) { /* transpose solve */
708:       PetscCall(MatProductSetType(X, MATPRODUCT_AtB));
709:     } else {
710:       PetscCall(MatProductSetType(X, MATPRODUCT_AB));
711:     }
712:     PetscCall(MatProductSetFromOptions(X));
713:     PetscCall(MatProductSymbolic(X));
714:     PetscCall(MatProductNumeric(X));

716:     PetscCall(MatCopy(X, B, SAME_NONZERO_PATTERN));
717:     break;
718:   default:
719:     SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unhandled MatFactorSchurStatus %d", F->schur_status);
720:   }
721:   // MUST get the array from X (not B), though they share the same host array. We can only guarantee X has the correct data on device.
722:   PetscCall(MatDenseGetArrayRead(X, &xarray)); // xarray should be mumps->redrhs, but using MatDenseGetArrayRead is safer with GPUs.
723:   PetscCall(MatMumpsCastPetscScalarArray(mumps->sizeredrhs, xarray, mumps->id.precision, mumps->id.redrhs));
724:   PetscCall(MatDenseRestoreArrayRead(X, &xarray));
725:   PetscCall(MatFactorRestoreSchurComplement(F, &S, schurstatus));
726:   PetscCall(MatDestroy(&B));
727:   PetscCall(MatDestroy(&X));
728:   PetscFunctionReturn(PETSC_SUCCESS);
729: }

731: static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion)
732: {
733:   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;

735:   PetscFunctionBegin;
736:   if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */
737:     PetscFunctionReturn(PETSC_SUCCESS);
738:   }
739:   if (!expansion) { /* prepare for the condensation step */
740:     PetscInt sizeredrhs = mumps->id.nrhs * mumps->id.size_schur;
741:     /* allocate MUMPS internal array to store reduced right-hand sides */
742:     if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) {
743:       mumps->id.lredrhs = mumps->id.size_schur;
744:       mumps->sizeredrhs = mumps->id.nrhs * mumps->id.lredrhs;
745:       if (mumps->id.redrhs_len) PetscCall(PetscFree(mumps->id.redrhs));
746:       PetscCall(PetscFree(mumps->redrhs));
747:       PetscCall(PetscMalloc1(mumps->sizeredrhs, &mumps->redrhs));
748:       PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, mumps->sizeredrhs, mumps->redrhs, mumps->id.precision, &mumps->id.redrhs_len, &mumps->id.redrhs));
749:     }
750:   } else {                                    /* prepare for the expansion step */
751:     PetscCall(MatMumpsSolveSchur_Private(F)); /* solve Schur complement, put solution in id.redrhs (this has to be done by the MUMPS user, so basically us) */
752:     mumps->id.ICNTL(26) = 2;                  /* expansion phase */
753:     PetscMUMPS_c(mumps);
754:     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));
755:     /* restore defaults */
756:     mumps->id.ICNTL(26) = -1;
757:     /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */
758:     if (mumps->id.nrhs > 1) {
759:       if (mumps->id.redrhs_len) PetscCall(PetscFree(mumps->id.redrhs));
760:       PetscCall(PetscFree(mumps->redrhs));
761:       mumps->id.redrhs_len = 0;
762:       mumps->id.lredrhs    = 0;
763:       mumps->sizeredrhs    = 0;
764:     }
765:   }
766:   PetscFunctionReturn(PETSC_SUCCESS);
767: }

769: /*
770:   MatConvertToTriples_A_B - convert PETSc matrix to triples: row[nz], col[nz], val[nz]

772:   input:
773:     A       - matrix in aij,baij or sbaij format
774:     shift   - 0: C style output triple; 1: Fortran style output triple.
775:     reuse   - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple
776:               MAT_REUSE_MATRIX:   only the values in v array are updated
777:   output:
778:     nnz     - dim of r, c, and v (number of local nonzero entries of A)
779:     r, c, v - row and col index, matrix values (matrix triples)

781:   The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is
782:   freed with PetscFree(mumps->irn);  This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means
783:   that the PetscMalloc() cannot easily be replaced with a PetscMalloc3().

785:  */

787: static PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
788: {
789:   const PetscScalar *av;
790:   const PetscInt    *ai, *aj, *ajj, M = A->rmap->n;
791:   PetscCount         nz, rnz, k;
792:   PetscMUMPSInt     *row, *col;
793:   Mat_SeqAIJ        *aa = (Mat_SeqAIJ *)A->data;

795:   PetscFunctionBegin;
796:   PetscCall(MatSeqAIJGetArrayRead(A, &av));
797:   if (reuse == MAT_INITIAL_MATRIX) {
798:     nz = aa->nz;
799:     ai = aa->i;
800:     aj = aa->j;
801:     PetscCall(PetscMalloc2(nz, &row, nz, &col));
802:     for (PetscCount i = k = 0; i < M; i++) {
803:       rnz = ai[i + 1] - ai[i];
804:       ajj = aj + ai[i];
805:       for (PetscCount j = 0; j < rnz; j++) {
806:         PetscCall(PetscMUMPSIntCast(i + shift, &row[k]));
807:         PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[k]));
808:         k++;
809:       }
810:     }
811:     mumps->val = (PetscScalar *)av;
812:     mumps->irn = row;
813:     mumps->jcn = col;
814:     mumps->nnz = nz;
815:   } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, av, aa->nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqaij_seqaij(), so one needs to copy the memory */
816:   else mumps->val = (PetscScalar *)av;                                           /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
817:   PetscCall(MatSeqAIJRestoreArrayRead(A, &av));
818:   PetscFunctionReturn(PETSC_SUCCESS);
819: }

821: static PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
822: {
823:   PetscCount     nz, i, j, k, r;
824:   Mat_SeqSELL   *a = (Mat_SeqSELL *)A->data;
825:   PetscMUMPSInt *row, *col;

827:   PetscFunctionBegin;
828:   nz = a->sliidx[a->totalslices];
829:   if (reuse == MAT_INITIAL_MATRIX) {
830:     PetscCall(PetscMalloc2(nz, &row, nz, &col));
831:     for (i = k = 0; i < a->totalslices; i++) {
832:       for (j = a->sliidx[i], r = 0; j < a->sliidx[i + 1]; j++, r = ((r + 1) & 0x07)) PetscCall(PetscMUMPSIntCast(8 * i + r + shift, &row[k++]));
833:     }
834:     for (i = 0; i < nz; i++) PetscCall(PetscMUMPSIntCast(a->colidx[i] + shift, &col[i]));
835:     mumps->irn = row;
836:     mumps->jcn = col;
837:     mumps->nnz = nz;
838:     mumps->val = a->val;
839:   } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, a->val, nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqsell_seqaij(), so one needs to copy the memory */
840:   else mumps->val = a->val;                                                      /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
841:   PetscFunctionReturn(PETSC_SUCCESS);
842: }

844: static PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
845: {
846:   Mat_SeqBAIJ    *aa = (Mat_SeqBAIJ *)A->data;
847:   const PetscInt *ai, *aj, *ajj, bs2 = aa->bs2;
848:   PetscCount      M, nz = bs2 * aa->nz, idx = 0, rnz, i, j, k, m;
849:   PetscInt        bs;
850:   PetscMUMPSInt  *row, *col;

852:   PetscFunctionBegin;
853:   if (reuse == MAT_INITIAL_MATRIX) {
854:     PetscCall(MatGetBlockSize(A, &bs));
855:     M  = A->rmap->N / bs;
856:     ai = aa->i;
857:     aj = aa->j;
858:     PetscCall(PetscMalloc2(nz, &row, nz, &col));
859:     for (i = 0; i < M; i++) {
860:       ajj = aj + ai[i];
861:       rnz = ai[i + 1] - ai[i];
862:       for (k = 0; k < rnz; k++) {
863:         for (j = 0; j < bs; j++) {
864:           for (m = 0; m < bs; m++) {
865:             PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[idx]));
866:             PetscCall(PetscMUMPSIntCast(bs * ajj[k] + j + shift, &col[idx]));
867:             idx++;
868:           }
869:         }
870:       }
871:     }
872:     mumps->irn = row;
873:     mumps->jcn = col;
874:     mumps->nnz = nz;
875:     mumps->val = aa->a;
876:   } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, aa->a, nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqbaij_seqaij(), so one needs to copy the memory */
877:   else mumps->val = aa->a;                                                      /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
878:   PetscFunctionReturn(PETSC_SUCCESS);
879: }

881: static PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
882: {
883:   const PetscInt *ai, *aj, *ajj;
884:   PetscInt        bs;
885:   PetscCount      nz, rnz, i, j, k, m;
886:   PetscMUMPSInt  *row, *col;
887:   PetscScalar    *val;
888:   Mat_SeqSBAIJ   *aa  = (Mat_SeqSBAIJ *)A->data;
889:   const PetscInt  bs2 = aa->bs2, mbs = aa->mbs;
890: #if defined(PETSC_USE_COMPLEX)
891:   PetscBool isset, hermitian;
892: #endif

894:   PetscFunctionBegin;
895: #if defined(PETSC_USE_COMPLEX)
896:   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
897:   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
898: #endif
899:   ai = aa->i;
900:   aj = aa->j;
901:   PetscCall(MatGetBlockSize(A, &bs));
902:   if (reuse == MAT_INITIAL_MATRIX) {
903:     const PetscCount alloc_size = aa->nz * bs2;

905:     PetscCall(PetscMalloc2(alloc_size, &row, alloc_size, &col));
906:     if (bs > 1) {
907:       PetscCall(PetscMalloc1(alloc_size, &mumps->val_alloc));
908:       mumps->val = mumps->val_alloc;
909:     } else {
910:       mumps->val = aa->a;
911:     }
912:     mumps->irn = row;
913:     mumps->jcn = col;
914:   } else {
915:     row = mumps->irn;
916:     col = mumps->jcn;
917:   }
918:   val = mumps->val;

920:   nz = 0;
921:   if (bs > 1) {
922:     for (i = 0; i < mbs; i++) {
923:       rnz = ai[i + 1] - ai[i];
924:       ajj = aj + ai[i];
925:       for (j = 0; j < rnz; j++) {
926:         for (k = 0; k < bs; k++) {
927:           for (m = 0; m < bs; m++) {
928:             if (ajj[j] > i || k >= m) {
929:               if (reuse == MAT_INITIAL_MATRIX) {
930:                 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[nz]));
931:                 PetscCall(PetscMUMPSIntCast(ajj[j] * bs + k + shift, &col[nz]));
932:               }
933:               val[nz++] = aa->a[(ai[i] + j) * bs2 + m + k * bs];
934:             }
935:           }
936:         }
937:       }
938:     }
939:   } else if (reuse == MAT_INITIAL_MATRIX) {
940:     for (i = 0; i < mbs; i++) {
941:       rnz = ai[i + 1] - ai[i];
942:       ajj = aj + ai[i];
943:       for (j = 0; j < rnz; j++) {
944:         PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
945:         PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
946:         nz++;
947:       }
948:     }
949:     PetscCheck(nz == aa->nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different numbers of nonzeros %" PetscCount_FMT " != %" PetscInt_FMT, nz, aa->nz);
950:   } else if (mumps->nest_vals)
951:     PetscCall(PetscArraycpy(mumps->val, aa->a, aa->nz)); /* bs == 1 and MAT_REUSE_MATRIX, MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqsbaij_seqsbaij(), so one needs to copy the memory */
952:   else mumps->val = aa->a;                               /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
953:   if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz;
954:   PetscFunctionReturn(PETSC_SUCCESS);
955: }

957: static PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
958: {
959:   const PetscInt    *ai, *aj, *ajj, *adiag, M = A->rmap->n;
960:   PetscCount         nz, rnz, i, j;
961:   const PetscScalar *av, *v1;
962:   PetscScalar       *val;
963:   PetscMUMPSInt     *row, *col;
964:   Mat_SeqAIJ        *aa = (Mat_SeqAIJ *)A->data;
965:   PetscBool          diagDense;
966: #if defined(PETSC_USE_COMPLEX)
967:   PetscBool hermitian, isset;
968: #endif

970:   PetscFunctionBegin;
971: #if defined(PETSC_USE_COMPLEX)
972:   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
973:   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
974: #endif
975:   PetscCall(MatSeqAIJGetArrayRead(A, &av));
976:   ai = aa->i;
977:   aj = aa->j;
978:   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, &diagDense));
979:   if (reuse == MAT_INITIAL_MATRIX) {
980:     /* count nz in the upper triangular part of A */
981:     nz = 0;
982:     if (!diagDense) {
983:       for (i = 0; i < M; i++) {
984:         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
985:           for (j = ai[i]; j < ai[i + 1]; j++) {
986:             if (aj[j] < i) continue;
987:             nz++;
988:           }
989:         } else {
990:           nz += ai[i + 1] - adiag[i];
991:         }
992:       }
993:     } else {
994:       for (i = 0; i < M; i++) nz += ai[i + 1] - adiag[i];
995:     }
996:     PetscCall(PetscMalloc2(nz, &row, nz, &col));
997:     PetscCall(PetscMalloc1(nz, &val));
998:     mumps->nnz = nz;
999:     mumps->irn = row;
1000:     mumps->jcn = col;
1001:     mumps->val = mumps->val_alloc = val;

1003:     nz = 0;
1004:     if (!diagDense) {
1005:       for (i = 0; i < M; i++) {
1006:         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
1007:           for (j = ai[i]; j < ai[i + 1]; j++) {
1008:             if (aj[j] < i) continue;
1009:             PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
1010:             PetscCall(PetscMUMPSIntCast(aj[j] + shift, &col[nz]));
1011:             val[nz] = av[j];
1012:             nz++;
1013:           }
1014:         } else {
1015:           rnz = ai[i + 1] - adiag[i];
1016:           ajj = aj + adiag[i];
1017:           v1  = av + adiag[i];
1018:           for (j = 0; j < rnz; j++) {
1019:             PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
1020:             PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
1021:             val[nz++] = v1[j];
1022:           }
1023:         }
1024:       }
1025:     } else {
1026:       for (i = 0; i < M; i++) {
1027:         rnz = ai[i + 1] - adiag[i];
1028:         ajj = aj + adiag[i];
1029:         v1  = av + adiag[i];
1030:         for (j = 0; j < rnz; j++) {
1031:           PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
1032:           PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
1033:           val[nz++] = v1[j];
1034:         }
1035:       }
1036:     }
1037:   } else {
1038:     nz  = 0;
1039:     val = mumps->val;
1040:     if (!diagDense) {
1041:       for (i = 0; i < M; i++) {
1042:         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
1043:           for (j = ai[i]; j < ai[i + 1]; j++) {
1044:             if (aj[j] < i) continue;
1045:             val[nz++] = av[j];
1046:           }
1047:         } else {
1048:           rnz = ai[i + 1] - adiag[i];
1049:           v1  = av + adiag[i];
1050:           for (j = 0; j < rnz; j++) val[nz++] = v1[j];
1051:         }
1052:       }
1053:     } else {
1054:       for (i = 0; i < M; i++) {
1055:         rnz = ai[i + 1] - adiag[i];
1056:         v1  = av + adiag[i];
1057:         for (j = 0; j < rnz; j++) val[nz++] = v1[j];
1058:       }
1059:     }
1060:   }
1061:   PetscCall(MatSeqAIJRestoreArrayRead(A, &av));
1062:   PetscFunctionReturn(PETSC_SUCCESS);
1063: }

1065: static PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1066: {
1067:   const PetscInt    *ai, *aj, *bi, *bj, *garray, *ajj, *bjj;
1068:   PetscInt           bs;
1069:   PetscCount         rstart, nz, i, j, k, m, jj, irow, countA, countB;
1070:   PetscMUMPSInt     *row, *col;
1071:   const PetscScalar *av, *bv, *v1, *v2;
1072:   PetscScalar       *val;
1073:   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ *)A->data;
1074:   Mat_SeqSBAIJ      *aa  = (Mat_SeqSBAIJ *)mat->A->data;
1075:   Mat_SeqBAIJ       *bb  = (Mat_SeqBAIJ *)mat->B->data;
1076:   const PetscInt     bs2 = aa->bs2, mbs = aa->mbs;
1077: #if defined(PETSC_USE_COMPLEX)
1078:   PetscBool hermitian, isset;
1079: #endif

1081:   PetscFunctionBegin;
1082: #if defined(PETSC_USE_COMPLEX)
1083:   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
1084:   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
1085: #endif
1086:   PetscCall(MatGetBlockSize(A, &bs));
1087:   rstart = A->rmap->rstart;
1088:   ai     = aa->i;
1089:   aj     = aa->j;
1090:   bi     = bb->i;
1091:   bj     = bb->j;
1092:   av     = aa->a;
1093:   bv     = bb->a;

1095:   garray = mat->garray;

1097:   if (reuse == MAT_INITIAL_MATRIX) {
1098:     nz = (aa->nz + bb->nz) * bs2; /* just a conservative estimate */
1099:     PetscCall(PetscMalloc2(nz, &row, nz, &col));
1100:     PetscCall(PetscMalloc1(nz, &val));
1101:     /* can not decide the exact mumps->nnz now because of the SBAIJ */
1102:     mumps->irn = row;
1103:     mumps->jcn = col;
1104:     mumps->val = mumps->val_alloc = val;
1105:   } else {
1106:     val = mumps->val;
1107:   }

1109:   jj   = 0;
1110:   irow = rstart;
1111:   for (i = 0; i < mbs; i++) {
1112:     ajj    = aj + ai[i]; /* ptr to the beginning of this row */
1113:     countA = ai[i + 1] - ai[i];
1114:     countB = bi[i + 1] - bi[i];
1115:     bjj    = bj + bi[i];
1116:     v1     = av + ai[i] * bs2;
1117:     v2     = bv + bi[i] * bs2;

1119:     if (bs > 1) {
1120:       /* A-part */
1121:       for (j = 0; j < countA; j++) {
1122:         for (k = 0; k < bs; k++) {
1123:           for (m = 0; m < bs; m++) {
1124:             if (rstart + ajj[j] * bs > irow || k >= m) {
1125:               if (reuse == MAT_INITIAL_MATRIX) {
1126:                 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj]));
1127:                 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] * bs + k + shift, &col[jj]));
1128:               }
1129:               val[jj++] = v1[j * bs2 + m + k * bs];
1130:             }
1131:           }
1132:         }
1133:       }

1135:       /* B-part */
1136:       for (j = 0; j < countB; j++) {
1137:         for (k = 0; k < bs; k++) {
1138:           for (m = 0; m < bs; m++) {
1139:             if (reuse == MAT_INITIAL_MATRIX) {
1140:               PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj]));
1141:               PetscCall(PetscMUMPSIntCast(garray[bjj[j]] * bs + k + shift, &col[jj]));
1142:             }
1143:             val[jj++] = v2[j * bs2 + m + k * bs];
1144:           }
1145:         }
1146:       }
1147:     } else {
1148:       /* A-part */
1149:       for (j = 0; j < countA; j++) {
1150:         if (reuse == MAT_INITIAL_MATRIX) {
1151:           PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
1152:           PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj]));
1153:         }
1154:         val[jj++] = v1[j];
1155:       }

1157:       /* B-part */
1158:       for (j = 0; j < countB; j++) {
1159:         if (reuse == MAT_INITIAL_MATRIX) {
1160:           PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
1161:           PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
1162:         }
1163:         val[jj++] = v2[j];
1164:       }
1165:     }
1166:     irow += bs;
1167:   }
1168:   if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = jj;
1169:   PetscFunctionReturn(PETSC_SUCCESS);
1170: }

1172: static PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1173: {
1174:   const PetscInt    *ai, *aj, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj;
1175:   PetscCount         rstart, cstart, nz, i, j, jj, irow, countA, countB;
1176:   PetscMUMPSInt     *row, *col;
1177:   const PetscScalar *av, *bv, *v1, *v2;
1178:   PetscScalar       *val;
1179:   Mat                Ad, Ao;
1180:   Mat_SeqAIJ        *aa;
1181:   Mat_SeqAIJ        *bb;

1183:   PetscFunctionBegin;
1184:   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray));
1185:   PetscCall(MatSeqAIJGetArrayRead(Ad, &av));
1186:   PetscCall(MatSeqAIJGetArrayRead(Ao, &bv));

1188:   aa = (Mat_SeqAIJ *)Ad->data;
1189:   bb = (Mat_SeqAIJ *)Ao->data;
1190:   ai = aa->i;
1191:   aj = aa->j;
1192:   bi = bb->i;
1193:   bj = bb->j;

1195:   rstart = A->rmap->rstart;
1196:   cstart = A->cmap->rstart;

1198:   if (reuse == MAT_INITIAL_MATRIX) {
1199:     nz = (PetscCount)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */
1200:     PetscCall(PetscMalloc2(nz, &row, nz, &col));
1201:     PetscCall(PetscMalloc1(nz, &val));
1202:     mumps->nnz = nz;
1203:     mumps->irn = row;
1204:     mumps->jcn = col;
1205:     mumps->val = mumps->val_alloc = val;
1206:   } else {
1207:     val = mumps->val;
1208:   }

1210:   jj   = 0;
1211:   irow = rstart;
1212:   for (i = 0; i < m; i++) {
1213:     ajj    = aj + ai[i]; /* ptr to the beginning of this row */
1214:     countA = ai[i + 1] - ai[i];
1215:     countB = bi[i + 1] - bi[i];
1216:     bjj    = bj + bi[i];
1217:     v1     = av + ai[i];
1218:     v2     = bv + bi[i];

1220:     /* A-part */
1221:     for (j = 0; j < countA; j++) {
1222:       if (reuse == MAT_INITIAL_MATRIX) {
1223:         PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
1224:         PetscCall(PetscMUMPSIntCast(cstart + ajj[j] + shift, &col[jj]));
1225:       }
1226:       val[jj++] = v1[j];
1227:     }

1229:     /* B-part */
1230:     for (j = 0; j < countB; j++) {
1231:       if (reuse == MAT_INITIAL_MATRIX) {
1232:         PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
1233:         PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
1234:       }
1235:       val[jj++] = v2[j];
1236:     }
1237:     irow++;
1238:   }
1239:   PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av));
1240:   PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv));
1241:   PetscFunctionReturn(PETSC_SUCCESS);
1242: }

1244: static PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1245: {
1246:   Mat_MPIBAIJ       *mat = (Mat_MPIBAIJ *)A->data;
1247:   Mat_SeqBAIJ       *aa  = (Mat_SeqBAIJ *)mat->A->data;
1248:   Mat_SeqBAIJ       *bb  = (Mat_SeqBAIJ *)mat->B->data;
1249:   const PetscInt    *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j, *ajj, *bjj;
1250:   const PetscInt    *garray = mat->garray, mbs = mat->mbs, rstart = A->rmap->rstart, cstart = A->cmap->rstart;
1251:   const PetscInt     bs2 = mat->bs2;
1252:   PetscInt           bs;
1253:   PetscCount         nz, i, j, k, n, jj, irow, countA, countB, idx;
1254:   PetscMUMPSInt     *row, *col;
1255:   const PetscScalar *av = aa->a, *bv = bb->a, *v1, *v2;
1256:   PetscScalar       *val;

1258:   PetscFunctionBegin;
1259:   PetscCall(MatGetBlockSize(A, &bs));
1260:   if (reuse == MAT_INITIAL_MATRIX) {
1261:     nz = bs2 * (aa->nz + bb->nz);
1262:     PetscCall(PetscMalloc2(nz, &row, nz, &col));
1263:     PetscCall(PetscMalloc1(nz, &val));
1264:     mumps->nnz = nz;
1265:     mumps->irn = row;
1266:     mumps->jcn = col;
1267:     mumps->val = mumps->val_alloc = val;
1268:   } else {
1269:     val = mumps->val;
1270:   }

1272:   jj   = 0;
1273:   irow = rstart;
1274:   for (i = 0; i < mbs; i++) {
1275:     countA = ai[i + 1] - ai[i];
1276:     countB = bi[i + 1] - bi[i];
1277:     ajj    = aj + ai[i];
1278:     bjj    = bj + bi[i];
1279:     v1     = av + bs2 * ai[i];
1280:     v2     = bv + bs2 * bi[i];

1282:     idx = 0;
1283:     /* A-part */
1284:     for (k = 0; k < countA; k++) {
1285:       for (j = 0; j < bs; j++) {
1286:         for (n = 0; n < bs; n++) {
1287:           if (reuse == MAT_INITIAL_MATRIX) {
1288:             PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj]));
1289:             PetscCall(PetscMUMPSIntCast(cstart + bs * ajj[k] + j + shift, &col[jj]));
1290:           }
1291:           val[jj++] = v1[idx++];
1292:         }
1293:       }
1294:     }

1296:     idx = 0;
1297:     /* B-part */
1298:     for (k = 0; k < countB; k++) {
1299:       for (j = 0; j < bs; j++) {
1300:         for (n = 0; n < bs; n++) {
1301:           if (reuse == MAT_INITIAL_MATRIX) {
1302:             PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj]));
1303:             PetscCall(PetscMUMPSIntCast(bs * garray[bjj[k]] + j + shift, &col[jj]));
1304:           }
1305:           val[jj++] = v2[idx++];
1306:         }
1307:       }
1308:     }
1309:     irow += bs;
1310:   }
1311:   PetscFunctionReturn(PETSC_SUCCESS);
1312: }

1314: static PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1315: {
1316:   const PetscInt    *ai, *aj, *adiag, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj;
1317:   PetscCount         rstart, nz, nza, nzb, i, j, jj, irow, countA, countB;
1318:   PetscMUMPSInt     *row, *col;
1319:   const PetscScalar *av, *bv, *v1, *v2;
1320:   PetscScalar       *val;
1321:   Mat                Ad, Ao;
1322:   Mat_SeqAIJ        *aa;
1323:   Mat_SeqAIJ        *bb;
1324: #if defined(PETSC_USE_COMPLEX)
1325:   PetscBool hermitian, isset;
1326: #endif

1328:   PetscFunctionBegin;
1329: #if defined(PETSC_USE_COMPLEX)
1330:   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
1331:   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
1332: #endif
1333:   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray));
1334:   PetscCall(MatSeqAIJGetArrayRead(Ad, &av));
1335:   PetscCall(MatSeqAIJGetArrayRead(Ao, &bv));

1337:   aa = (Mat_SeqAIJ *)Ad->data;
1338:   bb = (Mat_SeqAIJ *)Ao->data;
1339:   ai = aa->i;
1340:   aj = aa->j;
1341:   bi = bb->i;
1342:   bj = bb->j;
1343:   PetscCall(MatGetDiagonalMarkers_SeqAIJ(Ad, &adiag, NULL));
1344:   rstart = A->rmap->rstart;

1346:   if (reuse == MAT_INITIAL_MATRIX) {
1347:     nza = 0; /* num of upper triangular entries in mat->A, including diagonals */
1348:     nzb = 0; /* num of upper triangular entries in mat->B */
1349:     for (i = 0; i < m; i++) {
1350:       nza += (ai[i + 1] - adiag[i]);
1351:       countB = bi[i + 1] - bi[i];
1352:       bjj    = bj + bi[i];
1353:       for (j = 0; j < countB; j++) {
1354:         if (garray[bjj[j]] > rstart) nzb++;
1355:       }
1356:     }

1358:     nz = nza + nzb; /* total nz of upper triangular part of mat */
1359:     PetscCall(PetscMalloc2(nz, &row, nz, &col));
1360:     PetscCall(PetscMalloc1(nz, &val));
1361:     mumps->nnz = nz;
1362:     mumps->irn = row;
1363:     mumps->jcn = col;
1364:     mumps->val = mumps->val_alloc = val;
1365:   } else {
1366:     val = mumps->val;
1367:   }

1369:   jj   = 0;
1370:   irow = rstart;
1371:   for (i = 0; i < m; i++) {
1372:     ajj    = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */
1373:     v1     = av + adiag[i];
1374:     countA = ai[i + 1] - adiag[i];
1375:     countB = bi[i + 1] - bi[i];
1376:     bjj    = bj + bi[i];
1377:     v2     = bv + bi[i];

1379:     /* A-part */
1380:     for (j = 0; j < countA; j++) {
1381:       if (reuse == MAT_INITIAL_MATRIX) {
1382:         PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
1383:         PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj]));
1384:       }
1385:       val[jj++] = v1[j];
1386:     }

1388:     /* B-part */
1389:     for (j = 0; j < countB; j++) {
1390:       if (garray[bjj[j]] > rstart) {
1391:         if (reuse == MAT_INITIAL_MATRIX) {
1392:           PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
1393:           PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
1394:         }
1395:         val[jj++] = v2[j];
1396:       }
1397:     }
1398:     irow++;
1399:   }
1400:   PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av));
1401:   PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv));
1402:   PetscFunctionReturn(PETSC_SUCCESS);
1403: }

1405: static PetscErrorCode MatConvertToTriples_diagonal_xaij(Mat A, PETSC_UNUSED PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1406: {
1407:   const PetscScalar *av;
1408:   const PetscInt     M = A->rmap->n;
1409:   PetscCount         i;
1410:   PetscMUMPSInt     *row, *col;
1411:   Vec                v;

1413:   PetscFunctionBegin;
1414:   PetscCall(MatDiagonalGetDiagonal(A, &v));
1415:   PetscCall(VecGetArrayRead(v, &av));
1416:   if (reuse == MAT_INITIAL_MATRIX) {
1417:     PetscCall(PetscMalloc2(M, &row, M, &col));
1418:     for (i = 0; i < M; i++) {
1419:       PetscCall(PetscMUMPSIntCast(i + A->rmap->rstart, &row[i]));
1420:       col[i] = row[i];
1421:     }
1422:     mumps->val = (PetscScalar *)av;
1423:     mumps->irn = row;
1424:     mumps->jcn = col;
1425:     mumps->nnz = M;
1426:   } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, av, M)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_diagonal_xaij(), so one needs to copy the memory */
1427:   else mumps->val = (PetscScalar *)av;                                      /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
1428:   PetscCall(VecRestoreArrayRead(v, &av));
1429:   PetscFunctionReturn(PETSC_SUCCESS);
1430: }

1432: static PetscErrorCode MatConvertToTriples_dense_xaij(Mat A, PETSC_UNUSED PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1433: {
1434:   PetscScalar   *v;
1435:   const PetscInt m = A->rmap->n, N = A->cmap->N;
1436:   PetscInt       lda;
1437:   PetscCount     i, j;
1438:   PetscMUMPSInt *row, *col;

1440:   PetscFunctionBegin;
1441:   PetscCall(MatDenseGetArray(A, &v));
1442:   PetscCall(MatDenseGetLDA(A, &lda));
1443:   if (reuse == MAT_INITIAL_MATRIX) {
1444:     PetscCall(PetscMalloc2(m * N, &row, m * N, &col));
1445:     for (i = 0; i < m; i++) {
1446:       col[i] = 0;
1447:       PetscCall(PetscMUMPSIntCast(i + A->rmap->rstart, &row[i]));
1448:     }
1449:     for (j = 1; j < N; j++) {
1450:       for (i = 0; i < m; i++) PetscCall(PetscMUMPSIntCast(j, col + i + m * j));
1451:       PetscCall(PetscArraycpy(row + m * j, row + m * (j - 1), m));
1452:     }
1453:     if (lda == m) mumps->val = v;
1454:     else {
1455:       PetscCall(PetscMalloc1(m * N, &mumps->val));
1456:       mumps->val_alloc = mumps->val;
1457:       for (j = 0; j < N; j++) PetscCall(PetscArraycpy(mumps->val + m * j, v + lda * j, m));
1458:     }
1459:     mumps->irn = row;
1460:     mumps->jcn = col;
1461:     mumps->nnz = m * N;
1462:   } else {
1463:     if (lda == m && !mumps->nest_vals) mumps->val = v;
1464:     else {
1465:       for (j = 0; j < N; j++) PetscCall(PetscArraycpy(mumps->val + m * j, v + lda * j, m));
1466:     }
1467:   }
1468:   PetscCall(MatDenseRestoreArray(A, &v));
1469:   PetscFunctionReturn(PETSC_SUCCESS);
1470: }

1472: // If the input Mat (sub) is either MATTRANSPOSEVIRTUAL or MATHERMITIANTRANSPOSEVIRTUAL, this function gets the parent Mat until it is not a
1473: // MATTRANSPOSEVIRTUAL or MATHERMITIANTRANSPOSEVIRTUAL itself and returns the appropriate shift, scaling, and whether the parent Mat should be conjugated
1474: // and its rows and columns permuted
1475: // TODO FIXME: this should not be in this file and should instead be refactored where the same logic applies, e.g., MatAXPY_Dense_Nest()
1476: static PetscErrorCode MatGetTranspose_TransposeVirtual(Mat *sub, PetscBool *conjugate, PetscScalar *vshift, PetscScalar *vscale, PetscBool *swap)
1477: {
1478:   Mat         A;
1479:   PetscScalar s[2];
1480:   PetscBool   isTrans, isHTrans, compare;

1482:   PetscFunctionBegin;
1483:   do {
1484:     PetscCall(PetscObjectTypeCompare((PetscObject)*sub, MATTRANSPOSEVIRTUAL, &isTrans));
1485:     if (isTrans) {
1486:       PetscCall(MatTransposeGetMat(*sub, &A));
1487:       isHTrans = PETSC_FALSE;
1488:     } else {
1489:       PetscCall(PetscObjectTypeCompare((PetscObject)*sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans));
1490:       if (isHTrans) PetscCall(MatHermitianTransposeGetMat(*sub, &A));
1491:     }
1492:     compare = (PetscBool)(isTrans || isHTrans);
1493:     if (compare) {
1494:       if (vshift && vscale) {
1495:         PetscCall(MatShellGetScalingShifts(*sub, s, s + 1, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
1496:         if (!*conjugate) {
1497:           *vshift += s[0] * *vscale;
1498:           *vscale *= s[1];
1499:         } else {
1500:           *vshift += PetscConj(s[0]) * *vscale;
1501:           *vscale *= PetscConj(s[1]);
1502:         }
1503:       }
1504:       if (swap) *swap = (PetscBool)!*swap;
1505:       if (isHTrans && conjugate) *conjugate = (PetscBool)!*conjugate;
1506:       *sub = A;
1507:     }
1508:   } while (compare);
1509:   PetscFunctionReturn(PETSC_SUCCESS);
1510: }

1512: static PetscErrorCode MatConvertToTriples_nest_xaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1513: {
1514:   Mat     **mats;
1515:   PetscInt  nr, nc;
1516:   PetscBool chol = mumps->sym ? PETSC_TRUE : PETSC_FALSE;

1518:   PetscFunctionBegin;
1519:   PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats));
1520:   if (reuse == MAT_INITIAL_MATRIX) {
1521:     PetscMUMPSInt *irns, *jcns;
1522:     PetscScalar   *vals;
1523:     PetscCount     totnnz, cumnnz, maxnnz;
1524:     PetscInt      *pjcns_w, Mbs = 0;
1525:     IS            *rows, *cols;
1526:     PetscInt     **rows_idx, **cols_idx;

1528:     cumnnz = 0;
1529:     maxnnz = 0;
1530:     PetscCall(PetscMalloc2(nr * nc + 1, &mumps->nest_vals_start, nr * nc, &mumps->nest_convert_to_triples));
1531:     for (PetscInt r = 0; r < nr; r++) {
1532:       for (PetscInt c = 0; c < nc; c++) {
1533:         Mat sub = mats[r][c];

1535:         mumps->nest_convert_to_triples[r * nc + c] = NULL;
1536:         if (chol && c < r) continue; /* skip lower-triangular block for Cholesky */
1537:         if (sub) {
1538:           PetscErrorCode (*convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *) = NULL;
1539:           PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isDiag, isDense;
1540:           MatInfo   info;

1542:           PetscCall(MatGetTranspose_TransposeVirtual(&sub, NULL, NULL, NULL, NULL));
1543:           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ));
1544:           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ));
1545:           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ));
1546:           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ));
1547:           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ));
1548:           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ));
1549:           PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag));
1550:           PetscCall(PetscObjectTypeCompareAny((PetscObject)sub, &isDense, MATSEQDENSE, MATMPIDENSE, NULL));

1552:           if (chol) {
1553:             if (r == c) {
1554:               if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqsbaij;
1555:               else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpisbaij;
1556:               else if (isSeqSBAIJ) convert_to_triples = MatConvertToTriples_seqsbaij_seqsbaij;
1557:               else if (isMPISBAIJ) convert_to_triples = MatConvertToTriples_mpisbaij_mpisbaij;
1558:               else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij;
1559:               else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij;
1560:             } else {
1561:               if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij;
1562:               else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij;
1563:               else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij;
1564:               else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij;
1565:               else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij;
1566:               else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij;
1567:             }
1568:           } else {
1569:             if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij;
1570:             else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij;
1571:             else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij;
1572:             else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij;
1573:             else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij;
1574:             else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij;
1575:           }
1576:           PetscCheck(convert_to_triples, PetscObjectComm((PetscObject)sub), PETSC_ERR_SUP, "Not for block of type %s", ((PetscObject)sub)->type_name);
1577:           mumps->nest_convert_to_triples[r * nc + c] = convert_to_triples;
1578:           PetscCall(MatGetInfo(sub, MAT_LOCAL, &info));
1579:           cumnnz += (PetscCount)info.nz_used; /* can be overestimated for Cholesky */
1580:           maxnnz = PetscMax(maxnnz, info.nz_used);
1581:         }
1582:       }
1583:     }

1585:     /* Allocate total COO */
1586:     totnnz = cumnnz;
1587:     PetscCall(PetscMalloc2(totnnz, &irns, totnnz, &jcns));
1588:     PetscCall(PetscMalloc1(totnnz, &vals));

1590:     /* Handle rows and column maps
1591:        We directly map rows and use an SF for the columns */
1592:     PetscCall(PetscMalloc4(nr, &rows, nc, &cols, nr, &rows_idx, nc, &cols_idx));
1593:     PetscCall(MatNestGetISs(A, rows, cols));
1594:     for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetIndices(rows[r], (const PetscInt **)&rows_idx[r]));
1595:     for (PetscInt c = 0; c < nc; c++) PetscCall(ISGetIndices(cols[c], (const PetscInt **)&cols_idx[c]));
1596:     if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscMalloc1(maxnnz, &pjcns_w));
1597:     else (void)maxnnz;

1599:     cumnnz = 0;
1600:     for (PetscInt r = 0; r < nr; r++) {
1601:       for (PetscInt c = 0; c < nc; c++) {
1602:         Mat             sub    = mats[r][c];
1603:         const PetscInt *ridx   = rows_idx[r];
1604:         const PetscInt *cidx   = cols_idx[c];
1605:         PetscScalar     vscale = 1.0, vshift = 0.0;
1606:         PetscInt        rst, size, bs;
1607:         PetscSF         csf;
1608:         PetscBool       conjugate = PETSC_FALSE, swap = PETSC_FALSE;
1609:         PetscLayout     cmap;
1610:         PetscInt        innz;

1612:         mumps->nest_vals_start[r * nc + c] = cumnnz;
1613:         if (c == r) {
1614:           PetscCall(ISGetSize(rows[r], &size));
1615:           if (!mumps->nest_convert_to_triples[r * nc + c]) {
1616:             for (PetscInt c = 0; c < nc && !sub; ++c) sub = mats[r][c]; // diagonal Mat is NULL, so start over from the beginning of the current row
1617:           }
1618:           PetscCall(MatGetBlockSize(sub, &bs));
1619:           Mbs += size / bs;
1620:         }
1621:         if (!mumps->nest_convert_to_triples[r * nc + c]) continue;

1623:         /* Extract inner blocks if needed */
1624:         PetscCall(MatGetTranspose_TransposeVirtual(&sub, &conjugate, &vshift, &vscale, &swap));
1625:         PetscCheck(vshift == 0.0, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Nonzero shift in parent MatShell");

1627:         /* Get column layout to map off-process columns */
1628:         PetscCall(MatGetLayouts(sub, NULL, &cmap));

1630:         /* Get row start to map on-process rows */
1631:         PetscCall(MatGetOwnershipRange(sub, &rst, NULL));

1633:         /* Directly use the mumps datastructure and use C ordering for now */
1634:         PetscCall((*mumps->nest_convert_to_triples[r * nc + c])(sub, 0, MAT_INITIAL_MATRIX, mumps));

1636:         /* Swap the role of rows and columns indices for transposed blocks
1637:            since we need values with global final ordering */
1638:         if (swap) {
1639:           cidx = rows_idx[r];
1640:           ridx = cols_idx[c];
1641:         }

1643:         /* Communicate column indices
1644:            This could have been done with a single SF but it would have complicated the code a lot.
1645:            But since we do it only once, we pay the price of setting up an SF for each block */
1646:         if (PetscDefined(USE_64BIT_INDICES)) {
1647:           for (PetscInt k = 0; k < mumps->nnz; k++) pjcns_w[k] = mumps->jcn[k];
1648:         } else pjcns_w = (PetscInt *)mumps->jcn; /* This cast is needed only to silence warnings for 64bit integers builds */
1649:         PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &csf));
1650:         PetscCall(PetscIntCast(mumps->nnz, &innz));
1651:         PetscCall(PetscSFSetGraphLayout(csf, cmap, innz, NULL, PETSC_OWN_POINTER, pjcns_w));
1652:         PetscCall(PetscSFBcastBegin(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE));
1653:         PetscCall(PetscSFBcastEnd(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE));
1654:         PetscCall(PetscSFDestroy(&csf));

1656:         /* Import indices: use direct map for rows and mapped indices for columns */
1657:         if (swap) {
1658:           for (PetscInt k = 0; k < mumps->nnz; k++) {
1659:             PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &jcns[cumnnz + k]));
1660:             PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &irns[cumnnz + k]));
1661:           }
1662:         } else {
1663:           for (PetscInt k = 0; k < mumps->nnz; k++) {
1664:             PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &irns[cumnnz + k]));
1665:             PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &jcns[cumnnz + k]));
1666:           }
1667:         }

1669:         /* Import values to full COO */
1670:         if (conjugate) { /* conjugate the entries */
1671:           PetscScalar *v = vals + cumnnz;
1672:           for (PetscInt k = 0; k < mumps->nnz; k++) v[k] = vscale * PetscConj(mumps->val[k]);
1673:         } else if (vscale != 1.0) {
1674:           PetscScalar *v = vals + cumnnz;
1675:           for (PetscInt k = 0; k < mumps->nnz; k++) v[k] = vscale * mumps->val[k];
1676:         } else PetscCall(PetscArraycpy(vals + cumnnz, mumps->val, mumps->nnz));

1678:         /* Shift new starting point and sanity check */
1679:         cumnnz += mumps->nnz;
1680:         PetscCheck(cumnnz <= totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected number of nonzeros %" PetscCount_FMT " != %" PetscCount_FMT, cumnnz, totnnz);

1682:         /* Free scratch memory */
1683:         PetscCall(PetscFree2(mumps->irn, mumps->jcn));
1684:         PetscCall(PetscFree(mumps->val_alloc));
1685:         mumps->val = NULL;
1686:         mumps->nnz = 0;
1687:       }
1688:     }
1689:     if (mumps->id.ICNTL(15) == 1) {
1690:       if (Mbs != A->rmap->N) {
1691:         PetscMPIInt rank, size;

1693:         PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
1694:         PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
1695:         if (rank == 0) {
1696:           PetscInt shift = 0;

1698:           PetscCall(PetscMUMPSIntCast(Mbs, &mumps->id.nblk));
1699:           PetscCall(PetscFree(mumps->id.blkptr));
1700:           PetscCall(PetscMalloc1(Mbs + 1, &mumps->id.blkptr));
1701:           mumps->id.blkptr[0] = 1;
1702:           for (PetscInt i = 0; i < size; ++i) {
1703:             for (PetscInt r = 0; r < nr; r++) {
1704:               Mat             sub = mats[r][r];
1705:               const PetscInt *ranges;
1706:               PetscInt        bs;

1708:               for (PetscInt c = 0; c < nc && !sub; ++c) sub = mats[r][c]; // diagonal Mat is NULL, so start over from the beginning of the current row
1709:               PetscCall(MatGetOwnershipRanges(sub, &ranges));
1710:               PetscCall(MatGetBlockSize(sub, &bs));
1711:               for (PetscInt j = 0, start = mumps->id.blkptr[shift] + bs; j < ranges[i + 1] - ranges[i]; j += bs) PetscCall(PetscMUMPSIntCast(start + j, mumps->id.blkptr + shift + j / bs + 1));
1712:               shift += (ranges[i + 1] - ranges[i]) / bs;
1713:             }
1714:           }
1715:         }
1716:       } else mumps->id.ICNTL(15) = 0;
1717:     }
1718:     if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscFree(pjcns_w));
1719:     for (PetscInt r = 0; r < nr; r++) PetscCall(ISRestoreIndices(rows[r], (const PetscInt **)&rows_idx[r]));
1720:     for (PetscInt c = 0; c < nc; c++) PetscCall(ISRestoreIndices(cols[c], (const PetscInt **)&cols_idx[c]));
1721:     PetscCall(PetscFree4(rows, cols, rows_idx, cols_idx));
1722:     if (!chol) PetscCheck(cumnnz == totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different number of nonzeros %" PetscCount_FMT " != %" PetscCount_FMT, cumnnz, totnnz);
1723:     mumps->nest_vals_start[nr * nc] = cumnnz;

1725:     /* Set pointers for final MUMPS data structure */
1726:     mumps->nest_vals = vals;
1727:     mumps->val_alloc = NULL; /* do not use val_alloc since it may be reallocated with the OMP callpath */
1728:     mumps->val       = vals;
1729:     mumps->irn       = irns;
1730:     mumps->jcn       = jcns;
1731:     mumps->nnz       = cumnnz;
1732:   } else {
1733:     PetscScalar *oval = mumps->nest_vals;
1734:     for (PetscInt r = 0; r < nr; r++) {
1735:       for (PetscInt c = 0; c < nc; c++) {
1736:         PetscBool   conjugate = PETSC_FALSE;
1737:         Mat         sub       = mats[r][c];
1738:         PetscScalar vscale = 1.0, vshift = 0.0;
1739:         PetscInt    midx = r * nc + c;

1741:         if (!mumps->nest_convert_to_triples[midx]) continue;
1742:         PetscCall(MatGetTranspose_TransposeVirtual(&sub, &conjugate, &vshift, &vscale, NULL));
1743:         PetscCheck(vshift == 0.0, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Nonzero shift in parent MatShell");
1744:         mumps->val = oval + mumps->nest_vals_start[midx];
1745:         PetscCall((*mumps->nest_convert_to_triples[midx])(sub, shift, MAT_REUSE_MATRIX, mumps));
1746:         if (conjugate) {
1747:           PetscCount nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx];
1748:           for (PetscCount k = 0; k < nnz; k++) mumps->val[k] = vscale * PetscConj(mumps->val[k]);
1749:         } else if (vscale != 1.0) {
1750:           PetscCount nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx];
1751:           for (PetscCount k = 0; k < nnz; k++) mumps->val[k] *= vscale;
1752:         }
1753:       }
1754:     }
1755:     mumps->val = oval;
1756:   }
1757:   PetscFunctionReturn(PETSC_SUCCESS);
1758: }

1760: static PetscErrorCode MatDestroy_MUMPS(Mat A)
1761: {
1762:   Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;

1764:   PetscFunctionBegin;
1765:   PetscCall(PetscFree(mumps->id.isol_loc));
1766:   PetscCall(VecScatterDestroy(&mumps->scat_rhs));
1767:   PetscCall(VecScatterDestroy(&mumps->scat_sol));
1768:   PetscCall(VecDestroy(&mumps->b_seq));
1769:   PetscCall(VecDestroy(&mumps->x_seq));
1770:   PetscCall(PetscFree(mumps->id.perm_in));
1771:   PetscCall(PetscFree(mumps->id.blkvar));
1772:   PetscCall(PetscFree(mumps->id.blkptr));
1773:   PetscCall(PetscFree2(mumps->irn, mumps->jcn));
1774:   PetscCall(PetscFree(mumps->val_alloc));
1775:   PetscCall(PetscFree(mumps->info));
1776:   PetscCall(PetscFree(mumps->ICNTL_pre));
1777:   PetscCall(PetscFree(mumps->CNTL_pre));
1778:   PetscCall(MatMumpsResetSchur_Private(mumps));
1779:   if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */
1780:     mumps->id.job = JOB_END;
1781:     PetscMUMPS_c(mumps);
1782:     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in termination: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
1783:     if (mumps->mumps_comm != MPI_COMM_NULL) {
1784:       if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) PetscCallMPI(MPI_Comm_free(&mumps->mumps_comm));
1785:       else PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &mumps->mumps_comm));
1786:     }
1787:   }
1788:   PetscCall(MatMumpsFreeInternalID(&mumps->id));
1789: #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1790:   if (mumps->use_petsc_omp_support) {
1791:     PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl));
1792:     PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf));
1793:     PetscCall(PetscFree3(mumps->rhs_nrow, mumps->rhs_recvcounts, mumps->rhs_disps));
1794:   }
1795: #endif
1796:   PetscCall(PetscFree(mumps->ia_alloc));
1797:   PetscCall(PetscFree(mumps->ja_alloc));
1798:   PetscCall(PetscFree(mumps->recvcount));
1799:   PetscCall(PetscFree(mumps->reqs));
1800:   PetscCall(PetscFree(mumps->irhs_loc));
1801:   PetscCall(PetscFree2(mumps->nest_vals_start, mumps->nest_convert_to_triples));
1802:   PetscCall(PetscFree(mumps->nest_vals));
1803:   PetscCall(PetscFree(A->data));

1805:   /* clear composed functions */
1806:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
1807:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorSetSchurIS_C", NULL));
1808:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorCreateSchurComplement_C", NULL));
1809:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetIcntl_C", NULL));
1810:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetIcntl_C", NULL));
1811:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetCntl_C", NULL));
1812:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetCntl_C", NULL));
1813:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfo_C", NULL));
1814:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfog_C", NULL));
1815:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfo_C", NULL));
1816:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfog_C", NULL));
1817:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetNullPivots_C", NULL));
1818:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverse_C", NULL));
1819:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverseTranspose_C", NULL));
1820:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetBlk_C", NULL));
1821:   PetscFunctionReturn(PETSC_SUCCESS);
1822: }

1824: /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */
1825: static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A, PetscInt nrhs, const PetscScalar *array)
1826: {
1827:   Mat_MUMPS        *mumps   = (Mat_MUMPS *)A->data;
1828:   const PetscMPIInt ompsize = mumps->omp_comm_size;
1829:   PetscInt          i, m, M, rstart;

1831:   PetscFunctionBegin;
1832:   PetscCall(MatGetSize(A, &M, NULL));
1833:   PetscCall(MatGetLocalSize(A, &m, NULL));
1834:   PetscCheck(M <= PETSC_MUMPS_INT_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt");
1835:   if (ompsize == 1) {
1836:     if (!mumps->irhs_loc) {
1837:       mumps->nloc_rhs = (PetscMUMPSInt)m;
1838:       PetscCall(PetscMalloc1(m, &mumps->irhs_loc));
1839:       PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
1840:       for (i = 0; i < m; i++) PetscCall(PetscMUMPSIntCast(rstart + i + 1, &mumps->irhs_loc[i])); /* use 1-based indices */
1841:     }
1842:     PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, m * nrhs, array, mumps->id.precision, &mumps->id.rhs_loc_len, &mumps->id.rhs_loc));
1843:   } else {
1844: #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1845:     const PetscInt *ranges;
1846:     PetscMPIInt     j, k, sendcount, *petsc_ranks, *omp_ranks;
1847:     MPI_Group       petsc_group, omp_group;
1848:     PetscScalar    *recvbuf = NULL;

1850:     if (mumps->is_omp_master) {
1851:       /* Lazily initialize the omp stuff for distributed rhs */
1852:       if (!mumps->irhs_loc) {
1853:         PetscCall(PetscMalloc2(ompsize, &omp_ranks, ompsize, &petsc_ranks));
1854:         PetscCall(PetscMalloc3(ompsize, &mumps->rhs_nrow, ompsize, &mumps->rhs_recvcounts, ompsize, &mumps->rhs_disps));
1855:         PetscCallMPI(MPI_Comm_group(mumps->petsc_comm, &petsc_group));
1856:         PetscCallMPI(MPI_Comm_group(mumps->omp_comm, &omp_group));
1857:         for (j = 0; j < ompsize; j++) omp_ranks[j] = j;
1858:         PetscCallMPI(MPI_Group_translate_ranks(omp_group, ompsize, omp_ranks, petsc_group, petsc_ranks));

1860:         /* Populate mumps->irhs_loc[], rhs_nrow[] */
1861:         mumps->nloc_rhs = 0;
1862:         PetscCall(MatGetOwnershipRanges(A, &ranges));
1863:         for (j = 0; j < ompsize; j++) {
1864:           mumps->rhs_nrow[j] = ranges[petsc_ranks[j] + 1] - ranges[petsc_ranks[j]];
1865:           mumps->nloc_rhs += mumps->rhs_nrow[j];
1866:         }
1867:         PetscCall(PetscMalloc1(mumps->nloc_rhs, &mumps->irhs_loc));
1868:         for (j = k = 0; j < ompsize; j++) {
1869:           for (i = ranges[petsc_ranks[j]]; i < ranges[petsc_ranks[j] + 1]; i++, k++) PetscCall(PetscMUMPSIntCast(i + 1, &mumps->irhs_loc[k])); /* uses 1-based indices */
1870:         }

1872:         PetscCall(PetscFree2(omp_ranks, petsc_ranks));
1873:         PetscCallMPI(MPI_Group_free(&petsc_group));
1874:         PetscCallMPI(MPI_Group_free(&omp_group));
1875:       }

1877:       /* Realloc buffers when current nrhs is bigger than what we have met */
1878:       if (nrhs > mumps->max_nrhs) {
1879:         PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf));
1880:         PetscCall(PetscMalloc2(mumps->nloc_rhs * nrhs, &mumps->rhs_loc, mumps->nloc_rhs * nrhs, &mumps->rhs_recvbuf));
1881:         mumps->max_nrhs = nrhs;
1882:       }

1884:       /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */
1885:       for (j = 0; j < ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j] * nrhs, &mumps->rhs_recvcounts[j]));
1886:       mumps->rhs_disps[0] = 0;
1887:       for (j = 1; j < ompsize; j++) {
1888:         mumps->rhs_disps[j] = mumps->rhs_disps[j - 1] + mumps->rhs_recvcounts[j - 1];
1889:         PetscCheck(mumps->rhs_disps[j] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscMPIInt overflow!");
1890:       }
1891:       recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */
1892:     }

1894:     PetscCall(PetscMPIIntCast(m * nrhs, &sendcount));
1895:     PetscCallMPI(MPI_Gatherv(array, sendcount, MPIU_SCALAR, recvbuf, mumps->rhs_recvcounts, mumps->rhs_disps, MPIU_SCALAR, 0, mumps->omp_comm));

1897:     if (mumps->is_omp_master) {
1898:       if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */
1899:         PetscScalar *dst, *dstbase = mumps->rhs_loc;
1900:         for (j = 0; j < ompsize; j++) {
1901:           const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j];
1902:           dst                    = dstbase;
1903:           for (i = 0; i < nrhs; i++) {
1904:             PetscCall(PetscArraycpy(dst, src, mumps->rhs_nrow[j]));
1905:             src += mumps->rhs_nrow[j];
1906:             dst += mumps->nloc_rhs;
1907:           }
1908:           dstbase += mumps->rhs_nrow[j];
1909:         }
1910:       }
1911:       PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nloc_rhs * nrhs, mumps->rhs_loc, mumps->id.precision, &mumps->id.rhs_loc_len, &mumps->id.rhs_loc));
1912:     }
1913: #endif /* PETSC_HAVE_OPENMP_SUPPORT */
1914:   }
1915:   mumps->id.nrhs     = (PetscMUMPSInt)nrhs;
1916:   mumps->id.nloc_rhs = (PetscMUMPSInt)mumps->nloc_rhs;
1917:   mumps->id.lrhs_loc = mumps->nloc_rhs;
1918:   mumps->id.irhs_loc = mumps->irhs_loc;
1919:   PetscFunctionReturn(PETSC_SUCCESS);
1920: }

1922: static PetscErrorCode MatSolve_MUMPS(Mat A, Vec b, Vec x)
1923: {
1924:   Mat_MUMPS         *mumps  = (Mat_MUMPS *)A->data;
1925:   const PetscScalar *barray = NULL;
1926:   PetscScalar       *array;
1927:   IS                 is_iden, is_petsc;
1928:   PetscInt           i;
1929:   PetscBool          second_solve = PETSC_FALSE;
1930:   static PetscBool   cite1 = PETSC_FALSE, cite2 = PETSC_FALSE;

1932:   PetscFunctionBegin;
1933:   PetscCall(PetscCitationsRegister("@article{MUMPS01,\n  author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n  title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n  journal = {SIAM "
1934:                                    "Journal on Matrix Analysis and Applications},\n  volume = {23},\n  number = {1},\n  pages = {15--41},\n  year = {2001}\n}\n",
1935:                                    &cite1));
1936:   PetscCall(PetscCitationsRegister("@article{MUMPS02,\n  author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n  title = {Hybrid scheduling for the parallel solution of linear systems},\n  journal = {Parallel "
1937:                                    "Computing},\n  volume = {32},\n  number = {2},\n  pages = {136--156},\n  year = {2006}\n}\n",
1938:                                    &cite2));

1940:   PetscCall(VecFlag(x, A->factorerrortype));
1941:   if (A->factorerrortype) {
1942:     PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1943:     PetscFunctionReturn(PETSC_SUCCESS);
1944:   }

1946:   mumps->id.nrhs = 1;
1947:   if (mumps->petsc_size > 1) {
1948:     if (mumps->ICNTL20 == 10) {
1949:       mumps->id.ICNTL(20) = 10; /* dense distributed RHS, need to set rhs_loc[], irhs_loc[] */
1950:       PetscCall(VecGetArrayRead(b, &barray));
1951:       PetscCall(MatMumpsSetUpDistRHSInfo(A, 1, barray));
1952:     } else {
1953:       mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential b_seq vector*/
1954:       PetscCall(VecScatterBegin(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD));
1955:       PetscCall(VecScatterEnd(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD));
1956:       if (!mumps->myid) {
1957:         PetscCall(VecGetArray(mumps->b_seq, &array));
1958:         PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->b_seq->map->n, array, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs));
1959:       }
1960:     }
1961:   } else { /* petsc_size == 1, use MUMPS's dense centralized RHS feature, so that we don't need to bother with isol_loc[] to get the solution */
1962:     mumps->id.ICNTL(20) = 0;
1963:     PetscCall(VecCopy(b, x));
1964:     PetscCall(VecGetArray(x, &array));
1965:     PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, x->map->n, array, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs));
1966:   }

1968:   /*
1969:      handle condensation step of Schur complement (if any)
1970:      We set by default ICNTL(26) == -1 when Schur indices have been provided by the user.
1971:      According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase
1972:      Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system.
1973:      This requires an extra call to PetscMUMPS_c and the computation of the factors for S
1974:   */
1975:   if (mumps->id.size_schur > 0) {
1976:     PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc");
1977:     if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) {
1978:       second_solve = PETSC_TRUE;
1979:       PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); // allocate id.redrhs
1980:       mumps->id.ICNTL(26) = 1;                                /* condensation phase */
1981:     } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE));
1982:   }

1984:   mumps->id.job = JOB_SOLVE;
1985:   PetscMUMPS_c(mumps); // reduced solve, put solution in id.redrhs
1986:   PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));

1988:   /* handle expansion step of Schur complement (if any) */
1989:   if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE));
1990:   else if (mumps->id.ICNTL(26) == 1) { // condense the right hand side
1991:     PetscCall(MatMumpsSolveSchur_Private(A));
1992:     for (i = 0; i < mumps->id.size_schur; ++i) array[mumps->id.listvar_schur[i] - 1] = ID_FIELD_GET(mumps->id, redrhs, i);
1993:   }

1995:   if (mumps->petsc_size > 1) { /* convert mumps distributed solution to PETSc mpi x */
1996:     if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) {
1997:       /* when id.ICNTL(9) changes, the contents of ilsol_loc may change (not its size, lsol_loc), recreates scat_sol */
1998:       PetscCall(VecScatterDestroy(&mumps->scat_sol));
1999:     }
2000:     if (!mumps->scat_sol) { /* create scatter scat_sol */
2001:       PetscInt *isol2_loc = NULL;
2002:       PetscCall(ISCreateStride(PETSC_COMM_SELF, mumps->id.lsol_loc, 0, 1, &is_iden)); /* from */
2003:       PetscCall(PetscMalloc1(mumps->id.lsol_loc, &isol2_loc));
2004:       for (i = 0; i < mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i] - 1;                        /* change Fortran style to C style */
2005:       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, mumps->id.lsol_loc, isol2_loc, PETSC_OWN_POINTER, &is_petsc)); /* to */
2006:       PetscCall(VecScatterCreate(mumps->x_seq, is_iden, x, is_petsc, &mumps->scat_sol));
2007:       PetscCall(ISDestroy(&is_iden));
2008:       PetscCall(ISDestroy(&is_petsc));
2009:       mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */
2010:     }

2012:     PetscScalar *xarray;
2013:     PetscCall(VecGetArray(mumps->x_seq, &xarray));
2014:     PetscCall(MatMumpsCastMumpsScalarArray(mumps->id.lsol_loc, mumps->id.precision, mumps->id.sol_loc, xarray));
2015:     PetscCall(VecRestoreArray(mumps->x_seq, &xarray));
2016:     PetscCall(VecScatterBegin(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD));
2017:     PetscCall(VecScatterEnd(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD));

2019:     if (mumps->ICNTL20 == 10) { // distributed RHS
2020:       PetscCall(VecRestoreArrayRead(b, &barray));
2021:     } else if (!mumps->myid) { // centralized RHS
2022:       PetscCall(VecRestoreArray(mumps->b_seq, &array));
2023:     }
2024:   } else {
2025:     // id.rhs has the solution in mumps precision
2026:     PetscCall(MatMumpsCastMumpsScalarArray(x->map->n, mumps->id.precision, mumps->id.rhs, array));
2027:     PetscCall(VecRestoreArray(x, &array));
2028:   }

2030:   PetscCall(PetscLogFlops(2.0 * PetscMax(0, (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n)));
2031:   PetscFunctionReturn(PETSC_SUCCESS);
2032: }

2034: static PetscErrorCode MatSolveTranspose_MUMPS(Mat A, Vec b, Vec x)
2035: {
2036:   Mat_MUMPS          *mumps = (Mat_MUMPS *)A->data;
2037:   const PetscMUMPSInt value = mumps->id.ICNTL(9);

2039:   PetscFunctionBegin;
2040:   mumps->id.ICNTL(9) = 0;
2041:   PetscCall(MatSolve_MUMPS(A, b, x));
2042:   mumps->id.ICNTL(9) = value;
2043:   PetscFunctionReturn(PETSC_SUCCESS);
2044: }

2046: static PetscErrorCode MatMatSolve_MUMPS(Mat A, Mat B, Mat X)
2047: {
2048:   Mat                Bt = NULL;
2049:   PetscBool          denseX, denseB, flg, flgT;
2050:   Mat_MUMPS         *mumps = (Mat_MUMPS *)A->data;
2051:   PetscInt           i, nrhs, M, nrhsM;
2052:   PetscScalar       *array;
2053:   const PetscScalar *barray;
2054:   PetscInt           lsol_loc, nlsol_loc, *idxx, iidx = 0;
2055:   PetscMUMPSInt     *isol_loc, *isol_loc_save;
2056:   PetscScalar       *sol_loc;
2057:   void              *sol_loc_save;
2058:   PetscCount         sol_loc_len_save;
2059:   IS                 is_to, is_from;
2060:   PetscInt           k, proc, j, m, myrstart;
2061:   const PetscInt    *rstart;
2062:   Vec                v_mpi, msol_loc;
2063:   VecScatter         scat_sol;
2064:   Vec                b_seq;
2065:   VecScatter         scat_rhs;
2066:   PetscScalar       *aa;
2067:   PetscInt           spnr, *ia, *ja;
2068:   Mat_MPIAIJ        *b = NULL;

2070:   PetscFunctionBegin;
2071:   PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &denseX, MATSEQDENSE, MATMPIDENSE, NULL));
2072:   PetscCheck(denseX, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix");

2074:   PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &denseB, MATSEQDENSE, MATMPIDENSE, NULL));

2076:   if (denseB) {
2077:     PetscCheck(B->rmap->n == X->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix B and X must have same row distribution");
2078:     mumps->id.ICNTL(20) = 0; /* dense RHS */
2079:   } else {                   /* sparse B */
2080:     PetscCheck(X != B, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_IDN, "X and B must be different matrices");
2081:     PetscCall(PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSEVIRTUAL, &flgT));
2082:     PetscCheck(flgT, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONG, "Matrix B must be MATTRANSPOSEVIRTUAL matrix");
2083:     PetscCall(MatShellGetScalingShifts(B, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
2084:     /* input B is transpose of actual RHS matrix,
2085:      because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */
2086:     PetscCall(MatTransposeGetMat(B, &Bt));
2087:     mumps->id.ICNTL(20) = 1; /* sparse RHS */
2088:   }

2090:   PetscCall(MatGetSize(B, &M, &nrhs));
2091:   PetscCall(PetscIntMultError(nrhs, M, &nrhsM));
2092:   mumps->id.nrhs = (PetscMUMPSInt)nrhs;
2093:   mumps->id.lrhs = (PetscMUMPSInt)M;

2095:   if (mumps->petsc_size == 1) { // handle this easy case specially and return early
2096:     PetscScalar *aa;
2097:     PetscInt     spnr, *ia, *ja;
2098:     PetscBool    second_solve = PETSC_FALSE;

2100:     PetscCall(MatDenseGetArray(X, &array));
2101:     if (denseB) {
2102:       /* copy B to X */
2103:       PetscCall(MatDenseGetArrayRead(B, &barray));
2104:       PetscCall(PetscArraycpy(array, barray, nrhsM));
2105:       PetscCall(MatDenseRestoreArrayRead(B, &barray));
2106:     } else { /* sparse B */
2107:       PetscCall(MatSeqAIJGetArray(Bt, &aa));
2108:       PetscCall(MatGetRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
2109:       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
2110:       PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
2111:       PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->id.nz_rhs, aa, mumps->id.precision, &mumps->id.rhs_sparse_len, &mumps->id.rhs_sparse));
2112:     }
2113:     PetscCall(MatMumpsMakeMumpsScalarArray(denseB, nrhsM, array, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs));

2115:     /* handle condensation step of Schur complement (if any) */
2116:     if (mumps->id.size_schur > 0) {
2117:       if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) {
2118:         second_solve = PETSC_TRUE;
2119:         PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); // allocate id.redrhs
2120:         mumps->id.ICNTL(26) = 1;                                /* condensation phase, i.e, to solve id.redrhs */
2121:       } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE));
2122:     }

2124:     mumps->id.job = JOB_SOLVE;
2125:     PetscMUMPS_c(mumps);
2126:     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));

2128:     /* handle expansion step of Schur complement (if any) */
2129:     if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE));
2130:     else if (mumps->id.ICNTL(26) == 1) { // condense the right hand side
2131:       PetscCall(MatMumpsSolveSchur_Private(A));
2132:       for (j = 0; j < nrhs; ++j)
2133:         for (i = 0; i < mumps->id.size_schur; ++i) array[mumps->id.listvar_schur[i] - 1 + j * M] = ID_FIELD_GET(mumps->id, redrhs, i + j * mumps->id.lredrhs);
2134:     }

2136:     if (!denseB) { /* sparse B, restore ia, ja */
2137:       PetscCall(MatSeqAIJRestoreArray(Bt, &aa));
2138:       PetscCall(MatRestoreRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
2139:       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure");
2140:     }

2142:     // no matter dense B or sparse B, solution is in id.rhs; convert it to array of X.
2143:     PetscCall(MatMumpsCastMumpsScalarArray(nrhsM, mumps->id.precision, mumps->id.rhs, array));
2144:     PetscCall(MatDenseRestoreArray(X, &array));
2145:     PetscFunctionReturn(PETSC_SUCCESS);
2146:   }

2148:   /* parallel case: MUMPS requires rhs B to be centralized on the host! */
2149:   PetscCheck(!mumps->id.ICNTL(19), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc");

2151:   /* create msol_loc to hold mumps local solution */
2152:   isol_loc_save         = mumps->id.isol_loc; /* save these, as we want to reuse them in MatSolve() */
2153:   sol_loc_save          = mumps->id.sol_loc;
2154:   sol_loc_len_save      = mumps->id.sol_loc_len;
2155:   mumps->id.isol_loc    = NULL; // an init state
2156:   mumps->id.sol_loc     = NULL;
2157:   mumps->id.sol_loc_len = 0;

2159:   lsol_loc = mumps->id.lsol_loc;
2160:   PetscCall(PetscIntMultError(nrhs, lsol_loc, &nlsol_loc)); /* length of sol_loc */
2161:   PetscCall(PetscMalloc2(nlsol_loc, &sol_loc, lsol_loc, &isol_loc));
2162:   PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, nlsol_loc, sol_loc, mumps->id.precision, &mumps->id.sol_loc_len, &mumps->id.sol_loc));
2163:   mumps->id.isol_loc = isol_loc;

2165:   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nlsol_loc, (PetscScalar *)sol_loc, &msol_loc));

2167:   if (denseB) {
2168:     if (mumps->ICNTL20 == 10) {
2169:       mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
2170:       PetscCall(MatDenseGetArrayRead(B, &barray));
2171:       PetscCall(MatMumpsSetUpDistRHSInfo(A, nrhs, barray)); // put barray to rhs_loc
2172:       PetscCall(MatDenseRestoreArrayRead(B, &barray));
2173:       PetscCall(MatGetLocalSize(B, &m, NULL));
2174:       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhsM, NULL, &v_mpi)); // will scatter the solution to v_mpi, which wraps X
2175:     } else {
2176:       mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
2177:       /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in
2178:         very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank
2179:         0, re-arrange B into desired order, which is a local operation.
2180:       */

2182:       /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */
2183:       /* wrap dense rhs matrix B into a vector v_mpi */
2184:       PetscCall(MatGetLocalSize(B, &m, NULL));
2185:       PetscCall(MatDenseGetArrayRead(B, &barray));
2186:       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhsM, barray, &v_mpi));
2187:       PetscCall(MatDenseRestoreArrayRead(B, &barray));

2189:       /* scatter v_mpi to b_seq in proc[0]. With ICNTL(20) = 0, MUMPS requires rhs to be centralized on the host! */
2190:       if (!mumps->myid) {
2191:         PetscInt *idx;
2192:         /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */
2193:         PetscCall(PetscMalloc1(nrhsM, &idx));
2194:         PetscCall(MatGetOwnershipRanges(B, &rstart));
2195:         for (proc = 0, k = 0; proc < mumps->petsc_size; proc++) {
2196:           for (j = 0; j < nrhs; j++) {
2197:             for (i = rstart[proc]; i < rstart[proc + 1]; i++) idx[k++] = j * M + i;
2198:           }
2199:         }

2201:         PetscCall(VecCreateSeq(PETSC_COMM_SELF, nrhsM, &b_seq));
2202:         PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nrhsM, idx, PETSC_OWN_POINTER, &is_to));
2203:         PetscCall(ISCreateStride(PETSC_COMM_SELF, nrhsM, 0, 1, &is_from));
2204:       } else {
2205:         PetscCall(VecCreateSeq(PETSC_COMM_SELF, 0, &b_seq));
2206:         PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_to));
2207:         PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_from));
2208:       }

2210:       PetscCall(VecScatterCreate(v_mpi, is_from, b_seq, is_to, &scat_rhs));
2211:       PetscCall(VecScatterBegin(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD));
2212:       PetscCall(ISDestroy(&is_to));
2213:       PetscCall(ISDestroy(&is_from));
2214:       PetscCall(VecScatterEnd(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD));

2216:       if (!mumps->myid) { /* define rhs on the host */
2217:         PetscCall(VecGetArrayRead(b_seq, &barray));
2218:         PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, nrhsM, barray, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs));
2219:         PetscCall(VecRestoreArrayRead(b_seq, &barray));
2220:       }
2221:     }
2222:   } else { /* sparse B */
2223:     b = (Mat_MPIAIJ *)Bt->data;

2225:     /* wrap dense X into a vector v_mpi */
2226:     PetscCall(MatGetLocalSize(X, &m, NULL));
2227:     PetscCall(MatDenseGetArrayRead(X, &barray));
2228:     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X), 1, nrhs * m, nrhsM, barray, &v_mpi));
2229:     PetscCall(MatDenseRestoreArrayRead(X, &barray));

2231:     if (!mumps->myid) {
2232:       PetscCall(MatSeqAIJGetArray(b->A, &aa));
2233:       PetscCall(MatGetRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
2234:       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
2235:       PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
2236:       PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, ((Mat_SeqAIJ *)b->A->data)->nz, aa, mumps->id.precision, &mumps->id.rhs_sparse_len, &mumps->id.rhs_sparse));
2237:     } else {
2238:       mumps->id.irhs_ptr    = NULL;
2239:       mumps->id.irhs_sparse = NULL;
2240:       mumps->id.nz_rhs      = 0;
2241:       if (mumps->id.rhs_sparse_len) {
2242:         PetscCall(PetscFree(mumps->id.rhs_sparse));
2243:         mumps->id.rhs_sparse_len = 0;
2244:       }
2245:     }
2246:   }

2248:   /* solve phase */
2249:   mumps->id.job = JOB_SOLVE;
2250:   PetscMUMPS_c(mumps);
2251:   PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));

2253:   /* scatter mumps distributed solution to PETSc vector v_mpi, which shares local arrays with solution matrix X */
2254:   PetscCall(MatDenseGetArray(X, &array));
2255:   PetscCall(VecPlaceArray(v_mpi, array));

2257:   /* create scatter scat_sol */
2258:   PetscCall(MatGetOwnershipRanges(X, &rstart));
2259:   /* iidx: index for scatter mumps solution to PETSc X */

2261:   PetscCall(ISCreateStride(PETSC_COMM_SELF, nlsol_loc, 0, 1, &is_from));
2262:   PetscCall(PetscMalloc1(nlsol_loc, &idxx));
2263:   for (i = 0; i < lsol_loc; i++) {
2264:     isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */

2266:     for (proc = 0; proc < mumps->petsc_size; proc++) {
2267:       if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc + 1]) {
2268:         myrstart = rstart[proc];
2269:         k        = isol_loc[i] - myrstart;          /* local index on 1st column of PETSc vector X */
2270:         iidx     = k + myrstart * nrhs;             /* maps mumps isol_loc[i] to PETSc index in X */
2271:         m        = rstart[proc + 1] - rstart[proc]; /* rows of X for this proc */
2272:         break;
2273:       }
2274:     }

2276:     for (j = 0; j < nrhs; j++) idxx[i + j * lsol_loc] = iidx + j * m;
2277:   }
2278:   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nlsol_loc, idxx, PETSC_COPY_VALUES, &is_to));
2279:   PetscCall(MatMumpsCastMumpsScalarArray(nlsol_loc, mumps->id.precision, mumps->id.sol_loc, sol_loc)); // Vec msol_loc is created with sol_loc[]
2280:   PetscCall(VecScatterCreate(msol_loc, is_from, v_mpi, is_to, &scat_sol));
2281:   PetscCall(VecScatterBegin(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD));
2282:   PetscCall(ISDestroy(&is_from));
2283:   PetscCall(ISDestroy(&is_to));
2284:   PetscCall(VecScatterEnd(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD));
2285:   PetscCall(MatDenseRestoreArray(X, &array));

2287:   if (mumps->id.sol_loc_len) { // in case we allocated intermediate buffers
2288:     mumps->id.sol_loc_len = 0;
2289:     PetscCall(PetscFree(mumps->id.sol_loc));
2290:   }

2292:   // restore old values
2293:   mumps->id.sol_loc     = sol_loc_save;
2294:   mumps->id.sol_loc_len = sol_loc_len_save;
2295:   mumps->id.isol_loc    = isol_loc_save;

2297:   PetscCall(PetscFree2(sol_loc, isol_loc));
2298:   PetscCall(PetscFree(idxx));
2299:   PetscCall(VecDestroy(&msol_loc));
2300:   PetscCall(VecDestroy(&v_mpi));
2301:   if (!denseB) {
2302:     if (!mumps->myid) {
2303:       b = (Mat_MPIAIJ *)Bt->data;
2304:       PetscCall(MatSeqAIJRestoreArray(b->A, &aa));
2305:       PetscCall(MatRestoreRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
2306:       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure");
2307:     }
2308:   } else {
2309:     if (mumps->ICNTL20 == 0) {
2310:       PetscCall(VecDestroy(&b_seq));
2311:       PetscCall(VecScatterDestroy(&scat_rhs));
2312:     }
2313:   }
2314:   PetscCall(VecScatterDestroy(&scat_sol));
2315:   PetscCall(PetscLogFlops(nrhs * PetscMax(0, 2.0 * (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n)));
2316:   PetscFunctionReturn(PETSC_SUCCESS);
2317: }

2319: static PetscErrorCode MatMatSolveTranspose_MUMPS(Mat A, Mat B, Mat X)
2320: {
2321:   Mat_MUMPS          *mumps = (Mat_MUMPS *)A->data;
2322:   const PetscMUMPSInt value = mumps->id.ICNTL(9);

2324:   PetscFunctionBegin;
2325:   mumps->id.ICNTL(9) = 0;
2326:   PetscCall(MatMatSolve_MUMPS(A, B, X));
2327:   mumps->id.ICNTL(9) = value;
2328:   PetscFunctionReturn(PETSC_SUCCESS);
2329: }

2331: static PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A, Mat Bt, Mat X)
2332: {
2333:   PetscBool flg;
2334:   Mat       B;

2336:   PetscFunctionBegin;
2337:   PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt, &flg, MATSEQAIJ, MATMPIAIJ, NULL));
2338:   PetscCheck(flg, PetscObjectComm((PetscObject)Bt), PETSC_ERR_ARG_WRONG, "Matrix Bt must be MATAIJ matrix");

2340:   /* Create B=Bt^T that uses Bt's data structure */
2341:   PetscCall(MatCreateTranspose(Bt, &B));

2343:   PetscCall(MatMatSolve_MUMPS(A, B, X));
2344:   PetscCall(MatDestroy(&B));
2345:   PetscFunctionReturn(PETSC_SUCCESS);
2346: }

2348: #if !defined(PETSC_USE_COMPLEX)
2349: /*
2350:   input:
2351:    F:        numeric factor
2352:   output:
2353:    nneg:     total number of negative pivots
2354:    nzero:    total number of zero pivots
2355:    npos:     (global dimension of F) - nneg - nzero
2356: */
2357: static PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos)
2358: {
2359:   Mat_MUMPS  *mumps = (Mat_MUMPS *)F->data;
2360:   PetscMPIInt size;

2362:   PetscFunctionBegin;
2363:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F), &size));
2364:   /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */
2365:   PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia", mumps->id.INFOG(13));

2367:   if (nneg) *nneg = mumps->id.INFOG(12);
2368:   if (nzero || npos) {
2369:     PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection");
2370:     if (nzero) *nzero = mumps->id.INFOG(28);
2371:     if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28));
2372:   }
2373:   PetscFunctionReturn(PETSC_SUCCESS);
2374: }
2375: #endif

2377: static PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse, Mat_MUMPS *mumps)
2378: {
2379:   PetscMPIInt    nreqs;
2380:   PetscMUMPSInt *irn, *jcn;
2381:   PetscMPIInt    count;
2382:   PetscCount     totnnz, remain;
2383:   const PetscInt osize = mumps->omp_comm_size;
2384:   PetscScalar   *val;

2386:   PetscFunctionBegin;
2387:   if (osize > 1) {
2388:     if (reuse == MAT_INITIAL_MATRIX) {
2389:       /* master first gathers counts of nonzeros to receive */
2390:       if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize, &mumps->recvcount));
2391:       PetscCallMPI(MPI_Gather(&mumps->nnz, 1, MPIU_INT64, mumps->recvcount, 1, MPIU_INT64, 0 /*master*/, mumps->omp_comm));

2393:       /* Then each computes number of send/recvs */
2394:       if (mumps->is_omp_master) {
2395:         /* Start from 1 since self communication is not done in MPI */
2396:         nreqs = 0;
2397:         for (PetscMPIInt i = 1; i < osize; i++) nreqs += (mumps->recvcount[i] + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX;
2398:       } else {
2399:         nreqs = (PetscMPIInt)(((mumps->nnz + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX));
2400:       }
2401:       PetscCall(PetscMalloc1(nreqs * 3, &mumps->reqs)); /* Triple the requests since we send irn, jcn and val separately */

2403:       /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others.
2404:          MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz
2405:          might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size
2406:          is very small, the current approach should have no extra overhead compared to MPI_Gatherv.
2407:        */
2408:       nreqs = 0; /* counter for actual send/recvs */
2409:       if (mumps->is_omp_master) {
2410:         totnnz = 0;

2412:         for (PetscMPIInt i = 0; i < osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */
2413:         PetscCall(PetscMalloc2(totnnz, &irn, totnnz, &jcn));
2414:         PetscCall(PetscMalloc1(totnnz, &val));

2416:         /* Self communication */
2417:         PetscCall(PetscArraycpy(irn, mumps->irn, mumps->nnz));
2418:         PetscCall(PetscArraycpy(jcn, mumps->jcn, mumps->nnz));
2419:         PetscCall(PetscArraycpy(val, mumps->val, mumps->nnz));

2421:         /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */
2422:         PetscCall(PetscFree2(mumps->irn, mumps->jcn));
2423:         PetscCall(PetscFree(mumps->val_alloc));
2424:         mumps->nnz = totnnz;
2425:         mumps->irn = irn;
2426:         mumps->jcn = jcn;
2427:         mumps->val = mumps->val_alloc = val;

2429:         irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */
2430:         jcn += mumps->recvcount[0];
2431:         val += mumps->recvcount[0];

2433:         /* Remote communication */
2434:         for (PetscMPIInt i = 1; i < osize; i++) {
2435:           count  = (PetscMPIInt)PetscMin(mumps->recvcount[i], (PetscMPIInt)PETSC_MPI_INT_MAX);
2436:           remain = mumps->recvcount[i] - count;
2437:           while (count > 0) {
2438:             PetscCallMPI(MPIU_Irecv(irn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2439:             PetscCallMPI(MPIU_Irecv(jcn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2440:             PetscCallMPI(MPIU_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2441:             irn += count;
2442:             jcn += count;
2443:             val += count;
2444:             count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX);
2445:             remain -= count;
2446:           }
2447:         }
2448:       } else {
2449:         irn    = mumps->irn;
2450:         jcn    = mumps->jcn;
2451:         val    = mumps->val;
2452:         count  = (PetscMPIInt)PetscMin(mumps->nnz, (PetscMPIInt)PETSC_MPI_INT_MAX);
2453:         remain = mumps->nnz - count;
2454:         while (count > 0) {
2455:           PetscCallMPI(MPIU_Isend(irn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2456:           PetscCallMPI(MPIU_Isend(jcn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2457:           PetscCallMPI(MPIU_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2458:           irn += count;
2459:           jcn += count;
2460:           val += count;
2461:           count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX);
2462:           remain -= count;
2463:         }
2464:       }
2465:     } else {
2466:       nreqs = 0;
2467:       if (mumps->is_omp_master) {
2468:         val = mumps->val + mumps->recvcount[0];
2469:         for (PetscMPIInt i = 1; i < osize; i++) { /* Remote communication only since self data is already in place */
2470:           count  = (PetscMPIInt)PetscMin(mumps->recvcount[i], (PetscMPIInt)PETSC_MPI_INT_MAX);
2471:           remain = mumps->recvcount[i] - count;
2472:           while (count > 0) {
2473:             PetscCallMPI(MPIU_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2474:             val += count;
2475:             count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX);
2476:             remain -= count;
2477:           }
2478:         }
2479:       } else {
2480:         val    = mumps->val;
2481:         count  = (PetscMPIInt)PetscMin(mumps->nnz, (PetscMPIInt)PETSC_MPI_INT_MAX);
2482:         remain = mumps->nnz - count;
2483:         while (count > 0) {
2484:           PetscCallMPI(MPIU_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2485:           val += count;
2486:           count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX);
2487:           remain -= count;
2488:         }
2489:       }
2490:     }
2491:     PetscCallMPI(MPI_Waitall(nreqs, mumps->reqs, MPI_STATUSES_IGNORE));
2492:     mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */
2493:   }
2494:   PetscFunctionReturn(PETSC_SUCCESS);
2495: }

2497: static PetscErrorCode MatFactorNumeric_MUMPS(Mat F, Mat A, PETSC_UNUSED const MatFactorInfo *info)
2498: {
2499:   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;

2501:   PetscFunctionBegin;
2502:   if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) {
2503:     if (mumps->id.INFOG(1) == -6) PetscCall(PetscInfo(A, "MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2504:     PetscCall(PetscInfo(A, "MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2505:     PetscFunctionReturn(PETSC_SUCCESS);
2506:   }

2508:   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps));
2509:   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX, mumps));

2511:   /* numerical factorization phase */
2512:   mumps->id.job = JOB_FACTNUMERIC;
2513:   if (!mumps->id.ICNTL(18)) { /* A is centralized */
2514:     if (!mumps->myid) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a));
2515:   } else {
2516:     PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc));
2517:   }

2519:   if (F->schur) {
2520:     const PetscScalar *array;
2521:     MUMPS_INT          size = mumps->id.size_schur;
2522:     PetscCall(MatDenseGetArrayRead(F->schur, &array));
2523:     PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, size * size, array, mumps->id.precision, &mumps->id.schur_len, &mumps->id.schur));
2524:     PetscCall(MatDenseRestoreArrayRead(F->schur, &array));
2525:   }

2527:   PetscMUMPS_c(mumps);
2528:   if (mumps->id.INFOG(1) < 0) {
2529:     PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));
2530:     if (mumps->id.INFOG(1) == -10) {
2531:       PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2532:       F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
2533:     } else if (mumps->id.INFOG(1) == -13) {
2534:       PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, cannot allocate required memory %d megabytes\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2535:       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
2536:     } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) {
2537:       PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d, problem with work array\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2538:       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
2539:     } else {
2540:       PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2541:       F->factorerrortype = MAT_FACTOR_OTHER;
2542:     }
2543:   }
2544:   PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: ICNTL(16)=%d " MUMPS_MANUALS, mumps->id.INFOG(16));

2546:   F->assembled = PETSC_TRUE;

2548:   if (F->schur) { /* reset Schur status to unfactored */
2549: #if defined(PETSC_HAVE_CUDA)
2550:     F->schur->offloadmask = PETSC_OFFLOAD_CPU;
2551: #endif
2552:     PetscScalar *array;
2553:     PetscCall(MatDenseGetArray(F->schur, &array));
2554:     PetscCall(MatMumpsCastMumpsScalarArray(mumps->id.size_schur * mumps->id.size_schur, mumps->id.precision, mumps->id.schur, array));
2555:     PetscCall(MatDenseRestoreArray(F->schur, &array));
2556:     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
2557:       mumps->id.ICNTL(19) = 2;
2558:       PetscCall(MatTranspose(F->schur, MAT_INPLACE_MATRIX, &F->schur));
2559:     }
2560:     PetscCall(MatFactorRestoreSchurComplement(F, NULL, MAT_FACTOR_SCHUR_UNFACTORED));
2561:   }

2563:   /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */
2564:   if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3;

2566:   if (!mumps->is_omp_master) mumps->id.INFO(23) = 0;
2567:   // MUMPS userguide: ISOL_loc should be allocated by the user between the factorization and the
2568:   // solve phases. On exit from the solve phase, ISOL_loc(i) contains the index of the variables for
2569:   // which the solution (in SOL_loc) is available on the local processor.
2570:   // If successive calls to the solve phase (JOB= 3) are performed for a given matrix, ISOL_loc will
2571:   // normally have the same contents for each of these calls. The only exception is the case of
2572:   // unsymmetric matrices (SYM=1) when the transpose option is changed (see ICNTL(9)) and non
2573:   // symmetric row/column exchanges (see ICNTL(6)) have occurred before the solve phase.
2574:   if (mumps->petsc_size > 1) {
2575:     PetscInt     lsol_loc;
2576:     PetscScalar *array;

2578:     /* distributed solution; Create x_seq=sol_loc for repeated use */
2579:     if (mumps->x_seq) {
2580:       PetscCall(VecScatterDestroy(&mumps->scat_sol));
2581:       PetscCall(PetscFree(mumps->id.isol_loc));
2582:       PetscCall(VecDestroy(&mumps->x_seq));
2583:     }
2584:     lsol_loc = mumps->id.INFO(23); /* length of sol_loc */
2585:     PetscCall(PetscMalloc1(lsol_loc, &mumps->id.isol_loc));
2586:     PetscCall(VecCreateSeq(PETSC_COMM_SELF, lsol_loc, &mumps->x_seq));
2587:     PetscCall(VecGetArray(mumps->x_seq, &array));
2588:     PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, lsol_loc, array, mumps->id.precision, &mumps->id.sol_loc_len, &mumps->id.sol_loc));
2589:     PetscCall(VecRestoreArray(mumps->x_seq, &array));
2590:     mumps->id.lsol_loc = (PetscMUMPSInt)lsol_loc;
2591:   }
2592:   PetscCall(PetscLogFlops((double)ID_RINFO_GET(mumps->id, 2)));
2593:   PetscFunctionReturn(PETSC_SUCCESS);
2594: }

2596: /* Sets MUMPS options from the options database */
2597: static PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A)
2598: {
2599:   Mat_MUMPS    *mumps = (Mat_MUMPS *)F->data;
2600:   PetscReal     cntl;
2601:   PetscMUMPSInt icntl = 0, size, *listvar_schur;
2602:   PetscInt      info[80], i, ninfo = 80, rbs, cbs;
2603:   PetscBool     flg   = PETSC_FALSE;
2604:   PetscBool     schur = mumps->id.icntl ? (PetscBool)(mumps->id.ICNTL(26) == -1) : (PetscBool)(mumps->ICNTL26 == -1);
2605:   void         *arr;

2607:   PetscFunctionBegin;
2608:   PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MUMPS Options", "Mat");
2609:   if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */
2610:     PetscPrecision precision  = PetscDefined(USE_REAL_SINGLE) ? PETSC_PRECISION_SINGLE : PETSC_PRECISION_DOUBLE;
2611:     PetscInt       nthreads   = 0;
2612:     PetscInt       nCNTL_pre  = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
2613:     PetscInt       nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0;
2614:     PetscMUMPSInt  nblk, *blkvar, *blkptr;

2616:     mumps->petsc_comm = PetscObjectComm((PetscObject)A);
2617:     PetscCallMPI(MPI_Comm_size(mumps->petsc_comm, &mumps->petsc_size));
2618:     PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm, &mumps->myid)); /* "if (!myid)" still works even if mumps_comm is different */

2620:     PetscCall(PetscOptionsName("-mat_mumps_use_omp_threads", "Convert MPI processes into OpenMP threads", "None", &mumps->use_petsc_omp_support));
2621:     if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */
2622:     /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */
2623:     PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)F)->prefix, "-mat_mumps_use_omp_threads", &nthreads, NULL));
2624:     if (mumps->use_petsc_omp_support) {
2625:       PetscCheck(!schur, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature", ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : "");
2626: #if defined(PETSC_HAVE_OPENMP_SUPPORT)
2627:       PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm, nthreads, &mumps->omp_ctrl));
2628:       PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl, &mumps->omp_comm, &mumps->mumps_comm, &mumps->is_omp_master));
2629: #else
2630:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual",
2631:               ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : "");
2632: #endif
2633:     } else {
2634:       mumps->omp_comm      = PETSC_COMM_SELF;
2635:       mumps->mumps_comm    = mumps->petsc_comm;
2636:       mumps->is_omp_master = PETSC_TRUE;
2637:     }
2638:     PetscCallMPI(MPI_Comm_size(mumps->omp_comm, &mumps->omp_comm_size));
2639:     mumps->reqs = NULL;
2640:     mumps->tag  = 0;

2642:     if (mumps->mumps_comm != MPI_COMM_NULL) {
2643:       if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) {
2644:         /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */
2645:         MPI_Comm comm;
2646:         PetscCallMPI(MPI_Comm_dup(mumps->mumps_comm, &comm));
2647:         mumps->mumps_comm = comm;
2648:       } else PetscCall(PetscCommGetComm(mumps->petsc_comm, &mumps->mumps_comm));
2649:     }

2651:     mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm);
2652:     mumps->id.job          = JOB_INIT;
2653:     mumps->id.par          = 1; /* host participates factorizaton and solve */
2654:     mumps->id.sym          = mumps->sym;

2656:     size          = mumps->id.size_schur;
2657:     arr           = mumps->id.schur;
2658:     listvar_schur = mumps->id.listvar_schur;
2659:     nblk          = mumps->id.nblk;
2660:     blkvar        = mumps->id.blkvar;
2661:     blkptr        = mumps->id.blkptr;
2662:     if (PetscDefined(USE_DEBUG)) {
2663:       for (PetscInt i = 0; i < size; i++)
2664:         PetscCheck(listvar_schur[i] - 1 >= 0 && listvar_schur[i] - 1 < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid Schur index at position %" PetscInt_FMT "! %" PetscInt_FMT " must be in [0, %" PetscInt_FMT ")", i, (PetscInt)listvar_schur[i] - 1,
2665:                    A->rmap->N);
2666:     }

2668:     PetscCall(PetscOptionsEnum("-pc_precision", "Precision used by MUMPS", "MATSOLVERMUMPS", PetscPrecisionTypes, (PetscEnum)precision, (PetscEnum *)&precision, NULL));
2669:     PetscCheck(precision == PETSC_PRECISION_SINGLE || precision == PETSC_PRECISION_DOUBLE, PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "MUMPS does not support %s precision", PetscPrecisionTypes[precision]);
2670:     PetscCheck(precision == PETSC_SCALAR_PRECISION || PetscDefined(HAVE_MUMPS_MIXED_PRECISION), PetscObjectComm((PetscObject)F), PETSC_ERR_USER, "Your MUMPS library does not support mixed precision, but which is needed with your specified PetscScalar");
2671:     PetscCall(MatMumpsAllocateInternalID(&mumps->id, precision));

2673:     PetscMUMPS_c(mumps);
2674:     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));

2676:     /* set PETSc-MUMPS default options - override MUMPS default */
2677:     mumps->id.ICNTL(3) = 0;
2678:     mumps->id.ICNTL(4) = 0;
2679:     if (mumps->petsc_size == 1) {
2680:       mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */
2681:       mumps->id.ICNTL(7)  = 7; /* automatic choice of ordering done by the package */
2682:     } else {
2683:       mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */
2684:       mumps->id.ICNTL(21) = 1; /* distributed solution */
2685:     }
2686:     if (nblk && blkptr) {
2687:       mumps->id.ICNTL(15) = 1;
2688:       mumps->id.nblk      = nblk;
2689:       mumps->id.blkvar    = blkvar;
2690:       mumps->id.blkptr    = blkptr;
2691:     } else mumps->id.ICNTL(15) = 0;

2693:     /* restore cached ICNTL and CNTL values */
2694:     for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1 + 2 * icntl]) = mumps->ICNTL_pre[2 + 2 * icntl];
2695:     for (icntl = 0; icntl < nCNTL_pre; ++icntl) ID_CNTL_SET(mumps->id, (PetscInt)mumps->CNTL_pre[1 + 2 * icntl], mumps->CNTL_pre[2 + 2 * icntl]);

2697:     PetscCall(PetscFree(mumps->ICNTL_pre));
2698:     PetscCall(PetscFree(mumps->CNTL_pre));

2700:     if (schur) {
2701:       mumps->id.size_schur    = size;
2702:       mumps->id.schur_lld     = size;
2703:       mumps->id.schur         = arr;
2704:       mumps->id.listvar_schur = listvar_schur;
2705:       if (mumps->petsc_size > 1) {
2706:         PetscBool gs; /* gs is false if any rank other than root has non-empty IS */

2708:         mumps->id.ICNTL(19) = 1;                                                                            /* MUMPS returns Schur centralized on the host */
2709:         gs                  = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */
2710:         PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &gs, 1, MPI_C_BOOL, MPI_LAND, mumps->petsc_comm));
2711:         PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_SUP, "MUMPS distributed parallel Schur complements not yet supported from PETSc");
2712:       } else {
2713:         if (F->factortype == MAT_FACTOR_LU) {
2714:           mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */
2715:         } else {
2716:           mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */
2717:         }
2718:       }
2719:       mumps->id.ICNTL(26) = -1;
2720:     }

2722:     /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code.
2723:        For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS.
2724:      */
2725:     PetscCallMPI(MPI_Bcast(mumps->id.icntl, 40, MPI_INT, 0, mumps->omp_comm));
2726:     PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15, MPIU_MUMPSREAL(&mumps->id), 0, mumps->omp_comm));

2728:     mumps->scat_rhs = NULL;
2729:     mumps->scat_sol = NULL;
2730:   }
2731:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1", "ICNTL(1): output stream for error messages", "None", mumps->id.ICNTL(1), &icntl, &flg));
2732:   if (flg) mumps->id.ICNTL(1) = icntl;
2733:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2", "ICNTL(2): output stream for diagnostic printing, statistics, and warning", "None", mumps->id.ICNTL(2), &icntl, &flg));
2734:   if (flg) mumps->id.ICNTL(2) = icntl;
2735:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3", "ICNTL(3): output stream for global information, collected on the host", "None", mumps->id.ICNTL(3), &icntl, &flg));
2736:   if (flg) mumps->id.ICNTL(3) = icntl;

2738:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4", "ICNTL(4): level of printing (0 to 4)", "None", mumps->id.ICNTL(4), &icntl, &flg));
2739:   if (flg) mumps->id.ICNTL(4) = icntl;
2740:   if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */

2742:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6", "ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)", "None", mumps->id.ICNTL(6), &icntl, &flg));
2743:   if (flg) mumps->id.ICNTL(6) = icntl;

2745:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7", "ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)", "None", mumps->id.ICNTL(7), &icntl, &flg));
2746:   if (flg) {
2747:     PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto");
2748:     mumps->id.ICNTL(7) = icntl;
2749:   }

2751:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8", "ICNTL(8): scaling strategy (-2 to 8 or 77)", "None", mumps->id.ICNTL(8), &mumps->id.ICNTL(8), NULL));
2752:   /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */
2753:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10", "ICNTL(10): max num of refinements", "None", mumps->id.ICNTL(10), &mumps->id.ICNTL(10), NULL));
2754:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11", "ICNTL(11): statistics related to an error analysis (via -ksp_view)", "None", mumps->id.ICNTL(11), &mumps->id.ICNTL(11), NULL));
2755:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12", "ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)", "None", mumps->id.ICNTL(12), &mumps->id.ICNTL(12), NULL));
2756:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13", "ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting", "None", mumps->id.ICNTL(13), &mumps->id.ICNTL(13), NULL));
2757:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14", "ICNTL(14): percentage increase in the estimated working space", "None", mumps->id.ICNTL(14), &mumps->id.ICNTL(14), NULL));
2758:   PetscCall(MatGetBlockSizes(A, &rbs, &cbs));
2759:   if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = (PetscMUMPSInt)-rbs;
2760:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15", "ICNTL(15): compression of the input matrix resulting from a block format", "None", mumps->id.ICNTL(15), &mumps->id.ICNTL(15), &flg));
2761:   if (flg) {
2762:     if (mumps->id.ICNTL(15) < 0) PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes");
2763:     else if (mumps->id.ICNTL(15) > 0) {
2764:       const PetscInt *bsizes;
2765:       PetscInt        nblocks, p, *blkptr = NULL;
2766:       PetscMPIInt    *recvcounts, *displs, n;
2767:       PetscMPIInt     rank, size = 0;

2769:       PetscCall(MatGetVariableBlockSizes(A, &nblocks, &bsizes));
2770:       flg = PETSC_TRUE;
2771:       for (p = 0; p < nblocks; ++p) {
2772:         if (bsizes[p] > 1) break;
2773:       }
2774:       if (p == nblocks) flg = PETSC_FALSE;
2775:       PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &flg, 1, MPI_C_BOOL, MPI_LOR, PetscObjectComm((PetscObject)A)));
2776:       if (flg) { // if at least one process supplies variable block sizes and they are not all set to 1
2777:         PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
2778:         if (rank == 0) PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
2779:         PetscCall(PetscCalloc2(size, &recvcounts, size + 1, &displs));
2780:         PetscCall(PetscMPIIntCast(nblocks, &n));
2781:         PetscCallMPI(MPI_Gather(&n, 1, MPI_INT, recvcounts, 1, MPI_INT, 0, PetscObjectComm((PetscObject)A)));
2782:         for (PetscInt p = 0; p < size; ++p) displs[p + 1] = displs[p] + recvcounts[p];
2783:         PetscCall(PetscMalloc1(displs[size] + 1, &blkptr));
2784:         PetscCallMPI(MPI_Bcast(displs + size, 1, MPIU_INT, 0, PetscObjectComm((PetscObject)A)));
2785:         PetscCallMPI(MPI_Gatherv(bsizes, n, MPIU_INT, blkptr + 1, recvcounts, displs, MPIU_INT, 0, PetscObjectComm((PetscObject)A)));
2786:         if (rank == 0) {
2787:           blkptr[0] = 1;
2788:           for (PetscInt p = 0; p < n; ++p) blkptr[p + 1] += blkptr[p];
2789:           PetscCall(MatMumpsSetBlk(F, displs[size], NULL, blkptr));
2790:         }
2791:         PetscCall(PetscFree2(recvcounts, displs));
2792:         PetscCall(PetscFree(blkptr));
2793:       }
2794:     }
2795:   }
2796:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19", "ICNTL(19): computes the Schur complement", "None", mumps->id.ICNTL(19), &mumps->id.ICNTL(19), NULL));
2797:   if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */
2798:     PetscCall(MatDestroy(&F->schur));
2799:     PetscCall(MatMumpsResetSchur_Private(mumps));
2800:   }

2802:   /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps",
2803:      and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran
2804:      and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...'
2805:      This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug,
2806:      see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590.
2807:      In short, we could not use distributed RHS until with MPICH v4.0b1 or we enabled a workaround in mumps-5.6.2+
2808:    */
2809:   mumps->ICNTL20 = 10; /* Distributed dense RHS, by default */
2810: #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) || (PetscDefined(HAVE_MPICH) && MPICH_NUMVERSION < 40000101) || PetscDefined(HAVE_MSMPI)
2811:   mumps->ICNTL20 = 0; /* Centralized dense RHS, if need be */
2812: #endif
2813:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20", "ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides", "None", mumps->ICNTL20, &mumps->ICNTL20, &flg));
2814:   PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10", (int)mumps->ICNTL20);
2815: #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0)
2816:   PetscCheck(!flg || mumps->ICNTL20 != 10, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=10 is not supported before MUMPS-5.3.0");
2817: #endif
2818:   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */

2820:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22", "ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)", "None", mumps->id.ICNTL(22), &mumps->id.ICNTL(22), NULL));
2821:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23", "ICNTL(23): max size of the working memory (MB) that can allocate per processor", "None", mumps->id.ICNTL(23), &mumps->id.ICNTL(23), NULL));
2822:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24", "ICNTL(24): detection of null pivot rows (0 or 1)", "None", mumps->id.ICNTL(24), &mumps->id.ICNTL(24), NULL));
2823:   if (mumps->id.ICNTL(24)) mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */

2825:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25", "ICNTL(25): computes a solution of a deficient matrix and a null space basis", "None", mumps->id.ICNTL(25), &mumps->id.ICNTL(25), NULL));
2826:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26", "ICNTL(26): drives the solution phase if a Schur complement matrix", "None", mumps->id.ICNTL(26), &mumps->id.ICNTL(26), NULL));
2827:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27", "ICNTL(27): controls the blocking size for multiple right-hand sides", "None", mumps->id.ICNTL(27), &mumps->id.ICNTL(27), NULL));
2828:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28", "ICNTL(28): use 1 for sequential analysis and ICNTL(7) ordering, or 2 for parallel analysis and ICNTL(29) ordering", "None", mumps->id.ICNTL(28), &mumps->id.ICNTL(28), NULL));
2829:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29", "ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis", "None", mumps->id.ICNTL(29), &mumps->id.ICNTL(29), NULL));
2830:   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */
2831:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31", "ICNTL(31): indicates which factors may be discarded during factorization", "None", mumps->id.ICNTL(31), &mumps->id.ICNTL(31), NULL));
2832:   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elimination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL));  -- not supported by PETSc API */
2833:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33", "ICNTL(33): compute determinant", "None", mumps->id.ICNTL(33), &mumps->id.ICNTL(33), NULL));
2834:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35", "ICNTL(35): activates Block Low Rank (BLR) based factorization", "None", mumps->id.ICNTL(35), &mumps->id.ICNTL(35), NULL));
2835:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36", "ICNTL(36): choice of BLR factorization variant", "None", mumps->id.ICNTL(36), &mumps->id.ICNTL(36), NULL));
2836:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_37", "ICNTL(37): compression of the contribution blocks (CB)", "None", mumps->id.ICNTL(37), &mumps->id.ICNTL(37), NULL));
2837:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38", "ICNTL(38): estimated compression rate of LU factors with BLR", "None", mumps->id.ICNTL(38), &mumps->id.ICNTL(38), NULL));
2838:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_48", "ICNTL(48): multithreading with tree parallelism", "None", mumps->id.ICNTL(48), &mumps->id.ICNTL(48), NULL));
2839:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_49", "ICNTL(49): compact workarray at the end of factorization phase", "None", mumps->id.ICNTL(49), &mumps->id.ICNTL(49), NULL));
2840:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_56", "ICNTL(56): postponing and rank-revealing factorization", "None", mumps->id.ICNTL(56), &mumps->id.ICNTL(56), NULL));
2841:   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_58", "ICNTL(58): defines options for symbolic factorization", "None", mumps->id.ICNTL(58), &mumps->id.ICNTL(58), NULL));

2843:   PetscCall(PetscOptionsReal("-mat_mumps_cntl_1", "CNTL(1): relative pivoting threshold", "None", (PetscReal)ID_CNTL_GET(mumps->id, 1), &cntl, &flg));
2844:   if (flg) ID_CNTL_SET(mumps->id, 1, cntl);
2845:   PetscCall(PetscOptionsReal("-mat_mumps_cntl_2", "CNTL(2): stopping criterion of refinement", "None", (PetscReal)ID_CNTL_GET(mumps->id, 2), &cntl, &flg));
2846:   if (flg) ID_CNTL_SET(mumps->id, 2, cntl);
2847:   PetscCall(PetscOptionsReal("-mat_mumps_cntl_3", "CNTL(3): absolute pivoting threshold", "None", (PetscReal)ID_CNTL_GET(mumps->id, 3), &cntl, &flg));
2848:   if (flg) ID_CNTL_SET(mumps->id, 3, cntl);
2849:   PetscCall(PetscOptionsReal("-mat_mumps_cntl_4", "CNTL(4): value for static pivoting", "None", (PetscReal)ID_CNTL_GET(mumps->id, 4), &cntl, &flg));
2850:   if (flg) ID_CNTL_SET(mumps->id, 4, cntl);
2851:   PetscCall(PetscOptionsReal("-mat_mumps_cntl_5", "CNTL(5): fixation for null pivots", "None", (PetscReal)ID_CNTL_GET(mumps->id, 5), &cntl, &flg));
2852:   if (flg) ID_CNTL_SET(mumps->id, 5, cntl);
2853:   PetscCall(PetscOptionsReal("-mat_mumps_cntl_7", "CNTL(7): dropping parameter used during BLR", "None", (PetscReal)ID_CNTL_GET(mumps->id, 7), &cntl, &flg));
2854:   if (flg) ID_CNTL_SET(mumps->id, 7, cntl);

2856:   PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL));

2858:   PetscCall(PetscOptionsIntArray("-mat_mumps_view_info", "request INFO local to each processor", "", info, &ninfo, NULL));
2859:   if (ninfo) {
2860:     PetscCheck(ninfo <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "number of INFO %" PetscInt_FMT " must <= 80", ninfo);
2861:     PetscCall(PetscMalloc1(ninfo, &mumps->info));
2862:     mumps->ninfo = ninfo;
2863:     for (i = 0; i < ninfo; i++) {
2864:       PetscCheck(info[i] >= 0 && info[i] <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "index of INFO %" PetscInt_FMT " must between 1 and 80", ninfo);
2865:       mumps->info[i] = info[i];
2866:     }
2867:   }
2868:   PetscOptionsEnd();
2869:   PetscFunctionReturn(PETSC_SUCCESS);
2870: }

2872: static PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F, Mat A, PETSC_UNUSED const MatFactorInfo *info, Mat_MUMPS *mumps)
2873: {
2874:   PetscFunctionBegin;
2875:   if (mumps->id.INFOG(1) < 0) {
2876:     PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in analysis: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
2877:     if (mumps->id.INFOG(1) == -6) {
2878:       PetscCall(PetscInfo(F, "MUMPS error in analysis: matrix is singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2879:       F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT;
2880:     } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) {
2881:       PetscCall(PetscInfo(F, "MUMPS error in analysis: problem with work array, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2882:       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
2883:     } else {
2884:       PetscCall(PetscInfo(F, "MUMPS error in analysis: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS "\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2885:       F->factorerrortype = MAT_FACTOR_OTHER;
2886:     }
2887:   }
2888:   if (!mumps->id.n) F->factorerrortype = MAT_FACTOR_NOERROR;
2889:   PetscFunctionReturn(PETSC_SUCCESS);
2890: }

2892: static PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F, Mat A, IS r, PETSC_UNUSED IS c, const MatFactorInfo *info)
2893: {
2894:   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
2895:   Vec            b;
2896:   const PetscInt M = A->rmap->N;

2898:   PetscFunctionBegin;
2899:   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2900:     /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */
2901:     PetscFunctionReturn(PETSC_SUCCESS);
2902:   }

2904:   /* Set MUMPS options from the options database */
2905:   PetscCall(MatSetFromOptions_MUMPS(F, A));

2907:   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2908:   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));

2910:   /* analysis phase */
2911:   mumps->id.job = JOB_FACTSYMBOLIC;
2912:   PetscCall(PetscMUMPSIntCast(M, &mumps->id.n));
2913:   switch (mumps->id.ICNTL(18)) {
2914:   case 0: /* centralized assembled matrix input */
2915:     if (!mumps->myid) {
2916:       mumps->id.nnz = mumps->nnz;
2917:       mumps->id.irn = mumps->irn;
2918:       mumps->id.jcn = mumps->jcn;
2919:       if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a));
2920:       if (r && mumps->id.ICNTL(7) == 7) {
2921:         mumps->id.ICNTL(7) = 1;
2922:         if (!mumps->myid) {
2923:           const PetscInt *idx;
2924:           PetscInt        i;

2926:           PetscCall(PetscMalloc1(M, &mumps->id.perm_in));
2927:           PetscCall(ISGetIndices(r, &idx));
2928:           for (i = 0; i < M; i++) PetscCall(PetscMUMPSIntCast(idx[i] + 1, &mumps->id.perm_in[i])); /* perm_in[]: start from 1, not 0! */
2929:           PetscCall(ISRestoreIndices(r, &idx));
2930:         }
2931:       }
2932:     }
2933:     break;
2934:   case 3: /* distributed assembled matrix input (size>1) */
2935:     mumps->id.nnz_loc = mumps->nnz;
2936:     mumps->id.irn_loc = mumps->irn;
2937:     mumps->id.jcn_loc = mumps->jcn;
2938:     if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc));
2939:     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2940:       PetscCall(MatCreateVecs(A, NULL, &b));
2941:       PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
2942:       PetscCall(VecDestroy(&b));
2943:     }
2944:     break;
2945:   }
2946:   PetscMUMPS_c(mumps);
2947:   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));

2949:   F->ops->lufactornumeric   = MatFactorNumeric_MUMPS;
2950:   F->ops->solve             = MatSolve_MUMPS;
2951:   F->ops->solvetranspose    = MatSolveTranspose_MUMPS;
2952:   F->ops->matsolve          = MatMatSolve_MUMPS;
2953:   F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS;
2954:   F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS;

2956:   mumps->matstruc = SAME_NONZERO_PATTERN;
2957:   PetscFunctionReturn(PETSC_SUCCESS);
2958: }

2960: /* Note the PETSc r and c permutations are ignored */
2961: static PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F, Mat A, PETSC_UNUSED IS r, PETSC_UNUSED IS c, const MatFactorInfo *info)
2962: {
2963:   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
2964:   Vec            b;
2965:   const PetscInt M = A->rmap->N;

2967:   PetscFunctionBegin;
2968:   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2969:     /* F is assembled by a previous call of MatLUFactorSymbolic_BAIJMUMPS() */
2970:     PetscFunctionReturn(PETSC_SUCCESS);
2971:   }

2973:   /* Set MUMPS options from the options database */
2974:   PetscCall(MatSetFromOptions_MUMPS(F, A));

2976:   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2977:   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));

2979:   /* analysis phase */
2980:   mumps->id.job = JOB_FACTSYMBOLIC;
2981:   PetscCall(PetscMUMPSIntCast(M, &mumps->id.n));
2982:   switch (mumps->id.ICNTL(18)) {
2983:   case 0: /* centralized assembled matrix input */
2984:     if (!mumps->myid) {
2985:       mumps->id.nnz = mumps->nnz;
2986:       mumps->id.irn = mumps->irn;
2987:       mumps->id.jcn = mumps->jcn;
2988:       if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a));
2989:     }
2990:     break;
2991:   case 3: /* distributed assembled matrix input (size>1) */
2992:     mumps->id.nnz_loc = mumps->nnz;
2993:     mumps->id.irn_loc = mumps->irn;
2994:     mumps->id.jcn_loc = mumps->jcn;
2995:     if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc));
2996:     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2997:       PetscCall(MatCreateVecs(A, NULL, &b));
2998:       PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
2999:       PetscCall(VecDestroy(&b));
3000:     }
3001:     break;
3002:   }
3003:   PetscMUMPS_c(mumps);
3004:   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));

3006:   F->ops->lufactornumeric   = MatFactorNumeric_MUMPS;
3007:   F->ops->solve             = MatSolve_MUMPS;
3008:   F->ops->solvetranspose    = MatSolveTranspose_MUMPS;
3009:   F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS;

3011:   mumps->matstruc = SAME_NONZERO_PATTERN;
3012:   PetscFunctionReturn(PETSC_SUCCESS);
3013: }

3015: /* Note the PETSc r permutation and factor info are ignored */
3016: static PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F, Mat A, PETSC_UNUSED IS r, const MatFactorInfo *info)
3017: {
3018:   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
3019:   Vec            b;
3020:   const PetscInt M = A->rmap->N;

3022:   PetscFunctionBegin;
3023:   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
3024:     /* F is assembled by a previous call of MatCholeskyFactorSymbolic_MUMPS() */
3025:     PetscFunctionReturn(PETSC_SUCCESS);
3026:   }

3028:   /* Set MUMPS options from the options database */
3029:   PetscCall(MatSetFromOptions_MUMPS(F, A));

3031:   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
3032:   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));

3034:   /* analysis phase */
3035:   mumps->id.job = JOB_FACTSYMBOLIC;
3036:   PetscCall(PetscMUMPSIntCast(M, &mumps->id.n));
3037:   switch (mumps->id.ICNTL(18)) {
3038:   case 0: /* centralized assembled matrix input */
3039:     if (!mumps->myid) {
3040:       mumps->id.nnz = mumps->nnz;
3041:       mumps->id.irn = mumps->irn;
3042:       mumps->id.jcn = mumps->jcn;
3043:       if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a));
3044:     }
3045:     break;
3046:   case 3: /* distributed assembled matrix input (size>1) */
3047:     mumps->id.nnz_loc = mumps->nnz;
3048:     mumps->id.irn_loc = mumps->irn;
3049:     mumps->id.jcn_loc = mumps->jcn;
3050:     if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc));
3051:     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
3052:       PetscCall(MatCreateVecs(A, NULL, &b));
3053:       PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
3054:       PetscCall(VecDestroy(&b));
3055:     }
3056:     break;
3057:   }
3058:   PetscMUMPS_c(mumps);
3059:   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));

3061:   F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS;
3062:   F->ops->solve                 = MatSolve_MUMPS;
3063:   F->ops->solvetranspose        = MatSolve_MUMPS;
3064:   F->ops->matsolve              = MatMatSolve_MUMPS;
3065:   F->ops->mattransposesolve     = MatMatTransposeSolve_MUMPS;
3066:   F->ops->matsolvetranspose     = MatMatSolveTranspose_MUMPS;
3067: #if defined(PETSC_USE_COMPLEX)
3068:   F->ops->getinertia = NULL;
3069: #else
3070:   F->ops->getinertia = MatGetInertia_SBAIJMUMPS;
3071: #endif

3073:   mumps->matstruc = SAME_NONZERO_PATTERN;
3074:   PetscFunctionReturn(PETSC_SUCCESS);
3075: }

3077: static PetscErrorCode MatView_MUMPS(Mat A, PetscViewer viewer)
3078: {
3079:   PetscBool         isascii;
3080:   PetscViewerFormat format;
3081:   Mat_MUMPS        *mumps = (Mat_MUMPS *)A->data;

3083:   PetscFunctionBegin;
3084:   /* check if matrix is mumps type */
3085:   if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(PETSC_SUCCESS);

3087:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
3088:   if (isascii) {
3089:     PetscCall(PetscViewerGetFormat(viewer, &format));
3090:     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
3091:       PetscCall(PetscViewerASCIIPrintf(viewer, "MUMPS run parameters:\n"));
3092:       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
3093:         PetscCall(PetscViewerASCIIPrintf(viewer, "  SYM (matrix type):                   %d\n", mumps->id.sym));
3094:         PetscCall(PetscViewerASCIIPrintf(viewer, "  PAR (host participation):            %d\n", mumps->id.par));
3095:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(1) (output for error):         %d\n", mumps->id.ICNTL(1)));
3096:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(2) (output of diagnostic msg): %d\n", mumps->id.ICNTL(2)));
3097:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(3) (output for global info):   %d\n", mumps->id.ICNTL(3)));
3098:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(4) (level of printing):        %d\n", mumps->id.ICNTL(4)));
3099:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(5) (input mat struct):         %d\n", mumps->id.ICNTL(5)));
3100:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(6) (matrix prescaling):        %d\n", mumps->id.ICNTL(6)));
3101:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(7) (sequential matrix ordering):%d\n", mumps->id.ICNTL(7)));
3102:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(8) (scaling strategy):         %d\n", mumps->id.ICNTL(8)));
3103:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(10) (max num of refinements):  %d\n", mumps->id.ICNTL(10)));
3104:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(11) (error analysis):          %d\n", mumps->id.ICNTL(11)));
3105:         if (mumps->id.ICNTL(11) > 0) {
3106:           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(4) (inf norm of input mat):        %g\n", (double)ID_RINFOG_GET(mumps->id, 4)));
3107:           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(5) (inf norm of solution):         %g\n", (double)ID_RINFOG_GET(mumps->id, 5)));
3108:           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(6) (inf norm of residual):         %g\n", (double)ID_RINFOG_GET(mumps->id, 6)));
3109:           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(7),RINFOG(8) (backward error est): %g, %g\n", (double)ID_RINFOG_GET(mumps->id, 7), (double)ID_RINFOG_GET(mumps->id, 8)));
3110:           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(9) (error estimate):               %g\n", (double)ID_RINFOG_GET(mumps->id, 9)));
3111:           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n", (double)ID_RINFOG_GET(mumps->id, 10), (double)ID_RINFOG_GET(mumps->id, 11)));
3112:         }
3113:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(12) (efficiency control):                         %d\n", mumps->id.ICNTL(12)));
3114:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(13) (sequential factorization of the root node):  %d\n", mumps->id.ICNTL(13)));
3115:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(14) (percentage of estimated workspace increase): %d\n", mumps->id.ICNTL(14)));
3116:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(15) (compression of the input matrix):            %d\n", mumps->id.ICNTL(15)));
3117:         /* ICNTL(15-17) not used */
3118:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(18) (input mat struct):                           %d\n", mumps->id.ICNTL(18)));
3119:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(19) (Schur complement info):                      %d\n", mumps->id.ICNTL(19)));
3120:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(20) (RHS sparse pattern):                         %d\n", mumps->id.ICNTL(20)));
3121:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(21) (solution struct):                            %d\n", mumps->id.ICNTL(21)));
3122:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(22) (in-core/out-of-core facility):               %d\n", mumps->id.ICNTL(22)));
3123:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(23) (max size of memory can be allocated locally):%d\n", mumps->id.ICNTL(23)));

3125:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(24) (detection of null pivot rows):               %d\n", mumps->id.ICNTL(24)));
3126:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(25) (computation of a null space basis):          %d\n", mumps->id.ICNTL(25)));
3127:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(26) (Schur options for RHS or solution):          %d\n", mumps->id.ICNTL(26)));
3128:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(27) (blocking size for multiple RHS):             %d\n", mumps->id.ICNTL(27)));
3129:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(28) (use parallel or sequential ordering):        %d\n", mumps->id.ICNTL(28)));
3130:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(29) (parallel ordering):                          %d\n", mumps->id.ICNTL(29)));

3132:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(30) (user-specified set of entries in inv(A)):    %d\n", mumps->id.ICNTL(30)));
3133:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(31) (factors is discarded in the solve phase):    %d\n", mumps->id.ICNTL(31)));
3134:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(33) (compute determinant):                        %d\n", mumps->id.ICNTL(33)));
3135:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(35) (activate BLR based factorization):           %d\n", mumps->id.ICNTL(35)));
3136:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(36) (choice of BLR factorization variant):        %d\n", mumps->id.ICNTL(36)));
3137:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(37) (compression of the contribution blocks):     %d\n", mumps->id.ICNTL(37)));
3138:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(38) (estimated compression rate of LU factors):   %d\n", mumps->id.ICNTL(38)));
3139:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(48) (multithreading with tree parallelism):       %d\n", mumps->id.ICNTL(48)));
3140:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(49) (compact workarray at the end of factorization phase):%d\n", mumps->id.ICNTL(49)));
3141:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(56) (postponing and rank-revealing factorization):%d\n", mumps->id.ICNTL(56)));
3142:         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(58) (options for symbolic factorization):         %d\n", mumps->id.ICNTL(58)));

3144:         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(1) (relative pivoting threshold):      %g\n", (double)ID_CNTL_GET(mumps->id, 1)));
3145:         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(2) (stopping criterion of refinement): %g\n", (double)ID_CNTL_GET(mumps->id, 2)));
3146:         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(3) (absolute pivoting threshold):      %g\n", (double)ID_CNTL_GET(mumps->id, 3)));
3147:         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(4) (value of static pivoting):         %g\n", (double)ID_CNTL_GET(mumps->id, 4)));
3148:         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(5) (fixation for null pivots):         %g\n", (double)ID_CNTL_GET(mumps->id, 5)));
3149:         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(7) (dropping parameter for BLR):       %g\n", (double)ID_CNTL_GET(mumps->id, 7)));

3151:         /* information local to each processor */
3152:         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(1) (local estimated flops for the elimination after analysis):\n"));
3153:         PetscCall(PetscViewerASCIIPushSynchronized(viewer));
3154:         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %g\n", mumps->myid, (double)ID_RINFO_GET(mumps->id, 1)));
3155:         PetscCall(PetscViewerFlush(viewer));
3156:         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(2) (local estimated flops for the assembly after factorization):\n"));
3157:         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %g\n", mumps->myid, (double)ID_RINFO_GET(mumps->id, 2)));
3158:         PetscCall(PetscViewerFlush(viewer));
3159:         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(3) (local estimated flops for the elimination after factorization):\n"));
3160:         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %g\n", mumps->myid, (double)ID_RINFO_GET(mumps->id, 3)));
3161:         PetscCall(PetscViewerFlush(viewer));

3163:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization):\n"));
3164:         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(15)));
3165:         PetscCall(PetscViewerFlush(viewer));

3167:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization):\n"));
3168:         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(16)));
3169:         PetscCall(PetscViewerFlush(viewer));

3171:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(23) (num of pivots eliminated on this processor after factorization):\n"));
3172:         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(23)));
3173:         PetscCall(PetscViewerFlush(viewer));

3175:         if (mumps->ninfo && mumps->ninfo <= 80) {
3176:           PetscInt i;
3177:           for (i = 0; i < mumps->ninfo; i++) {
3178:             PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(%" PetscInt_FMT "):\n", mumps->info[i]));
3179:             PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(mumps->info[i])));
3180:             PetscCall(PetscViewerFlush(viewer));
3181:           }
3182:         }
3183:         PetscCall(PetscViewerASCIIPopSynchronized(viewer));
3184:       } else PetscCall(PetscViewerASCIIPrintf(viewer, "  Use -%sksp_view ::ascii_info_detail to display information for all processes\n", ((PetscObject)A)->prefix ? ((PetscObject)A)->prefix : ""));

3186:       if (mumps->myid == 0) { /* information from the host */
3187:         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFOG(1) (global estimated flops for the elimination after analysis): %g\n", (double)ID_RINFOG_GET(mumps->id, 1)));
3188:         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFOG(2) (global estimated flops for the assembly after factorization): %g\n", (double)ID_RINFOG_GET(mumps->id, 2)));
3189:         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFOG(3) (global estimated flops for the elimination after factorization): %g\n", (double)ID_RINFOG_GET(mumps->id, 3)));
3190:         PetscCall(PetscViewerASCIIPrintf(viewer, "  (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n", (double)ID_RINFOG_GET(mumps->id, 12), (double)ID_RINFOG_GET(mumps->id, 13), mumps->id.INFOG(34)));

3192:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(3)));
3193:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(4)));
3194:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(5) (estimated maximum front size in the complete tree): %d\n", mumps->id.INFOG(5)));
3195:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(6) (number of nodes in the complete tree): %d\n", mumps->id.INFOG(6)));
3196:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(7) (ordering option effectively used after analysis): %d\n", mumps->id.INFOG(7)));
3197:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n", mumps->id.INFOG(8)));
3198:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n", mumps->id.INFOG(9)));
3199:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(10) (total integer space store the matrix factors after factorization): %d\n", mumps->id.INFOG(10)));
3200:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(11) (order of largest frontal matrix after factorization): %d\n", mumps->id.INFOG(11)));
3201:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(12) (number of off-diagonal pivots): %d\n", mumps->id.INFOG(12)));
3202:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(13) (number of delayed pivots after factorization): %d\n", mumps->id.INFOG(13)));
3203:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(14) (number of memory compress after factorization): %d\n", mumps->id.INFOG(14)));
3204:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(15) (number of steps of iterative refinement after solution): %d\n", mumps->id.INFOG(15)));
3205:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n", mumps->id.INFOG(16)));
3206:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n", mumps->id.INFOG(17)));
3207:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n", mumps->id.INFOG(18)));
3208:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n", mumps->id.INFOG(19)));
3209:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(20) (estimated number of entries in the factors): %d\n", mumps->id.INFOG(20)));
3210:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n", mumps->id.INFOG(21)));
3211:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n", mumps->id.INFOG(22)));
3212:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n", mumps->id.INFOG(23)));
3213:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n", mumps->id.INFOG(24)));
3214:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n", mumps->id.INFOG(25)));
3215:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(28) (after factorization: number of null pivots encountered): %d\n", mumps->id.INFOG(28)));
3216:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n", mumps->id.INFOG(29)));
3217:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n", mumps->id.INFOG(30), mumps->id.INFOG(31)));
3218:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(32) (after analysis: type of analysis done): %d\n", mumps->id.INFOG(32)));
3219:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(33) (value used for ICNTL(8)): %d\n", mumps->id.INFOG(33)));
3220:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(34) (exponent of the determinant if determinant is requested): %d\n", mumps->id.INFOG(34)));
3221:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n", mumps->id.INFOG(35)));
3222:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(36)));
3223:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n", mumps->id.INFOG(37)));
3224:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(38)));
3225:         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n", mumps->id.INFOG(39)));
3226:       }
3227:     }
3228:   }
3229:   PetscFunctionReturn(PETSC_SUCCESS);
3230: }

3232: static PetscErrorCode MatGetInfo_MUMPS(Mat A, PETSC_UNUSED MatInfoType flag, MatInfo *info)
3233: {
3234:   Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;

3236:   PetscFunctionBegin;
3237:   info->block_size        = 1.0;
3238:   info->nz_allocated      = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20);
3239:   info->nz_used           = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20);
3240:   info->nz_unneeded       = 0.0;
3241:   info->assemblies        = 0.0;
3242:   info->mallocs           = 0.0;
3243:   info->memory            = 0.0;
3244:   info->fill_ratio_given  = 0;
3245:   info->fill_ratio_needed = 0;
3246:   info->factor_mallocs    = 0;
3247:   PetscFunctionReturn(PETSC_SUCCESS);
3248: }

3250: static PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is)
3251: {
3252:   Mat_MUMPS         *mumps = (Mat_MUMPS *)F->data;
3253:   const PetscScalar *arr;
3254:   const PetscInt    *idxs;
3255:   PetscInt           size, i;

3257:   PetscFunctionBegin;
3258:   PetscCall(ISGetLocalSize(is, &size));
3259:   /* Schur complement matrix */
3260:   PetscCall(MatDestroy(&F->schur));
3261:   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size, size, NULL, &F->schur));
3262:   PetscCall(MatDenseGetArrayRead(F->schur, &arr));
3263:   // don't allocate mumps->id.schur[] now as its precision is yet to know
3264:   PetscCall(PetscMUMPSIntCast(size, &mumps->id.size_schur));
3265:   PetscCall(PetscMUMPSIntCast(size, &mumps->id.schur_lld));
3266:   PetscCall(MatDenseRestoreArrayRead(F->schur, &arr));
3267:   if (mumps->sym == 1) PetscCall(MatSetOption(F->schur, MAT_SPD, PETSC_TRUE));

3269:   /* MUMPS expects Fortran style indices */
3270:   PetscCall(PetscFree(mumps->id.listvar_schur));
3271:   PetscCall(PetscMalloc1(size, &mumps->id.listvar_schur));
3272:   PetscCall(ISGetIndices(is, &idxs));
3273:   for (i = 0; i < size; i++) PetscCall(PetscMUMPSIntCast(idxs[i] + 1, &mumps->id.listvar_schur[i]));
3274:   PetscCall(ISRestoreIndices(is, &idxs));
3275:   /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */
3276:   if (mumps->id.icntl) mumps->id.ICNTL(26) = -1;
3277:   else mumps->ICNTL26 = -1;
3278:   PetscFunctionReturn(PETSC_SUCCESS);
3279: }

3281: static PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F, Mat *S)
3282: {
3283:   Mat          St;
3284:   Mat_MUMPS   *mumps = (Mat_MUMPS *)F->data;
3285:   PetscScalar *array;
3286:   PetscInt     i, j, N = mumps->id.size_schur;

3288:   PetscFunctionBegin;
3289:   PetscCheck(mumps->id.ICNTL(19), PetscObjectComm((PetscObject)F), PETSC_ERR_ORDER, "Schur complement mode not selected! Call MatFactorSetSchurIS() to enable it");
3290:   PetscCall(MatCreate(PETSC_COMM_SELF, &St));
3291:   PetscCall(MatSetSizes(St, PETSC_DECIDE, PETSC_DECIDE, mumps->id.size_schur, mumps->id.size_schur));
3292:   PetscCall(MatSetType(St, MATDENSE));
3293:   PetscCall(MatSetUp(St));
3294:   PetscCall(MatDenseGetArray(St, &array));
3295:   if (!mumps->sym) {                /* MUMPS always return a full matrix */
3296:     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
3297:       for (i = 0; i < N; i++) {
3298:         for (j = 0; j < N; j++) array[j * N + i] = ID_FIELD_GET(mumps->id, schur, i * N + j);
3299:       }
3300:     } else { /* stored by columns */
3301:       PetscCall(MatMumpsCastMumpsScalarArray(N * N, mumps->id.precision, mumps->id.schur, array));
3302:     }
3303:   } else {                          /* either full or lower-triangular (not packed) */
3304:     if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */
3305:       for (i = 0; i < N; i++) {
3306:         for (j = i; j < N; j++) array[i * N + j] = array[j * N + i] = ID_FIELD_GET(mumps->id, schur, i * N + j);
3307:       }
3308:     } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */
3309:       PetscCall(MatMumpsCastMumpsScalarArray(N * N, mumps->id.precision, mumps->id.schur, array));
3310:     } else { /* ICNTL(19) == 1 lower triangular stored by rows */
3311:       for (i = 0; i < N; i++) {
3312:         for (j = 0; j < i + 1; j++) array[i * N + j] = array[j * N + i] = ID_FIELD_GET(mumps->id, schur, i * N + j);
3313:       }
3314:     }
3315:   }
3316:   PetscCall(MatDenseRestoreArray(St, &array));
3317:   *S = St;
3318:   PetscFunctionReturn(PETSC_SUCCESS);
3319: }

3321: static PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt ival)
3322: {
3323:   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;

3325:   PetscFunctionBegin;
3326:   if (mumps->id.job == JOB_NULL) {                                            /* need to cache icntl and ival since PetscMUMPS_c() has never been called */
3327:     PetscMUMPSInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */
3328:     for (i = 0; i < nICNTL_pre; ++i)
3329:       if (mumps->ICNTL_pre[1 + 2 * i] == icntl) break; /* is this ICNTL already cached? */
3330:     if (i == nICNTL_pre) {                             /* not already cached */
3331:       if (i > 0) PetscCall(PetscRealloc(sizeof(PetscMUMPSInt) * (2 * nICNTL_pre + 3), &mumps->ICNTL_pre));
3332:       else PetscCall(PetscCalloc(sizeof(PetscMUMPSInt) * 3, &mumps->ICNTL_pre));
3333:       mumps->ICNTL_pre[0]++;
3334:     }
3335:     mumps->ICNTL_pre[1 + 2 * i] = (PetscMUMPSInt)icntl;
3336:     PetscCall(PetscMUMPSIntCast(ival, mumps->ICNTL_pre + 2 + 2 * i));
3337:   } else PetscCall(PetscMUMPSIntCast(ival, &mumps->id.ICNTL(icntl)));
3338:   PetscFunctionReturn(PETSC_SUCCESS);
3339: }

3341: static PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt *ival)
3342: {
3343:   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;

3345:   PetscFunctionBegin;
3346:   if (mumps->id.job == JOB_NULL) {
3347:     PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0;
3348:     *ival = 0;
3349:     for (i = 0; i < nICNTL_pre; ++i) {
3350:       if (mumps->ICNTL_pre[1 + 2 * i] == icntl) *ival = mumps->ICNTL_pre[2 + 2 * i];
3351:     }
3352:   } else *ival = mumps->id.ICNTL(icntl);
3353:   PetscFunctionReturn(PETSC_SUCCESS);
3354: }

3356: /*@
3357:   MatMumpsSetIcntl - Set MUMPS parameter ICNTL() <https://mumps-solver.org/index.php?page=doc>

3359:   Logically Collective

3361:   Input Parameters:
3362: + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3363: . icntl - index of MUMPS parameter array `ICNTL()`
3364: - ival  - value of MUMPS `ICNTL(icntl)`

3366:   Options Database Key:
3367: . -mat_mumps_icntl_<icntl> <ival> - change the option numbered `icntl` to `ival`

3369:   Level: beginner

3371:   Note:
3372:   Ignored if MUMPS is not installed or `F` is not a MUMPS matrix

3374: .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3375: @*/
3376: PetscErrorCode MatMumpsSetIcntl(Mat F, PetscInt icntl, PetscInt ival)
3377: {
3378:   PetscFunctionBegin;
3380:   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3383:   PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 48 || icntl == 49 || icntl == 56 || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl);
3384:   PetscTryMethod(F, "MatMumpsSetIcntl_C", (Mat, PetscInt, PetscInt), (F, icntl, ival));
3385:   PetscFunctionReturn(PETSC_SUCCESS);
3386: }

3388: /*@
3389:   MatMumpsGetIcntl - Get MUMPS parameter ICNTL() <https://mumps-solver.org/index.php?page=doc>

3391:   Logically Collective

3393:   Input Parameters:
3394: + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3395: - icntl - index of MUMPS parameter array ICNTL()

3397:   Output Parameter:
3398: . ival - value of MUMPS ICNTL(icntl)

3400:   Level: beginner

3402: .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3403: @*/
3404: PetscErrorCode MatMumpsGetIcntl(Mat F, PetscInt icntl, PetscInt *ival)
3405: {
3406:   PetscFunctionBegin;
3408:   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3410:   PetscAssertPointer(ival, 3);
3411:   PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 48 || icntl == 49 || icntl == 56 || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl);
3412:   PetscUseMethod(F, "MatMumpsGetIcntl_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
3413:   PetscFunctionReturn(PETSC_SUCCESS);
3414: }

3416: static PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal val)
3417: {
3418:   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;

3420:   PetscFunctionBegin;
3421:   if (mumps->id.job == JOB_NULL) {
3422:     PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
3423:     for (i = 0; i < nCNTL_pre; ++i)
3424:       if (mumps->CNTL_pre[1 + 2 * i] == icntl) break;
3425:     if (i == nCNTL_pre) {
3426:       if (i > 0) PetscCall(PetscRealloc(sizeof(PetscReal) * (2 * nCNTL_pre + 3), &mumps->CNTL_pre));
3427:       else PetscCall(PetscCalloc(sizeof(PetscReal) * 3, &mumps->CNTL_pre));
3428:       mumps->CNTL_pre[0]++;
3429:     }
3430:     mumps->CNTL_pre[1 + 2 * i] = icntl;
3431:     mumps->CNTL_pre[2 + 2 * i] = val;
3432:   } else ID_CNTL_SET(mumps->id, icntl, val);
3433:   PetscFunctionReturn(PETSC_SUCCESS);
3434: }

3436: static PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal *val)
3437: {
3438:   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;

3440:   PetscFunctionBegin;
3441:   if (mumps->id.job == JOB_NULL) {
3442:     PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
3443:     *val = 0.0;
3444:     for (i = 0; i < nCNTL_pre; ++i) {
3445:       if (mumps->CNTL_pre[1 + 2 * i] == icntl) *val = mumps->CNTL_pre[2 + 2 * i];
3446:     }
3447:   } else *val = ID_CNTL_GET(mumps->id, icntl);
3448:   PetscFunctionReturn(PETSC_SUCCESS);
3449: }

3451: /*@
3452:   MatMumpsSetCntl - Set MUMPS parameter CNTL() <https://mumps-solver.org/index.php?page=doc>

3454:   Logically Collective

3456:   Input Parameters:
3457: + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3458: . icntl - index of MUMPS parameter array `CNTL()`
3459: - val   - value of MUMPS `CNTL(icntl)`

3461:   Options Database Key:
3462: . -mat_mumps_cntl_<icntl> <val> - change the option numbered icntl to ival

3464:   Level: beginner

3466:   Note:
3467:   Ignored if MUMPS is not installed or `F` is not a MUMPS matrix

3469: .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3470: @*/
3471: PetscErrorCode MatMumpsSetCntl(Mat F, PetscInt icntl, PetscReal val)
3472: {
3473:   PetscFunctionBegin;
3475:   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3478:   PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl);
3479:   PetscTryMethod(F, "MatMumpsSetCntl_C", (Mat, PetscInt, PetscReal), (F, icntl, val));
3480:   PetscFunctionReturn(PETSC_SUCCESS);
3481: }

3483: /*@
3484:   MatMumpsGetCntl - Get MUMPS parameter CNTL() <https://mumps-solver.org/index.php?page=doc>

3486:   Logically Collective

3488:   Input Parameters:
3489: + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3490: - icntl - index of MUMPS parameter array CNTL()

3492:   Output Parameter:
3493: . val - value of MUMPS CNTL(icntl)

3495:   Level: beginner

3497: .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3498: @*/
3499: PetscErrorCode MatMumpsGetCntl(Mat F, PetscInt icntl, PetscReal *val)
3500: {
3501:   PetscFunctionBegin;
3503:   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3505:   PetscAssertPointer(val, 3);
3506:   PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl);
3507:   PetscUseMethod(F, "MatMumpsGetCntl_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
3508:   PetscFunctionReturn(PETSC_SUCCESS);
3509: }

3511: static PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F, PetscInt icntl, PetscInt *info)
3512: {
3513:   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;

3515:   PetscFunctionBegin;
3516:   *info = mumps->id.INFO(icntl);
3517:   PetscFunctionReturn(PETSC_SUCCESS);
3518: }

3520: static PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F, PetscInt icntl, PetscInt *infog)
3521: {
3522:   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;

3524:   PetscFunctionBegin;
3525:   *infog = mumps->id.INFOG(icntl);
3526:   PetscFunctionReturn(PETSC_SUCCESS);
3527: }

3529: static PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfo)
3530: {
3531:   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;

3533:   PetscFunctionBegin;
3534:   *rinfo = ID_RINFO_GET(mumps->id, icntl);
3535:   PetscFunctionReturn(PETSC_SUCCESS);
3536: }

3538: static PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfog)
3539: {
3540:   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;

3542:   PetscFunctionBegin;
3543:   *rinfog = ID_RINFOG_GET(mumps->id, icntl);
3544:   PetscFunctionReturn(PETSC_SUCCESS);
3545: }

3547: static PetscErrorCode MatMumpsGetNullPivots_MUMPS(Mat F, PetscInt *size, PetscInt **array)
3548: {
3549:   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;

3551:   PetscFunctionBegin;
3552:   PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection");
3553:   *size  = 0;
3554:   *array = NULL;
3555:   if (!mumps->myid) {
3556:     *size = mumps->id.INFOG(28);
3557:     PetscCall(PetscMalloc1(*size, array));
3558:     for (int i = 0; i < *size; i++) (*array)[i] = mumps->id.pivnul_list[i] - 1;
3559:   }
3560:   PetscFunctionReturn(PETSC_SUCCESS);
3561: }

3563: static PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F, Mat spRHS)
3564: {
3565:   Mat          Bt = NULL, Btseq = NULL;
3566:   PetscBool    flg;
3567:   Mat_MUMPS   *mumps = (Mat_MUMPS *)F->data;
3568:   PetscScalar *aa;
3569:   PetscInt     spnr, *ia, *ja, M, nrhs;

3571:   PetscFunctionBegin;
3572:   PetscAssertPointer(spRHS, 2);
3573:   PetscCall(PetscObjectTypeCompare((PetscObject)spRHS, MATTRANSPOSEVIRTUAL, &flg));
3574:   PetscCheck(flg, PetscObjectComm((PetscObject)spRHS), PETSC_ERR_ARG_WRONG, "Matrix spRHS must be type MATTRANSPOSEVIRTUAL matrix");
3575:   PetscCall(MatShellGetScalingShifts(spRHS, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
3576:   PetscCall(MatTransposeGetMat(spRHS, &Bt));

3578:   PetscCall(MatMumpsSetIcntl(F, 30, 1));

3580:   if (mumps->petsc_size > 1) {
3581:     Mat_MPIAIJ *b = (Mat_MPIAIJ *)Bt->data;
3582:     Btseq         = b->A;
3583:   } else {
3584:     Btseq = Bt;
3585:   }

3587:   PetscCall(MatGetSize(spRHS, &M, &nrhs));
3588:   mumps->id.nrhs = (PetscMUMPSInt)nrhs;
3589:   PetscCall(PetscMUMPSIntCast(M, &mumps->id.lrhs));
3590:   mumps->id.rhs = NULL;

3592:   if (!mumps->myid) {
3593:     PetscCall(MatSeqAIJGetArray(Btseq, &aa));
3594:     PetscCall(MatGetRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
3595:     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
3596:     PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
3597:     PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, ((Mat_SeqAIJ *)Btseq->data)->nz, aa, mumps->id.precision, &mumps->id.rhs_sparse_len, &mumps->id.rhs_sparse));
3598:   } else {
3599:     mumps->id.irhs_ptr    = NULL;
3600:     mumps->id.irhs_sparse = NULL;
3601:     mumps->id.nz_rhs      = 0;
3602:     if (mumps->id.rhs_sparse_len) {
3603:       PetscCall(PetscFree(mumps->id.rhs_sparse));
3604:       mumps->id.rhs_sparse_len = 0;
3605:     }
3606:   }
3607:   mumps->id.ICNTL(20) = 1; /* rhs is sparse */
3608:   mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */

3610:   /* solve phase */
3611:   mumps->id.job = JOB_SOLVE;
3612:   PetscMUMPS_c(mumps);
3613:   PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));

3615:   if (!mumps->myid) {
3616:     PetscCall(MatSeqAIJRestoreArray(Btseq, &aa));
3617:     PetscCall(MatRestoreRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
3618:     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
3619:   }
3620:   PetscFunctionReturn(PETSC_SUCCESS);
3621: }

3623: /*@
3624:   MatMumpsGetInverse - Get user-specified set of entries in inverse of `A` <https://mumps-solver.org/index.php?page=doc>

3626:   Logically Collective

3628:   Input Parameter:
3629: . F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`

3631:   Output Parameter:
3632: . spRHS - sequential sparse matrix in `MATTRANSPOSEVIRTUAL` format with requested entries of inverse of `A`

3634:   Level: beginner

3636: .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`
3637: @*/
3638: PetscErrorCode MatMumpsGetInverse(Mat F, Mat spRHS)
3639: {
3640:   PetscFunctionBegin;
3642:   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3643:   PetscUseMethod(F, "MatMumpsGetInverse_C", (Mat, Mat), (F, spRHS));
3644:   PetscFunctionReturn(PETSC_SUCCESS);
3645: }

3647: static PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F, Mat spRHST)
3648: {
3649:   Mat spRHS;

3651:   PetscFunctionBegin;
3652:   PetscCall(MatCreateTranspose(spRHST, &spRHS));
3653:   PetscCall(MatMumpsGetInverse_MUMPS(F, spRHS));
3654:   PetscCall(MatDestroy(&spRHS));
3655:   PetscFunctionReturn(PETSC_SUCCESS);
3656: }

3658: /*@
3659:   MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix $A^T $ <https://mumps-solver.org/index.php?page=doc>

3661:   Logically Collective

3663:   Input Parameter:
3664: . F - the factored matrix of A obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`

3666:   Output Parameter:
3667: . spRHST - sequential sparse matrix in `MATAIJ` format containing the requested entries of inverse of `A`^T

3669:   Level: beginner

3671: .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()`
3672: @*/
3673: PetscErrorCode MatMumpsGetInverseTranspose(Mat F, Mat spRHST)
3674: {
3675:   PetscBool flg;

3677:   PetscFunctionBegin;
3679:   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3680:   PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST, &flg, MATSEQAIJ, MATMPIAIJ, NULL));
3681:   PetscCheck(flg, PetscObjectComm((PetscObject)spRHST), PETSC_ERR_ARG_WRONG, "Matrix spRHST must be MATAIJ matrix");
3682:   PetscUseMethod(F, "MatMumpsGetInverseTranspose_C", (Mat, Mat), (F, spRHST));
3683:   PetscFunctionReturn(PETSC_SUCCESS);
3684: }

3686: static PetscErrorCode MatMumpsSetBlk_MUMPS(Mat F, PetscInt nblk, const PetscInt blkvar[], const PetscInt blkptr[])
3687: {
3688:   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;

3690:   PetscFunctionBegin;
3691:   if (nblk) {
3692:     PetscAssertPointer(blkptr, 4);
3693:     PetscCall(PetscMUMPSIntCast(nblk, &mumps->id.nblk));
3694:     PetscCall(PetscFree(mumps->id.blkptr));
3695:     PetscCall(PetscMalloc1(nblk + 1, &mumps->id.blkptr));
3696:     for (PetscInt i = 0; i < nblk + 1; ++i) PetscCall(PetscMUMPSIntCast(blkptr[i], mumps->id.blkptr + i));
3697:     // mumps->id.icntl[] might have not been allocated, which is done in MatSetFromOptions_MUMPS(). So we don't assign ICNTL(15).
3698:     // We use id.nblk and id.blkptr to know what values to set to ICNTL(15) in MatSetFromOptions_MUMPS().
3699:     // mumps->id.ICNTL(15) = 1;
3700:     if (blkvar) {
3701:       PetscCall(PetscFree(mumps->id.blkvar));
3702:       PetscCall(PetscMalloc1(F->rmap->N, &mumps->id.blkvar));
3703:       for (PetscInt i = 0; i < F->rmap->N; ++i) PetscCall(PetscMUMPSIntCast(blkvar[i], mumps->id.blkvar + i));
3704:     }
3705:   } else {
3706:     PetscCall(PetscFree(mumps->id.blkptr));
3707:     PetscCall(PetscFree(mumps->id.blkvar));
3708:     // mumps->id.ICNTL(15) = 0;
3709:     mumps->id.nblk = 0;
3710:   }
3711:   PetscFunctionReturn(PETSC_SUCCESS);
3712: }

3714: /*@
3715:   MatMumpsSetBlk - Set user-specified variable block sizes to be used with `-mat_mumps_icntl_15 1`

3717:   Not collective, only relevant on the first process of the MPI communicator

3719:   Input Parameters:
3720: + F      - the factored matrix of A obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3721: . nblk   - the number of blocks
3722: . blkvar - see MUMPS documentation, `blkvar(blkptr(iblk):blkptr(iblk+1)-1)`, (`iblk=1, nblk`) holds the variables associated to block `iblk`
3723: - blkptr - array starting at 1 and of size `nblk + 1` storing the prefix sum of all blocks

3725:   Level: advanced

3727: .seealso: [](ch_matrices), `MATSOLVERMUMPS`, `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatSetVariableBlockSizes()`
3728: @*/
3729: PetscErrorCode MatMumpsSetBlk(Mat F, PetscInt nblk, const PetscInt blkvar[], const PetscInt blkptr[])
3730: {
3731:   PetscFunctionBegin;
3733:   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3734:   PetscUseMethod(F, "MatMumpsSetBlk_C", (Mat, PetscInt, const PetscInt[], const PetscInt[]), (F, nblk, blkvar, blkptr));
3735:   PetscFunctionReturn(PETSC_SUCCESS);
3736: }

3738: /*@
3739:   MatMumpsGetInfo - Get MUMPS parameter INFO() <https://mumps-solver.org/index.php?page=doc>

3741:   Logically Collective

3743:   Input Parameters:
3744: + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3745: - icntl - index of MUMPS parameter array INFO()

3747:   Output Parameter:
3748: . ival - value of MUMPS INFO(icntl)

3750:   Level: beginner

3752: .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3753: @*/
3754: PetscErrorCode MatMumpsGetInfo(Mat F, PetscInt icntl, PetscInt *ival)
3755: {
3756:   PetscFunctionBegin;
3758:   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3759:   PetscAssertPointer(ival, 3);
3760:   PetscUseMethod(F, "MatMumpsGetInfo_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
3761:   PetscFunctionReturn(PETSC_SUCCESS);
3762: }

3764: /*@
3765:   MatMumpsGetInfog - Get MUMPS parameter INFOG() <https://mumps-solver.org/index.php?page=doc>

3767:   Logically Collective

3769:   Input Parameters:
3770: + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3771: - icntl - index of MUMPS parameter array INFOG()

3773:   Output Parameter:
3774: . ival - value of MUMPS INFOG(icntl)

3776:   Level: beginner

3778: .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3779: @*/
3780: PetscErrorCode MatMumpsGetInfog(Mat F, PetscInt icntl, PetscInt *ival)
3781: {
3782:   PetscFunctionBegin;
3784:   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3785:   PetscAssertPointer(ival, 3);
3786:   PetscUseMethod(F, "MatMumpsGetInfog_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
3787:   PetscFunctionReturn(PETSC_SUCCESS);
3788: }

3790: /*@
3791:   MatMumpsGetRinfo - Get MUMPS parameter RINFO() <https://mumps-solver.org/index.php?page=doc>

3793:   Logically Collective

3795:   Input Parameters:
3796: + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3797: - icntl - index of MUMPS parameter array RINFO()

3799:   Output Parameter:
3800: . val - value of MUMPS RINFO(icntl)

3802:   Level: beginner

3804: .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()`
3805: @*/
3806: PetscErrorCode MatMumpsGetRinfo(Mat F, PetscInt icntl, PetscReal *val)
3807: {
3808:   PetscFunctionBegin;
3810:   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3811:   PetscAssertPointer(val, 3);
3812:   PetscUseMethod(F, "MatMumpsGetRinfo_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
3813:   PetscFunctionReturn(PETSC_SUCCESS);
3814: }

3816: /*@
3817:   MatMumpsGetRinfog - Get MUMPS parameter RINFOG() <https://mumps-solver.org/index.php?page=doc>

3819:   Logically Collective

3821:   Input Parameters:
3822: + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3823: - icntl - index of MUMPS parameter array RINFOG()

3825:   Output Parameter:
3826: . val - value of MUMPS RINFOG(icntl)

3828:   Level: beginner

3830: .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`
3831: @*/
3832: PetscErrorCode MatMumpsGetRinfog(Mat F, PetscInt icntl, PetscReal *val)
3833: {
3834:   PetscFunctionBegin;
3836:   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3837:   PetscAssertPointer(val, 3);
3838:   PetscUseMethod(F, "MatMumpsGetRinfog_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
3839:   PetscFunctionReturn(PETSC_SUCCESS);
3840: }

3842: /*@
3843:   MatMumpsGetNullPivots - Get MUMPS parameter PIVNUL_LIST() <https://mumps-solver.org/index.php?page=doc>

3845:   Logically Collective

3847:   Input Parameter:
3848: . F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`

3850:   Output Parameters:
3851: + size  - local size of the array. The size of the array is non-zero only on MPI rank 0
3852: - array - array of rows with null pivot, these rows follow 0-based indexing. The array gets allocated within the function and the user is responsible
3853:           for freeing this array.

3855:   Level: beginner

3857: .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`
3858: @*/
3859: PetscErrorCode MatMumpsGetNullPivots(Mat F, PetscInt *size, PetscInt **array)
3860: {
3861:   PetscFunctionBegin;
3863:   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3864:   PetscAssertPointer(size, 2);
3865:   PetscAssertPointer(array, 3);
3866:   PetscUseMethod(F, "MatMumpsGetNullPivots_C", (Mat, PetscInt *, PetscInt **), (F, size, array));
3867:   PetscFunctionReturn(PETSC_SUCCESS);
3868: }

3870: /*MC
3871:   MATSOLVERMUMPS -  A matrix type providing direct solvers (LU and Cholesky) for
3872:   MPI distributed and sequential matrices via the external package MUMPS <https://mumps-solver.org/index.php?page=doc>

3874:   Works with `MATAIJ` and `MATSBAIJ` matrices

3876:   Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS

3878:   Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode.
3879:   See details below.

3881:   Use `-pc_type cholesky` or `lu` `-pc_factor_mat_solver_type mumps` to use this direct solver

3883:   Options Database Keys:
3884: +  -mat_mumps_icntl_1  - ICNTL(1): output stream for error messages
3885: .  -mat_mumps_icntl_2  - ICNTL(2): output stream for diagnostic printing, statistics, and warning
3886: .  -mat_mumps_icntl_3  - ICNTL(3): output stream for global information, collected on the host
3887: .  -mat_mumps_icntl_4  - ICNTL(4): level of printing (0 to 4)
3888: .  -mat_mumps_icntl_6  - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)
3889: .  -mat_mumps_icntl_7  - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto
3890:                           Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only)
3891: .  -mat_mumps_icntl_8  - ICNTL(8): scaling strategy (-2 to 8 or 77)
3892: .  -mat_mumps_icntl_10 - ICNTL(10): max num of refinements
3893: .  -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view)
3894: .  -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)
3895: .  -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting
3896: .  -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space
3897: .  -mat_mumps_icntl_15 - ICNTL(15): compression of the input matrix resulting from a block format
3898: .  -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement
3899: .  -mat_mumps_icntl_20 - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS
3900: .  -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)
3901: .  -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor
3902: .  -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1)
3903: .  -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis
3904: .  -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix
3905: .  -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ICNTL(7) ordering, or 2 for parallel analysis and ICNTL(29) ordering
3906: .  -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis
3907: .  -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A)
3908: .  -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization
3909: .  -mat_mumps_icntl_33 - ICNTL(33): compute determinant
3910: .  -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature
3911: .  -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant
3912: .  -mat_mumps_icntl_37 - ICNTL(37): compression of the contribution blocks (CB)
3913: .  -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR
3914: .  -mat_mumps_icntl_48 - ICNTL(48): multithreading with tree parallelism
3915: .  -mat_mumps_icntl_49 - ICNTL(49): compact workarray at the end of factorization phase
3916: .  -mat_mumps_icntl_58 - ICNTL(58): options for symbolic factorization
3917: .  -mat_mumps_cntl_1   - CNTL(1): relative pivoting threshold
3918: .  -mat_mumps_cntl_2   - CNTL(2): stopping criterion of refinement
3919: .  -mat_mumps_cntl_3   - CNTL(3): absolute pivoting threshold
3920: .  -mat_mumps_cntl_4   - CNTL(4): value for static pivoting
3921: .  -mat_mumps_cntl_5   - CNTL(5): fixation for null pivots
3922: .  -mat_mumps_cntl_7   - CNTL(7): precision of the dropping parameter used during BLR factorization
3923: -  -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS.
3924:                                     Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual.

3926:   Level: beginner

3928:   Notes:
3929:   MUMPS Cholesky does not handle (complex) Hermitian matrices (see User's Guide at <https://mumps-solver.org/index.php?page=doc>) so using it will
3930:   error if the matrix is Hermitian.

3932:   When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling
3933:   `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix.

3935:   When a MUMPS factorization fails inside a KSP solve, for example with a `KSP_DIVERGED_PC_FAILED`, one can find the MUMPS information about
3936:   the failure with
3937: .vb
3938:           KSPGetPC(ksp,&pc);
3939:           PCFactorGetMatrix(pc,&mat);
3940:           MatMumpsGetInfo(mat,....);
3941:           MatMumpsGetInfog(mat,....); etc.
3942: .ve
3943:   Or run with `-ksp_error_if_not_converged` and the program will be stopped and the information printed in the error message.

3945:   MUMPS provides 64-bit integer support in two build modes:
3946:   full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and
3947:   requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI).

3949:   selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices,
3950:   MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and
3951:   columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit
3952:   integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS.

3954:   With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc.

3956:   Two modes to run MUMPS/PETSc with OpenMP
3957: .vb
3958:    Set `OMP_NUM_THREADS` and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP
3959:    threads per rank, then you may use "export `OMP_NUM_THREADS` = 16 && mpirun -n 4 ./test".
3960: .ve

3962: .vb
3963:    `-mat_mumps_use_omp_threads` [m] and run your code with as many MPI ranks as the number of cores. For example,
3964:    if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16"
3965: .ve

3967:    To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part
3968:    (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with `--with-openmp` `--download-hwloc`
3969:    (or `--with-hwloc`), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS
3970:    libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS
3971:    (PETSc will automatically try to utilized a threaded BLAS if `--with-openmp` is provided).

3973:    If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI
3974:    processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of
3975:    size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm
3976:    are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set
3977:    by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs.
3978:    In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets,
3979:    if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind
3980:    MPI ranks to cores, then with `-mat_mumps_use_omp_threads` 16, a master rank (and threads it spawns) will use half cores in socket 0, and half
3981:    cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the
3982:    problem will not happen. Therefore, when you use `-mat_mumps_use_omp_threads`, you need to keep an eye on your MPI rank mapping and CPU binding.
3983:    For example, with the Slurm job scheduler, one can use srun `--cpu-bind`=verbose -m block:block to map consecutive MPI ranks to sockets and
3984:    examine the mapping result.

3986:    PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set `OMP_PROC_BIND` and `OMP_PLACES` in job scripts,
3987:    for example, export `OMP_PLACES`=threads and export `OMP_PROC_BIND`=spread. One does not need to export `OMP_NUM_THREADS`=m in job scripts as PETSc
3988:    calls `omp_set_num_threads`(m) internally before calling MUMPS.

3990:    See {cite}`heroux2011bi` and {cite}`gutierrez2017accommodating`

3992: .seealso: [](ch_matrices), `Mat`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `MatMumpsSetBlk()`, `KSPGetPC()`, `PCFactorGetMatrix()`
3993: M*/

3995: static PetscErrorCode MatFactorGetSolverType_mumps(PETSC_UNUSED Mat A, MatSolverType *type)
3996: {
3997:   PetscFunctionBegin;
3998:   *type = MATSOLVERMUMPS;
3999:   PetscFunctionReturn(PETSC_SUCCESS);
4000: }

4002: /* MatGetFactor for Seq and MPI AIJ matrices */
4003: static PetscErrorCode MatGetFactor_aij_mumps(Mat A, MatFactorType ftype, Mat *F)
4004: {
4005:   Mat         B;
4006:   Mat_MUMPS  *mumps;
4007:   PetscBool   isSeqAIJ, isDiag, isDense;
4008:   PetscMPIInt size;

4010:   PetscFunctionBegin;
4011: #if defined(PETSC_USE_COMPLEX)
4012:   if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
4013:     PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n"));
4014:     *F = NULL;
4015:     PetscFunctionReturn(PETSC_SUCCESS);
4016:   }
4017: #endif
4018:   /* Create the factorization matrix */
4019:   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ));
4020:   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATDIAGONAL, &isDiag));
4021:   PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &isDense, MATSEQDENSE, MATMPIDENSE, NULL));
4022:   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
4023:   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
4024:   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
4025:   PetscCall(MatSetUp(B));

4027:   PetscCall(PetscNew(&mumps));

4029:   B->ops->view    = MatView_MUMPS;
4030:   B->ops->getinfo = MatGetInfo_MUMPS;

4032:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
4033:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
4034:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
4035:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
4036:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
4037:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
4038:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
4039:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
4040:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
4041:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
4042:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
4043:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
4044:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
4045:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
4046:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS));

4048:   if (ftype == MAT_FACTOR_LU) {
4049:     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
4050:     B->factortype            = MAT_FACTOR_LU;
4051:     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij;
4052:     else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij;
4053:     else if (isDense) mumps->ConvertToTriples = MatConvertToTriples_dense_xaij;
4054:     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij;
4055:     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
4056:     mumps->sym = 0;
4057:   } else {
4058:     B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
4059:     B->factortype                  = MAT_FACTOR_CHOLESKY;
4060:     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij;
4061:     else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij;
4062:     else if (isDense) mumps->ConvertToTriples = MatConvertToTriples_dense_xaij;
4063:     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij;
4064:     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY]));
4065: #if defined(PETSC_USE_COMPLEX)
4066:     mumps->sym = 2;
4067: #else
4068:     if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
4069:     else mumps->sym = 2;
4070: #endif
4071:   }

4073:   /* set solvertype */
4074:   PetscCall(PetscFree(B->solvertype));
4075:   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
4076:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
4077:   if (size == 1) {
4078:     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
4079:     B->canuseordering = PETSC_TRUE;
4080:   }
4081:   B->ops->destroy = MatDestroy_MUMPS;
4082:   B->data         = (void *)mumps;

4084:   *F               = B;
4085:   mumps->id.job    = JOB_NULL;
4086:   mumps->ICNTL_pre = NULL;
4087:   mumps->CNTL_pre  = NULL;
4088:   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
4089:   PetscFunctionReturn(PETSC_SUCCESS);
4090: }

4092: /* MatGetFactor for Seq and MPI SBAIJ matrices */
4093: static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A, PETSC_UNUSED MatFactorType ftype, Mat *F)
4094: {
4095:   Mat         B;
4096:   Mat_MUMPS  *mumps;
4097:   PetscBool   isSeqSBAIJ;
4098:   PetscMPIInt size;

4100:   PetscFunctionBegin;
4101: #if defined(PETSC_USE_COMPLEX)
4102:   if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
4103:     PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n"));
4104:     *F = NULL;
4105:     PetscFunctionReturn(PETSC_SUCCESS);
4106:   }
4107: #endif
4108:   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
4109:   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
4110:   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
4111:   PetscCall(MatSetUp(B));

4113:   PetscCall(PetscNew(&mumps));
4114:   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &isSeqSBAIJ));
4115:   if (isSeqSBAIJ) {
4116:     mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij;
4117:   } else {
4118:     mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij;
4119:   }

4121:   B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
4122:   B->ops->view                   = MatView_MUMPS;
4123:   B->ops->getinfo                = MatGetInfo_MUMPS;

4125:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
4126:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
4127:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
4128:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
4129:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
4130:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
4131:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
4132:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
4133:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
4134:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
4135:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
4136:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
4137:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
4138:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
4139:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS));

4141:   B->factortype = MAT_FACTOR_CHOLESKY;
4142: #if defined(PETSC_USE_COMPLEX)
4143:   mumps->sym = 2;
4144: #else
4145:   if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
4146:   else mumps->sym = 2;
4147: #endif

4149:   /* set solvertype */
4150:   PetscCall(PetscFree(B->solvertype));
4151:   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
4152:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
4153:   if (size == 1) {
4154:     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
4155:     B->canuseordering = PETSC_TRUE;
4156:   }
4157:   PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY]));
4158:   B->ops->destroy = MatDestroy_MUMPS;
4159:   B->data         = (void *)mumps;

4161:   *F               = B;
4162:   mumps->id.job    = JOB_NULL;
4163:   mumps->ICNTL_pre = NULL;
4164:   mumps->CNTL_pre  = NULL;
4165:   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
4166:   PetscFunctionReturn(PETSC_SUCCESS);
4167: }

4169: static PetscErrorCode MatGetFactor_baij_mumps(Mat A, MatFactorType ftype, Mat *F)
4170: {
4171:   Mat         B;
4172:   Mat_MUMPS  *mumps;
4173:   PetscBool   isSeqBAIJ;
4174:   PetscMPIInt size;

4176:   PetscFunctionBegin;
4177:   /* Create the factorization matrix */
4178:   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &isSeqBAIJ));
4179:   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
4180:   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
4181:   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
4182:   PetscCall(MatSetUp(B));

4184:   PetscCall(PetscNew(&mumps));
4185:   PetscCheck(ftype == MAT_FACTOR_LU, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead");
4186:   B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS;
4187:   B->factortype            = MAT_FACTOR_LU;
4188:   if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij;
4189:   else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij;
4190:   mumps->sym = 0;
4191:   PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));

4193:   B->ops->view    = MatView_MUMPS;
4194:   B->ops->getinfo = MatGetInfo_MUMPS;

4196:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
4197:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
4198:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
4199:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
4200:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
4201:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
4202:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
4203:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
4204:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
4205:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
4206:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
4207:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
4208:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
4209:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
4210:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS));

4212:   /* set solvertype */
4213:   PetscCall(PetscFree(B->solvertype));
4214:   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
4215:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
4216:   if (size == 1) {
4217:     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
4218:     B->canuseordering = PETSC_TRUE;
4219:   }
4220:   B->ops->destroy = MatDestroy_MUMPS;
4221:   B->data         = (void *)mumps;

4223:   *F               = B;
4224:   mumps->id.job    = JOB_NULL;
4225:   mumps->ICNTL_pre = NULL;
4226:   mumps->CNTL_pre  = NULL;
4227:   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
4228:   PetscFunctionReturn(PETSC_SUCCESS);
4229: }

4231: /* MatGetFactor for Seq and MPI SELL matrices */
4232: static PetscErrorCode MatGetFactor_sell_mumps(Mat A, MatFactorType ftype, Mat *F)
4233: {
4234:   Mat         B;
4235:   Mat_MUMPS  *mumps;
4236:   PetscBool   isSeqSELL;
4237:   PetscMPIInt size;

4239:   PetscFunctionBegin;
4240:   /* Create the factorization matrix */
4241:   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSELL, &isSeqSELL));
4242:   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
4243:   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
4244:   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
4245:   PetscCall(MatSetUp(B));

4247:   PetscCall(PetscNew(&mumps));

4249:   B->ops->view    = MatView_MUMPS;
4250:   B->ops->getinfo = MatGetInfo_MUMPS;

4252:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
4253:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
4254:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
4255:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
4256:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
4257:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
4258:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
4259:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
4260:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
4261:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
4262:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
4263:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));

4265:   PetscCheck(ftype == MAT_FACTOR_LU, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented");
4266:   B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
4267:   B->factortype            = MAT_FACTOR_LU;
4268:   PetscCheck(isSeqSELL, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented");
4269:   mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij;
4270:   mumps->sym              = 0;
4271:   PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));

4273:   /* set solvertype */
4274:   PetscCall(PetscFree(B->solvertype));
4275:   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
4276:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
4277:   if (size == 1) {
4278:     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization  */
4279:     B->canuseordering = PETSC_TRUE;
4280:   }
4281:   B->ops->destroy = MatDestroy_MUMPS;
4282:   B->data         = (void *)mumps;

4284:   *F               = B;
4285:   mumps->id.job    = JOB_NULL;
4286:   mumps->ICNTL_pre = NULL;
4287:   mumps->CNTL_pre  = NULL;
4288:   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
4289:   PetscFunctionReturn(PETSC_SUCCESS);
4290: }

4292: /* MatGetFactor for MATNEST matrices */
4293: static PetscErrorCode MatGetFactor_nest_mumps(Mat A, MatFactorType ftype, Mat *F)
4294: {
4295:   Mat         B, **mats;
4296:   Mat_MUMPS  *mumps;
4297:   PetscInt    nr, nc;
4298:   PetscMPIInt size;
4299:   PetscBool   flg = PETSC_TRUE;

4301:   PetscFunctionBegin;
4302: #if defined(PETSC_USE_COMPLEX)
4303:   if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
4304:     PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n"));
4305:     *F = NULL;
4306:     PetscFunctionReturn(PETSC_SUCCESS);
4307:   }
4308: #endif

4310:   /* Return if some condition is not satisfied */
4311:   *F = NULL;
4312:   PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats));
4313:   if (ftype == MAT_FACTOR_CHOLESKY) {
4314:     IS       *rows, *cols;
4315:     PetscInt *m, *M;

4317:     PetscCheck(nr == nc, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_FACTOR_CHOLESKY not supported for nest sizes %" PetscInt_FMT " != %" PetscInt_FMT ". Use MAT_FACTOR_LU.", nr, nc);
4318:     PetscCall(PetscMalloc2(nr, &rows, nc, &cols));
4319:     PetscCall(MatNestGetISs(A, rows, cols));
4320:     for (PetscInt r = 0; flg && r < nr; r++) PetscCall(ISEqualUnsorted(rows[r], cols[r], &flg));
4321:     if (!flg) {
4322:       PetscCall(PetscFree2(rows, cols));
4323:       PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for unequal row and column maps. Use MAT_FACTOR_LU.\n"));
4324:       PetscFunctionReturn(PETSC_SUCCESS);
4325:     }
4326:     PetscCall(PetscMalloc2(nr, &m, nr, &M));
4327:     for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetMinMax(rows[r], &m[r], &M[r]));
4328:     for (PetscInt r = 0; flg && r < nr; r++)
4329:       for (PetscInt k = r + 1; flg && k < nr; k++)
4330:         if ((m[k] <= m[r] && m[r] <= M[k]) || (m[k] <= M[r] && M[r] <= M[k])) flg = PETSC_FALSE;
4331:     PetscCall(PetscFree2(m, M));
4332:     PetscCall(PetscFree2(rows, cols));
4333:     if (!flg) {
4334:       PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for intersecting row maps. Use MAT_FACTOR_LU.\n"));
4335:       PetscFunctionReturn(PETSC_SUCCESS);
4336:     }
4337:   }

4339:   for (PetscInt r = 0; r < nr; r++) {
4340:     for (PetscInt c = 0; c < nc; c++) {
4341:       Mat       sub = mats[r][c];
4342:       PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isDiag, isDense;

4344:       if (!sub || (ftype == MAT_FACTOR_CHOLESKY && c < r)) continue;
4345:       PetscCall(MatGetTranspose_TransposeVirtual(&sub, NULL, NULL, NULL, NULL));
4346:       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ));
4347:       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ));
4348:       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ));
4349:       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ));
4350:       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ));
4351:       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ));
4352:       PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag));
4353:       PetscCall(PetscObjectTypeCompareAny((PetscObject)sub, &isDense, MATSEQDENSE, MATMPIDENSE, NULL));
4354:       if (ftype == MAT_FACTOR_CHOLESKY) {
4355:         if (r == c) {
4356:           if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isSeqSBAIJ && !isMPISBAIJ && !isDiag && !isDense) {
4357:             PetscCall(PetscInfo(sub, "MAT_FACTOR_CHOLESKY not supported for diagonal block of type %s.\n", ((PetscObject)sub)->type_name));
4358:             flg = PETSC_FALSE;
4359:           }
4360:         } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag && !isDense) {
4361:           PetscCall(PetscInfo(sub, "MAT_FACTOR_CHOLESKY not supported for off-diagonal block of type %s.\n", ((PetscObject)sub)->type_name));
4362:           flg = PETSC_FALSE;
4363:         }
4364:       } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag && !isDense) {
4365:         PetscCall(PetscInfo(sub, "MAT_FACTOR_LU not supported for block of type %s.\n", ((PetscObject)sub)->type_name));
4366:         flg = PETSC_FALSE;
4367:       }
4368:     }
4369:   }
4370:   if (!flg) PetscFunctionReturn(PETSC_SUCCESS);

4372:   /* Create the factorization matrix */
4373:   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
4374:   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
4375:   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
4376:   PetscCall(MatSetUp(B));

4378:   PetscCall(PetscNew(&mumps));

4380:   B->ops->view    = MatView_MUMPS;
4381:   B->ops->getinfo = MatGetInfo_MUMPS;

4383:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
4384:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
4385:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
4386:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
4387:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
4388:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
4389:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
4390:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
4391:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
4392:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
4393:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
4394:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
4395:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
4396:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
4397:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS));

4399:   if (ftype == MAT_FACTOR_LU) {
4400:     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
4401:     B->factortype            = MAT_FACTOR_LU;
4402:     mumps->sym               = 0;
4403:   } else {
4404:     B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
4405:     B->factortype                  = MAT_FACTOR_CHOLESKY;
4406: #if defined(PETSC_USE_COMPLEX)
4407:     mumps->sym = 2;
4408: #else
4409:     if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
4410:     else mumps->sym = 2;
4411: #endif
4412:   }
4413:   mumps->ConvertToTriples = MatConvertToTriples_nest_xaij;
4414:   PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[ftype]));

4416:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
4417:   if (size == 1) {
4418:     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
4419:     B->canuseordering = PETSC_TRUE;
4420:   }

4422:   /* set solvertype */
4423:   PetscCall(PetscFree(B->solvertype));
4424:   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
4425:   B->ops->destroy = MatDestroy_MUMPS;
4426:   B->data         = (void *)mumps;

4428:   *F               = B;
4429:   mumps->id.job    = JOB_NULL;
4430:   mumps->ICNTL_pre = NULL;
4431:   mumps->CNTL_pre  = NULL;
4432:   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
4433:   PetscFunctionReturn(PETSC_SUCCESS);
4434: }

4436: PETSC_INTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void)
4437: {
4438:   PetscFunctionBegin;
4439:   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
4440:   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
4441:   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps));
4442:   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps));
4443:   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPISBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps));
4444:   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
4445:   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
4446:   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps));
4447:   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps));
4448:   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps));
4449:   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSELL, MAT_FACTOR_LU, MatGetFactor_sell_mumps));
4450:   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
4451:   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
4452:   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQDENSE, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
4453:   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQDENSE, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
4454:   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIDENSE, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
4455:   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIDENSE, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
4456:   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_LU, MatGetFactor_nest_mumps));
4457:   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_CHOLESKY, MatGetFactor_nest_mumps));
4458:   PetscFunctionReturn(PETSC_SUCCESS);
4459: }