Actual source code: mumps.c


  2: /*
  3:     Provides an interface to the MUMPS sparse solver
  4: */
  5: #include <petscpkg_version.h>
  6: #include <../src/mat/impls/aij/mpi/mpiaij.h>
  7: #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
  8: #include <../src/mat/impls/sell/mpi/mpisell.h>

 10: EXTERN_C_BEGIN
 11: #if defined(PETSC_USE_COMPLEX)
 12:   #if defined(PETSC_USE_REAL_SINGLE)
 13:     #include <cmumps_c.h>
 14:   #else
 15:     #include <zmumps_c.h>
 16:   #endif
 17: #else
 18:   #if defined(PETSC_USE_REAL_SINGLE)
 19:     #include <smumps_c.h>
 20:   #else
 21:     #include <dmumps_c.h>
 22:   #endif
 23: #endif
 24: EXTERN_C_END
 25: #define JOB_INIT         -1
 26: #define JOB_NULL         0
 27: #define JOB_FACTSYMBOLIC 1
 28: #define JOB_FACTNUMERIC  2
 29: #define JOB_SOLVE        3
 30: #define JOB_END          -2

 32: /* calls to MUMPS */
 33: #if defined(PETSC_USE_COMPLEX)
 34:   #if defined(PETSC_USE_REAL_SINGLE)
 35:     #define MUMPS_c cmumps_c
 36:   #else
 37:     #define MUMPS_c zmumps_c
 38:   #endif
 39: #else
 40:   #if defined(PETSC_USE_REAL_SINGLE)
 41:     #define MUMPS_c smumps_c
 42:   #else
 43:     #define MUMPS_c dmumps_c
 44:   #endif
 45: #endif

 47: /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for
 48:    number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the
 49:    naming convention in PetscMPIInt, PetscBLASInt etc.
 50: */
 51: typedef MUMPS_INT PetscMUMPSInt;

 53: #if PETSC_PKG_MUMPS_VERSION_GE(5, 3, 0)
 54:   #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */
 55:     #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out"
 56:   #endif
 57: #else
 58:   #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */
 59:     #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out"
 60:   #endif
 61: #endif

 63: #define MPIU_MUMPSINT       MPI_INT
 64: #define PETSC_MUMPS_INT_MAX 2147483647
 65: #define PETSC_MUMPS_INT_MIN -2147483648

 67: /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/
 68: static inline PetscErrorCode PetscMUMPSIntCast(PetscInt a, PetscMUMPSInt *b)
 69: {
 70: #if PetscDefined(USE_64BIT_INDICES)
 71:   PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt");
 72: #endif
 73:   *b = (PetscMUMPSInt)(a);
 74:   return 0;
 75: }

 77: /* Put these utility routines here since they are only used in this file */
 78: static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject, const char opt[], const char text[], const char man[], PetscMUMPSInt currentvalue, PetscMUMPSInt *value, PetscBool *set, PetscMUMPSInt lb, PetscMUMPSInt ub)
 79: {
 80:   PetscInt  myval;
 81:   PetscBool myset;
 82:   /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */
 83:   PetscOptionsInt_Private(PetscOptionsObject, opt, text, man, (PetscInt)currentvalue, &myval, &myset, lb, ub);
 84:   if (myset) PetscMUMPSIntCast(myval, value);
 85:   if (set) *set = myset;
 86:   return 0;
 87: }
 88: #define PetscOptionsMUMPSInt(a, b, c, d, e, f) PetscOptionsMUMPSInt_Private(PetscOptionsObject, a, b, c, d, e, f, PETSC_MUMPS_INT_MIN, PETSC_MUMPS_INT_MAX)

 90: /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */
 91: #if defined(PETSC_HAVE_OPENMP_SUPPORT)
 92:   #define PetscMUMPS_c(mumps) \
 93:     do { \
 94:       if (mumps->use_petsc_omp_support) { \
 95:         if (mumps->is_omp_master) { \
 96:           PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl); \
 97:           MUMPS_c(&mumps->id); \
 98:           PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl); \
 99:         } \
100:         PetscOmpCtrlBarrier(mumps->omp_ctrl); \
101:         /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific      \
102:          to processes, so we only Bcast info[1], an error code and leave others (since they do not have   \
103:          an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82.                   \
104:          omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \
105:       */ \
106:         MPI_Bcast(mumps->id.infog, 40, MPIU_MUMPSINT, 0, mumps->omp_comm); \
107:         MPI_Bcast(mumps->id.rinfog, 20, MPIU_REAL, 0, mumps->omp_comm); \
108:         MPI_Bcast(mumps->id.info, 1, MPIU_MUMPSINT, 0, mumps->omp_comm); \
109:       } else { \
110:         MUMPS_c(&mumps->id); \
111:       } \
112:     } while (0)
113: #else
114:   #define PetscMUMPS_c(mumps) \
115:     do { \
116:       MUMPS_c(&mumps->id); \
117:     } while (0)
118: #endif

120: /* declare MumpsScalar */
121: #if defined(PETSC_USE_COMPLEX)
122:   #if defined(PETSC_USE_REAL_SINGLE)
123:     #define MumpsScalar mumps_complex
124:   #else
125:     #define MumpsScalar mumps_double_complex
126:   #endif
127: #else
128:   #define MumpsScalar PetscScalar
129: #endif

131: /* macros s.t. indices match MUMPS documentation */
132: #define ICNTL(I)  icntl[(I)-1]
133: #define CNTL(I)   cntl[(I)-1]
134: #define INFOG(I)  infog[(I)-1]
135: #define INFO(I)   info[(I)-1]
136: #define RINFOG(I) rinfog[(I)-1]
137: #define RINFO(I)  rinfo[(I)-1]

139: typedef struct Mat_MUMPS Mat_MUMPS;
140: struct Mat_MUMPS {
141: #if defined(PETSC_USE_COMPLEX)
142:   #if defined(PETSC_USE_REAL_SINGLE)
143:   CMUMPS_STRUC_C id;
144:   #else
145:   ZMUMPS_STRUC_C id;
146:   #endif
147: #else
148:   #if defined(PETSC_USE_REAL_SINGLE)
149:   SMUMPS_STRUC_C id;
150:   #else
151:   DMUMPS_STRUC_C id;
152:   #endif
153: #endif

155:   MatStructure   matstruc;
156:   PetscMPIInt    myid, petsc_size;
157:   PetscMUMPSInt *irn, *jcn;       /* the (i,j,v) triplets passed to mumps. */
158:   PetscScalar   *val, *val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */
159:   PetscInt64     nnz;             /* number of nonzeros. The type is called selective 64-bit in mumps */
160:   PetscMUMPSInt  sym;
161:   MPI_Comm       mumps_comm;
162:   PetscMUMPSInt *ICNTL_pre;
163:   PetscReal     *CNTL_pre;
164:   PetscMUMPSInt  ICNTL9_pre;         /* check if ICNTL(9) is changed from previous MatSolve */
165:   VecScatter     scat_rhs, scat_sol; /* used by MatSolve() */
166:   PetscMUMPSInt  ICNTL20;            /* use centralized (0) or distributed (10) dense RHS */
167:   PetscMUMPSInt  lrhs_loc, nloc_rhs, *irhs_loc;
168: #if defined(PETSC_HAVE_OPENMP_SUPPORT)
169:   PetscInt    *rhs_nrow, max_nrhs;
170:   PetscMPIInt *rhs_recvcounts, *rhs_disps;
171:   PetscScalar *rhs_loc, *rhs_recvbuf;
172: #endif
173:   Vec            b_seq, x_seq;
174:   PetscInt       ninfo, *info; /* which INFO to display */
175:   PetscInt       sizeredrhs;
176:   PetscScalar   *schur_sol;
177:   PetscInt       schur_sizesol;
178:   PetscMUMPSInt *ia_alloc, *ja_alloc; /* work arrays used for the CSR struct for sparse rhs */
179:   PetscInt64     cur_ilen, cur_jlen;  /* current len of ia_alloc[], ja_alloc[] */
180:   PetscErrorCode (*ConvertToTriples)(Mat, PetscInt, MatReuse, Mat_MUMPS *);

182:   /* stuff used by petsc/mumps OpenMP support*/
183:   PetscBool    use_petsc_omp_support;
184:   PetscOmpCtrl omp_ctrl;             /* an OpenMP controler that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */
185:   MPI_Comm     petsc_comm, omp_comm; /* petsc_comm is petsc matrix's comm */
186:   PetscInt64  *recvcount;            /* a collection of nnz on omp_master */
187:   PetscMPIInt  tag, omp_comm_size;
188:   PetscBool    is_omp_master; /* is this rank the master of omp_comm */
189:   MPI_Request *reqs;
190: };

192: /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt.
193:    Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices.
194:  */
195: static PetscErrorCode PetscMUMPSIntCSRCast(Mat_MUMPS *mumps, PetscInt nrow, PetscInt *ia, PetscInt *ja, PetscMUMPSInt **ia_mumps, PetscMUMPSInt **ja_mumps, PetscMUMPSInt *nnz_mumps)
196: {
197:   PetscInt nnz = ia[nrow] - 1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */

199: #if defined(PETSC_USE_64BIT_INDICES)
200:   {
201:     PetscInt i;
202:     if (nrow + 1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */
203:       PetscFree(mumps->ia_alloc);
204:       PetscMalloc1(nrow + 1, &mumps->ia_alloc);
205:       mumps->cur_ilen = nrow + 1;
206:     }
207:     if (nnz > mumps->cur_jlen) {
208:       PetscFree(mumps->ja_alloc);
209:       PetscMalloc1(nnz, &mumps->ja_alloc);
210:       mumps->cur_jlen = nnz;
211:     }
212:     for (i = 0; i < nrow + 1; i++) PetscMUMPSIntCast(ia[i], &(mumps->ia_alloc[i]));
213:     for (i = 0; i < nnz; i++) PetscMUMPSIntCast(ja[i], &(mumps->ja_alloc[i]));
214:     *ia_mumps = mumps->ia_alloc;
215:     *ja_mumps = mumps->ja_alloc;
216:   }
217: #else
218:   *ia_mumps          = ia;
219:   *ja_mumps          = ja;
220: #endif
221:   PetscMUMPSIntCast(nnz, nnz_mumps);
222:   return 0;
223: }

225: static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS *mumps)
226: {
227:   PetscFree(mumps->id.listvar_schur);
228:   PetscFree(mumps->id.redrhs);
229:   PetscFree(mumps->schur_sol);
230:   mumps->id.size_schur = 0;
231:   mumps->id.schur_lld  = 0;
232:   mumps->id.ICNTL(19)  = 0;
233:   return 0;
234: }

236: /* solve with rhs in mumps->id.redrhs and return in the same location */
237: static PetscErrorCode MatMumpsSolveSchur_Private(Mat F)
238: {
239:   Mat_MUMPS           *mumps = (Mat_MUMPS *)F->data;
240:   Mat                  S, B, X;
241:   MatFactorSchurStatus schurstatus;
242:   PetscInt             sizesol;

244:   MatFactorFactorizeSchurComplement(F);
245:   MatFactorGetSchurComplement(F, &S, &schurstatus);
246:   MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &B);
247:   MatSetType(B, ((PetscObject)S)->type_name);
248: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
249:   MatBindToCPU(B, S->boundtocpu);
250: #endif
251:   switch (schurstatus) {
252:   case MAT_FACTOR_SCHUR_FACTORED:
253:     MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &X);
254:     MatSetType(X, ((PetscObject)S)->type_name);
255: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
256:     MatBindToCPU(X, S->boundtocpu);
257: #endif
258:     if (!mumps->id.ICNTL(9)) { /* transpose solve */
259:       MatMatSolveTranspose(S, B, X);
260:     } else {
261:       MatMatSolve(S, B, X);
262:     }
263:     break;
264:   case MAT_FACTOR_SCHUR_INVERTED:
265:     sizesol = mumps->id.nrhs * mumps->id.size_schur;
266:     if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) {
267:       PetscFree(mumps->schur_sol);
268:       PetscMalloc1(sizesol, &mumps->schur_sol);
269:       mumps->schur_sizesol = sizesol;
270:     }
271:     MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->schur_sol, &X);
272:     MatSetType(X, ((PetscObject)S)->type_name);
273: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
274:     MatBindToCPU(X, S->boundtocpu);
275: #endif
276:     MatProductCreateWithMat(S, B, NULL, X);
277:     if (!mumps->id.ICNTL(9)) { /* transpose solve */
278:       MatProductSetType(X, MATPRODUCT_AtB);
279:     } else {
280:       MatProductSetType(X, MATPRODUCT_AB);
281:     }
282:     MatProductSetFromOptions(X);
283:     MatProductSymbolic(X);
284:     MatProductNumeric(X);

286:     MatCopy(X, B, SAME_NONZERO_PATTERN);
287:     break;
288:   default:
289:     SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unhandled MatFactorSchurStatus %d", F->schur_status);
290:   }
291:   MatFactorRestoreSchurComplement(F, &S, schurstatus);
292:   MatDestroy(&B);
293:   MatDestroy(&X);
294:   return 0;
295: }

297: static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion)
298: {
299:   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;

301:   if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */
302:     return 0;
303:   }
304:   if (!expansion) { /* prepare for the condensation step */
305:     PetscInt sizeredrhs = mumps->id.nrhs * mumps->id.size_schur;
306:     /* allocate MUMPS internal array to store reduced right-hand sides */
307:     if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) {
308:       PetscFree(mumps->id.redrhs);
309:       mumps->id.lredrhs = mumps->id.size_schur;
310:       PetscMalloc1(mumps->id.nrhs * mumps->id.lredrhs, &mumps->id.redrhs);
311:       mumps->sizeredrhs = mumps->id.nrhs * mumps->id.lredrhs;
312:     }
313:     mumps->id.ICNTL(26) = 1; /* condensation phase */
314:   } else {                   /* prepare for the expansion step */
315:     /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */
316:     MatMumpsSolveSchur_Private(F);
317:     mumps->id.ICNTL(26) = 2; /* expansion phase */
318:     PetscMUMPS_c(mumps);
320:     /* restore defaults */
321:     mumps->id.ICNTL(26) = -1;
322:     /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */
323:     if (mumps->id.nrhs > 1) {
324:       PetscFree(mumps->id.redrhs);
325:       mumps->id.lredrhs = 0;
326:       mumps->sizeredrhs = 0;
327:     }
328:   }
329:   return 0;
330: }

332: /*
333:   MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz]

335:   input:
336:     A       - matrix in aij,baij or sbaij format
337:     shift   - 0: C style output triple; 1: Fortran style output triple.
338:     reuse   - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple
339:               MAT_REUSE_MATRIX:   only the values in v array are updated
340:   output:
341:     nnz     - dim of r, c, and v (number of local nonzero entries of A)
342:     r, c, v - row and col index, matrix values (matrix triples)

344:   The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is
345:   freed with PetscFree(mumps->irn);  This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means
346:   that the PetscMalloc() cannot easily be replaced with a PetscMalloc3().

348:  */

350: PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
351: {
352:   const PetscScalar *av;
353:   const PetscInt    *ai, *aj, *ajj, M = A->rmap->n;
354:   PetscInt64         nz, rnz, i, j, k;
355:   PetscMUMPSInt     *row, *col;
356:   Mat_SeqAIJ        *aa = (Mat_SeqAIJ *)A->data;

358:   MatSeqAIJGetArrayRead(A, &av);
359:   mumps->val = (PetscScalar *)av;
360:   if (reuse == MAT_INITIAL_MATRIX) {
361:     nz = aa->nz;
362:     ai = aa->i;
363:     aj = aa->j;
364:     PetscMalloc2(nz, &row, nz, &col);
365:     for (i = k = 0; i < M; i++) {
366:       rnz = ai[i + 1] - ai[i];
367:       ajj = aj + ai[i];
368:       for (j = 0; j < rnz; j++) {
369:         PetscMUMPSIntCast(i + shift, &row[k]);
370:         PetscMUMPSIntCast(ajj[j] + shift, &col[k]);
371:         k++;
372:       }
373:     }
374:     mumps->irn = row;
375:     mumps->jcn = col;
376:     mumps->nnz = nz;
377:   }
378:   MatSeqAIJRestoreArrayRead(A, &av);
379:   return 0;
380: }

382: PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
383: {
384:   PetscInt64     nz, i, j, k, r;
385:   Mat_SeqSELL   *a = (Mat_SeqSELL *)A->data;
386:   PetscMUMPSInt *row, *col;

388:   mumps->val = a->val;
389:   if (reuse == MAT_INITIAL_MATRIX) {
390:     nz = a->sliidx[a->totalslices];
391:     PetscMalloc2(nz, &row, nz, &col);
392:     for (i = k = 0; i < a->totalslices; i++) {
393:       for (j = a->sliidx[i], r = 0; j < a->sliidx[i + 1]; j++, r = ((r + 1) & 0x07)) PetscMUMPSIntCast(8 * i + r + shift, &row[k++]);
394:     }
395:     for (i = 0; i < nz; i++) PetscMUMPSIntCast(a->colidx[i] + shift, &col[i]);
396:     mumps->irn = row;
397:     mumps->jcn = col;
398:     mumps->nnz = nz;
399:   }
400:   return 0;
401: }

403: PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
404: {
405:   Mat_SeqBAIJ    *aa = (Mat_SeqBAIJ *)A->data;
406:   const PetscInt *ai, *aj, *ajj, bs2 = aa->bs2;
407:   PetscInt64      M, nz, idx = 0, rnz, i, j, k, m;
408:   PetscInt        bs;
409:   PetscMUMPSInt  *row, *col;

411:   MatGetBlockSize(A, &bs);
412:   M          = A->rmap->N / bs;
413:   mumps->val = aa->a;
414:   if (reuse == MAT_INITIAL_MATRIX) {
415:     ai = aa->i;
416:     aj = aa->j;
417:     nz = bs2 * aa->nz;
418:     PetscMalloc2(nz, &row, nz, &col);
419:     for (i = 0; i < M; i++) {
420:       ajj = aj + ai[i];
421:       rnz = ai[i + 1] - ai[i];
422:       for (k = 0; k < rnz; k++) {
423:         for (j = 0; j < bs; j++) {
424:           for (m = 0; m < bs; m++) {
425:             PetscMUMPSIntCast(i * bs + m + shift, &row[idx]);
426:             PetscMUMPSIntCast(bs * ajj[k] + j + shift, &col[idx]);
427:             idx++;
428:           }
429:         }
430:       }
431:     }
432:     mumps->irn = row;
433:     mumps->jcn = col;
434:     mumps->nnz = nz;
435:   }
436:   return 0;
437: }

439: PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
440: {
441:   const PetscInt *ai, *aj, *ajj;
442:   PetscInt        bs;
443:   PetscInt64      nz, rnz, i, j, k, m;
444:   PetscMUMPSInt  *row, *col;
445:   PetscScalar    *val;
446:   Mat_SeqSBAIJ   *aa  = (Mat_SeqSBAIJ *)A->data;
447:   const PetscInt  bs2 = aa->bs2, mbs = aa->mbs;
448: #if defined(PETSC_USE_COMPLEX)
449:   PetscBool isset, hermitian;
450: #endif

452: #if defined(PETSC_USE_COMPLEX)
453:   MatIsHermitianKnown(A, &isset, &hermitian);
455: #endif
456:   ai = aa->i;
457:   aj = aa->j;
458:   MatGetBlockSize(A, &bs);
459:   if (reuse == MAT_INITIAL_MATRIX) {
460:     nz = aa->nz;
461:     PetscMalloc2(bs2 * nz, &row, bs2 * nz, &col);
462:     if (bs > 1) {
463:       PetscMalloc1(bs2 * nz, &mumps->val_alloc);
464:       mumps->val = mumps->val_alloc;
465:     } else {
466:       mumps->val = aa->a;
467:     }
468:     mumps->irn = row;
469:     mumps->jcn = col;
470:   } else {
471:     if (bs == 1) mumps->val = aa->a;
472:     row = mumps->irn;
473:     col = mumps->jcn;
474:   }
475:   val = mumps->val;

477:   nz = 0;
478:   if (bs > 1) {
479:     for (i = 0; i < mbs; i++) {
480:       rnz = ai[i + 1] - ai[i];
481:       ajj = aj + ai[i];
482:       for (j = 0; j < rnz; j++) {
483:         for (k = 0; k < bs; k++) {
484:           for (m = 0; m < bs; m++) {
485:             if (ajj[j] > i || k >= m) {
486:               if (reuse == MAT_INITIAL_MATRIX) {
487:                 PetscMUMPSIntCast(i * bs + m + shift, &row[nz]);
488:                 PetscMUMPSIntCast(ajj[j] * bs + k + shift, &col[nz]);
489:               }
490:               val[nz++] = aa->a[(ai[i] + j) * bs2 + m + k * bs];
491:             }
492:           }
493:         }
494:       }
495:     }
496:   } else if (reuse == MAT_INITIAL_MATRIX) {
497:     for (i = 0; i < mbs; i++) {
498:       rnz = ai[i + 1] - ai[i];
499:       ajj = aj + ai[i];
500:       for (j = 0; j < rnz; j++) {
501:         PetscMUMPSIntCast(i + shift, &row[nz]);
502:         PetscMUMPSIntCast(ajj[j] + shift, &col[nz]);
503:         nz++;
504:       }
505:     }
507:   }
508:   if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz;
509:   return 0;
510: }

512: PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
513: {
514:   const PetscInt    *ai, *aj, *ajj, *adiag, M = A->rmap->n;
515:   PetscInt64         nz, rnz, i, j;
516:   const PetscScalar *av, *v1;
517:   PetscScalar       *val;
518:   PetscMUMPSInt     *row, *col;
519:   Mat_SeqAIJ        *aa = (Mat_SeqAIJ *)A->data;
520:   PetscBool          missing;
521: #if defined(PETSC_USE_COMPLEX)
522:   PetscBool hermitian, isset;
523: #endif

525: #if defined(PETSC_USE_COMPLEX)
526:   MatIsHermitianKnown(A, &isset, &hermitian);
528: #endif
529:   MatSeqAIJGetArrayRead(A, &av);
530:   ai    = aa->i;
531:   aj    = aa->j;
532:   adiag = aa->diag;
533:   MatMissingDiagonal_SeqAIJ(A, &missing, NULL);
534:   if (reuse == MAT_INITIAL_MATRIX) {
535:     /* count nz in the upper triangular part of A */
536:     nz = 0;
537:     if (missing) {
538:       for (i = 0; i < M; i++) {
539:         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
540:           for (j = ai[i]; j < ai[i + 1]; j++) {
541:             if (aj[j] < i) continue;
542:             nz++;
543:           }
544:         } else {
545:           nz += ai[i + 1] - adiag[i];
546:         }
547:       }
548:     } else {
549:       for (i = 0; i < M; i++) nz += ai[i + 1] - adiag[i];
550:     }
551:     PetscMalloc2(nz, &row, nz, &col);
552:     PetscMalloc1(nz, &val);
553:     mumps->nnz = nz;
554:     mumps->irn = row;
555:     mumps->jcn = col;
556:     mumps->val = mumps->val_alloc = val;

558:     nz = 0;
559:     if (missing) {
560:       for (i = 0; i < M; i++) {
561:         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
562:           for (j = ai[i]; j < ai[i + 1]; j++) {
563:             if (aj[j] < i) continue;
564:             PetscMUMPSIntCast(i + shift, &row[nz]);
565:             PetscMUMPSIntCast(aj[j] + shift, &col[nz]);
566:             val[nz] = av[j];
567:             nz++;
568:           }
569:         } else {
570:           rnz = ai[i + 1] - adiag[i];
571:           ajj = aj + adiag[i];
572:           v1  = av + adiag[i];
573:           for (j = 0; j < rnz; j++) {
574:             PetscMUMPSIntCast(i + shift, &row[nz]);
575:             PetscMUMPSIntCast(ajj[j] + shift, &col[nz]);
576:             val[nz++] = v1[j];
577:           }
578:         }
579:       }
580:     } else {
581:       for (i = 0; i < M; i++) {
582:         rnz = ai[i + 1] - adiag[i];
583:         ajj = aj + adiag[i];
584:         v1  = av + adiag[i];
585:         for (j = 0; j < rnz; j++) {
586:           PetscMUMPSIntCast(i + shift, &row[nz]);
587:           PetscMUMPSIntCast(ajj[j] + shift, &col[nz]);
588:           val[nz++] = v1[j];
589:         }
590:       }
591:     }
592:   } else {
593:     nz  = 0;
594:     val = mumps->val;
595:     if (missing) {
596:       for (i = 0; i < M; i++) {
597:         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
598:           for (j = ai[i]; j < ai[i + 1]; j++) {
599:             if (aj[j] < i) continue;
600:             val[nz++] = av[j];
601:           }
602:         } else {
603:           rnz = ai[i + 1] - adiag[i];
604:           v1  = av + adiag[i];
605:           for (j = 0; j < rnz; j++) val[nz++] = v1[j];
606:         }
607:       }
608:     } else {
609:       for (i = 0; i < M; i++) {
610:         rnz = ai[i + 1] - adiag[i];
611:         v1  = av + adiag[i];
612:         for (j = 0; j < rnz; j++) val[nz++] = v1[j];
613:       }
614:     }
615:   }
616:   MatSeqAIJRestoreArrayRead(A, &av);
617:   return 0;
618: }

620: PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
621: {
622:   const PetscInt    *ai, *aj, *bi, *bj, *garray, *ajj, *bjj;
623:   PetscInt           bs;
624:   PetscInt64         rstart, nz, i, j, k, m, jj, irow, countA, countB;
625:   PetscMUMPSInt     *row, *col;
626:   const PetscScalar *av, *bv, *v1, *v2;
627:   PetscScalar       *val;
628:   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ *)A->data;
629:   Mat_SeqSBAIJ      *aa  = (Mat_SeqSBAIJ *)(mat->A)->data;
630:   Mat_SeqBAIJ       *bb  = (Mat_SeqBAIJ *)(mat->B)->data;
631:   const PetscInt     bs2 = aa->bs2, mbs = aa->mbs;
632: #if defined(PETSC_USE_COMPLEX)
633:   PetscBool hermitian, isset;
634: #endif

636: #if defined(PETSC_USE_COMPLEX)
637:   MatIsHermitianKnown(A, &isset, &hermitian);
639: #endif
640:   MatGetBlockSize(A, &bs);
641:   rstart = A->rmap->rstart;
642:   ai     = aa->i;
643:   aj     = aa->j;
644:   bi     = bb->i;
645:   bj     = bb->j;
646:   av     = aa->a;
647:   bv     = bb->a;

649:   garray = mat->garray;

651:   if (reuse == MAT_INITIAL_MATRIX) {
652:     nz = (aa->nz + bb->nz) * bs2; /* just a conservative estimate */
653:     PetscMalloc2(nz, &row, nz, &col);
654:     PetscMalloc1(nz, &val);
655:     /* can not decide the exact mumps->nnz now because of the SBAIJ */
656:     mumps->irn = row;
657:     mumps->jcn = col;
658:     mumps->val = mumps->val_alloc = val;
659:   } else {
660:     val = mumps->val;
661:   }

663:   jj   = 0;
664:   irow = rstart;
665:   for (i = 0; i < mbs; i++) {
666:     ajj    = aj + ai[i]; /* ptr to the beginning of this row */
667:     countA = ai[i + 1] - ai[i];
668:     countB = bi[i + 1] - bi[i];
669:     bjj    = bj + bi[i];
670:     v1     = av + ai[i] * bs2;
671:     v2     = bv + bi[i] * bs2;

673:     if (bs > 1) {
674:       /* A-part */
675:       for (j = 0; j < countA; j++) {
676:         for (k = 0; k < bs; k++) {
677:           for (m = 0; m < bs; m++) {
678:             if (rstart + ajj[j] * bs > irow || k >= m) {
679:               if (reuse == MAT_INITIAL_MATRIX) {
680:                 PetscMUMPSIntCast(irow + m + shift, &row[jj]);
681:                 PetscMUMPSIntCast(rstart + ajj[j] * bs + k + shift, &col[jj]);
682:               }
683:               val[jj++] = v1[j * bs2 + m + k * bs];
684:             }
685:           }
686:         }
687:       }

689:       /* B-part */
690:       for (j = 0; j < countB; j++) {
691:         for (k = 0; k < bs; k++) {
692:           for (m = 0; m < bs; m++) {
693:             if (reuse == MAT_INITIAL_MATRIX) {
694:               PetscMUMPSIntCast(irow + m + shift, &row[jj]);
695:               PetscMUMPSIntCast(garray[bjj[j]] * bs + k + shift, &col[jj]);
696:             }
697:             val[jj++] = v2[j * bs2 + m + k * bs];
698:           }
699:         }
700:       }
701:     } else {
702:       /* A-part */
703:       for (j = 0; j < countA; j++) {
704:         if (reuse == MAT_INITIAL_MATRIX) {
705:           PetscMUMPSIntCast(irow + shift, &row[jj]);
706:           PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj]);
707:         }
708:         val[jj++] = v1[j];
709:       }

711:       /* B-part */
712:       for (j = 0; j < countB; j++) {
713:         if (reuse == MAT_INITIAL_MATRIX) {
714:           PetscMUMPSIntCast(irow + shift, &row[jj]);
715:           PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]);
716:         }
717:         val[jj++] = v2[j];
718:       }
719:     }
720:     irow += bs;
721:   }
722:   mumps->nnz = jj;
723:   return 0;
724: }

726: PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
727: {
728:   const PetscInt    *ai, *aj, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj;
729:   PetscInt64         rstart, nz, i, j, jj, irow, countA, countB;
730:   PetscMUMPSInt     *row, *col;
731:   const PetscScalar *av, *bv, *v1, *v2;
732:   PetscScalar       *val;
733:   Mat                Ad, Ao;
734:   Mat_SeqAIJ        *aa;
735:   Mat_SeqAIJ        *bb;

737:   MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray);
738:   MatSeqAIJGetArrayRead(Ad, &av);
739:   MatSeqAIJGetArrayRead(Ao, &bv);

741:   aa = (Mat_SeqAIJ *)(Ad)->data;
742:   bb = (Mat_SeqAIJ *)(Ao)->data;
743:   ai = aa->i;
744:   aj = aa->j;
745:   bi = bb->i;
746:   bj = bb->j;

748:   rstart = A->rmap->rstart;

750:   if (reuse == MAT_INITIAL_MATRIX) {
751:     nz = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */
752:     PetscMalloc2(nz, &row, nz, &col);
753:     PetscMalloc1(nz, &val);
754:     mumps->nnz = nz;
755:     mumps->irn = row;
756:     mumps->jcn = col;
757:     mumps->val = mumps->val_alloc = val;
758:   } else {
759:     val = mumps->val;
760:   }

762:   jj   = 0;
763:   irow = rstart;
764:   for (i = 0; i < m; i++) {
765:     ajj    = aj + ai[i]; /* ptr to the beginning of this row */
766:     countA = ai[i + 1] - ai[i];
767:     countB = bi[i + 1] - bi[i];
768:     bjj    = bj + bi[i];
769:     v1     = av + ai[i];
770:     v2     = bv + bi[i];

772:     /* A-part */
773:     for (j = 0; j < countA; j++) {
774:       if (reuse == MAT_INITIAL_MATRIX) {
775:         PetscMUMPSIntCast(irow + shift, &row[jj]);
776:         PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj]);
777:       }
778:       val[jj++] = v1[j];
779:     }

781:     /* B-part */
782:     for (j = 0; j < countB; j++) {
783:       if (reuse == MAT_INITIAL_MATRIX) {
784:         PetscMUMPSIntCast(irow + shift, &row[jj]);
785:         PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]);
786:       }
787:       val[jj++] = v2[j];
788:     }
789:     irow++;
790:   }
791:   MatSeqAIJRestoreArrayRead(Ad, &av);
792:   MatSeqAIJRestoreArrayRead(Ao, &bv);
793:   return 0;
794: }

796: PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
797: {
798:   Mat_MPIBAIJ       *mat = (Mat_MPIBAIJ *)A->data;
799:   Mat_SeqBAIJ       *aa  = (Mat_SeqBAIJ *)(mat->A)->data;
800:   Mat_SeqBAIJ       *bb  = (Mat_SeqBAIJ *)(mat->B)->data;
801:   const PetscInt    *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j, *ajj, *bjj;
802:   const PetscInt    *garray = mat->garray, mbs = mat->mbs, rstart = A->rmap->rstart;
803:   const PetscInt     bs2 = mat->bs2;
804:   PetscInt           bs;
805:   PetscInt64         nz, i, j, k, n, jj, irow, countA, countB, idx;
806:   PetscMUMPSInt     *row, *col;
807:   const PetscScalar *av = aa->a, *bv = bb->a, *v1, *v2;
808:   PetscScalar       *val;

810:   MatGetBlockSize(A, &bs);
811:   if (reuse == MAT_INITIAL_MATRIX) {
812:     nz = bs2 * (aa->nz + bb->nz);
813:     PetscMalloc2(nz, &row, nz, &col);
814:     PetscMalloc1(nz, &val);
815:     mumps->nnz = nz;
816:     mumps->irn = row;
817:     mumps->jcn = col;
818:     mumps->val = mumps->val_alloc = val;
819:   } else {
820:     val = mumps->val;
821:   }

823:   jj   = 0;
824:   irow = rstart;
825:   for (i = 0; i < mbs; i++) {
826:     countA = ai[i + 1] - ai[i];
827:     countB = bi[i + 1] - bi[i];
828:     ajj    = aj + ai[i];
829:     bjj    = bj + bi[i];
830:     v1     = av + bs2 * ai[i];
831:     v2     = bv + bs2 * bi[i];

833:     idx = 0;
834:     /* A-part */
835:     for (k = 0; k < countA; k++) {
836:       for (j = 0; j < bs; j++) {
837:         for (n = 0; n < bs; n++) {
838:           if (reuse == MAT_INITIAL_MATRIX) {
839:             PetscMUMPSIntCast(irow + n + shift, &row[jj]);
840:             PetscMUMPSIntCast(rstart + bs * ajj[k] + j + shift, &col[jj]);
841:           }
842:           val[jj++] = v1[idx++];
843:         }
844:       }
845:     }

847:     idx = 0;
848:     /* B-part */
849:     for (k = 0; k < countB; k++) {
850:       for (j = 0; j < bs; j++) {
851:         for (n = 0; n < bs; n++) {
852:           if (reuse == MAT_INITIAL_MATRIX) {
853:             PetscMUMPSIntCast(irow + n + shift, &row[jj]);
854:             PetscMUMPSIntCast(bs * garray[bjj[k]] + j + shift, &col[jj]);
855:           }
856:           val[jj++] = v2[idx++];
857:         }
858:       }
859:     }
860:     irow += bs;
861:   }
862:   return 0;
863: }

865: PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
866: {
867:   const PetscInt    *ai, *aj, *adiag, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj;
868:   PetscInt64         rstart, nz, nza, nzb, i, j, jj, irow, countA, countB;
869:   PetscMUMPSInt     *row, *col;
870:   const PetscScalar *av, *bv, *v1, *v2;
871:   PetscScalar       *val;
872:   Mat                Ad, Ao;
873:   Mat_SeqAIJ        *aa;
874:   Mat_SeqAIJ        *bb;
875: #if defined(PETSC_USE_COMPLEX)
876:   PetscBool hermitian, isset;
877: #endif

879: #if defined(PETSC_USE_COMPLEX)
880:   MatIsHermitianKnown(A, &isset, &hermitian);
882: #endif
883:   MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray);
884:   MatSeqAIJGetArrayRead(Ad, &av);
885:   MatSeqAIJGetArrayRead(Ao, &bv);

887:   aa    = (Mat_SeqAIJ *)(Ad)->data;
888:   bb    = (Mat_SeqAIJ *)(Ao)->data;
889:   ai    = aa->i;
890:   aj    = aa->j;
891:   adiag = aa->diag;
892:   bi    = bb->i;
893:   bj    = bb->j;

895:   rstart = A->rmap->rstart;

897:   if (reuse == MAT_INITIAL_MATRIX) {
898:     nza = 0; /* num of upper triangular entries in mat->A, including diagonals */
899:     nzb = 0; /* num of upper triangular entries in mat->B */
900:     for (i = 0; i < m; i++) {
901:       nza += (ai[i + 1] - adiag[i]);
902:       countB = bi[i + 1] - bi[i];
903:       bjj    = bj + bi[i];
904:       for (j = 0; j < countB; j++) {
905:         if (garray[bjj[j]] > rstart) nzb++;
906:       }
907:     }

909:     nz = nza + nzb; /* total nz of upper triangular part of mat */
910:     PetscMalloc2(nz, &row, nz, &col);
911:     PetscMalloc1(nz, &val);
912:     mumps->nnz = nz;
913:     mumps->irn = row;
914:     mumps->jcn = col;
915:     mumps->val = mumps->val_alloc = val;
916:   } else {
917:     val = mumps->val;
918:   }

920:   jj   = 0;
921:   irow = rstart;
922:   for (i = 0; i < m; i++) {
923:     ajj    = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */
924:     v1     = av + adiag[i];
925:     countA = ai[i + 1] - adiag[i];
926:     countB = bi[i + 1] - bi[i];
927:     bjj    = bj + bi[i];
928:     v2     = bv + bi[i];

930:     /* A-part */
931:     for (j = 0; j < countA; j++) {
932:       if (reuse == MAT_INITIAL_MATRIX) {
933:         PetscMUMPSIntCast(irow + shift, &row[jj]);
934:         PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj]);
935:       }
936:       val[jj++] = v1[j];
937:     }

939:     /* B-part */
940:     for (j = 0; j < countB; j++) {
941:       if (garray[bjj[j]] > rstart) {
942:         if (reuse == MAT_INITIAL_MATRIX) {
943:           PetscMUMPSIntCast(irow + shift, &row[jj]);
944:           PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]);
945:         }
946:         val[jj++] = v2[j];
947:       }
948:     }
949:     irow++;
950:   }
951:   MatSeqAIJRestoreArrayRead(Ad, &av);
952:   MatSeqAIJRestoreArrayRead(Ao, &bv);
953:   return 0;
954: }

956: PetscErrorCode MatDestroy_MUMPS(Mat A)
957: {
958:   Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;

960:   PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc);
961:   VecScatterDestroy(&mumps->scat_rhs);
962:   VecScatterDestroy(&mumps->scat_sol);
963:   VecDestroy(&mumps->b_seq);
964:   VecDestroy(&mumps->x_seq);
965:   PetscFree(mumps->id.perm_in);
966:   PetscFree2(mumps->irn, mumps->jcn);
967:   PetscFree(mumps->val_alloc);
968:   PetscFree(mumps->info);
969:   PetscFree(mumps->ICNTL_pre);
970:   PetscFree(mumps->CNTL_pre);
971:   MatMumpsResetSchur_Private(mumps);
972:   if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */
973:     mumps->id.job = JOB_END;
974:     PetscMUMPS_c(mumps);
976:     if (mumps->mumps_comm != MPI_COMM_NULL) {
977:       if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) MPI_Comm_free(&mumps->mumps_comm);
978:       else PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &mumps->mumps_comm);
979:     }
980:   }
981: #if defined(PETSC_HAVE_OPENMP_SUPPORT)
982:   if (mumps->use_petsc_omp_support) {
983:     PetscOmpCtrlDestroy(&mumps->omp_ctrl);
984:     PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf);
985:     PetscFree3(mumps->rhs_nrow, mumps->rhs_recvcounts, mumps->rhs_disps);
986:   }
987: #endif
988:   PetscFree(mumps->ia_alloc);
989:   PetscFree(mumps->ja_alloc);
990:   PetscFree(mumps->recvcount);
991:   PetscFree(mumps->reqs);
992:   PetscFree(mumps->irhs_loc);
993:   PetscFree(A->data);

995:   /* clear composed functions */
996:   PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL);
997:   PetscObjectComposeFunction((PetscObject)A, "MatFactorSetSchurIS_C", NULL);
998:   PetscObjectComposeFunction((PetscObject)A, "MatFactorCreateSchurComplement_C", NULL);
999:   PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetIcntl_C", NULL);
1000:   PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetIcntl_C", NULL);
1001:   PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetCntl_C", NULL);
1002:   PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetCntl_C", NULL);
1003:   PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfo_C", NULL);
1004:   PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfog_C", NULL);
1005:   PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfo_C", NULL);
1006:   PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfog_C", NULL);
1007:   PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverse_C", NULL);
1008:   PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverseTranspose_C", NULL);
1009:   return 0;
1010: }

1012: /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */
1013: static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A, PetscInt nrhs, const PetscScalar *array)
1014: {
1015:   Mat_MUMPS        *mumps   = (Mat_MUMPS *)A->data;
1016:   const PetscMPIInt ompsize = mumps->omp_comm_size;
1017:   PetscInt          i, m, M, rstart;

1019:   MatGetSize(A, &M, NULL);
1020:   MatGetLocalSize(A, &m, NULL);
1022:   if (ompsize == 1) {
1023:     if (!mumps->irhs_loc) {
1024:       mumps->nloc_rhs = m;
1025:       PetscMalloc1(m, &mumps->irhs_loc);
1026:       MatGetOwnershipRange(A, &rstart, NULL);
1027:       for (i = 0; i < m; i++) mumps->irhs_loc[i] = rstart + i + 1; /* use 1-based indices */
1028:     }
1029:     mumps->id.rhs_loc = (MumpsScalar *)array;
1030:   } else {
1031: #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1032:     const PetscInt *ranges;
1033:     PetscMPIInt     j, k, sendcount, *petsc_ranks, *omp_ranks;
1034:     MPI_Group       petsc_group, omp_group;
1035:     PetscScalar    *recvbuf = NULL;

1037:     if (mumps->is_omp_master) {
1038:       /* Lazily initialize the omp stuff for distributed rhs */
1039:       if (!mumps->irhs_loc) {
1040:         PetscMalloc2(ompsize, &omp_ranks, ompsize, &petsc_ranks);
1041:         PetscMalloc3(ompsize, &mumps->rhs_nrow, ompsize, &mumps->rhs_recvcounts, ompsize, &mumps->rhs_disps);
1042:         MPI_Comm_group(mumps->petsc_comm, &petsc_group);
1043:         MPI_Comm_group(mumps->omp_comm, &omp_group);
1044:         for (j = 0; j < ompsize; j++) omp_ranks[j] = j;
1045:         MPI_Group_translate_ranks(omp_group, ompsize, omp_ranks, petsc_group, petsc_ranks);

1047:         /* Populate mumps->irhs_loc[], rhs_nrow[] */
1048:         mumps->nloc_rhs = 0;
1049:         MatGetOwnershipRanges(A, &ranges);
1050:         for (j = 0; j < ompsize; j++) {
1051:           mumps->rhs_nrow[j] = ranges[petsc_ranks[j] + 1] - ranges[petsc_ranks[j]];
1052:           mumps->nloc_rhs += mumps->rhs_nrow[j];
1053:         }
1054:         PetscMalloc1(mumps->nloc_rhs, &mumps->irhs_loc);
1055:         for (j = k = 0; j < ompsize; j++) {
1056:           for (i = ranges[petsc_ranks[j]]; i < ranges[petsc_ranks[j] + 1]; i++, k++) mumps->irhs_loc[k] = i + 1; /* uses 1-based indices */
1057:         }

1059:         PetscFree2(omp_ranks, petsc_ranks);
1060:         MPI_Group_free(&petsc_group);
1061:         MPI_Group_free(&omp_group);
1062:       }

1064:       /* Realloc buffers when current nrhs is bigger than what we have met */
1065:       if (nrhs > mumps->max_nrhs) {
1066:         PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf);
1067:         PetscMalloc2(mumps->nloc_rhs * nrhs, &mumps->rhs_loc, mumps->nloc_rhs * nrhs, &mumps->rhs_recvbuf);
1068:         mumps->max_nrhs = nrhs;
1069:       }

1071:       /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */
1072:       for (j = 0; j < ompsize; j++) PetscMPIIntCast(mumps->rhs_nrow[j] * nrhs, &mumps->rhs_recvcounts[j]);
1073:       mumps->rhs_disps[0] = 0;
1074:       for (j = 1; j < ompsize; j++) {
1075:         mumps->rhs_disps[j] = mumps->rhs_disps[j - 1] + mumps->rhs_recvcounts[j - 1];
1077:       }
1078:       recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */
1079:     }

1081:     PetscMPIIntCast(m * nrhs, &sendcount);
1082:     MPI_Gatherv(array, sendcount, MPIU_SCALAR, recvbuf, mumps->rhs_recvcounts, mumps->rhs_disps, MPIU_SCALAR, 0, mumps->omp_comm);

1084:     if (mumps->is_omp_master) {
1085:       if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */
1086:         PetscScalar *dst, *dstbase = mumps->rhs_loc;
1087:         for (j = 0; j < ompsize; j++) {
1088:           const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j];
1089:           dst                    = dstbase;
1090:           for (i = 0; i < nrhs; i++) {
1091:             PetscArraycpy(dst, src, mumps->rhs_nrow[j]);
1092:             src += mumps->rhs_nrow[j];
1093:             dst += mumps->nloc_rhs;
1094:           }
1095:           dstbase += mumps->rhs_nrow[j];
1096:         }
1097:       }
1098:       mumps->id.rhs_loc = (MumpsScalar *)mumps->rhs_loc;
1099:     }
1100: #endif /* PETSC_HAVE_OPENMP_SUPPORT */
1101:   }
1102:   mumps->id.nrhs     = nrhs;
1103:   mumps->id.nloc_rhs = mumps->nloc_rhs;
1104:   mumps->id.lrhs_loc = mumps->nloc_rhs;
1105:   mumps->id.irhs_loc = mumps->irhs_loc;
1106:   return 0;
1107: }

1109: PetscErrorCode MatSolve_MUMPS(Mat A, Vec b, Vec x)
1110: {
1111:   Mat_MUMPS         *mumps  = (Mat_MUMPS *)A->data;
1112:   const PetscScalar *rarray = NULL;
1113:   PetscScalar       *array;
1114:   IS                 is_iden, is_petsc;
1115:   PetscInt           i;
1116:   PetscBool          second_solve = PETSC_FALSE;
1117:   static PetscBool   cite1 = PETSC_FALSE, cite2 = PETSC_FALSE;

1119:   PetscCall(PetscCitationsRegister("@article{MUMPS01,\n  author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n  title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n  journal = {SIAM "
1120:                                    "Journal on Matrix Analysis and Applications},\n  volume = {23},\n  number = {1},\n  pages = {15--41},\n  year = {2001}\n}\n",
1121:                                    &cite1));
1122:   PetscCall(PetscCitationsRegister("@article{MUMPS02,\n  author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n  title = {Hybrid scheduling for the parallel solution of linear systems},\n  journal = {Parallel "
1123:                                    "Computing},\n  volume = {32},\n  number = {2},\n  pages = {136--156},\n  year = {2006}\n}\n",
1124:                                    &cite2));

1126:   if (A->factorerrortype) {
1127:     PetscInfo(A, "MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2));
1128:     VecSetInf(x);
1129:     return 0;
1130:   }

1132:   mumps->id.nrhs = 1;
1133:   if (mumps->petsc_size > 1) {
1134:     if (mumps->ICNTL20 == 10) {
1135:       mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
1136:       VecGetArrayRead(b, &rarray);
1137:       MatMumpsSetUpDistRHSInfo(A, 1, rarray);
1138:     } else {
1139:       mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential rhs vector*/
1140:       VecScatterBegin(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD);
1141:       VecScatterEnd(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD);
1142:       if (!mumps->myid) {
1143:         VecGetArray(mumps->b_seq, &array);
1144:         mumps->id.rhs = (MumpsScalar *)array;
1145:       }
1146:     }
1147:   } else {                   /* petsc_size == 1 */
1148:     mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
1149:     VecCopy(b, x);
1150:     VecGetArray(x, &array);
1151:     mumps->id.rhs = (MumpsScalar *)array;
1152:   }

1154:   /*
1155:      handle condensation step of Schur complement (if any)
1156:      We set by default ICNTL(26) == -1 when Schur indices have been provided by the user.
1157:      According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase
1158:      Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system.
1159:      This requires an extra call to PetscMUMPS_c and the computation of the factors for S
1160:   */
1161:   if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) {
1163:     second_solve = PETSC_TRUE;
1164:     MatMumpsHandleSchur_Private(A, PETSC_FALSE);
1165:   }
1166:   /* solve phase */
1167:   /*-------------*/
1168:   mumps->id.job = JOB_SOLVE;
1169:   PetscMUMPS_c(mumps);

1172:   /* handle expansion step of Schur complement (if any) */
1173:   if (second_solve) MatMumpsHandleSchur_Private(A, PETSC_TRUE);

1175:   if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */
1176:     if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) {
1177:       /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */
1178:       VecScatterDestroy(&mumps->scat_sol);
1179:     }
1180:     if (!mumps->scat_sol) { /* create scatter scat_sol */
1181:       PetscInt *isol2_loc = NULL;
1182:       ISCreateStride(PETSC_COMM_SELF, mumps->id.lsol_loc, 0, 1, &is_iden); /* from */
1183:       PetscMalloc1(mumps->id.lsol_loc, &isol2_loc);
1184:       for (i = 0; i < mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i] - 1;                        /* change Fortran style to C style */
1185:       ISCreateGeneral(PETSC_COMM_SELF, mumps->id.lsol_loc, isol2_loc, PETSC_OWN_POINTER, &is_petsc); /* to */
1186:       VecScatterCreate(mumps->x_seq, is_iden, x, is_petsc, &mumps->scat_sol);
1187:       ISDestroy(&is_iden);
1188:       ISDestroy(&is_petsc);
1189:       mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */
1190:     }

1192:     VecScatterBegin(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD);
1193:     VecScatterEnd(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD);
1194:   }

1196:   if (mumps->petsc_size > 1) {
1197:     if (mumps->ICNTL20 == 10) {
1198:       VecRestoreArrayRead(b, &rarray);
1199:     } else if (!mumps->myid) {
1200:       VecRestoreArray(mumps->b_seq, &array);
1201:     }
1202:   } else VecRestoreArray(x, &array);

1204:   PetscLogFlops(2.0 * mumps->id.RINFO(3));
1205:   return 0;
1206: }

1208: PetscErrorCode MatSolveTranspose_MUMPS(Mat A, Vec b, Vec x)
1209: {
1210:   Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;

1212:   mumps->id.ICNTL(9) = 0;
1213:   MatSolve_MUMPS(A, b, x);
1214:   mumps->id.ICNTL(9) = 1;
1215:   return 0;
1216: }

1218: PetscErrorCode MatMatSolve_MUMPS(Mat A, Mat B, Mat X)
1219: {
1220:   Mat                Bt = NULL;
1221:   PetscBool          denseX, denseB, flg, flgT;
1222:   Mat_MUMPS         *mumps = (Mat_MUMPS *)A->data;
1223:   PetscInt           i, nrhs, M;
1224:   PetscScalar       *array;
1225:   const PetscScalar *rbray;
1226:   PetscInt           lsol_loc, nlsol_loc, *idxx, iidx = 0;
1227:   PetscMUMPSInt     *isol_loc, *isol_loc_save;
1228:   PetscScalar       *bray, *sol_loc, *sol_loc_save;
1229:   IS                 is_to, is_from;
1230:   PetscInt           k, proc, j, m, myrstart;
1231:   const PetscInt    *rstart;
1232:   Vec                v_mpi, msol_loc;
1233:   VecScatter         scat_sol;
1234:   Vec                b_seq;
1235:   VecScatter         scat_rhs;
1236:   PetscScalar       *aa;
1237:   PetscInt           spnr, *ia, *ja;
1238:   Mat_MPIAIJ        *b = NULL;

1240:   PetscObjectTypeCompareAny((PetscObject)X, &denseX, MATSEQDENSE, MATMPIDENSE, NULL);

1243:   PetscObjectTypeCompareAny((PetscObject)B, &denseB, MATSEQDENSE, MATMPIDENSE, NULL);
1244:   if (denseB) {
1246:     mumps->id.ICNTL(20) = 0; /* dense RHS */
1247:   } else {                   /* sparse B */
1249:     PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSEVIRTUAL, &flgT);
1250:     if (flgT) { /* input B is transpose of actural RHS matrix,
1251:                  because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */
1252:       MatTransposeGetMat(B, &Bt);
1253:     } else SETERRQ(PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONG, "Matrix B must be MATTRANSPOSEVIRTUAL matrix");
1254:     mumps->id.ICNTL(20) = 1; /* sparse RHS */
1255:   }

1257:   MatGetSize(B, &M, &nrhs);
1258:   mumps->id.nrhs = nrhs;
1259:   mumps->id.lrhs = M;
1260:   mumps->id.rhs  = NULL;

1262:   if (mumps->petsc_size == 1) {
1263:     PetscScalar *aa;
1264:     PetscInt     spnr, *ia, *ja;
1265:     PetscBool    second_solve = PETSC_FALSE;

1267:     MatDenseGetArray(X, &array);
1268:     mumps->id.rhs = (MumpsScalar *)array;

1270:     if (denseB) {
1271:       /* copy B to X */
1272:       MatDenseGetArrayRead(B, &rbray);
1273:       PetscArraycpy(array, rbray, M * nrhs);
1274:       MatDenseRestoreArrayRead(B, &rbray);
1275:     } else { /* sparse B */
1276:       MatSeqAIJGetArray(Bt, &aa);
1277:       MatGetRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg);
1279:       PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs);
1280:       mumps->id.rhs_sparse = (MumpsScalar *)aa;
1281:     }
1282:     /* handle condensation step of Schur complement (if any) */
1283:     if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) {
1284:       second_solve = PETSC_TRUE;
1285:       MatMumpsHandleSchur_Private(A, PETSC_FALSE);
1286:     }
1287:     /* solve phase */
1288:     /*-------------*/
1289:     mumps->id.job = JOB_SOLVE;
1290:     PetscMUMPS_c(mumps);

1293:     /* handle expansion step of Schur complement (if any) */
1294:     if (second_solve) MatMumpsHandleSchur_Private(A, PETSC_TRUE);
1295:     if (!denseB) { /* sparse B */
1296:       MatSeqAIJRestoreArray(Bt, &aa);
1297:       MatRestoreRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg);
1299:     }
1300:     MatDenseRestoreArray(X, &array);
1301:     return 0;
1302:   }

1304:   /*--------- parallel case: MUMPS requires rhs B to be centralized on the host! --------*/

1307:   /* create msol_loc to hold mumps local solution */
1308:   isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */
1309:   sol_loc_save  = (PetscScalar *)mumps->id.sol_loc;

1311:   lsol_loc  = mumps->id.lsol_loc;
1312:   nlsol_loc = nrhs * lsol_loc; /* length of sol_loc */
1313:   PetscMalloc2(nlsol_loc, &sol_loc, lsol_loc, &isol_loc);
1314:   mumps->id.sol_loc  = (MumpsScalar *)sol_loc;
1315:   mumps->id.isol_loc = isol_loc;

1317:   VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nlsol_loc, (PetscScalar *)sol_loc, &msol_loc);

1319:   if (denseB) {
1320:     if (mumps->ICNTL20 == 10) {
1321:       mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
1322:       MatDenseGetArrayRead(B, &rbray);
1323:       MatMumpsSetUpDistRHSInfo(A, nrhs, rbray);
1324:       MatDenseRestoreArrayRead(B, &rbray);
1325:       MatGetLocalSize(B, &m, NULL);
1326:       VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, NULL, &v_mpi);
1327:     } else {
1328:       mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
1329:       /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in
1330:         very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank
1331:         0, re-arrange B into desired order, which is a local operation.
1332:       */

1334:       /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */
1335:       /* wrap dense rhs matrix B into a vector v_mpi */
1336:       MatGetLocalSize(B, &m, NULL);
1337:       MatDenseGetArray(B, &bray);
1338:       VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi);
1339:       MatDenseRestoreArray(B, &bray);

1341:       /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */
1342:       if (!mumps->myid) {
1343:         PetscInt *idx;
1344:         /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */
1345:         PetscMalloc1(nrhs * M, &idx);
1346:         MatGetOwnershipRanges(B, &rstart);
1347:         k = 0;
1348:         for (proc = 0; proc < mumps->petsc_size; proc++) {
1349:           for (j = 0; j < nrhs; j++) {
1350:             for (i = rstart[proc]; i < rstart[proc + 1]; i++) idx[k++] = j * M + i;
1351:           }
1352:         }

1354:         VecCreateSeq(PETSC_COMM_SELF, nrhs * M, &b_seq);
1355:         ISCreateGeneral(PETSC_COMM_SELF, nrhs * M, idx, PETSC_OWN_POINTER, &is_to);
1356:         ISCreateStride(PETSC_COMM_SELF, nrhs * M, 0, 1, &is_from);
1357:       } else {
1358:         VecCreateSeq(PETSC_COMM_SELF, 0, &b_seq);
1359:         ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_to);
1360:         ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_from);
1361:       }
1362:       VecScatterCreate(v_mpi, is_from, b_seq, is_to, &scat_rhs);
1363:       VecScatterBegin(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD);
1364:       ISDestroy(&is_to);
1365:       ISDestroy(&is_from);
1366:       VecScatterEnd(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD);

1368:       if (!mumps->myid) { /* define rhs on the host */
1369:         VecGetArray(b_seq, &bray);
1370:         mumps->id.rhs = (MumpsScalar *)bray;
1371:         VecRestoreArray(b_seq, &bray);
1372:       }
1373:     }
1374:   } else { /* sparse B */
1375:     b = (Mat_MPIAIJ *)Bt->data;

1377:     /* wrap dense X into a vector v_mpi */
1378:     MatGetLocalSize(X, &m, NULL);
1379:     MatDenseGetArray(X, &bray);
1380:     VecCreateMPIWithArray(PetscObjectComm((PetscObject)X), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi);
1381:     MatDenseRestoreArray(X, &bray);

1383:     if (!mumps->myid) {
1384:       MatSeqAIJGetArray(b->A, &aa);
1385:       MatGetRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg);
1387:       PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs);
1388:       mumps->id.rhs_sparse = (MumpsScalar *)aa;
1389:     } else {
1390:       mumps->id.irhs_ptr    = NULL;
1391:       mumps->id.irhs_sparse = NULL;
1392:       mumps->id.nz_rhs      = 0;
1393:       mumps->id.rhs_sparse  = NULL;
1394:     }
1395:   }

1397:   /* solve phase */
1398:   /*-------------*/
1399:   mumps->id.job = JOB_SOLVE;
1400:   PetscMUMPS_c(mumps);

1403:   /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */
1404:   MatDenseGetArray(X, &array);
1405:   VecPlaceArray(v_mpi, array);

1407:   /* create scatter scat_sol */
1408:   MatGetOwnershipRanges(X, &rstart);
1409:   /* iidx: index for scatter mumps solution to petsc X */

1411:   ISCreateStride(PETSC_COMM_SELF, nlsol_loc, 0, 1, &is_from);
1412:   PetscMalloc1(nlsol_loc, &idxx);
1413:   for (i = 0; i < lsol_loc; i++) {
1414:     isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */

1416:     for (proc = 0; proc < mumps->petsc_size; proc++) {
1417:       if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc + 1]) {
1418:         myrstart = rstart[proc];
1419:         k        = isol_loc[i] - myrstart;          /* local index on 1st column of petsc vector X */
1420:         iidx     = k + myrstart * nrhs;             /* maps mumps isol_loc[i] to petsc index in X */
1421:         m        = rstart[proc + 1] - rstart[proc]; /* rows of X for this proc */
1422:         break;
1423:       }
1424:     }

1426:     for (j = 0; j < nrhs; j++) idxx[i + j * lsol_loc] = iidx + j * m;
1427:   }
1428:   ISCreateGeneral(PETSC_COMM_SELF, nlsol_loc, idxx, PETSC_COPY_VALUES, &is_to);
1429:   VecScatterCreate(msol_loc, is_from, v_mpi, is_to, &scat_sol);
1430:   VecScatterBegin(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD);
1431:   ISDestroy(&is_from);
1432:   ISDestroy(&is_to);
1433:   VecScatterEnd(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD);
1434:   MatDenseRestoreArray(X, &array);

1436:   /* free spaces */
1437:   mumps->id.sol_loc  = (MumpsScalar *)sol_loc_save;
1438:   mumps->id.isol_loc = isol_loc_save;

1440:   PetscFree2(sol_loc, isol_loc);
1441:   PetscFree(idxx);
1442:   VecDestroy(&msol_loc);
1443:   VecDestroy(&v_mpi);
1444:   if (!denseB) {
1445:     if (!mumps->myid) {
1446:       b = (Mat_MPIAIJ *)Bt->data;
1447:       MatSeqAIJRestoreArray(b->A, &aa);
1448:       MatRestoreRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg);
1450:     }
1451:   } else {
1452:     if (mumps->ICNTL20 == 0) {
1453:       VecDestroy(&b_seq);
1454:       VecScatterDestroy(&scat_rhs);
1455:     }
1456:   }
1457:   VecScatterDestroy(&scat_sol);
1458:   PetscLogFlops(2.0 * nrhs * mumps->id.RINFO(3));
1459:   return 0;
1460: }

1462: PetscErrorCode MatMatSolveTranspose_MUMPS(Mat A, Mat B, Mat X)
1463: {
1464:   Mat_MUMPS    *mumps    = (Mat_MUMPS *)A->data;
1465:   PetscMUMPSInt oldvalue = mumps->id.ICNTL(9);

1467:   mumps->id.ICNTL(9) = 0;
1468:   MatMatSolve_MUMPS(A, B, X);
1469:   mumps->id.ICNTL(9) = oldvalue;
1470:   return 0;
1471: }

1473: PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A, Mat Bt, Mat X)
1474: {
1475:   PetscBool flg;
1476:   Mat       B;

1478:   PetscObjectTypeCompareAny((PetscObject)Bt, &flg, MATSEQAIJ, MATMPIAIJ, NULL);

1481:   /* Create B=Bt^T that uses Bt's data structure */
1482:   MatCreateTranspose(Bt, &B);

1484:   MatMatSolve_MUMPS(A, B, X);
1485:   MatDestroy(&B);
1486:   return 0;
1487: }

1489: #if !defined(PETSC_USE_COMPLEX)
1490: /*
1491:   input:
1492:    F:        numeric factor
1493:   output:
1494:    nneg:     total number of negative pivots
1495:    nzero:    total number of zero pivots
1496:    npos:     (global dimension of F) - nneg - nzero
1497: */
1498: PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos)
1499: {
1500:   Mat_MUMPS  *mumps = (Mat_MUMPS *)F->data;
1501:   PetscMPIInt size;

1503:   MPI_Comm_size(PetscObjectComm((PetscObject)F), &size);
1504:   /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */

1507:   if (nneg) *nneg = mumps->id.INFOG(12);
1508:   if (nzero || npos) {
1510:     if (nzero) *nzero = mumps->id.INFOG(28);
1511:     if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28));
1512:   }
1513:   return 0;
1514: }
1515: #endif

1517: PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse, Mat_MUMPS *mumps)
1518: {
1519:   PetscInt       i, nreqs;
1520:   PetscMUMPSInt *irn, *jcn;
1521:   PetscMPIInt    count;
1522:   PetscInt64     totnnz, remain;
1523:   const PetscInt osize = mumps->omp_comm_size;
1524:   PetscScalar   *val;

1526:   if (osize > 1) {
1527:     if (reuse == MAT_INITIAL_MATRIX) {
1528:       /* master first gathers counts of nonzeros to receive */
1529:       if (mumps->is_omp_master) PetscMalloc1(osize, &mumps->recvcount);
1530:       MPI_Gather(&mumps->nnz, 1, MPIU_INT64, mumps->recvcount, 1, MPIU_INT64, 0 /*master*/, mumps->omp_comm);

1532:       /* Then each computes number of send/recvs */
1533:       if (mumps->is_omp_master) {
1534:         /* Start from 1 since self communication is not done in MPI */
1535:         nreqs = 0;
1536:         for (i = 1; i < osize; i++) nreqs += (mumps->recvcount[i] + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX;
1537:       } else {
1538:         nreqs = (mumps->nnz + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX;
1539:       }
1540:       PetscMalloc1(nreqs * 3, &mumps->reqs); /* Triple the requests since we send irn, jcn and val separately */

1542:       /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others.
1543:          MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz
1544:          might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size
1545:          is very small, the current approach should have no extra overhead compared to MPI_Gatherv.
1546:        */
1547:       nreqs = 0; /* counter for actual send/recvs */
1548:       if (mumps->is_omp_master) {
1549:         for (i = 0, totnnz = 0; i < osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */
1550:         PetscMalloc2(totnnz, &irn, totnnz, &jcn);
1551:         PetscMalloc1(totnnz, &val);

1553:         /* Self communication */
1554:         PetscArraycpy(irn, mumps->irn, mumps->nnz);
1555:         PetscArraycpy(jcn, mumps->jcn, mumps->nnz);
1556:         PetscArraycpy(val, mumps->val, mumps->nnz);

1558:         /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */
1559:         PetscFree2(mumps->irn, mumps->jcn);
1560:         PetscFree(mumps->val_alloc);
1561:         mumps->nnz = totnnz;
1562:         mumps->irn = irn;
1563:         mumps->jcn = jcn;
1564:         mumps->val = mumps->val_alloc = val;

1566:         irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */
1567:         jcn += mumps->recvcount[0];
1568:         val += mumps->recvcount[0];

1570:         /* Remote communication */
1571:         for (i = 1; i < osize; i++) {
1572:           count  = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX);
1573:           remain = mumps->recvcount[i] - count;
1574:           while (count > 0) {
1575:             MPI_Irecv(irn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]);
1576:             MPI_Irecv(jcn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]);
1577:             MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]);
1578:             irn += count;
1579:             jcn += count;
1580:             val += count;
1581:             count = PetscMin(remain, PETSC_MPI_INT_MAX);
1582:             remain -= count;
1583:           }
1584:         }
1585:       } else {
1586:         irn    = mumps->irn;
1587:         jcn    = mumps->jcn;
1588:         val    = mumps->val;
1589:         count  = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX);
1590:         remain = mumps->nnz - count;
1591:         while (count > 0) {
1592:           MPI_Isend(irn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]);
1593:           MPI_Isend(jcn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]);
1594:           MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]);
1595:           irn += count;
1596:           jcn += count;
1597:           val += count;
1598:           count = PetscMin(remain, PETSC_MPI_INT_MAX);
1599:           remain -= count;
1600:         }
1601:       }
1602:     } else {
1603:       nreqs = 0;
1604:       if (mumps->is_omp_master) {
1605:         val = mumps->val + mumps->recvcount[0];
1606:         for (i = 1; i < osize; i++) { /* Remote communication only since self data is already in place */
1607:           count  = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX);
1608:           remain = mumps->recvcount[i] - count;
1609:           while (count > 0) {
1610:             MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]);
1611:             val += count;
1612:             count = PetscMin(remain, PETSC_MPI_INT_MAX);
1613:             remain -= count;
1614:           }
1615:         }
1616:       } else {
1617:         val    = mumps->val;
1618:         count  = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX);
1619:         remain = mumps->nnz - count;
1620:         while (count > 0) {
1621:           MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]);
1622:           val += count;
1623:           count = PetscMin(remain, PETSC_MPI_INT_MAX);
1624:           remain -= count;
1625:         }
1626:       }
1627:     }
1628:     MPI_Waitall(nreqs, mumps->reqs, MPI_STATUSES_IGNORE);
1629:     mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */
1630:   }
1631:   return 0;
1632: }

1634: PetscErrorCode MatFactorNumeric_MUMPS(Mat F, Mat A, const MatFactorInfo *info)
1635: {
1636:   Mat_MUMPS *mumps = (Mat_MUMPS *)(F)->data;
1637:   PetscBool  isMPIAIJ;

1639:   if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) {
1640:     if (mumps->id.INFOG(1) == -6) PetscInfo(A, "MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2));
1641:     PetscInfo(A, "MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2));
1642:     return 0;
1643:   }

1645:   (*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps);
1646:   MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX, mumps);

1648:   /* numerical factorization phase */
1649:   /*-------------------------------*/
1650:   mumps->id.job = JOB_FACTNUMERIC;
1651:   if (!mumps->id.ICNTL(18)) { /* A is centralized */
1652:     if (!mumps->myid) mumps->id.a = (MumpsScalar *)mumps->val;
1653:   } else {
1654:     mumps->id.a_loc = (MumpsScalar *)mumps->val;
1655:   }
1656:   PetscMUMPS_c(mumps);
1657:   if (mumps->id.INFOG(1) < 0) {
1659:     if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */
1660:       PetscInfo(F, "matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2));
1661:       F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
1662:     } else if (mumps->id.INFOG(1) == -13) {
1663:       PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n", mumps->id.INFOG(1), mumps->id.INFO(2));
1664:       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1665:     } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) {
1666:       PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray\n", mumps->id.INFOG(1), mumps->id.INFO(2));
1667:       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1668:     } else {
1669:       PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2));
1670:       F->factorerrortype = MAT_FACTOR_OTHER;
1671:     }
1672:   }

1675:   F->assembled = PETSC_TRUE;

1677:   if (F->schur) { /* reset Schur status to unfactored */
1678: #if defined(PETSC_HAVE_CUDA)
1679:     F->schur->offloadmask = PETSC_OFFLOAD_CPU;
1680: #endif
1681:     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
1682:       mumps->id.ICNTL(19) = 2;
1683:       MatTranspose(F->schur, MAT_INPLACE_MATRIX, &F->schur);
1684:     }
1685:     MatFactorRestoreSchurComplement(F, NULL, MAT_FACTOR_SCHUR_UNFACTORED);
1686:   }

1688:   /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */
1689:   if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3;

1691:   if (!mumps->is_omp_master) mumps->id.INFO(23) = 0;
1692:   if (mumps->petsc_size > 1) {
1693:     PetscInt     lsol_loc;
1694:     PetscScalar *sol_loc;

1696:     PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &isMPIAIJ);

1698:     /* distributed solution; Create x_seq=sol_loc for repeated use */
1699:     if (mumps->x_seq) {
1700:       VecScatterDestroy(&mumps->scat_sol);
1701:       PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc);
1702:       VecDestroy(&mumps->x_seq);
1703:     }
1704:     lsol_loc = mumps->id.INFO(23); /* length of sol_loc */
1705:     PetscMalloc2(lsol_loc, &sol_loc, lsol_loc, &mumps->id.isol_loc);
1706:     mumps->id.lsol_loc = lsol_loc;
1707:     mumps->id.sol_loc  = (MumpsScalar *)sol_loc;
1708:     VecCreateSeqWithArray(PETSC_COMM_SELF, 1, lsol_loc, sol_loc, &mumps->x_seq);
1709:   }
1710:   PetscLogFlops(mumps->id.RINFO(2));
1711:   return 0;
1712: }

1714: /* Sets MUMPS options from the options database */
1715: PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A)
1716: {
1717:   Mat_MUMPS    *mumps = (Mat_MUMPS *)F->data;
1718:   PetscMUMPSInt icntl = 0, size, *listvar_schur;
1719:   PetscInt      info[80], i, ninfo = 80, rbs, cbs;
1720:   PetscBool     flg = PETSC_FALSE, schur = (PetscBool)(mumps->id.ICNTL(26) == -1);
1721:   MumpsScalar  *arr;

1723:   PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MUMPS Options", "Mat");
1724:   if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */
1725:     PetscInt nthreads   = 0;
1726:     PetscInt nCNTL_pre  = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
1727:     PetscInt nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0;

1729:     mumps->petsc_comm = PetscObjectComm((PetscObject)A);
1730:     MPI_Comm_size(mumps->petsc_comm, &mumps->petsc_size);
1731:     MPI_Comm_rank(mumps->petsc_comm, &mumps->myid); /* "if (!myid)" still works even if mumps_comm is different */

1733:     PetscOptionsName("-mat_mumps_use_omp_threads", "Convert MPI processes into OpenMP threads", "None", &mumps->use_petsc_omp_support);
1734:     if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */
1735:     /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */
1736:     PetscOptionsGetInt(NULL, ((PetscObject)F)->prefix, "-mat_mumps_use_omp_threads", &nthreads, NULL);
1737:     if (mumps->use_petsc_omp_support) {
1739:                  ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : "");
1741: #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1742:       PetscOmpCtrlCreate(mumps->petsc_comm, nthreads, &mumps->omp_ctrl);
1743:       PetscOmpCtrlGetOmpComms(mumps->omp_ctrl, &mumps->omp_comm, &mumps->mumps_comm, &mumps->is_omp_master);
1744: #endif
1745:     } else {
1746:       mumps->omp_comm      = PETSC_COMM_SELF;
1747:       mumps->mumps_comm    = mumps->petsc_comm;
1748:       mumps->is_omp_master = PETSC_TRUE;
1749:     }
1750:     MPI_Comm_size(mumps->omp_comm, &mumps->omp_comm_size);
1751:     mumps->reqs = NULL;
1752:     mumps->tag  = 0;

1754:     if (mumps->mumps_comm != MPI_COMM_NULL) {
1755:       if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) {
1756:         /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */
1757:         MPI_Comm comm;
1758:         MPI_Comm_dup(mumps->mumps_comm, &comm);
1759:         mumps->mumps_comm = comm;
1760:       } else PetscCommGetComm(mumps->petsc_comm, &mumps->mumps_comm);
1761:     }

1763:     mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm);
1764:     mumps->id.job          = JOB_INIT;
1765:     mumps->id.par          = 1; /* host participates factorizaton and solve */
1766:     mumps->id.sym          = mumps->sym;

1768:     size          = mumps->id.size_schur;
1769:     arr           = mumps->id.schur;
1770:     listvar_schur = mumps->id.listvar_schur;
1771:     PetscMUMPS_c(mumps);
1773:     /* restore cached ICNTL and CNTL values */
1774:     for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1 + 2 * icntl]) = mumps->ICNTL_pre[2 + 2 * icntl];
1775:     for (icntl = 0; icntl < nCNTL_pre; ++icntl) mumps->id.CNTL((PetscInt)mumps->CNTL_pre[1 + 2 * icntl]) = mumps->CNTL_pre[2 + 2 * icntl];
1776:     PetscFree(mumps->ICNTL_pre);
1777:     PetscFree(mumps->CNTL_pre);

1779:     if (schur) {
1780:       mumps->id.size_schur    = size;
1781:       mumps->id.schur_lld     = size;
1782:       mumps->id.schur         = arr;
1783:       mumps->id.listvar_schur = listvar_schur;
1784:       if (mumps->petsc_size > 1) {
1785:         PetscBool gs; /* gs is false if any rank other than root has non-empty IS */

1787:         mumps->id.ICNTL(19) = 1;                                                                            /* MUMPS returns Schur centralized on the host */
1788:         gs                  = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */
1789:         MPI_Allreduce(MPI_IN_PLACE, &gs, 1, MPIU_BOOL, MPI_LAND, mumps->petsc_comm);
1791:       } else {
1792:         if (F->factortype == MAT_FACTOR_LU) {
1793:           mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */
1794:         } else {
1795:           mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */
1796:         }
1797:       }
1798:       mumps->id.ICNTL(26) = -1;
1799:     }

1801:     /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code.
1802:        For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS.
1803:      */
1804:     MPI_Bcast(mumps->id.icntl, 40, MPI_INT, 0, mumps->omp_comm);
1805:     MPI_Bcast(mumps->id.cntl, 15, MPIU_REAL, 0, mumps->omp_comm);

1807:     mumps->scat_rhs = NULL;
1808:     mumps->scat_sol = NULL;

1810:     /* set PETSc-MUMPS default options - override MUMPS default */
1811:     mumps->id.ICNTL(3) = 0;
1812:     mumps->id.ICNTL(4) = 0;
1813:     if (mumps->petsc_size == 1) {
1814:       mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */
1815:       mumps->id.ICNTL(7)  = 7; /* automatic choice of ordering done by the package */
1816:     } else {
1817:       mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */
1818:       mumps->id.ICNTL(21) = 1; /* distributed solution */
1819:     }
1820:   }
1821:   PetscOptionsMUMPSInt("-mat_mumps_icntl_1", "ICNTL(1): output stream for error messages", "None", mumps->id.ICNTL(1), &icntl, &flg);
1822:   if (flg) mumps->id.ICNTL(1) = icntl;
1823:   PetscOptionsMUMPSInt("-mat_mumps_icntl_2", "ICNTL(2): output stream for diagnostic printing, statistics, and warning", "None", mumps->id.ICNTL(2), &icntl, &flg);
1824:   if (flg) mumps->id.ICNTL(2) = icntl;
1825:   PetscOptionsMUMPSInt("-mat_mumps_icntl_3", "ICNTL(3): output stream for global information, collected on the host", "None", mumps->id.ICNTL(3), &icntl, &flg);
1826:   if (flg) mumps->id.ICNTL(3) = icntl;

1828:   PetscOptionsMUMPSInt("-mat_mumps_icntl_4", "ICNTL(4): level of printing (0 to 4)", "None", mumps->id.ICNTL(4), &icntl, &flg);
1829:   if (flg) mumps->id.ICNTL(4) = icntl;
1830:   if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */

1832:   PetscOptionsMUMPSInt("-mat_mumps_icntl_6", "ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)", "None", mumps->id.ICNTL(6), &icntl, &flg);
1833:   if (flg) mumps->id.ICNTL(6) = icntl;

1835:   PetscOptionsMUMPSInt("-mat_mumps_icntl_7", "ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)", "None", mumps->id.ICNTL(7), &icntl, &flg);
1836:   if (flg) {
1838:     mumps->id.ICNTL(7) = icntl;
1839:   }

1841:   PetscOptionsMUMPSInt("-mat_mumps_icntl_8", "ICNTL(8): scaling strategy (-2 to 8 or 77)", "None", mumps->id.ICNTL(8), &mumps->id.ICNTL(8), NULL);
1842:   /* PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL); handled by MatSolveTranspose_MUMPS() */
1843:   PetscOptionsMUMPSInt("-mat_mumps_icntl_10", "ICNTL(10): max num of refinements", "None", mumps->id.ICNTL(10), &mumps->id.ICNTL(10), NULL);
1844:   PetscOptionsMUMPSInt("-mat_mumps_icntl_11", "ICNTL(11): statistics related to an error analysis (via -ksp_view)", "None", mumps->id.ICNTL(11), &mumps->id.ICNTL(11), NULL);
1845:   PetscOptionsMUMPSInt("-mat_mumps_icntl_12", "ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)", "None", mumps->id.ICNTL(12), &mumps->id.ICNTL(12), NULL);
1846:   PetscOptionsMUMPSInt("-mat_mumps_icntl_13", "ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting", "None", mumps->id.ICNTL(13), &mumps->id.ICNTL(13), NULL);
1847:   PetscOptionsMUMPSInt("-mat_mumps_icntl_14", "ICNTL(14): percentage increase in the estimated working space", "None", mumps->id.ICNTL(14), &mumps->id.ICNTL(14), NULL);
1848:   MatGetBlockSizes(A, &rbs, &cbs);
1849:   if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = -rbs;
1850:   PetscOptionsMUMPSInt("-mat_mumps_icntl_15", "ICNTL(15): compression of the input matrix resulting from a block format", "None", mumps->id.ICNTL(15), &mumps->id.ICNTL(15), &flg);
1851:   if (flg) {
1854:   }
1855:   PetscOptionsMUMPSInt("-mat_mumps_icntl_19", "ICNTL(19): computes the Schur complement", "None", mumps->id.ICNTL(19), &mumps->id.ICNTL(19), NULL);
1856:   if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */
1857:     MatDestroy(&F->schur);
1858:     MatMumpsResetSchur_Private(mumps);
1859:   }

1861:   /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps",
1862:      and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran
1863:      and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...'
1864:      This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug,
1865:      see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590.
1866:      In short, we could not use distributed RHS with MPICH until v4.0b1.
1867:    */
1868: #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) || (defined(PETSC_HAVE_MPICH_NUMVERSION) && (PETSC_HAVE_MPICH_NUMVERSION < 40000101))
1869:   mumps->ICNTL20 = 0; /* Centralized dense RHS*/
1870: #else
1871:   mumps->ICNTL20     = 10; /* Distributed dense RHS*/
1872: #endif
1873:   PetscOptionsMUMPSInt("-mat_mumps_icntl_20", "ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides", "None", mumps->ICNTL20, &mumps->ICNTL20, &flg);
1875: #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0)
1877: #endif
1878:   /* PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL); we only use distributed solution vector */

1880:   PetscOptionsMUMPSInt("-mat_mumps_icntl_22", "ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)", "None", mumps->id.ICNTL(22), &mumps->id.ICNTL(22), NULL);
1881:   PetscOptionsMUMPSInt("-mat_mumps_icntl_23", "ICNTL(23): max size of the working memory (MB) that can allocate per processor", "None", mumps->id.ICNTL(23), &mumps->id.ICNTL(23), NULL);
1882:   PetscOptionsMUMPSInt("-mat_mumps_icntl_24", "ICNTL(24): detection of null pivot rows (0 or 1)", "None", mumps->id.ICNTL(24), &mumps->id.ICNTL(24), NULL);
1883:   if (mumps->id.ICNTL(24)) { mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ }

1885:   PetscOptionsMUMPSInt("-mat_mumps_icntl_25", "ICNTL(25): computes a solution of a deficient matrix and a null space basis", "None", mumps->id.ICNTL(25), &mumps->id.ICNTL(25), NULL);
1886:   PetscOptionsMUMPSInt("-mat_mumps_icntl_26", "ICNTL(26): drives the solution phase if a Schur complement matrix", "None", mumps->id.ICNTL(26), &mumps->id.ICNTL(26), NULL);
1887:   PetscOptionsMUMPSInt("-mat_mumps_icntl_27", "ICNTL(27): controls the blocking size for multiple right-hand sides", "None", mumps->id.ICNTL(27), &mumps->id.ICNTL(27), NULL);
1888:   PetscOptionsMUMPSInt("-mat_mumps_icntl_28", "ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering", "None", mumps->id.ICNTL(28), &mumps->id.ICNTL(28), NULL);
1889:   PetscOptionsMUMPSInt("-mat_mumps_icntl_29", "ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis", "None", mumps->id.ICNTL(29), &mumps->id.ICNTL(29), NULL);
1890:   /* PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL); */ /* call MatMumpsGetInverse() directly */
1891:   PetscOptionsMUMPSInt("-mat_mumps_icntl_31", "ICNTL(31): indicates which factors may be discarded during factorization", "None", mumps->id.ICNTL(31), &mumps->id.ICNTL(31), NULL);
1892:   /* PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL);  -- not supported by PETSc API */
1893:   PetscOptionsMUMPSInt("-mat_mumps_icntl_33", "ICNTL(33): compute determinant", "None", mumps->id.ICNTL(33), &mumps->id.ICNTL(33), NULL);
1894:   PetscOptionsMUMPSInt("-mat_mumps_icntl_35", "ICNTL(35): activates Block Low Rank (BLR) based factorization", "None", mumps->id.ICNTL(35), &mumps->id.ICNTL(35), NULL);
1895:   PetscOptionsMUMPSInt("-mat_mumps_icntl_36", "ICNTL(36): choice of BLR factorization variant", "None", mumps->id.ICNTL(36), &mumps->id.ICNTL(36), NULL);
1896:   PetscOptionsMUMPSInt("-mat_mumps_icntl_38", "ICNTL(38): estimated compression rate of LU factors with BLR", "None", mumps->id.ICNTL(38), &mumps->id.ICNTL(38), NULL);

1898:   PetscOptionsReal("-mat_mumps_cntl_1", "CNTL(1): relative pivoting threshold", "None", mumps->id.CNTL(1), &mumps->id.CNTL(1), NULL);
1899:   PetscOptionsReal("-mat_mumps_cntl_2", "CNTL(2): stopping criterion of refinement", "None", mumps->id.CNTL(2), &mumps->id.CNTL(2), NULL);
1900:   PetscOptionsReal("-mat_mumps_cntl_3", "CNTL(3): absolute pivoting threshold", "None", mumps->id.CNTL(3), &mumps->id.CNTL(3), NULL);
1901:   PetscOptionsReal("-mat_mumps_cntl_4", "CNTL(4): value for static pivoting", "None", mumps->id.CNTL(4), &mumps->id.CNTL(4), NULL);
1902:   PetscOptionsReal("-mat_mumps_cntl_5", "CNTL(5): fixation for null pivots", "None", mumps->id.CNTL(5), &mumps->id.CNTL(5), NULL);
1903:   PetscOptionsReal("-mat_mumps_cntl_7", "CNTL(7): dropping parameter used during BLR", "None", mumps->id.CNTL(7), &mumps->id.CNTL(7), NULL);

1905:   PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL);

1907:   PetscOptionsIntArray("-mat_mumps_view_info", "request INFO local to each processor", "", info, &ninfo, NULL);
1908:   if (ninfo) {
1910:     PetscMalloc1(ninfo, &mumps->info);
1911:     mumps->ninfo = ninfo;
1912:     for (i = 0; i < ninfo; i++) {
1914:       mumps->info[i] = info[i];
1915:     }
1916:   }
1917:   PetscOptionsEnd();
1918:   return 0;
1919: }

1921: PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F, Mat A, const MatFactorInfo *info, Mat_MUMPS *mumps)
1922: {
1923:   if (mumps->id.INFOG(1) < 0) {
1925:     if (mumps->id.INFOG(1) == -6) {
1926:       PetscInfo(F, "matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2));
1927:       F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT;
1928:     } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) {
1929:       PetscInfo(F, "problem of workspace, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2));
1930:       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1931:     } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) {
1932:       PetscInfo(F, "Empty matrix\n");
1933:     } else {
1934:       PetscInfo(F, "Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2));
1935:       F->factorerrortype = MAT_FACTOR_OTHER;
1936:     }
1937:   }
1938:   return 0;
1939: }

1941: PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info)
1942: {
1943:   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
1944:   Vec            b;
1945:   const PetscInt M = A->rmap->N;

1947:   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
1948:     /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */
1949:     return 0;
1950:   }

1952:   /* Set MUMPS options from the options database */
1953:   MatSetFromOptions_MUMPS(F, A);

1955:   (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps);
1956:   MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps);

1958:   /* analysis phase */
1959:   /*----------------*/
1960:   mumps->id.job = JOB_FACTSYMBOLIC;
1961:   mumps->id.n   = M;
1962:   switch (mumps->id.ICNTL(18)) {
1963:   case 0: /* centralized assembled matrix input */
1964:     if (!mumps->myid) {
1965:       mumps->id.nnz = mumps->nnz;
1966:       mumps->id.irn = mumps->irn;
1967:       mumps->id.jcn = mumps->jcn;
1968:       if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val;
1969:       if (r) {
1970:         mumps->id.ICNTL(7) = 1;
1971:         if (!mumps->myid) {
1972:           const PetscInt *idx;
1973:           PetscInt        i;

1975:           PetscMalloc1(M, &mumps->id.perm_in);
1976:           ISGetIndices(r, &idx);
1977:           for (i = 0; i < M; i++) PetscMUMPSIntCast(idx[i] + 1, &(mumps->id.perm_in[i])); /* perm_in[]: start from 1, not 0! */
1978:           ISRestoreIndices(r, &idx);
1979:         }
1980:       }
1981:     }
1982:     break;
1983:   case 3: /* distributed assembled matrix input (size>1) */
1984:     mumps->id.nnz_loc = mumps->nnz;
1985:     mumps->id.irn_loc = mumps->irn;
1986:     mumps->id.jcn_loc = mumps->jcn;
1987:     if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val;
1988:     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
1989:       MatCreateVecs(A, NULL, &b);
1990:       VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq);
1991:       VecDestroy(&b);
1992:     }
1993:     break;
1994:   }
1995:   PetscMUMPS_c(mumps);
1996:   MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps);

1998:   F->ops->lufactornumeric   = MatFactorNumeric_MUMPS;
1999:   F->ops->solve             = MatSolve_MUMPS;
2000:   F->ops->solvetranspose    = MatSolveTranspose_MUMPS;
2001:   F->ops->matsolve          = MatMatSolve_MUMPS;
2002:   F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS;
2003:   F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS;

2005:   mumps->matstruc = SAME_NONZERO_PATTERN;
2006:   return 0;
2007: }

2009: /* Note the Petsc r and c permutations are ignored */
2010: PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info)
2011: {
2012:   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
2013:   Vec            b;
2014:   const PetscInt M = A->rmap->N;

2016:   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2017:     /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */
2018:     return 0;
2019:   }

2021:   /* Set MUMPS options from the options database */
2022:   MatSetFromOptions_MUMPS(F, A);

2024:   (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps);
2025:   MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps);

2027:   /* analysis phase */
2028:   /*----------------*/
2029:   mumps->id.job = JOB_FACTSYMBOLIC;
2030:   mumps->id.n   = M;
2031:   switch (mumps->id.ICNTL(18)) {
2032:   case 0: /* centralized assembled matrix input */
2033:     if (!mumps->myid) {
2034:       mumps->id.nnz = mumps->nnz;
2035:       mumps->id.irn = mumps->irn;
2036:       mumps->id.jcn = mumps->jcn;
2037:       if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val;
2038:     }
2039:     break;
2040:   case 3: /* distributed assembled matrix input (size>1) */
2041:     mumps->id.nnz_loc = mumps->nnz;
2042:     mumps->id.irn_loc = mumps->irn;
2043:     mumps->id.jcn_loc = mumps->jcn;
2044:     if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val;
2045:     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2046:       MatCreateVecs(A, NULL, &b);
2047:       VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq);
2048:       VecDestroy(&b);
2049:     }
2050:     break;
2051:   }
2052:   PetscMUMPS_c(mumps);
2053:   MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps);

2055:   F->ops->lufactornumeric   = MatFactorNumeric_MUMPS;
2056:   F->ops->solve             = MatSolve_MUMPS;
2057:   F->ops->solvetranspose    = MatSolveTranspose_MUMPS;
2058:   F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS;

2060:   mumps->matstruc = SAME_NONZERO_PATTERN;
2061:   return 0;
2062: }

2064: /* Note the Petsc r permutation and factor info are ignored */
2065: PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F, Mat A, IS r, const MatFactorInfo *info)
2066: {
2067:   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
2068:   Vec            b;
2069:   const PetscInt M = A->rmap->N;

2071:   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2072:     /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */
2073:     return 0;
2074:   }

2076:   /* Set MUMPS options from the options database */
2077:   MatSetFromOptions_MUMPS(F, A);

2079:   (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps);
2080:   MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps);

2082:   /* analysis phase */
2083:   /*----------------*/
2084:   mumps->id.job = JOB_FACTSYMBOLIC;
2085:   mumps->id.n   = M;
2086:   switch (mumps->id.ICNTL(18)) {
2087:   case 0: /* centralized assembled matrix input */
2088:     if (!mumps->myid) {
2089:       mumps->id.nnz = mumps->nnz;
2090:       mumps->id.irn = mumps->irn;
2091:       mumps->id.jcn = mumps->jcn;
2092:       if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val;
2093:     }
2094:     break;
2095:   case 3: /* distributed assembled matrix input (size>1) */
2096:     mumps->id.nnz_loc = mumps->nnz;
2097:     mumps->id.irn_loc = mumps->irn;
2098:     mumps->id.jcn_loc = mumps->jcn;
2099:     if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val;
2100:     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2101:       MatCreateVecs(A, NULL, &b);
2102:       VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq);
2103:       VecDestroy(&b);
2104:     }
2105:     break;
2106:   }
2107:   PetscMUMPS_c(mumps);
2108:   MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps);

2110:   F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS;
2111:   F->ops->solve                 = MatSolve_MUMPS;
2112:   F->ops->solvetranspose        = MatSolve_MUMPS;
2113:   F->ops->matsolve              = MatMatSolve_MUMPS;
2114:   F->ops->mattransposesolve     = MatMatTransposeSolve_MUMPS;
2115:   F->ops->matsolvetranspose     = MatMatSolveTranspose_MUMPS;
2116: #if defined(PETSC_USE_COMPLEX)
2117:   F->ops->getinertia = NULL;
2118: #else
2119:   F->ops->getinertia = MatGetInertia_SBAIJMUMPS;
2120: #endif

2122:   mumps->matstruc = SAME_NONZERO_PATTERN;
2123:   return 0;
2124: }

2126: PetscErrorCode MatView_MUMPS(Mat A, PetscViewer viewer)
2127: {
2128:   PetscBool         iascii;
2129:   PetscViewerFormat format;
2130:   Mat_MUMPS        *mumps = (Mat_MUMPS *)A->data;

2132:   /* check if matrix is mumps type */
2133:   if (A->ops->solve != MatSolve_MUMPS) return 0;

2135:   PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii);
2136:   if (iascii) {
2137:     PetscViewerGetFormat(viewer, &format);
2138:     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
2139:       PetscViewerASCIIPrintf(viewer, "MUMPS run parameters:\n");
2140:       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
2141:         PetscViewerASCIIPrintf(viewer, "  SYM (matrix type):                   %d\n", mumps->id.sym);
2142:         PetscViewerASCIIPrintf(viewer, "  PAR (host participation):            %d\n", mumps->id.par);
2143:         PetscViewerASCIIPrintf(viewer, "  ICNTL(1) (output for error):         %d\n", mumps->id.ICNTL(1));
2144:         PetscViewerASCIIPrintf(viewer, "  ICNTL(2) (output of diagnostic msg): %d\n", mumps->id.ICNTL(2));
2145:         PetscViewerASCIIPrintf(viewer, "  ICNTL(3) (output for global info):   %d\n", mumps->id.ICNTL(3));
2146:         PetscViewerASCIIPrintf(viewer, "  ICNTL(4) (level of printing):        %d\n", mumps->id.ICNTL(4));
2147:         PetscViewerASCIIPrintf(viewer, "  ICNTL(5) (input mat struct):         %d\n", mumps->id.ICNTL(5));
2148:         PetscViewerASCIIPrintf(viewer, "  ICNTL(6) (matrix prescaling):        %d\n", mumps->id.ICNTL(6));
2149:         PetscViewerASCIIPrintf(viewer, "  ICNTL(7) (sequential matrix ordering):%d\n", mumps->id.ICNTL(7));
2150:         PetscViewerASCIIPrintf(viewer, "  ICNTL(8) (scaling strategy):         %d\n", mumps->id.ICNTL(8));
2151:         PetscViewerASCIIPrintf(viewer, "  ICNTL(10) (max num of refinements):  %d\n", mumps->id.ICNTL(10));
2152:         PetscViewerASCIIPrintf(viewer, "  ICNTL(11) (error analysis):          %d\n", mumps->id.ICNTL(11));
2153:         if (mumps->id.ICNTL(11) > 0) {
2154:           PetscViewerASCIIPrintf(viewer, "    RINFOG(4) (inf norm of input mat):        %g\n", mumps->id.RINFOG(4));
2155:           PetscViewerASCIIPrintf(viewer, "    RINFOG(5) (inf norm of solution):         %g\n", mumps->id.RINFOG(5));
2156:           PetscViewerASCIIPrintf(viewer, "    RINFOG(6) (inf norm of residual):         %g\n", mumps->id.RINFOG(6));
2157:           PetscViewerASCIIPrintf(viewer, "    RINFOG(7),RINFOG(8) (backward error est): %g, %g\n", mumps->id.RINFOG(7), mumps->id.RINFOG(8));
2158:           PetscViewerASCIIPrintf(viewer, "    RINFOG(9) (error estimate):               %g\n", mumps->id.RINFOG(9));
2159:           PetscViewerASCIIPrintf(viewer, "    RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n", mumps->id.RINFOG(10), mumps->id.RINFOG(11));
2160:         }
2161:         PetscViewerASCIIPrintf(viewer, "  ICNTL(12) (efficiency control):                         %d\n", mumps->id.ICNTL(12));
2162:         PetscViewerASCIIPrintf(viewer, "  ICNTL(13) (sequential factorization of the root node):  %d\n", mumps->id.ICNTL(13));
2163:         PetscViewerASCIIPrintf(viewer, "  ICNTL(14) (percentage of estimated workspace increase): %d\n", mumps->id.ICNTL(14));
2164:         PetscViewerASCIIPrintf(viewer, "  ICNTL(15) (compression of the input matrix):            %d\n", mumps->id.ICNTL(15));
2165:         /* ICNTL(15-17) not used */
2166:         PetscViewerASCIIPrintf(viewer, "  ICNTL(18) (input mat struct):                           %d\n", mumps->id.ICNTL(18));
2167:         PetscViewerASCIIPrintf(viewer, "  ICNTL(19) (Schur complement info):                      %d\n", mumps->id.ICNTL(19));
2168:         PetscViewerASCIIPrintf(viewer, "  ICNTL(20) (RHS sparse pattern):                         %d\n", mumps->id.ICNTL(20));
2169:         PetscViewerASCIIPrintf(viewer, "  ICNTL(21) (solution struct):                            %d\n", mumps->id.ICNTL(21));
2170:         PetscViewerASCIIPrintf(viewer, "  ICNTL(22) (in-core/out-of-core facility):               %d\n", mumps->id.ICNTL(22));
2171:         PetscViewerASCIIPrintf(viewer, "  ICNTL(23) (max size of memory can be allocated locally):%d\n", mumps->id.ICNTL(23));

2173:         PetscViewerASCIIPrintf(viewer, "  ICNTL(24) (detection of null pivot rows):               %d\n", mumps->id.ICNTL(24));
2174:         PetscViewerASCIIPrintf(viewer, "  ICNTL(25) (computation of a null space basis):          %d\n", mumps->id.ICNTL(25));
2175:         PetscViewerASCIIPrintf(viewer, "  ICNTL(26) (Schur options for RHS or solution):          %d\n", mumps->id.ICNTL(26));
2176:         PetscViewerASCIIPrintf(viewer, "  ICNTL(27) (blocking size for multiple RHS):             %d\n", mumps->id.ICNTL(27));
2177:         PetscViewerASCIIPrintf(viewer, "  ICNTL(28) (use parallel or sequential ordering):        %d\n", mumps->id.ICNTL(28));
2178:         PetscViewerASCIIPrintf(viewer, "  ICNTL(29) (parallel ordering):                          %d\n", mumps->id.ICNTL(29));

2180:         PetscViewerASCIIPrintf(viewer, "  ICNTL(30) (user-specified set of entries in inv(A)):    %d\n", mumps->id.ICNTL(30));
2181:         PetscViewerASCIIPrintf(viewer, "  ICNTL(31) (factors is discarded in the solve phase):    %d\n", mumps->id.ICNTL(31));
2182:         PetscViewerASCIIPrintf(viewer, "  ICNTL(33) (compute determinant):                        %d\n", mumps->id.ICNTL(33));
2183:         PetscViewerASCIIPrintf(viewer, "  ICNTL(35) (activate BLR based factorization):           %d\n", mumps->id.ICNTL(35));
2184:         PetscViewerASCIIPrintf(viewer, "  ICNTL(36) (choice of BLR factorization variant):        %d\n", mumps->id.ICNTL(36));
2185:         PetscViewerASCIIPrintf(viewer, "  ICNTL(38) (estimated compression rate of LU factors):   %d\n", mumps->id.ICNTL(38));

2187:         PetscViewerASCIIPrintf(viewer, "  CNTL(1) (relative pivoting threshold):      %g\n", mumps->id.CNTL(1));
2188:         PetscViewerASCIIPrintf(viewer, "  CNTL(2) (stopping criterion of refinement): %g\n", mumps->id.CNTL(2));
2189:         PetscViewerASCIIPrintf(viewer, "  CNTL(3) (absolute pivoting threshold):      %g\n", mumps->id.CNTL(3));
2190:         PetscViewerASCIIPrintf(viewer, "  CNTL(4) (value of static pivoting):         %g\n", mumps->id.CNTL(4));
2191:         PetscViewerASCIIPrintf(viewer, "  CNTL(5) (fixation for null pivots):         %g\n", mumps->id.CNTL(5));
2192:         PetscViewerASCIIPrintf(viewer, "  CNTL(7) (dropping parameter for BLR):       %g\n", mumps->id.CNTL(7));

2194:         /* information local to each processor */
2195:         PetscViewerASCIIPrintf(viewer, "  RINFO(1) (local estimated flops for the elimination after analysis):\n");
2196:         PetscViewerASCIIPushSynchronized(viewer);
2197:         PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %g\n", mumps->myid, mumps->id.RINFO(1));
2198:         PetscViewerFlush(viewer);
2199:         PetscViewerASCIIPrintf(viewer, "  RINFO(2) (local estimated flops for the assembly after factorization):\n");
2200:         PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %g\n", mumps->myid, mumps->id.RINFO(2));
2201:         PetscViewerFlush(viewer);
2202:         PetscViewerASCIIPrintf(viewer, "  RINFO(3) (local estimated flops for the elimination after factorization):\n");
2203:         PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %g\n", mumps->myid, mumps->id.RINFO(3));
2204:         PetscViewerFlush(viewer);

2206:         PetscViewerASCIIPrintf(viewer, "  INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization):\n");
2207:         PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(15));
2208:         PetscViewerFlush(viewer);

2210:         PetscViewerASCIIPrintf(viewer, "  INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization):\n");
2211:         PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(16));
2212:         PetscViewerFlush(viewer);

2214:         PetscViewerASCIIPrintf(viewer, "  INFO(23) (num of pivots eliminated on this processor after factorization):\n");
2215:         PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(23));
2216:         PetscViewerFlush(viewer);

2218:         if (mumps->ninfo && mumps->ninfo <= 80) {
2219:           PetscInt i;
2220:           for (i = 0; i < mumps->ninfo; i++) {
2221:             PetscViewerASCIIPrintf(viewer, "  INFO(%" PetscInt_FMT "):\n", mumps->info[i]);
2222:             PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(mumps->info[i]));
2223:             PetscViewerFlush(viewer);
2224:           }
2225:         }
2226:         PetscViewerASCIIPopSynchronized(viewer);
2227:       } else PetscViewerASCIIPrintf(viewer, "  Use -%sksp_view ::ascii_info_detail to display information for all processes\n", ((PetscObject)A)->prefix ? ((PetscObject)A)->prefix : "");

2229:       if (mumps->myid == 0) { /* information from the host */
2230:         PetscViewerASCIIPrintf(viewer, "  RINFOG(1) (global estimated flops for the elimination after analysis): %g\n", mumps->id.RINFOG(1));
2231:         PetscViewerASCIIPrintf(viewer, "  RINFOG(2) (global estimated flops for the assembly after factorization): %g\n", mumps->id.RINFOG(2));
2232:         PetscViewerASCIIPrintf(viewer, "  RINFOG(3) (global estimated flops for the elimination after factorization): %g\n", mumps->id.RINFOG(3));
2233:         PetscViewerASCIIPrintf(viewer, "  (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n", mumps->id.RINFOG(12), mumps->id.RINFOG(13), mumps->id.INFOG(34));

2235:         PetscViewerASCIIPrintf(viewer, "  INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(3));
2236:         PetscViewerASCIIPrintf(viewer, "  INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(4));
2237:         PetscViewerASCIIPrintf(viewer, "  INFOG(5) (estimated maximum front size in the complete tree): %d\n", mumps->id.INFOG(5));
2238:         PetscViewerASCIIPrintf(viewer, "  INFOG(6) (number of nodes in the complete tree): %d\n", mumps->id.INFOG(6));
2239:         PetscViewerASCIIPrintf(viewer, "  INFOG(7) (ordering option effectively used after analysis): %d\n", mumps->id.INFOG(7));
2240:         PetscViewerASCIIPrintf(viewer, "  INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n", mumps->id.INFOG(8));
2241:         PetscViewerASCIIPrintf(viewer, "  INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n", mumps->id.INFOG(9));
2242:         PetscViewerASCIIPrintf(viewer, "  INFOG(10) (total integer space store the matrix factors after factorization): %d\n", mumps->id.INFOG(10));
2243:         PetscViewerASCIIPrintf(viewer, "  INFOG(11) (order of largest frontal matrix after factorization): %d\n", mumps->id.INFOG(11));
2244:         PetscViewerASCIIPrintf(viewer, "  INFOG(12) (number of off-diagonal pivots): %d\n", mumps->id.INFOG(12));
2245:         PetscViewerASCIIPrintf(viewer, "  INFOG(13) (number of delayed pivots after factorization): %d\n", mumps->id.INFOG(13));
2246:         PetscViewerASCIIPrintf(viewer, "  INFOG(14) (number of memory compress after factorization): %d\n", mumps->id.INFOG(14));
2247:         PetscViewerASCIIPrintf(viewer, "  INFOG(15) (number of steps of iterative refinement after solution): %d\n", mumps->id.INFOG(15));
2248:         PetscViewerASCIIPrintf(viewer, "  INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n", mumps->id.INFOG(16));
2249:         PetscViewerASCIIPrintf(viewer, "  INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n", mumps->id.INFOG(17));
2250:         PetscViewerASCIIPrintf(viewer, "  INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n", mumps->id.INFOG(18));
2251:         PetscViewerASCIIPrintf(viewer, "  INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n", mumps->id.INFOG(19));
2252:         PetscViewerASCIIPrintf(viewer, "  INFOG(20) (estimated number of entries in the factors): %d\n", mumps->id.INFOG(20));
2253:         PetscViewerASCIIPrintf(viewer, "  INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n", mumps->id.INFOG(21));
2254:         PetscViewerASCIIPrintf(viewer, "  INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n", mumps->id.INFOG(22));
2255:         PetscViewerASCIIPrintf(viewer, "  INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n", mumps->id.INFOG(23));
2256:         PetscViewerASCIIPrintf(viewer, "  INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n", mumps->id.INFOG(24));
2257:         PetscViewerASCIIPrintf(viewer, "  INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n", mumps->id.INFOG(25));
2258:         PetscViewerASCIIPrintf(viewer, "  INFOG(28) (after factorization: number of null pivots encountered): %d\n", mumps->id.INFOG(28));
2259:         PetscViewerASCIIPrintf(viewer, "  INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n", mumps->id.INFOG(29));
2260:         PetscViewerASCIIPrintf(viewer, "  INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n", mumps->id.INFOG(30), mumps->id.INFOG(31));
2261:         PetscViewerASCIIPrintf(viewer, "  INFOG(32) (after analysis: type of analysis done): %d\n", mumps->id.INFOG(32));
2262:         PetscViewerASCIIPrintf(viewer, "  INFOG(33) (value used for ICNTL(8)): %d\n", mumps->id.INFOG(33));
2263:         PetscViewerASCIIPrintf(viewer, "  INFOG(34) (exponent of the determinant if determinant is requested): %d\n", mumps->id.INFOG(34));
2264:         PetscViewerASCIIPrintf(viewer, "  INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n", mumps->id.INFOG(35));
2265:         PetscViewerASCIIPrintf(viewer, "  INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(36));
2266:         PetscViewerASCIIPrintf(viewer, "  INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n", mumps->id.INFOG(37));
2267:         PetscViewerASCIIPrintf(viewer, "  INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(38));
2268:         PetscViewerASCIIPrintf(viewer, "  INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n", mumps->id.INFOG(39));
2269:       }
2270:     }
2271:   }
2272:   return 0;
2273: }

2275: PetscErrorCode MatGetInfo_MUMPS(Mat A, MatInfoType flag, MatInfo *info)
2276: {
2277:   Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;

2279:   info->block_size        = 1.0;
2280:   info->nz_allocated      = mumps->id.INFOG(20);
2281:   info->nz_used           = mumps->id.INFOG(20);
2282:   info->nz_unneeded       = 0.0;
2283:   info->assemblies        = 0.0;
2284:   info->mallocs           = 0.0;
2285:   info->memory            = 0.0;
2286:   info->fill_ratio_given  = 0;
2287:   info->fill_ratio_needed = 0;
2288:   info->factor_mallocs    = 0;
2289:   return 0;
2290: }

2292: /* -------------------------------------------------------------------------------------------*/
2293: PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is)
2294: {
2295:   Mat_MUMPS         *mumps = (Mat_MUMPS *)F->data;
2296:   const PetscScalar *arr;
2297:   const PetscInt    *idxs;
2298:   PetscInt           size, i;

2300:   ISGetLocalSize(is, &size);
2301:   /* Schur complement matrix */
2302:   MatDestroy(&F->schur);
2303:   MatCreateSeqDense(PETSC_COMM_SELF, size, size, NULL, &F->schur);
2304:   MatDenseGetArrayRead(F->schur, &arr);
2305:   mumps->id.schur      = (MumpsScalar *)arr;
2306:   mumps->id.size_schur = size;
2307:   mumps->id.schur_lld  = size;
2308:   MatDenseRestoreArrayRead(F->schur, &arr);
2309:   if (mumps->sym == 1) MatSetOption(F->schur, MAT_SPD, PETSC_TRUE);

2311:   /* MUMPS expects Fortran style indices */
2312:   PetscFree(mumps->id.listvar_schur);
2313:   PetscMalloc1(size, &mumps->id.listvar_schur);
2314:   ISGetIndices(is, &idxs);
2315:   for (i = 0; i < size; i++) PetscMUMPSIntCast(idxs[i] + 1, &(mumps->id.listvar_schur[i]));
2316:   ISRestoreIndices(is, &idxs);
2317:   /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */
2318:   mumps->id.ICNTL(26) = -1;
2319:   return 0;
2320: }

2322: /* -------------------------------------------------------------------------------------------*/
2323: PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F, Mat *S)
2324: {
2325:   Mat          St;
2326:   Mat_MUMPS   *mumps = (Mat_MUMPS *)F->data;
2327:   PetscScalar *array;
2328: #if defined(PETSC_USE_COMPLEX)
2329:   PetscScalar im = PetscSqrtScalar((PetscScalar)-1.0);
2330: #endif

2333:   MatCreate(PETSC_COMM_SELF, &St);
2334:   MatSetSizes(St, PETSC_DECIDE, PETSC_DECIDE, mumps->id.size_schur, mumps->id.size_schur);
2335:   MatSetType(St, MATDENSE);
2336:   MatSetUp(St);
2337:   MatDenseGetArray(St, &array);
2338:   if (!mumps->sym) {                /* MUMPS always return a full matrix */
2339:     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
2340:       PetscInt i, j, N = mumps->id.size_schur;
2341:       for (i = 0; i < N; i++) {
2342:         for (j = 0; j < N; j++) {
2343: #if !defined(PETSC_USE_COMPLEX)
2344:           PetscScalar val = mumps->id.schur[i * N + j];
2345: #else
2346:           PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i;
2347: #endif
2348:           array[j * N + i] = val;
2349:         }
2350:       }
2351:     } else { /* stored by columns */
2352:       PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur);
2353:     }
2354:   } else {                          /* either full or lower-triangular (not packed) */
2355:     if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */
2356:       PetscInt i, j, N = mumps->id.size_schur;
2357:       for (i = 0; i < N; i++) {
2358:         for (j = i; j < N; j++) {
2359: #if !defined(PETSC_USE_COMPLEX)
2360:           PetscScalar val = mumps->id.schur[i * N + j];
2361: #else
2362:           PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i;
2363: #endif
2364:           array[i * N + j] = val;
2365:           array[j * N + i] = val;
2366:         }
2367:       }
2368:     } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */
2369:       PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur);
2370:     } else { /* ICNTL(19) == 1 lower triangular stored by rows */
2371:       PetscInt i, j, N = mumps->id.size_schur;
2372:       for (i = 0; i < N; i++) {
2373:         for (j = 0; j < i + 1; j++) {
2374: #if !defined(PETSC_USE_COMPLEX)
2375:           PetscScalar val = mumps->id.schur[i * N + j];
2376: #else
2377:           PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i;
2378: #endif
2379:           array[i * N + j] = val;
2380:           array[j * N + i] = val;
2381:         }
2382:       }
2383:     }
2384:   }
2385:   MatDenseRestoreArray(St, &array);
2386:   *S = St;
2387:   return 0;
2388: }

2390: /* -------------------------------------------------------------------------------------------*/
2391: PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt ival)
2392: {
2393:   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;

2395:   if (mumps->id.job == JOB_NULL) {                                       /* need to cache icntl and ival since PetscMUMPS_c() has never been called */
2396:     PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */
2397:     for (i = 0; i < nICNTL_pre; ++i)
2398:       if (mumps->ICNTL_pre[1 + 2 * i] == icntl) break; /* is this ICNTL already cached? */
2399:     if (i == nICNTL_pre) {                             /* not already cached */
2400:       if (i > 0) PetscRealloc(sizeof(PetscMUMPSInt) * (2 * nICNTL_pre + 3), &mumps->ICNTL_pre);
2401:       else PetscCalloc(sizeof(PetscMUMPSInt) * 3, &mumps->ICNTL_pre);
2402:       mumps->ICNTL_pre[0]++;
2403:     }
2404:     mumps->ICNTL_pre[1 + 2 * i] = icntl;
2405:     PetscMUMPSIntCast(ival, mumps->ICNTL_pre + 2 + 2 * i);
2406:   } else PetscMUMPSIntCast(ival, &mumps->id.ICNTL(icntl));
2407:   return 0;
2408: }

2410: PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt *ival)
2411: {
2412:   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;

2414:   *ival = mumps->id.ICNTL(icntl);
2415:   return 0;
2416: }

2418: /*@
2419:   MatMumpsSetIcntl - Set MUMPS parameter ICNTL()

2421:    Logically Collective

2423:    Input Parameters:
2424: +  F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2425: .  icntl - index of MUMPS parameter array ICNTL()
2426: -  ival - value of MUMPS ICNTL(icntl)

2428:   Options Database Key:
2429: .   -mat_mumps_icntl_<icntl> <ival> - change the option numbered icntl to ival

2431:    Level: beginner

2433:    References:
2434: .  * - MUMPS Users' Guide

2436: .seealso: `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2437: @*/
2438: PetscErrorCode MatMumpsSetIcntl(Mat F, PetscInt icntl, PetscInt ival)
2439: {
2445:   PetscTryMethod(F, "MatMumpsSetIcntl_C", (Mat, PetscInt, PetscInt), (F, icntl, ival));
2446:   return 0;
2447: }

2449: /*@
2450:   MatMumpsGetIcntl - Get MUMPS parameter ICNTL()

2452:    Logically Collective

2454:    Input Parameters:
2455: +  F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2456: -  icntl - index of MUMPS parameter array ICNTL()

2458:   Output Parameter:
2459: .  ival - value of MUMPS ICNTL(icntl)

2461:    Level: beginner

2463:    References:
2464: .  * - MUMPS Users' Guide

2466: .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2467: @*/
2468: PetscErrorCode MatMumpsGetIcntl(Mat F, PetscInt icntl, PetscInt *ival)
2469: {
2475:   PetscUseMethod(F, "MatMumpsGetIcntl_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
2476:   return 0;
2477: }

2479: /* -------------------------------------------------------------------------------------------*/
2480: PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal val)
2481: {
2482:   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;

2484:   if (mumps->id.job == JOB_NULL) {
2485:     PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
2486:     for (i = 0; i < nCNTL_pre; ++i)
2487:       if (mumps->CNTL_pre[1 + 2 * i] == icntl) break;
2488:     if (i == nCNTL_pre) {
2489:       if (i > 0) PetscRealloc(sizeof(PetscReal) * (2 * nCNTL_pre + 3), &mumps->CNTL_pre);
2490:       else PetscCalloc(sizeof(PetscReal) * 3, &mumps->CNTL_pre);
2491:       mumps->CNTL_pre[0]++;
2492:     }
2493:     mumps->CNTL_pre[1 + 2 * i] = icntl;
2494:     mumps->CNTL_pre[2 + 2 * i] = val;
2495:   } else mumps->id.CNTL(icntl) = val;
2496:   return 0;
2497: }

2499: PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal *val)
2500: {
2501:   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;

2503:   *val = mumps->id.CNTL(icntl);
2504:   return 0;
2505: }

2507: /*@
2508:   MatMumpsSetCntl - Set MUMPS parameter CNTL()

2510:    Logically Collective

2512:    Input Parameters:
2513: +  F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2514: .  icntl - index of MUMPS parameter array CNTL()
2515: -  val - value of MUMPS CNTL(icntl)

2517:   Options Database Key:
2518: .   -mat_mumps_cntl_<icntl> <val>  - change the option numbered icntl to ival

2520:    Level: beginner

2522:    References:
2523: .  * - MUMPS Users' Guide

2525: .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2526: @*/
2527: PetscErrorCode MatMumpsSetCntl(Mat F, PetscInt icntl, PetscReal val)
2528: {
2534:   PetscTryMethod(F, "MatMumpsSetCntl_C", (Mat, PetscInt, PetscReal), (F, icntl, val));
2535:   return 0;
2536: }

2538: /*@
2539:   MatMumpsGetCntl - Get MUMPS parameter CNTL()

2541:    Logically Collective

2543:    Input Parameters:
2544: +  F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2545: -  icntl - index of MUMPS parameter array CNTL()

2547:   Output Parameter:
2548: .  val - value of MUMPS CNTL(icntl)

2550:    Level: beginner

2552:    References:
2553: .  * - MUMPS Users' Guide

2555: .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2556: @*/
2557: PetscErrorCode MatMumpsGetCntl(Mat F, PetscInt icntl, PetscReal *val)
2558: {
2564:   PetscUseMethod(F, "MatMumpsGetCntl_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
2565:   return 0;
2566: }

2568: PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F, PetscInt icntl, PetscInt *info)
2569: {
2570:   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;

2572:   *info = mumps->id.INFO(icntl);
2573:   return 0;
2574: }

2576: PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F, PetscInt icntl, PetscInt *infog)
2577: {
2578:   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;

2580:   *infog = mumps->id.INFOG(icntl);
2581:   return 0;
2582: }

2584: PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfo)
2585: {
2586:   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;

2588:   *rinfo = mumps->id.RINFO(icntl);
2589:   return 0;
2590: }

2592: PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfog)
2593: {
2594:   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;

2596:   *rinfog = mumps->id.RINFOG(icntl);
2597:   return 0;
2598: }

2600: PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F, Mat spRHS)
2601: {
2602:   Mat          Bt = NULL, Btseq = NULL;
2603:   PetscBool    flg;
2604:   Mat_MUMPS   *mumps = (Mat_MUMPS *)F->data;
2605:   PetscScalar *aa;
2606:   PetscInt     spnr, *ia, *ja, M, nrhs;

2609:   PetscObjectTypeCompare((PetscObject)spRHS, MATTRANSPOSEVIRTUAL, &flg);
2610:   if (flg) {
2611:     MatTransposeGetMat(spRHS, &Bt);
2612:   } else SETERRQ(PetscObjectComm((PetscObject)spRHS), PETSC_ERR_ARG_WRONG, "Matrix spRHS must be type MATTRANSPOSEVIRTUAL matrix");

2614:   MatMumpsSetIcntl(F, 30, 1);

2616:   if (mumps->petsc_size > 1) {
2617:     Mat_MPIAIJ *b = (Mat_MPIAIJ *)Bt->data;
2618:     Btseq         = b->A;
2619:   } else {
2620:     Btseq = Bt;
2621:   }

2623:   MatGetSize(spRHS, &M, &nrhs);
2624:   mumps->id.nrhs = nrhs;
2625:   mumps->id.lrhs = M;
2626:   mumps->id.rhs  = NULL;

2628:   if (!mumps->myid) {
2629:     MatSeqAIJGetArray(Btseq, &aa);
2630:     MatGetRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg);
2632:     PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs);
2633:     mumps->id.rhs_sparse = (MumpsScalar *)aa;
2634:   } else {
2635:     mumps->id.irhs_ptr    = NULL;
2636:     mumps->id.irhs_sparse = NULL;
2637:     mumps->id.nz_rhs      = 0;
2638:     mumps->id.rhs_sparse  = NULL;
2639:   }
2640:   mumps->id.ICNTL(20) = 1; /* rhs is sparse */
2641:   mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */

2643:   /* solve phase */
2644:   /*-------------*/
2645:   mumps->id.job = JOB_SOLVE;
2646:   PetscMUMPS_c(mumps);

2649:   if (!mumps->myid) {
2650:     MatSeqAIJRestoreArray(Btseq, &aa);
2651:     MatRestoreRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg);
2653:   }
2654:   return 0;
2655: }

2657: /*@
2658:   MatMumpsGetInverse - Get user-specified set of entries in inverse of A

2660:    Logically Collective

2662:    Input Parameters:
2663: +  F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2664: -  spRHS - sequential sparse matrix in `MATTRANSPOSEVIRTUAL` format holding specified indices in processor[0]

2666:   Output Parameter:
2667: . spRHS - requested entries of inverse of A

2669:    Level: beginner

2671:    References:
2672: .  * - MUMPS Users' Guide

2674: .seealso: `MatGetFactor()`, `MatCreateTranspose()`
2675: @*/
2676: PetscErrorCode MatMumpsGetInverse(Mat F, Mat spRHS)
2677: {
2680:   PetscUseMethod(F, "MatMumpsGetInverse_C", (Mat, Mat), (F, spRHS));
2681:   return 0;
2682: }

2684: PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F, Mat spRHST)
2685: {
2686:   Mat spRHS;

2688:   MatCreateTranspose(spRHST, &spRHS);
2689:   MatMumpsGetInverse_MUMPS(F, spRHS);
2690:   MatDestroy(&spRHS);
2691:   return 0;
2692: }

2694: /*@
2695:   MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix A^T

2697:    Logically Collective

2699:    Input Parameters:
2700: +  F - the factored matrix of A obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2701: -  spRHST - sequential sparse matrix in `MATAIJ` format holding specified indices of A^T in processor[0]

2703:   Output Parameter:
2704: . spRHST - requested entries of inverse of A^T

2706:    Level: beginner

2708:    References:
2709: .  * - MUMPS Users' Guide

2711: .seealso: `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()`
2712: @*/
2713: PetscErrorCode MatMumpsGetInverseTranspose(Mat F, Mat spRHST)
2714: {
2715:   PetscBool flg;

2719:   PetscObjectTypeCompareAny((PetscObject)spRHST, &flg, MATSEQAIJ, MATMPIAIJ, NULL);

2722:   PetscUseMethod(F, "MatMumpsGetInverseTranspose_C", (Mat, Mat), (F, spRHST));
2723:   return 0;
2724: }

2726: /*@
2727:   MatMumpsGetInfo - Get MUMPS parameter INFO()

2729:    Logically Collective

2731:    Input Parameters:
2732: +  F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2733: -  icntl - index of MUMPS parameter array INFO()

2735:   Output Parameter:
2736: .  ival - value of MUMPS INFO(icntl)

2738:    Level: beginner

2740:    References:
2741: .  * - MUMPS Users' Guide

2743: .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2744: @*/
2745: PetscErrorCode MatMumpsGetInfo(Mat F, PetscInt icntl, PetscInt *ival)
2746: {
2750:   PetscUseMethod(F, "MatMumpsGetInfo_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
2751:   return 0;
2752: }

2754: /*@
2755:   MatMumpsGetInfog - Get MUMPS parameter INFOG()

2757:    Logically Collective

2759:    Input Parameters:
2760: +  F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2761: -  icntl - index of MUMPS parameter array INFOG()

2763:   Output Parameter:
2764: .  ival - value of MUMPS INFOG(icntl)

2766:    Level: beginner

2768:    References:
2769: .  * - MUMPS Users' Guide

2771: .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2772: @*/
2773: PetscErrorCode MatMumpsGetInfog(Mat F, PetscInt icntl, PetscInt *ival)
2774: {
2778:   PetscUseMethod(F, "MatMumpsGetInfog_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
2779:   return 0;
2780: }

2782: /*@
2783:   MatMumpsGetRinfo - Get MUMPS parameter RINFO()

2785:    Logically Collective

2787:    Input Parameters:
2788: +  F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2789: -  icntl - index of MUMPS parameter array RINFO()

2791:   Output Parameter:
2792: .  val - value of MUMPS RINFO(icntl)

2794:    Level: beginner

2796:    References:
2797: .  * - MUMPS Users' Guide

2799: .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()`
2800: @*/
2801: PetscErrorCode MatMumpsGetRinfo(Mat F, PetscInt icntl, PetscReal *val)
2802: {
2806:   PetscUseMethod(F, "MatMumpsGetRinfo_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
2807:   return 0;
2808: }

2810: /*@
2811:   MatMumpsGetRinfog - Get MUMPS parameter RINFOG()

2813:    Logically Collective

2815:    Input Parameters:
2816: +  F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2817: -  icntl - index of MUMPS parameter array RINFOG()

2819:   Output Parameter:
2820: .  val - value of MUMPS RINFOG(icntl)

2822:    Level: beginner

2824:    References:
2825: .  * - MUMPS Users' Guide

2827: .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`
2828: @*/
2829: PetscErrorCode MatMumpsGetRinfog(Mat F, PetscInt icntl, PetscReal *val)
2830: {
2834:   PetscUseMethod(F, "MatMumpsGetRinfog_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
2835:   return 0;
2836: }

2838: /*MC
2839:   MATSOLVERMUMPS -  A matrix type providing direct solvers (LU and Cholesky) for
2840:   distributed and sequential matrices via the external package MUMPS.

2842:   Works with `MATAIJ` and `MATSBAIJ` matrices

2844:   Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS

2846:   Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. See details below.

2848:   Use -pc_type cholesky or lu -pc_factor_mat_solver_type mumps to use this direct solver

2850:   Options Database Keys:
2851: +  -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages
2852: .  -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning
2853: .  -mat_mumps_icntl_3 -  ICNTL(3): output stream for global information, collected on the host
2854: .  -mat_mumps_icntl_4 -  ICNTL(4): level of printing (0 to 4)
2855: .  -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)
2856: .  -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto
2857:                         Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only)
2858: .  -mat_mumps_icntl_8  - ICNTL(8): scaling strategy (-2 to 8 or 77)
2859: .  -mat_mumps_icntl_10  - ICNTL(10): max num of refinements
2860: .  -mat_mumps_icntl_11  - ICNTL(11): statistics related to an error analysis (via -ksp_view)
2861: .  -mat_mumps_icntl_12  - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)
2862: .  -mat_mumps_icntl_13  - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting
2863: .  -mat_mumps_icntl_14  - ICNTL(14): percentage increase in the estimated working space
2864: .  -mat_mumps_icntl_15  - ICNTL(15): compression of the input matrix resulting from a block format
2865: .  -mat_mumps_icntl_19  - ICNTL(19): computes the Schur complement
2866: .  -mat_mumps_icntl_20  - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS
2867: .  -mat_mumps_icntl_22  - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)
2868: .  -mat_mumps_icntl_23  - ICNTL(23): max size of the working memory (MB) that can allocate per processor
2869: .  -mat_mumps_icntl_24  - ICNTL(24): detection of null pivot rows (0 or 1)
2870: .  -mat_mumps_icntl_25  - ICNTL(25): compute a solution of a deficient matrix and a null space basis
2871: .  -mat_mumps_icntl_26  - ICNTL(26): drives the solution phase if a Schur complement matrix
2872: .  -mat_mumps_icntl_28  - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering
2873: .  -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis
2874: .  -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A)
2875: .  -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization
2876: .  -mat_mumps_icntl_33 - ICNTL(33): compute determinant
2877: .  -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature
2878: .  -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant
2879: .  -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR
2880: .  -mat_mumps_cntl_1  - CNTL(1): relative pivoting threshold
2881: .  -mat_mumps_cntl_2  -  CNTL(2): stopping criterion of refinement
2882: .  -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold
2883: .  -mat_mumps_cntl_4 - CNTL(4): value for static pivoting
2884: .  -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots
2885: .  -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization
2886: -  -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS.
2887:                                    Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual.

2889:   Level: beginner

2891:     Notes:
2892:     MUMPS Cholesky does not handle (complex) Hermitian matrices http://mumps.enseeiht.fr/doc/userguide_5.2.1.pdf so using it will error if the matrix is Hermitian.

2894:     When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling
2895:     `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix.

2897:     When a MUMPS factorization fails inside a KSP solve, for example with a `KSP_DIVERGED_PC_FAILED`, one can find the MUMPS information about the failure by calling
2898: $          KSPGetPC(ksp,&pc);
2899: $          PCFactorGetMatrix(pc,&mat);
2900: $          MatMumpsGetInfo(mat,....);
2901: $          MatMumpsGetInfog(mat,....); etc.
2902:            Or you can run with -ksp_error_if_not_converged and the program will be stopped and the information printed in the error message.

2904:   Using MUMPS with 64-bit integers
2905:     MUMPS provides 64-bit integer support in two build modes:
2906:       full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and
2907:       requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI).

2909:       selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices,
2910:       MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and
2911:       columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit
2912:       integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS.

2914:     With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc.

2916:   Two modes to run MUMPS/PETSc with OpenMP
2917: $     Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP
2918: $     threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test".

2920: $     -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example,
2921: $     if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16"

2923:    To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part
2924:    (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with --with-openmp --download-hwloc
2925:    (or --with-hwloc), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS
2926:    libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS
2927:    (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided).

2929:    If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI
2930:    processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of
2931:    size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm
2932:    are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set
2933:    by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs.
2934:    In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets,
2935:    if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind
2936:    MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half
2937:    cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the
2938:    problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding.
2939:    For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and
2940:    examine the mapping result.

2942:    PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set `OMP_PROC_BIND` and `OMP_PLACES` in job scripts,
2943:    for example, export `OMP_PLACES`=threads and export `OMP_PROC_BIND`=spread. One does not need to export `OMP_NUM_THREADS`=m in job scripts as PETSc
2944:    calls `omp_set_num_threads`(m) internally before calling MUMPS.

2946:    References:
2947: +  * - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011).
2948: -  * - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017.

2950: .seealso: `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `KSPGetPC()`, `PCFactorGetMatrix()`
2951: M*/

2953: static PetscErrorCode MatFactorGetSolverType_mumps(Mat A, MatSolverType *type)
2954: {
2955:   *type = MATSOLVERMUMPS;
2956:   return 0;
2957: }

2959: /* MatGetFactor for Seq and MPI AIJ matrices */
2960: static PetscErrorCode MatGetFactor_aij_mumps(Mat A, MatFactorType ftype, Mat *F)
2961: {
2962:   Mat         B;
2963:   Mat_MUMPS  *mumps;
2964:   PetscBool   isSeqAIJ;
2965:   PetscMPIInt size;

2967: #if defined(PETSC_USE_COMPLEX)
2969: #endif
2970:   /* Create the factorization matrix */
2971:   PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ);
2972:   MatCreate(PetscObjectComm((PetscObject)A), &B);
2973:   MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N);
2974:   PetscStrallocpy("mumps", &((PetscObject)B)->type_name);
2975:   MatSetUp(B);

2977:   PetscNew(&mumps);

2979:   B->ops->view    = MatView_MUMPS;
2980:   B->ops->getinfo = MatGetInfo_MUMPS;

2982:   PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps);
2983:   PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS);
2984:   PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS);
2985:   PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS);
2986:   PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS);
2987:   PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS);
2988:   PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS);
2989:   PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS);
2990:   PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS);
2991:   PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS);
2992:   PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS);
2993:   PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS);
2994:   PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS);

2996:   if (ftype == MAT_FACTOR_LU) {
2997:     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
2998:     B->factortype            = MAT_FACTOR_LU;
2999:     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij;
3000:     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij;
3001:     PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]);
3002:     mumps->sym = 0;
3003:   } else {
3004:     B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3005:     B->factortype                  = MAT_FACTOR_CHOLESKY;
3006:     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij;
3007:     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij;
3008:     PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY]);
3009: #if defined(PETSC_USE_COMPLEX)
3010:     mumps->sym = 2;
3011: #else
3012:     if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
3013:     else mumps->sym = 2;
3014: #endif
3015:   }

3017:   /* set solvertype */
3018:   PetscFree(B->solvertype);
3019:   PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype);
3020:   MPI_Comm_size(PetscObjectComm((PetscObject)A), &size);
3021:   if (size == 1) {
3022:     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3023:     B->canuseordering = PETSC_TRUE;
3024:   }
3025:   B->ops->destroy = MatDestroy_MUMPS;
3026:   B->data         = (void *)mumps;

3028:   *F               = B;
3029:   mumps->id.job    = JOB_NULL;
3030:   mumps->ICNTL_pre = NULL;
3031:   mumps->CNTL_pre  = NULL;
3032:   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
3033:   return 0;
3034: }

3036: /* MatGetFactor for Seq and MPI SBAIJ matrices */
3037: static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A, MatFactorType ftype, Mat *F)
3038: {
3039:   Mat         B;
3040:   Mat_MUMPS  *mumps;
3041:   PetscBool   isSeqSBAIJ;
3042:   PetscMPIInt size;

3044: #if defined(PETSC_USE_COMPLEX)
3046: #endif
3047:   MatCreate(PetscObjectComm((PetscObject)A), &B);
3048:   MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N);
3049:   PetscStrallocpy("mumps", &((PetscObject)B)->type_name);
3050:   MatSetUp(B);

3052:   PetscNew(&mumps);
3053:   PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &isSeqSBAIJ);
3054:   if (isSeqSBAIJ) {
3055:     mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij;
3056:   } else {
3057:     mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij;
3058:   }

3060:   B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3061:   B->ops->view                   = MatView_MUMPS;
3062:   B->ops->getinfo                = MatGetInfo_MUMPS;

3064:   PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps);
3065:   PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS);
3066:   PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS);
3067:   PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS);
3068:   PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS);
3069:   PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS);
3070:   PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS);
3071:   PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS);
3072:   PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS);
3073:   PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS);
3074:   PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS);
3075:   PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS);
3076:   PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS);

3078:   B->factortype = MAT_FACTOR_CHOLESKY;
3079: #if defined(PETSC_USE_COMPLEX)
3080:   mumps->sym = 2;
3081: #else
3082:   if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
3083:   else mumps->sym = 2;
3084: #endif

3086:   /* set solvertype */
3087:   PetscFree(B->solvertype);
3088:   PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype);
3089:   MPI_Comm_size(PetscObjectComm((PetscObject)A), &size);
3090:   if (size == 1) {
3091:     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3092:     B->canuseordering = PETSC_TRUE;
3093:   }
3094:   PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY]);
3095:   B->ops->destroy = MatDestroy_MUMPS;
3096:   B->data         = (void *)mumps;

3098:   *F               = B;
3099:   mumps->id.job    = JOB_NULL;
3100:   mumps->ICNTL_pre = NULL;
3101:   mumps->CNTL_pre  = NULL;
3102:   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
3103:   return 0;
3104: }

3106: static PetscErrorCode MatGetFactor_baij_mumps(Mat A, MatFactorType ftype, Mat *F)
3107: {
3108:   Mat         B;
3109:   Mat_MUMPS  *mumps;
3110:   PetscBool   isSeqBAIJ;
3111:   PetscMPIInt size;

3113:   /* Create the factorization matrix */
3114:   PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &isSeqBAIJ);
3115:   MatCreate(PetscObjectComm((PetscObject)A), &B);
3116:   MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N);
3117:   PetscStrallocpy("mumps", &((PetscObject)B)->type_name);
3118:   MatSetUp(B);

3120:   PetscNew(&mumps);
3121:   if (ftype == MAT_FACTOR_LU) {
3122:     B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS;
3123:     B->factortype            = MAT_FACTOR_LU;
3124:     if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij;
3125:     else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij;
3126:     mumps->sym = 0;
3127:     PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]);
3128:   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead");

3130:   B->ops->view    = MatView_MUMPS;
3131:   B->ops->getinfo = MatGetInfo_MUMPS;

3133:   PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps);
3134:   PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS);
3135:   PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS);
3136:   PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS);
3137:   PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS);
3138:   PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS);
3139:   PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS);
3140:   PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS);
3141:   PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS);
3142:   PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS);
3143:   PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS);
3144:   PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS);
3145:   PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS);

3147:   /* set solvertype */
3148:   PetscFree(B->solvertype);
3149:   PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype);
3150:   MPI_Comm_size(PetscObjectComm((PetscObject)A), &size);
3151:   if (size == 1) {
3152:     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3153:     B->canuseordering = PETSC_TRUE;
3154:   }
3155:   B->ops->destroy = MatDestroy_MUMPS;
3156:   B->data         = (void *)mumps;

3158:   *F               = B;
3159:   mumps->id.job    = JOB_NULL;
3160:   mumps->ICNTL_pre = NULL;
3161:   mumps->CNTL_pre  = NULL;
3162:   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
3163:   return 0;
3164: }

3166: /* MatGetFactor for Seq and MPI SELL matrices */
3167: static PetscErrorCode MatGetFactor_sell_mumps(Mat A, MatFactorType ftype, Mat *F)
3168: {
3169:   Mat         B;
3170:   Mat_MUMPS  *mumps;
3171:   PetscBool   isSeqSELL;
3172:   PetscMPIInt size;

3174:   /* Create the factorization matrix */
3175:   PetscObjectTypeCompare((PetscObject)A, MATSEQSELL, &isSeqSELL);
3176:   MatCreate(PetscObjectComm((PetscObject)A), &B);
3177:   MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N);
3178:   PetscStrallocpy("mumps", &((PetscObject)B)->type_name);
3179:   MatSetUp(B);

3181:   PetscNew(&mumps);

3183:   B->ops->view    = MatView_MUMPS;
3184:   B->ops->getinfo = MatGetInfo_MUMPS;

3186:   PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps);
3187:   PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS);
3188:   PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS);
3189:   PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS);
3190:   PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS);
3191:   PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS);
3192:   PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS);
3193:   PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS);
3194:   PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS);
3195:   PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS);
3196:   PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS);

3198:   if (ftype == MAT_FACTOR_LU) {
3199:     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3200:     B->factortype            = MAT_FACTOR_LU;
3201:     if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij;
3202:     else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented");
3203:     mumps->sym = 0;
3204:     PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]);
3205:   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented");

3207:   /* set solvertype */
3208:   PetscFree(B->solvertype);
3209:   PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype);
3210:   MPI_Comm_size(PetscObjectComm((PetscObject)A), &size);
3211:   if (size == 1) {
3212:     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization  */
3213:     B->canuseordering = PETSC_TRUE;
3214:   }
3215:   B->ops->destroy = MatDestroy_MUMPS;
3216:   B->data         = (void *)mumps;

3218:   *F               = B;
3219:   mumps->id.job    = JOB_NULL;
3220:   mumps->ICNTL_pre = NULL;
3221:   mumps->CNTL_pre  = NULL;
3222:   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
3223:   return 0;
3224: }

3226: PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void)
3227: {
3228:   MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps);
3229:   MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps);
3230:   MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps);
3231:   MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps);
3232:   MatSolverTypeRegister(MATSOLVERMUMPS, MATMPISBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps);
3233:   MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps);
3234:   MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps);
3235:   MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps);
3236:   MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps);
3237:   MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps);
3238:   MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSELL, MAT_FACTOR_LU, MatGetFactor_sell_mumps);
3239:   return 0;
3240: }